github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	MaxAlign = 32 // max data alignment
    44  
    45  	// Loop alignment constants:
    46  	// want to align loop entry to LoopAlign-byte boundary,
    47  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    48  	// We define a loop entry as the target of a backward jump.
    49  	//
    50  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    51  	// and it aligns all jump targets, not just backward jump targets.
    52  	//
    53  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    54  	// is very slight but negative, so the alignment is disabled by
    55  	// setting MaxLoopPad = 0. The code is here for reference and
    56  	// for future experiments.
    57  	//
    58  	LoopAlign  = 16
    59  	MaxLoopPad = 0
    60  	FuncAlign  = 16
    61  )
    62  
    63  type Optab struct {
    64  	as     int16
    65  	ytab   []ytab
    66  	prefix uint8
    67  	op     [23]uint8
    68  }
    69  
    70  type ytab struct {
    71  	from    uint8
    72  	from3   uint8
    73  	to      uint8
    74  	zcase   uint8
    75  	zoffset uint8
    76  }
    77  
    78  type Movtab struct {
    79  	as   int16
    80  	ft   uint8
    81  	f3t  uint8
    82  	tt   uint8
    83  	code uint8
    84  	op   [4]uint8
    85  }
    86  
    87  const (
    88  	Yxxx = iota
    89  	Ynone
    90  	Yi0 // $0
    91  	Yi1 // $1
    92  	Yi8 // $x, x fits in int8
    93  	Yu8 // $x, x fits in uint8
    94  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    95  	Ys32
    96  	Yi32
    97  	Yi64
    98  	Yiauto
    99  	Yal
   100  	Ycl
   101  	Yax
   102  	Ycx
   103  	Yrb
   104  	Yrl
   105  	Yrl32 // Yrl on 32-bit system
   106  	Yrf
   107  	Yf0
   108  	Yrx
   109  	Ymb
   110  	Yml
   111  	Ym
   112  	Ybr
   113  	Ycs
   114  	Yss
   115  	Yds
   116  	Yes
   117  	Yfs
   118  	Ygs
   119  	Ygdtr
   120  	Yidtr
   121  	Yldtr
   122  	Ymsw
   123  	Ytask
   124  	Ycr0
   125  	Ycr1
   126  	Ycr2
   127  	Ycr3
   128  	Ycr4
   129  	Ycr5
   130  	Ycr6
   131  	Ycr7
   132  	Ycr8
   133  	Ydr0
   134  	Ydr1
   135  	Ydr2
   136  	Ydr3
   137  	Ydr4
   138  	Ydr5
   139  	Ydr6
   140  	Ydr7
   141  	Ytr0
   142  	Ytr1
   143  	Ytr2
   144  	Ytr3
   145  	Ytr4
   146  	Ytr5
   147  	Ytr6
   148  	Ytr7
   149  	Ymr
   150  	Ymm
   151  	Yxr
   152  	Yxm
   153  	Ytls
   154  	Ytextsize
   155  	Yindir
   156  	Ymax
   157  )
   158  
   159  const (
   160  	Zxxx = iota
   161  	Zlit
   162  	Zlitm_r
   163  	Z_rp
   164  	Zbr
   165  	Zcall
   166  	Zcallcon
   167  	Zcallduff
   168  	Zcallind
   169  	Zcallindreg
   170  	Zib_
   171  	Zib_rp
   172  	Zibo_m
   173  	Zibo_m_xm
   174  	Zil_
   175  	Zil_rp
   176  	Ziq_rp
   177  	Zilo_m
   178  	Ziqo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_3d
   189  	Zm_r_xm_nr
   190  	Zr_m_xm_nr
   191  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zmax
   209  )
   210  
   211  const (
   212  	Px  = 0
   213  	Px1 = 1    // symbolic; exact value doesn't matter
   214  	P32 = 0x32 /* 32-bit only */
   215  	Pe  = 0x66 /* operand escape */
   216  	Pm  = 0x0f /* 2byte opcode escape */
   217  	Pq  = 0xff /* both escapes: 66 0f */
   218  	Pb  = 0xfe /* byte operands */
   219  	Pf2 = 0xf2 /* xmm escape 1: f2 0f */
   220  	Pf3 = 0xf3 /* xmm escape 2: f3 0f */
   221  	Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
   222  	Pw  = 0x48 /* Rex.w */
   223  	Pw8 = 0x90 // symbolic; exact value doesn't matter
   224  	Py  = 0x80 /* defaults to 64-bit mode */
   225  	Py1 = 0x81 // symbolic; exact value doesn't matter
   226  	Py3 = 0x83 // symbolic; exact value doesn't matter
   227  
   228  	Rxf = 1 << 9 /* internal flag for Rxr on from */
   229  	Rxt = 1 << 8 /* internal flag for Rxr on to */
   230  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   231  	Rxr = 1 << 2 /* extend modrm reg */
   232  	Rxx = 1 << 1 /* extend sib index */
   233  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   234  
   235  	Maxand = 10 /* in -a output width of the byte codes */
   236  )
   237  
   238  var ycover [Ymax * Ymax]uint8
   239  
   240  var reg [MAXREG]int
   241  
   242  var regrex [MAXREG + 1]int
   243  
   244  var ynone = []ytab{
   245  	{Ynone, Ynone, Ynone, Zlit, 1},
   246  }
   247  
   248  var ysahf = []ytab{
   249  	{Ynone, Ynone, Ynone, Zlit, 2},
   250  	{Ynone, Ynone, Ynone, Zlit, 1},
   251  }
   252  
   253  var ytext = []ytab{
   254  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   255  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   256  }
   257  
   258  var ynop = []ytab{
   259  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   260  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   261  	{Ynone, Ynone, Yml, Zpseudo, 0},
   262  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   263  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   264  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   265  	{Yml, Ynone, Ynone, Zpseudo, 0},
   266  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   267  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   268  }
   269  
   270  var yfuncdata = []ytab{
   271  	{Yi32, Ynone, Ym, Zpseudo, 0},
   272  }
   273  
   274  var ypcdata = []ytab{
   275  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   276  }
   277  
   278  var yxorb = []ytab{
   279  	{Yi32, Ynone, Yal, Zib_, 1},
   280  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   281  	{Yrb, Ynone, Ymb, Zr_m, 1},
   282  	{Ymb, Ynone, Yrb, Zm_r, 1},
   283  }
   284  
   285  var yxorl = []ytab{
   286  	{Yi8, Ynone, Yml, Zibo_m, 2},
   287  	{Yi32, Ynone, Yax, Zil_, 1},
   288  	{Yi32, Ynone, Yml, Zilo_m, 2},
   289  	{Yrl, Ynone, Yml, Zr_m, 1},
   290  	{Yml, Ynone, Yrl, Zm_r, 1},
   291  }
   292  
   293  var yaddl = []ytab{
   294  	{Yi8, Ynone, Yml, Zibo_m, 2},
   295  	{Yi32, Ynone, Yax, Zil_, 1},
   296  	{Yi32, Ynone, Yml, Zilo_m, 2},
   297  	{Yrl, Ynone, Yml, Zr_m, 1},
   298  	{Yml, Ynone, Yrl, Zm_r, 1},
   299  }
   300  
   301  var yincb = []ytab{
   302  	{Ynone, Ynone, Ymb, Zo_m, 2},
   303  }
   304  
   305  var yincw = []ytab{
   306  	{Ynone, Ynone, Yml, Zo_m, 2},
   307  }
   308  
   309  var yincl = []ytab{
   310  	{Ynone, Ynone, Yrl, Z_rp, 1},
   311  	{Ynone, Ynone, Yml, Zo_m, 2},
   312  }
   313  
   314  var yincq = []ytab{
   315  	{Ynone, Ynone, Yml, Zo_m, 2},
   316  }
   317  
   318  var ycmpb = []ytab{
   319  	{Yal, Ynone, Yi32, Z_ib, 1},
   320  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   321  	{Ymb, Ynone, Yrb, Zm_r, 1},
   322  	{Yrb, Ynone, Ymb, Zr_m, 1},
   323  }
   324  
   325  var ycmpl = []ytab{
   326  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   327  	{Yax, Ynone, Yi32, Z_il, 1},
   328  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   329  	{Yml, Ynone, Yrl, Zm_r, 1},
   330  	{Yrl, Ynone, Yml, Zr_m, 1},
   331  }
   332  
   333  var yshb = []ytab{
   334  	{Yi1, Ynone, Ymb, Zo_m, 2},
   335  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   336  	{Ycx, Ynone, Ymb, Zo_m, 2},
   337  }
   338  
   339  var yshl = []ytab{
   340  	{Yi1, Ynone, Yml, Zo_m, 2},
   341  	{Yi32, Ynone, Yml, Zibo_m, 2},
   342  	{Ycl, Ynone, Yml, Zo_m, 2},
   343  	{Ycx, Ynone, Yml, Zo_m, 2},
   344  }
   345  
   346  var ytestb = []ytab{
   347  	{Yi32, Ynone, Yal, Zib_, 1},
   348  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   349  	{Yrb, Ynone, Ymb, Zr_m, 1},
   350  	{Ymb, Ynone, Yrb, Zm_r, 1},
   351  }
   352  
   353  var ytestl = []ytab{
   354  	{Yi32, Ynone, Yax, Zil_, 1},
   355  	{Yi32, Ynone, Yml, Zilo_m, 2},
   356  	{Yrl, Ynone, Yml, Zr_m, 1},
   357  	{Yml, Ynone, Yrl, Zm_r, 1},
   358  }
   359  
   360  var ymovb = []ytab{
   361  	{Yrb, Ynone, Ymb, Zr_m, 1},
   362  	{Ymb, Ynone, Yrb, Zm_r, 1},
   363  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   364  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   365  }
   366  
   367  var ymbs = []ytab{
   368  	{Ymb, Ynone, Ynone, Zm_o, 2},
   369  }
   370  
   371  var ybtl = []ytab{
   372  	{Yi8, Ynone, Yml, Zibo_m, 2},
   373  	{Yrl, Ynone, Yml, Zr_m, 1},
   374  }
   375  
   376  var ymovw = []ytab{
   377  	{Yrl, Ynone, Yml, Zr_m, 1},
   378  	{Yml, Ynone, Yrl, Zm_r, 1},
   379  	{Yi0, Ynone, Yrl, Zclr, 1},
   380  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   381  	{Yi32, Ynone, Yml, Zilo_m, 2},
   382  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   383  }
   384  
   385  var ymovl = []ytab{
   386  	{Yrl, Ynone, Yml, Zr_m, 1},
   387  	{Yml, Ynone, Yrl, Zm_r, 1},
   388  	{Yi0, Ynone, Yrl, Zclr, 1},
   389  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   390  	{Yi32, Ynone, Yml, Zilo_m, 2},
   391  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   392  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   393  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   394  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   395  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   396  }
   397  
   398  var yret = []ytab{
   399  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   400  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   401  }
   402  
   403  var ymovq = []ytab{
   404  	// valid in 32-bit mode
   405  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   406  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   407  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   408  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   409  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   410  
   411  	// valid only in 64-bit mode, usually with 64-bit prefix
   412  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   413  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   414  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   415  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   416  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   417  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   418  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   419  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   420  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   421  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   422  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   423  }
   424  
   425  var ym_rl = []ytab{
   426  	{Ym, Ynone, Yrl, Zm_r, 1},
   427  }
   428  
   429  var yrl_m = []ytab{
   430  	{Yrl, Ynone, Ym, Zr_m, 1},
   431  }
   432  
   433  var ymb_rl = []ytab{
   434  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   435  }
   436  
   437  var yml_rl = []ytab{
   438  	{Yml, Ynone, Yrl, Zm_r, 1},
   439  }
   440  
   441  var yrl_ml = []ytab{
   442  	{Yrl, Ynone, Yml, Zr_m, 1},
   443  }
   444  
   445  var yml_mb = []ytab{
   446  	{Yrb, Ynone, Ymb, Zr_m, 1},
   447  	{Ymb, Ynone, Yrb, Zm_r, 1},
   448  }
   449  
   450  var yrb_mb = []ytab{
   451  	{Yrb, Ynone, Ymb, Zr_m, 1},
   452  }
   453  
   454  var yxchg = []ytab{
   455  	{Yax, Ynone, Yrl, Z_rp, 1},
   456  	{Yrl, Ynone, Yax, Zrp_, 1},
   457  	{Yrl, Ynone, Yml, Zr_m, 1},
   458  	{Yml, Ynone, Yrl, Zm_r, 1},
   459  }
   460  
   461  var ydivl = []ytab{
   462  	{Yml, Ynone, Ynone, Zm_o, 2},
   463  }
   464  
   465  var ydivb = []ytab{
   466  	{Ymb, Ynone, Ynone, Zm_o, 2},
   467  }
   468  
   469  var yimul = []ytab{
   470  	{Yml, Ynone, Ynone, Zm_o, 2},
   471  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   472  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   473  	{Yml, Ynone, Yrl, Zm_r, 2},
   474  }
   475  
   476  var yimul3 = []ytab{
   477  	{Yi8, Yml, Yrl, Zibm_r, 2},
   478  }
   479  
   480  var ybyte = []ytab{
   481  	{Yi64, Ynone, Ynone, Zbyte, 1},
   482  }
   483  
   484  var yin = []ytab{
   485  	{Yi32, Ynone, Ynone, Zib_, 1},
   486  	{Ynone, Ynone, Ynone, Zlit, 1},
   487  }
   488  
   489  var yint = []ytab{
   490  	{Yi32, Ynone, Ynone, Zib_, 1},
   491  }
   492  
   493  var ypushl = []ytab{
   494  	{Yrl, Ynone, Ynone, Zrp_, 1},
   495  	{Ym, Ynone, Ynone, Zm_o, 2},
   496  	{Yi8, Ynone, Ynone, Zib_, 1},
   497  	{Yi32, Ynone, Ynone, Zil_, 1},
   498  }
   499  
   500  var ypopl = []ytab{
   501  	{Ynone, Ynone, Yrl, Z_rp, 1},
   502  	{Ynone, Ynone, Ym, Zo_m, 2},
   503  }
   504  
   505  var ybswap = []ytab{
   506  	{Ynone, Ynone, Yrl, Z_rp, 2},
   507  }
   508  
   509  var yscond = []ytab{
   510  	{Ynone, Ynone, Ymb, Zo_m, 2},
   511  }
   512  
   513  var yjcond = []ytab{
   514  	{Ynone, Ynone, Ybr, Zbr, 0},
   515  	{Yi0, Ynone, Ybr, Zbr, 0},
   516  	{Yi1, Ynone, Ybr, Zbr, 1},
   517  }
   518  
   519  var yloop = []ytab{
   520  	{Ynone, Ynone, Ybr, Zloop, 1},
   521  }
   522  
   523  var ycall = []ytab{
   524  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   525  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   526  	{Ynone, Ynone, Yindir, Zcallind, 2},
   527  	{Ynone, Ynone, Ybr, Zcall, 0},
   528  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   529  }
   530  
   531  var yduff = []ytab{
   532  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   533  }
   534  
   535  var yjmp = []ytab{
   536  	{Ynone, Ynone, Yml, Zo_m64, 2},
   537  	{Ynone, Ynone, Ybr, Zjmp, 0},
   538  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   539  }
   540  
   541  var yfmvd = []ytab{
   542  	{Ym, Ynone, Yf0, Zm_o, 2},
   543  	{Yf0, Ynone, Ym, Zo_m, 2},
   544  	{Yrf, Ynone, Yf0, Zm_o, 2},
   545  	{Yf0, Ynone, Yrf, Zo_m, 2},
   546  }
   547  
   548  var yfmvdp = []ytab{
   549  	{Yf0, Ynone, Ym, Zo_m, 2},
   550  	{Yf0, Ynone, Yrf, Zo_m, 2},
   551  }
   552  
   553  var yfmvf = []ytab{
   554  	{Ym, Ynone, Yf0, Zm_o, 2},
   555  	{Yf0, Ynone, Ym, Zo_m, 2},
   556  }
   557  
   558  var yfmvx = []ytab{
   559  	{Ym, Ynone, Yf0, Zm_o, 2},
   560  }
   561  
   562  var yfmvp = []ytab{
   563  	{Yf0, Ynone, Ym, Zo_m, 2},
   564  }
   565  
   566  var yfcmv = []ytab{
   567  	{Yrf, Ynone, Yf0, Zm_o, 2},
   568  }
   569  
   570  var yfadd = []ytab{
   571  	{Ym, Ynone, Yf0, Zm_o, 2},
   572  	{Yrf, Ynone, Yf0, Zm_o, 2},
   573  	{Yf0, Ynone, Yrf, Zo_m, 2},
   574  }
   575  
   576  var yfaddp = []ytab{
   577  	{Yf0, Ynone, Yrf, Zo_m, 2},
   578  }
   579  
   580  var yfxch = []ytab{
   581  	{Yf0, Ynone, Yrf, Zo_m, 2},
   582  	{Yrf, Ynone, Yf0, Zm_o, 2},
   583  }
   584  
   585  var ycompp = []ytab{
   586  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   587  }
   588  
   589  var ystsw = []ytab{
   590  	{Ynone, Ynone, Ym, Zo_m, 2},
   591  	{Ynone, Ynone, Yax, Zlit, 1},
   592  }
   593  
   594  var ystcw = []ytab{
   595  	{Ynone, Ynone, Ym, Zo_m, 2},
   596  	{Ym, Ynone, Ynone, Zm_o, 2},
   597  }
   598  
   599  var ysvrs = []ytab{
   600  	{Ynone, Ynone, Ym, Zo_m, 2},
   601  	{Ym, Ynone, Ynone, Zm_o, 2},
   602  }
   603  
   604  var ymm = []ytab{
   605  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   606  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   607  }
   608  
   609  var yxm = []ytab{
   610  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   611  }
   612  
   613  var yxcvm1 = []ytab{
   614  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   615  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   616  }
   617  
   618  var yxcvm2 = []ytab{
   619  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   620  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   621  }
   622  
   623  /*
   624  var yxmq = []ytab{
   625  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   626  }
   627  */
   628  
   629  var yxr = []ytab{
   630  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   631  }
   632  
   633  var yxr_ml = []ytab{
   634  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   635  }
   636  
   637  var ymr = []ytab{
   638  	{Ymr, Ynone, Ymr, Zm_r, 1},
   639  }
   640  
   641  var ymr_ml = []ytab{
   642  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   643  }
   644  
   645  var yxcmp = []ytab{
   646  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   647  }
   648  
   649  var yxcmpi = []ytab{
   650  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   651  }
   652  
   653  var yxmov = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   655  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   656  }
   657  
   658  var yxcvfl = []ytab{
   659  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   660  }
   661  
   662  var yxcvlf = []ytab{
   663  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   664  }
   665  
   666  var yxcvfq = []ytab{
   667  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   668  }
   669  
   670  var yxcvqf = []ytab{
   671  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   672  }
   673  
   674  var yps = []ytab{
   675  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   676  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   677  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   678  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   679  }
   680  
   681  var yxrrl = []ytab{
   682  	{Yxr, Ynone, Yrl, Zm_r, 1},
   683  }
   684  
   685  var ymfp = []ytab{
   686  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   687  }
   688  
   689  var ymrxr = []ytab{
   690  	{Ymr, Ynone, Yxr, Zm_r, 1},
   691  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   692  }
   693  
   694  var ymshuf = []ytab{
   695  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   696  }
   697  
   698  var ymshufb = []ytab{
   699  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   700  }
   701  
   702  var yxshuf = []ytab{
   703  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   704  }
   705  
   706  var yextrw = []ytab{
   707  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   708  }
   709  
   710  var yinsrw = []ytab{
   711  	{Yu8, Yml, Yxr, Zibm_r, 2},
   712  }
   713  
   714  var yinsr = []ytab{
   715  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   716  }
   717  
   718  var ypsdq = []ytab{
   719  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   720  }
   721  
   722  var ymskb = []ytab{
   723  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   724  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   725  }
   726  
   727  var ycrc32l = []ytab{
   728  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   729  }
   730  
   731  var yprefetch = []ytab{
   732  	{Ym, Ynone, Ynone, Zm_o, 2},
   733  }
   734  
   735  var yaes = []ytab{
   736  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   737  }
   738  
   739  var yaes2 = []ytab{
   740  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   741  }
   742  
   743  /*
   744   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   745   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   746   * the entry with the given p->as and then looks through the ytable for that
   747   * instruction (the second field in the optab struct) for a line whose first
   748   * two values match the Ytypes of the p->from and p->to operands.  The function
   749   * oclass in span.c computes the specific Ytype of an operand and then the set
   750   * of more general Ytypes that it satisfies is implied by the ycover table, set
   751   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   752   * from the more general 8-bit constants, but instinit says
   753   *
   754   *        ycover[Yi0*Ymax + Ys32] = 1;
   755   *        ycover[Yi1*Ymax + Ys32] = 1;
   756   *        ycover[Yi8*Ymax + Ys32] = 1;
   757   *
   758   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   759   * if that's what an instruction can handle.
   760   *
   761   * In parallel with the scan through the ytable for the appropriate line, there
   762   * is a z pointer that starts out pointing at the strange magic byte list in
   763   * the Optab struct.  With each step past a non-matching ytable line, z
   764   * advances by the 4th entry in the line.  When a matching line is found, that
   765   * z pointer has the extra data to use in laying down the instruction bytes.
   766   * The actual bytes laid down are a function of the 3rd entry in the line (that
   767   * is, the Ztype) and the z bytes.
   768   *
   769   * For example, let's look at AADDL.  The optab line says:
   770   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   771   *
   772   * and yaddl says
   773   *        uchar   yaddl[] =
   774   *        {
   775   *                Yi8,    Yml,    Zibo_m, 2,
   776   *                Yi32,   Yax,    Zil_,   1,
   777   *                Yi32,   Yml,    Zilo_m, 2,
   778   *                Yrl,    Yml,    Zr_m,   1,
   779   *                Yml,    Yrl,    Zm_r,   1,
   780   *                0
   781   *        };
   782   *
   783   * so there are 5 possible types of ADDL instruction that can be laid down, and
   784   * possible states used to lay them down (Ztype and z pointer, assuming z
   785   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   786   *
   787   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   788   *        Yi32, Yax -> Zil_, z+2 (0x05)
   789   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   790   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   791   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   792   *
   793   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   794   * relatively straightforward as this program goes.
   795   *
   796   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   797   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   798   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   799   * Zilo_m is the same but a long (32-bit) immediate.
   800   */
   801  var optab =
   802  /*	as, ytab, andproto, opcode */
   803  []Optab{
   804  	Optab{obj.AXXX, nil, 0, [23]uint8{}},
   805  	Optab{AAAA, ynone, P32, [23]uint8{0x37}},
   806  	Optab{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   807  	Optab{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   808  	Optab{AAAS, ynone, P32, [23]uint8{0x3f}},
   809  	Optab{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   810  	Optab{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   811  	Optab{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   812  	Optab{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   813  	Optab{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   814  	Optab{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   815  	Optab{AADDPD, yxm, Pq, [23]uint8{0x58}},
   816  	Optab{AADDPS, yxm, Pm, [23]uint8{0x58}},
   817  	Optab{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   818  	Optab{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   819  	Optab{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   820  	Optab{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   821  	Optab{AADJSP, nil, 0, [23]uint8{}},
   822  	Optab{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   823  	Optab{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   824  	Optab{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   825  	Optab{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   826  	Optab{AANDPD, yxm, Pq, [23]uint8{0x54}},
   827  	Optab{AANDPS, yxm, Pq, [23]uint8{0x54}},
   828  	Optab{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   829  	Optab{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   830  	Optab{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   831  	Optab{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   832  	Optab{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   833  	Optab{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   834  	Optab{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   835  	Optab{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   836  	Optab{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   837  	Optab{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   838  	Optab{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   839  	Optab{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   840  	Optab{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   841  	Optab{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   842  	Optab{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   843  	Optab{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   844  	Optab{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   845  	Optab{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   846  	Optab{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   847  	Optab{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   848  	Optab{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   849  	Optab{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   850  	Optab{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   851  	Optab{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   852  	Optab{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   853  	Optab{ABYTE, ybyte, Px, [23]uint8{1}},
   854  	Optab{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   855  	Optab{ACDQ, ynone, Px, [23]uint8{0x99}},
   856  	Optab{ACLC, ynone, Px, [23]uint8{0xf8}},
   857  	Optab{ACLD, ynone, Px, [23]uint8{0xfc}},
   858  	Optab{ACLI, ynone, Px, [23]uint8{0xfa}},
   859  	Optab{ACLTS, ynone, Pm, [23]uint8{0x06}},
   860  	Optab{ACMC, ynone, Px, [23]uint8{0xf5}},
   861  	Optab{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   862  	Optab{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   863  	Optab{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   864  	Optab{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   865  	Optab{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   866  	Optab{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   867  	Optab{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   868  	Optab{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   869  	Optab{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   870  	Optab{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   871  	Optab{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   872  	Optab{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   873  	Optab{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   874  	Optab{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   875  	Optab{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   876  	Optab{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   877  	Optab{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   878  	Optab{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   879  	Optab{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   880  	Optab{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   881  	Optab{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   882  	Optab{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   883  	Optab{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   884  	Optab{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   885  	Optab{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   886  	Optab{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   887  	Optab{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   888  	Optab{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   889  	Optab{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   890  	Optab{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   891  	Optab{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   892  	Optab{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   893  	Optab{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   894  	Optab{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   895  	Optab{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   896  	Optab{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   897  	Optab{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   898  	Optab{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   899  	Optab{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   900  	Optab{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   901  	Optab{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   902  	Optab{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   903  	Optab{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   904  	Optab{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   905  	Optab{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   906  	Optab{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   907  	Optab{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   908  	Optab{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   909  	Optab{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   910  	Optab{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   911  	Optab{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   912  	Optab{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   913  	Optab{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   914  	Optab{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   915  	Optab{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   916  	Optab{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   917  	Optab{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   918  	Optab{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   919  	Optab{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   920  	Optab{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   921  	Optab{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
   922  	Optab{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
   923  	Optab{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   924  	Optab{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   925  	Optab{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   926  	Optab{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   927  	Optab{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   928  	Optab{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   929  	Optab{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   930  	Optab{API2FW, ymfp, Px, [23]uint8{0x0c}},
   931  	Optab{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   932  	Optab{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   933  	Optab{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   934  	Optab{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   935  	Optab{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   936  	Optab{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   937  	Optab{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   938  	Optab{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   939  	Optab{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   940  	Optab{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   941  	Optab{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   942  	Optab{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   943  	Optab{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   944  	Optab{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   945  	Optab{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   946  	Optab{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   947  	Optab{ACWD, ynone, Pe, [23]uint8{0x99}},
   948  	Optab{ACQO, ynone, Pw, [23]uint8{0x99}},
   949  	Optab{ADAA, ynone, P32, [23]uint8{0x27}},
   950  	Optab{ADAS, ynone, P32, [23]uint8{0x2f}},
   951  	Optab{obj.ADATA, nil, 0, [23]uint8{}},
   952  	Optab{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
   953  	Optab{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   954  	Optab{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   955  	Optab{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
   956  	Optab{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   957  	Optab{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   958  	Optab{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   959  	Optab{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   960  	Optab{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   961  	Optab{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   962  	Optab{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   963  	Optab{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   964  	Optab{AEMMS, ynone, Pm, [23]uint8{0x77}},
   965  	Optab{AENTER, nil, 0, [23]uint8{}}, /* botch */
   966  	Optab{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   967  	Optab{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   968  	Optab{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   969  	Optab{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   970  	Optab{obj.AGLOBL, nil, 0, [23]uint8{}},
   971  	Optab{AHLT, ynone, Px, [23]uint8{0xf4}},
   972  	Optab{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   973  	Optab{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   974  	Optab{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   975  	Optab{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   976  	Optab{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   977  	Optab{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   978  	Optab{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   979  	Optab{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   980  	Optab{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   981  	Optab{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   982  	Optab{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
   983  	Optab{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   984  	Optab{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   985  	Optab{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
   986  	Optab{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   987  	Optab{AINSB, ynone, Pb, [23]uint8{0x6c}},
   988  	Optab{AINSL, ynone, Px, [23]uint8{0x6d}},
   989  	Optab{AINSW, ynone, Pe, [23]uint8{0x6d}},
   990  	Optab{AINT, yint, Px, [23]uint8{0xcd}},
   991  	Optab{AINTO, ynone, P32, [23]uint8{0xce}},
   992  	Optab{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   993  	Optab{AIRETL, ynone, Px, [23]uint8{0xcf}},
   994  	Optab{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   995  	Optab{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   996  	Optab{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   997  	Optab{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   998  	Optab{AJCXZL, yloop, Px, [23]uint8{0xe3}},
   999  	Optab{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1000  	Optab{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1001  	Optab{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1002  	Optab{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1003  	Optab{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1004  	Optab{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1005  	Optab{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1006  	Optab{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1007  	Optab{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1008  	Optab{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1009  	Optab{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1010  	Optab{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1011  	Optab{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1012  	Optab{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1013  	Optab{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1014  	Optab{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1015  	Optab{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1016  	Optab{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1017  	Optab{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1018  	Optab{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1019  	Optab{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1020  	Optab{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1021  	Optab{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1022  	Optab{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1023  	Optab{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1024  	Optab{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1025  	Optab{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1026  	Optab{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1027  	Optab{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1028  	Optab{ALODSL, ynone, Px, [23]uint8{0xad}},
  1029  	Optab{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1030  	Optab{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1031  	Optab{ALONG, ybyte, Px, [23]uint8{4}},
  1032  	Optab{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1033  	Optab{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1034  	Optab{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1035  	Optab{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1036  	Optab{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1037  	Optab{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1038  	Optab{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1039  	Optab{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1040  	Optab{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1041  	Optab{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1042  	Optab{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1043  	Optab{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1044  	Optab{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1045  	Optab{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1046  	Optab{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1047  	Optab{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1048  	Optab{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1049  	Optab{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1050  	Optab{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1051  	Optab{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1052  	Optab{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1053  	Optab{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1054  	Optab{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1055  	Optab{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1056  	Optab{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1057  	Optab{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1058  	Optab{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1059  	Optab{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1060  	Optab{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1061  	Optab{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1062  	Optab{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1063  	Optab{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1064  	Optab{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1065  	Optab{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1066  	Optab{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1067  	Optab{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1068  	Optab{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1069  	Optab{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1070  	Optab{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1071  	Optab{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1072  	Optab{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1073  	Optab{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1074  	Optab{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1075  	Optab{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1076  	Optab{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1077  	Optab{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1078  	Optab{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1079  	Optab{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1080  	Optab{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1081  	Optab{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1082  	Optab{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1083  	Optab{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1084  	Optab{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1085  	Optab{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1086  	Optab{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1087  	Optab{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1088  	Optab{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1089  	Optab{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1090  	Optab{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1091  	Optab{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1092  	Optab{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1093  	Optab{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1094  	Optab{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1095  	Optab{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1096  	Optab{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1097  	Optab{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1098  	Optab{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1099  	Optab{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1100  	Optab{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1101  	Optab{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1102  	Optab{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1103  	Optab{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1104  	Optab{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1105  	Optab{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1106  	Optab{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1107  	Optab{AORPD, yxm, Pq, [23]uint8{0x56}},
  1108  	Optab{AORPS, yxm, Pm, [23]uint8{0x56}},
  1109  	Optab{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1110  	Optab{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1111  	Optab{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1112  	Optab{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1113  	Optab{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1114  	Optab{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1115  	Optab{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1116  	Optab{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1117  	Optab{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1118  	Optab{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1119  	Optab{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1120  	Optab{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1121  	Optab{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1122  	Optab{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1123  	Optab{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1124  	Optab{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1125  	Optab{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1126  	Optab{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1127  	Optab{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1128  	Optab{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1129  	Optab{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1130  	Optab{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1131  	Optab{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1132  	Optab{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1133  	Optab{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1134  	Optab{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1135  	Optab{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1136  	Optab{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1137  	Optab{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1138  	Optab{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1139  	Optab{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1140  	Optab{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1141  	Optab{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1142  	Optab{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1143  	Optab{APFACC, ymfp, Px, [23]uint8{0xae}},
  1144  	Optab{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1145  	Optab{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1146  	Optab{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1147  	Optab{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1148  	Optab{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1149  	Optab{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1150  	Optab{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1151  	Optab{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1152  	Optab{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1153  	Optab{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1154  	Optab{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1155  	Optab{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1156  	Optab{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1157  	Optab{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1158  	Optab{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1159  	Optab{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1160  	Optab{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1161  	Optab{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1162  	Optab{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1163  	Optab{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1164  	Optab{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1165  	Optab{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1166  	Optab{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1167  	Optab{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1168  	Optab{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1169  	Optab{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1170  	Optab{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1171  	Optab{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1172  	Optab{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1173  	Optab{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1174  	Optab{APOPAL, ynone, P32, [23]uint8{0x61}},
  1175  	Optab{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1176  	Optab{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1177  	Optab{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1178  	Optab{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1179  	Optab{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1180  	Optab{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1181  	Optab{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1182  	Optab{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1183  	Optab{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1184  	Optab{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1185  	Optab{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1186  	Optab{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1187  	Optab{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1188  	Optab{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1189  	Optab{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1190  	Optab{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1191  	Optab{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1192  	Optab{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1193  	Optab{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1194  	Optab{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1195  	Optab{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1196  	Optab{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1197  	Optab{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1198  	Optab{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
  1199  	Optab{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1200  	Optab{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1201  	Optab{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1202  	Optab{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1203  	Optab{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1204  	Optab{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1205  	Optab{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1206  	Optab{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1207  	Optab{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1208  	Optab{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1209  	Optab{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1210  	Optab{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1211  	Optab{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1212  	Optab{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1213  	Optab{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1214  	Optab{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1215  	Optab{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1216  	Optab{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1217  	Optab{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1218  	Optab{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1219  	Optab{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1220  	Optab{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1221  	Optab{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1222  	Optab{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1223  	Optab{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1224  	Optab{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1225  	Optab{AQUAD, ybyte, Px, [23]uint8{8}},
  1226  	Optab{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1227  	Optab{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1228  	Optab{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1229  	Optab{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1230  	Optab{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1231  	Optab{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1232  	Optab{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1233  	Optab{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1234  	Optab{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1235  	Optab{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1236  	Optab{AREP, ynone, Px, [23]uint8{0xf3}},
  1237  	Optab{AREPN, ynone, Px, [23]uint8{0xf2}},
  1238  	Optab{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1239  	Optab{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1240  	Optab{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1241  	Optab{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1242  	Optab{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1243  	Optab{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1244  	Optab{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1245  	Optab{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1246  	Optab{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1247  	Optab{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1248  	Optab{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1249  	Optab{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1250  	Optab{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1251  	Optab{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1252  	Optab{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1253  	Optab{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1254  	Optab{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1255  	Optab{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1256  	Optab{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1257  	Optab{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1258  	Optab{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1259  	Optab{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1260  	Optab{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1261  	Optab{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1262  	Optab{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1263  	Optab{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1264  	Optab{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1265  	Optab{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1266  	Optab{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1267  	Optab{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1268  	Optab{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1269  	Optab{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1270  	Optab{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1271  	Optab{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1272  	Optab{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1273  	Optab{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1274  	Optab{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1275  	Optab{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1276  	Optab{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1277  	Optab{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1278  	Optab{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1279  	Optab{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1280  	Optab{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1281  	Optab{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1282  	Optab{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1283  	Optab{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1284  	Optab{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1285  	Optab{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1286  	Optab{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1287  	Optab{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1288  	Optab{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1289  	Optab{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1290  	Optab{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1291  	Optab{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1292  	Optab{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1293  	Optab{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1294  	Optab{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1295  	Optab{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1296  	Optab{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1297  	Optab{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1298  	Optab{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1299  	Optab{ASTC, ynone, Px, [23]uint8{0xf9}},
  1300  	Optab{ASTD, ynone, Px, [23]uint8{0xfd}},
  1301  	Optab{ASTI, ynone, Px, [23]uint8{0xfb}},
  1302  	Optab{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1303  	Optab{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1304  	Optab{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1305  	Optab{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1306  	Optab{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1307  	Optab{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1308  	Optab{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1309  	Optab{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1310  	Optab{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1311  	Optab{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1312  	Optab{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1313  	Optab{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1314  	Optab{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1315  	Optab{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1316  	Optab{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1317  	Optab{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1318  	Optab{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1319  	Optab{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1320  	Optab{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1321  	Optab{obj.ATEXT, ytext, Px, [23]uint8{}},
  1322  	Optab{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1323  	Optab{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1324  	Optab{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1325  	Optab{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1326  	Optab{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1327  	Optab{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1328  	Optab{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1329  	Optab{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1330  	Optab{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1331  	Optab{AWORD, ybyte, Px, [23]uint8{2}},
  1332  	Optab{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1333  	Optab{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1334  	Optab{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1335  	Optab{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1336  	Optab{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1337  	Optab{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1338  	Optab{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1339  	Optab{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1340  	Optab{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1341  	Optab{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1342  	Optab{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1343  	Optab{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1344  	Optab{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1345  	Optab{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1346  	Optab{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1347  	Optab{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1348  	Optab{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1349  	Optab{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1350  	Optab{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1351  	Optab{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1352  	Optab{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1353  	Optab{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1354  	Optab{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1355  	Optab{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1356  	Optab{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1357  	Optab{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1358  	Optab{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1359  	Optab{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1360  	Optab{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1361  	Optab{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1362  	Optab{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1363  	Optab{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1364  	Optab{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1365  	Optab{AFCOMB, nil, 0, [23]uint8{}},
  1366  	Optab{AFCOMBP, nil, 0, [23]uint8{}},
  1367  	Optab{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1368  	Optab{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1369  	Optab{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1370  	Optab{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1371  	Optab{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1372  	Optab{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1373  	Optab{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1374  	Optab{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1375  	Optab{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1376  	Optab{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1377  	Optab{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1378  	Optab{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1379  	Optab{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1380  	Optab{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1381  	Optab{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1382  	Optab{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1383  	Optab{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1384  	Optab{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1385  	Optab{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1386  	Optab{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1387  	Optab{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1388  	Optab{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1389  	Optab{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1390  	Optab{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1391  	Optab{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1392  	Optab{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1393  	Optab{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1394  	Optab{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1395  	Optab{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1396  	Optab{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1397  	Optab{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1398  	Optab{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1399  	Optab{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1400  	Optab{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1401  	Optab{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1402  	Optab{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1403  	Optab{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1404  	Optab{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1405  	Optab{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1406  	Optab{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1407  	Optab{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1408  	Optab{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1409  	Optab{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1410  	Optab{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1411  	Optab{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1412  	Optab{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1413  	Optab{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1414  	Optab{AFFREE, nil, 0, [23]uint8{}},
  1415  	Optab{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1416  	Optab{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1417  	Optab{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1418  	Optab{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1419  	Optab{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1420  	Optab{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1421  	Optab{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1422  	Optab{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1423  	Optab{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1424  	Optab{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1425  	Optab{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1426  	Optab{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1427  	Optab{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1428  	Optab{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1429  	Optab{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1430  	Optab{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1431  	Optab{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1432  	Optab{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1433  	Optab{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1434  	Optab{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1435  	Optab{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1436  	Optab{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1437  	Optab{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1438  	Optab{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1439  	Optab{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1440  	Optab{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1441  	Optab{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1442  	Optab{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1443  	Optab{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1444  	Optab{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1445  	Optab{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1446  	Optab{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1447  	Optab{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1448  	Optab{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1449  	Optab{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1450  	Optab{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1451  	Optab{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1452  	Optab{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1453  	Optab{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1454  	Optab{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1455  	Optab{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1456  	Optab{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1457  	Optab{AINVD, ynone, Pm, [23]uint8{0x08}},
  1458  	Optab{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1459  	Optab{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1460  	Optab{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1461  	Optab{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1462  	Optab{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1463  	Optab{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1464  	Optab{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1465  	Optab{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1466  	Optab{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1467  	Optab{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1468  	Optab{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1469  	Optab{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1470  	Optab{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1471  	Optab{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1472  	Optab{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1473  	Optab{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1474  	Optab{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1475  	Optab{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1476  	Optab{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1477  	Optab{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1478  	Optab{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1479  	Optab{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1480  	Optab{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1481  	Optab{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1482  	Optab{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1483  	Optab{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1484  	Optab{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1485  	Optab{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1486  	Optab{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1487  	Optab{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1488  	Optab{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1489  	Optab{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1490  	Optab{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1491  	Optab{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1492  	Optab{obj.ATYPE, nil, 0, [23]uint8{}},
  1493  	Optab{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1494  	Optab{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1495  	Optab{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1496  	Optab{obj.AVARDEF, nil, 0, [23]uint8{}},
  1497  	Optab{obj.AVARKILL, nil, 0, [23]uint8{}},
  1498  	Optab{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1499  	Optab{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1500  	Optab{obj.AEND, nil, 0, [23]uint8{}},
  1501  	Optab{0, nil, 0, [23]uint8{}},
  1502  }
  1503  
  1504  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1505  
  1506  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1507  // This happens on systems like Solaris that call .so functions instead of system calls.
  1508  // It does not seem to be necessary for any other systems. This is probably working
  1509  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1510  // what that bug is. And this does fix it.
  1511  func isextern(s *obj.LSym) bool {
  1512  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1513  	return strings.HasPrefix(s.Name, "libc_")
  1514  }
  1515  
  1516  // single-instruction no-ops of various lengths.
  1517  // constructed by hand and disassembled with gdb to verify.
  1518  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1519  var nop = [][16]uint8{
  1520  	[16]uint8{0x90},
  1521  	[16]uint8{0x66, 0x90},
  1522  	[16]uint8{0x0F, 0x1F, 0x00},
  1523  	[16]uint8{0x0F, 0x1F, 0x40, 0x00},
  1524  	[16]uint8{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1525  	[16]uint8{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1526  	[16]uint8{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1527  	[16]uint8{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1528  	[16]uint8{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1529  }
  1530  
  1531  // Native Client rejects the repeated 0x66 prefix.
  1532  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1533  func fillnop(p []byte, n int) {
  1534  	var m int
  1535  
  1536  	for n > 0 {
  1537  		m = n
  1538  		if m > len(nop) {
  1539  			m = len(nop)
  1540  		}
  1541  		copy(p[:m], nop[m-1][:m])
  1542  		p = p[m:]
  1543  		n -= m
  1544  	}
  1545  }
  1546  
  1547  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1548  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1549  	fillnop(s.P[c:], int(pad))
  1550  	return c + pad
  1551  }
  1552  
  1553  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1554  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1555  		return l
  1556  	}
  1557  	return q
  1558  }
  1559  
  1560  func span6(ctxt *obj.Link, s *obj.LSym) {
  1561  	ctxt.Cursym = s
  1562  
  1563  	if s.P != nil {
  1564  		return
  1565  	}
  1566  
  1567  	if ycover[0] == 0 {
  1568  		instinit()
  1569  	}
  1570  
  1571  	var v int32
  1572  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1573  		if p.To.Type == obj.TYPE_BRANCH {
  1574  			if p.Pcond == nil {
  1575  				p.Pcond = p
  1576  			}
  1577  		}
  1578  		if p.As == AADJSP {
  1579  			p.To.Type = obj.TYPE_REG
  1580  			p.To.Reg = REG_SP
  1581  			v = int32(-p.From.Offset)
  1582  			p.From.Offset = int64(v)
  1583  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1584  			if v < 0 {
  1585  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1586  				v = -v
  1587  				p.From.Offset = int64(v)
  1588  			}
  1589  
  1590  			if v == 0 {
  1591  				p.As = obj.ANOP
  1592  			}
  1593  		}
  1594  	}
  1595  
  1596  	var q *obj.Prog
  1597  	for p := s.Text; p != nil; p = p.Link {
  1598  		p.Back = 2 // use short branches first time through
  1599  		q = p.Pcond
  1600  		if q != nil && (q.Back&2 != 0) {
  1601  			p.Back |= 1 // backward jump
  1602  			q.Back |= 4 // loop head
  1603  		}
  1604  
  1605  		if p.As == AADJSP {
  1606  			p.To.Type = obj.TYPE_REG
  1607  			p.To.Reg = REG_SP
  1608  			v = int32(-p.From.Offset)
  1609  			p.From.Offset = int64(v)
  1610  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1611  			if v < 0 {
  1612  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1613  				v = -v
  1614  				p.From.Offset = int64(v)
  1615  			}
  1616  
  1617  			if v == 0 {
  1618  				p.As = obj.ANOP
  1619  			}
  1620  		}
  1621  	}
  1622  
  1623  	n := 0
  1624  	var bp []byte
  1625  	var c int32
  1626  	var i int
  1627  	var loop int32
  1628  	var m int
  1629  	var p *obj.Prog
  1630  	for {
  1631  		loop = 0
  1632  		for i = 0; i < len(s.R); i++ {
  1633  			s.R[i] = obj.Reloc{}
  1634  		}
  1635  		s.R = s.R[:0]
  1636  		s.P = s.P[:0]
  1637  		c = 0
  1638  		for p = s.Text; p != nil; p = p.Link {
  1639  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1640  				var deferreturn *obj.LSym
  1641  
  1642  				if deferreturn == nil {
  1643  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1644  				}
  1645  
  1646  				// pad everything to avoid crossing 32-byte boundary
  1647  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1648  					c = naclpad(ctxt, s, c, -c&31)
  1649  				}
  1650  
  1651  				// pad call deferreturn to start at 32-byte boundary
  1652  				// so that subtracting 5 in jmpdefer will jump back
  1653  				// to that boundary and rerun the call.
  1654  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1655  					c = naclpad(ctxt, s, c, -c&31)
  1656  				}
  1657  
  1658  				// pad call to end at 32-byte boundary
  1659  				if p.As == obj.ACALL {
  1660  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1661  				}
  1662  
  1663  				// the linker treats REP and STOSQ as different instructions
  1664  				// but in fact the REP is a prefix on the STOSQ.
  1665  				// make sure REP has room for 2 more bytes, so that
  1666  				// padding will not be inserted before the next instruction.
  1667  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1668  					c = naclpad(ctxt, s, c, -c&31)
  1669  				}
  1670  
  1671  				// same for LOCK.
  1672  				// various instructions follow; the longest is 4 bytes.
  1673  				// give ourselves 8 bytes so as to avoid surprises.
  1674  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1675  					c = naclpad(ctxt, s, c, -c&31)
  1676  				}
  1677  			}
  1678  
  1679  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1680  				// pad with NOPs
  1681  				v = -c & (LoopAlign - 1)
  1682  
  1683  				if v <= MaxLoopPad {
  1684  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1685  					fillnop(s.P[c:], int(v))
  1686  					c += v
  1687  				}
  1688  			}
  1689  
  1690  			p.Pc = int64(c)
  1691  
  1692  			// process forward jumps to p
  1693  			for q = p.Rel; q != nil; q = q.Forwd {
  1694  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1695  				if q.Back&2 != 0 { // short
  1696  					if v > 127 {
  1697  						loop++
  1698  						q.Back ^= 2
  1699  					}
  1700  
  1701  					if q.As == AJCXZL {
  1702  						s.P[q.Pc+2] = byte(v)
  1703  					} else {
  1704  						s.P[q.Pc+1] = byte(v)
  1705  					}
  1706  				} else {
  1707  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1708  					bp[0] = byte(v)
  1709  					bp = bp[1:]
  1710  					bp[0] = byte(v >> 8)
  1711  					bp = bp[1:]
  1712  					bp[0] = byte(v >> 16)
  1713  					bp = bp[1:]
  1714  					bp[0] = byte(v >> 24)
  1715  				}
  1716  			}
  1717  
  1718  			p.Rel = nil
  1719  
  1720  			p.Pc = int64(c)
  1721  			asmins(ctxt, p)
  1722  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1723  			if int(p.Isize) != m {
  1724  				p.Isize = uint8(m)
  1725  				loop++
  1726  			}
  1727  
  1728  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1729  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1730  			p.Mark = uint16(m)
  1731  			c += int32(m)
  1732  		}
  1733  
  1734  		n++
  1735  		if n > 20 {
  1736  			ctxt.Diag("span must be looping")
  1737  			log.Fatalf("loop")
  1738  		}
  1739  		if loop == 0 {
  1740  			break
  1741  		}
  1742  	}
  1743  
  1744  	if ctxt.Headtype == obj.Hnacl {
  1745  		c = naclpad(ctxt, s, c, -c&31)
  1746  	}
  1747  
  1748  	c += -c & (FuncAlign - 1)
  1749  	s.Size = int64(c)
  1750  
  1751  	if false { /* debug['a'] > 1 */
  1752  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1753  		var i int
  1754  		for i = 0; i < len(s.P); i++ {
  1755  			fmt.Printf(" %.2x", s.P[i])
  1756  			if i%16 == 15 {
  1757  				fmt.Printf("\n  %.6x", uint(i+1))
  1758  			}
  1759  		}
  1760  
  1761  		if i%16 != 0 {
  1762  			fmt.Printf("\n")
  1763  		}
  1764  
  1765  		for i := 0; i < len(s.R); i++ {
  1766  			r := &s.R[i]
  1767  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1768  		}
  1769  	}
  1770  }
  1771  
  1772  func instinit() {
  1773  	var c int
  1774  
  1775  	for i := 1; optab[i].as != 0; i++ {
  1776  		c = int(optab[i].as)
  1777  		if opindex[c&obj.AMask] != nil {
  1778  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1779  		}
  1780  		opindex[c&obj.AMask] = &optab[i]
  1781  	}
  1782  
  1783  	for i := 0; i < Ymax; i++ {
  1784  		ycover[i*Ymax+i] = 1
  1785  	}
  1786  
  1787  	ycover[Yi0*Ymax+Yi8] = 1
  1788  	ycover[Yi1*Ymax+Yi8] = 1
  1789  	ycover[Yu7*Ymax+Yi8] = 1
  1790  
  1791  	ycover[Yi0*Ymax+Yu7] = 1
  1792  	ycover[Yi1*Ymax+Yu7] = 1
  1793  
  1794  	ycover[Yi0*Ymax+Yu8] = 1
  1795  	ycover[Yi1*Ymax+Yu8] = 1
  1796  	ycover[Yu7*Ymax+Yu8] = 1
  1797  
  1798  	ycover[Yi0*Ymax+Ys32] = 1
  1799  	ycover[Yi1*Ymax+Ys32] = 1
  1800  	ycover[Yu7*Ymax+Ys32] = 1
  1801  	ycover[Yu8*Ymax+Ys32] = 1
  1802  	ycover[Yi8*Ymax+Ys32] = 1
  1803  
  1804  	ycover[Yi0*Ymax+Yi32] = 1
  1805  	ycover[Yi1*Ymax+Yi32] = 1
  1806  	ycover[Yu7*Ymax+Yi32] = 1
  1807  	ycover[Yu8*Ymax+Yi32] = 1
  1808  	ycover[Yi8*Ymax+Yi32] = 1
  1809  	ycover[Ys32*Ymax+Yi32] = 1
  1810  
  1811  	ycover[Yi0*Ymax+Yi64] = 1
  1812  	ycover[Yi1*Ymax+Yi64] = 1
  1813  	ycover[Yu7*Ymax+Yi64] = 1
  1814  	ycover[Yu8*Ymax+Yi64] = 1
  1815  	ycover[Yi8*Ymax+Yi64] = 1
  1816  	ycover[Ys32*Ymax+Yi64] = 1
  1817  	ycover[Yi32*Ymax+Yi64] = 1
  1818  
  1819  	ycover[Yal*Ymax+Yrb] = 1
  1820  	ycover[Ycl*Ymax+Yrb] = 1
  1821  	ycover[Yax*Ymax+Yrb] = 1
  1822  	ycover[Ycx*Ymax+Yrb] = 1
  1823  	ycover[Yrx*Ymax+Yrb] = 1
  1824  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  1825  
  1826  	ycover[Ycl*Ymax+Ycx] = 1
  1827  
  1828  	ycover[Yax*Ymax+Yrx] = 1
  1829  	ycover[Ycx*Ymax+Yrx] = 1
  1830  
  1831  	ycover[Yax*Ymax+Yrl] = 1
  1832  	ycover[Ycx*Ymax+Yrl] = 1
  1833  	ycover[Yrx*Ymax+Yrl] = 1
  1834  	ycover[Yrl32*Ymax+Yrl] = 1
  1835  
  1836  	ycover[Yf0*Ymax+Yrf] = 1
  1837  
  1838  	ycover[Yal*Ymax+Ymb] = 1
  1839  	ycover[Ycl*Ymax+Ymb] = 1
  1840  	ycover[Yax*Ymax+Ymb] = 1
  1841  	ycover[Ycx*Ymax+Ymb] = 1
  1842  	ycover[Yrx*Ymax+Ymb] = 1
  1843  	ycover[Yrb*Ymax+Ymb] = 1
  1844  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  1845  	ycover[Ym*Ymax+Ymb] = 1
  1846  
  1847  	ycover[Yax*Ymax+Yml] = 1
  1848  	ycover[Ycx*Ymax+Yml] = 1
  1849  	ycover[Yrx*Ymax+Yml] = 1
  1850  	ycover[Yrl*Ymax+Yml] = 1
  1851  	ycover[Yrl32*Ymax+Yml] = 1
  1852  	ycover[Ym*Ymax+Yml] = 1
  1853  
  1854  	ycover[Yax*Ymax+Ymm] = 1
  1855  	ycover[Ycx*Ymax+Ymm] = 1
  1856  	ycover[Yrx*Ymax+Ymm] = 1
  1857  	ycover[Yrl*Ymax+Ymm] = 1
  1858  	ycover[Yrl32*Ymax+Ymm] = 1
  1859  	ycover[Ym*Ymax+Ymm] = 1
  1860  	ycover[Ymr*Ymax+Ymm] = 1
  1861  
  1862  	ycover[Ym*Ymax+Yxm] = 1
  1863  	ycover[Yxr*Ymax+Yxm] = 1
  1864  
  1865  	for i := 0; i < MAXREG; i++ {
  1866  		reg[i] = -1
  1867  		if i >= REG_AL && i <= REG_R15B {
  1868  			reg[i] = (i - REG_AL) & 7
  1869  			if i >= REG_SPB && i <= REG_DIB {
  1870  				regrex[i] = 0x40
  1871  			}
  1872  			if i >= REG_R8B && i <= REG_R15B {
  1873  				regrex[i] = Rxr | Rxx | Rxb
  1874  			}
  1875  		}
  1876  
  1877  		if i >= REG_AH && i <= REG_BH {
  1878  			reg[i] = 4 + ((i - REG_AH) & 7)
  1879  		}
  1880  		if i >= REG_AX && i <= REG_R15 {
  1881  			reg[i] = (i - REG_AX) & 7
  1882  			if i >= REG_R8 {
  1883  				regrex[i] = Rxr | Rxx | Rxb
  1884  			}
  1885  		}
  1886  
  1887  		if i >= REG_F0 && i <= REG_F0+7 {
  1888  			reg[i] = (i - REG_F0) & 7
  1889  		}
  1890  		if i >= REG_M0 && i <= REG_M0+7 {
  1891  			reg[i] = (i - REG_M0) & 7
  1892  		}
  1893  		if i >= REG_X0 && i <= REG_X0+15 {
  1894  			reg[i] = (i - REG_X0) & 7
  1895  			if i >= REG_X0+8 {
  1896  				regrex[i] = Rxr | Rxx | Rxb
  1897  			}
  1898  		}
  1899  
  1900  		if i >= REG_CR+8 && i <= REG_CR+15 {
  1901  			regrex[i] = Rxr
  1902  		}
  1903  	}
  1904  }
  1905  
  1906  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  1907  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  1908  		return 0
  1909  	}
  1910  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  1911  		switch a.Reg {
  1912  		case REG_CS:
  1913  			return 0x2e
  1914  
  1915  		case REG_DS:
  1916  			return 0x3e
  1917  
  1918  		case REG_ES:
  1919  			return 0x26
  1920  
  1921  		case REG_FS:
  1922  			return 0x64
  1923  
  1924  		case REG_GS:
  1925  			return 0x65
  1926  
  1927  		case REG_TLS:
  1928  			// NOTE: Systems listed here should be only systems that
  1929  			// support direct TLS references like 8(TLS) implemented as
  1930  			// direct references from FS or GS. Systems that require
  1931  			// the initial-exec model, where you load the TLS base into
  1932  			// a register and then index from that register, do not reach
  1933  			// this code and should not be listed.
  1934  			if p.Mode == 32 {
  1935  				switch ctxt.Headtype {
  1936  				default:
  1937  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1938  
  1939  				case obj.Hdarwin,
  1940  					obj.Hdragonfly,
  1941  					obj.Hfreebsd,
  1942  					obj.Hnetbsd,
  1943  					obj.Hopenbsd:
  1944  					return 0x65 // GS
  1945  				}
  1946  			}
  1947  
  1948  			switch ctxt.Headtype {
  1949  			default:
  1950  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1951  
  1952  			case obj.Hlinux:
  1953  				if ctxt.Flag_shared != 0 {
  1954  					log.Fatalf("unknown TLS base register for linux with -shared")
  1955  				} else {
  1956  					return 0x64 // FS
  1957  				}
  1958  
  1959  			case obj.Hdragonfly,
  1960  				obj.Hfreebsd,
  1961  				obj.Hnetbsd,
  1962  				obj.Hopenbsd,
  1963  				obj.Hsolaris:
  1964  				return 0x64 // FS
  1965  
  1966  			case obj.Hdarwin:
  1967  				return 0x65 // GS
  1968  			}
  1969  		}
  1970  	}
  1971  
  1972  	if p.Mode == 32 {
  1973  		return 0
  1974  	}
  1975  
  1976  	switch a.Index {
  1977  	case REG_CS:
  1978  		return 0x2e
  1979  
  1980  	case REG_DS:
  1981  		return 0x3e
  1982  
  1983  	case REG_ES:
  1984  		return 0x26
  1985  
  1986  	case REG_TLS:
  1987  		if ctxt.Flag_shared != 0 {
  1988  			// When building for inclusion into a shared library, an instruction of the form
  1989  			//     MOV 0(CX)(TLS*1), AX
  1990  			// becomes
  1991  			//     mov %fs:(%rcx), %rax
  1992  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  1993  			// there is only one TLS variable -- g -- so this is OK). When not building for
  1994  			// a shared library the instruction does not require a prefix.
  1995  			if a.Offset != 0 {
  1996  				log.Fatalf("cannot handle non-0 offsets to TLS")
  1997  			}
  1998  			return 0x64
  1999  		}
  2000  
  2001  	case REG_FS:
  2002  		return 0x64
  2003  
  2004  	case REG_GS:
  2005  		return 0x65
  2006  	}
  2007  
  2008  	return 0
  2009  }
  2010  
  2011  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2012  	switch a.Type {
  2013  	case obj.TYPE_NONE:
  2014  		return Ynone
  2015  
  2016  	case obj.TYPE_BRANCH:
  2017  		return Ybr
  2018  
  2019  	case obj.TYPE_INDIR:
  2020  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2021  			return Yindir
  2022  		}
  2023  		return Yxxx
  2024  
  2025  	case obj.TYPE_MEM:
  2026  		return Ym
  2027  
  2028  	case obj.TYPE_ADDR:
  2029  		switch a.Name {
  2030  		case obj.NAME_EXTERN,
  2031  			obj.NAME_GOTREF,
  2032  			obj.NAME_STATIC:
  2033  			if a.Sym != nil && isextern(a.Sym) || p.Mode == 32 {
  2034  				return Yi32
  2035  			}
  2036  			return Yiauto // use pc-relative addressing
  2037  
  2038  		case obj.NAME_AUTO,
  2039  			obj.NAME_PARAM:
  2040  			return Yiauto
  2041  		}
  2042  
  2043  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2044  		// and got Yi32 in an earlier version of this code.
  2045  		// Keep doing that until we fix yduff etc.
  2046  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2047  			return Yi32
  2048  		}
  2049  
  2050  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2051  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2052  		}
  2053  		fallthrough
  2054  
  2055  		// fall through
  2056  
  2057  	case obj.TYPE_CONST:
  2058  		if a.Sym != nil {
  2059  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2060  		}
  2061  
  2062  		v := a.Offset
  2063  		if p.Mode == 32 {
  2064  			v = int64(int32(v))
  2065  		}
  2066  		if v == 0 {
  2067  			return Yi0
  2068  		}
  2069  		if v == 1 {
  2070  			return Yi1
  2071  		}
  2072  		if v >= 0 && v <= 127 {
  2073  			return Yu7
  2074  		}
  2075  		if v >= 0 && v <= 255 {
  2076  			return Yu8
  2077  		}
  2078  		if v >= -128 && v <= 127 {
  2079  			return Yi8
  2080  		}
  2081  		if p.Mode == 32 {
  2082  			return Yi32
  2083  		}
  2084  		l := int32(v)
  2085  		if int64(l) == v {
  2086  			return Ys32 /* can sign extend */
  2087  		}
  2088  		if v>>32 == 0 {
  2089  			return Yi32 /* unsigned */
  2090  		}
  2091  		return Yi64
  2092  
  2093  	case obj.TYPE_TEXTSIZE:
  2094  		return Ytextsize
  2095  	}
  2096  
  2097  	if a.Type != obj.TYPE_REG {
  2098  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2099  		return Yxxx
  2100  	}
  2101  
  2102  	switch a.Reg {
  2103  	case REG_AL:
  2104  		return Yal
  2105  
  2106  	case REG_AX:
  2107  		return Yax
  2108  
  2109  		/*
  2110  			case REG_SPB:
  2111  		*/
  2112  	case REG_BPB,
  2113  		REG_SIB,
  2114  		REG_DIB,
  2115  		REG_R8B,
  2116  		REG_R9B,
  2117  		REG_R10B,
  2118  		REG_R11B,
  2119  		REG_R12B,
  2120  		REG_R13B,
  2121  		REG_R14B,
  2122  		REG_R15B:
  2123  		if ctxt.Asmode != 64 {
  2124  			return Yxxx
  2125  		}
  2126  		fallthrough
  2127  
  2128  	case REG_DL,
  2129  		REG_BL,
  2130  		REG_AH,
  2131  		REG_CH,
  2132  		REG_DH,
  2133  		REG_BH:
  2134  		return Yrb
  2135  
  2136  	case REG_CL:
  2137  		return Ycl
  2138  
  2139  	case REG_CX:
  2140  		return Ycx
  2141  
  2142  	case REG_DX, REG_BX:
  2143  		return Yrx
  2144  
  2145  	case REG_R8, /* not really Yrl */
  2146  		REG_R9,
  2147  		REG_R10,
  2148  		REG_R11,
  2149  		REG_R12,
  2150  		REG_R13,
  2151  		REG_R14,
  2152  		REG_R15:
  2153  		if ctxt.Asmode != 64 {
  2154  			return Yxxx
  2155  		}
  2156  		fallthrough
  2157  
  2158  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2159  		if p.Mode == 32 {
  2160  			return Yrl32
  2161  		}
  2162  		return Yrl
  2163  
  2164  	case REG_F0 + 0:
  2165  		return Yf0
  2166  
  2167  	case REG_F0 + 1,
  2168  		REG_F0 + 2,
  2169  		REG_F0 + 3,
  2170  		REG_F0 + 4,
  2171  		REG_F0 + 5,
  2172  		REG_F0 + 6,
  2173  		REG_F0 + 7:
  2174  		return Yrf
  2175  
  2176  	case REG_M0 + 0,
  2177  		REG_M0 + 1,
  2178  		REG_M0 + 2,
  2179  		REG_M0 + 3,
  2180  		REG_M0 + 4,
  2181  		REG_M0 + 5,
  2182  		REG_M0 + 6,
  2183  		REG_M0 + 7:
  2184  		return Ymr
  2185  
  2186  	case REG_X0 + 0,
  2187  		REG_X0 + 1,
  2188  		REG_X0 + 2,
  2189  		REG_X0 + 3,
  2190  		REG_X0 + 4,
  2191  		REG_X0 + 5,
  2192  		REG_X0 + 6,
  2193  		REG_X0 + 7,
  2194  		REG_X0 + 8,
  2195  		REG_X0 + 9,
  2196  		REG_X0 + 10,
  2197  		REG_X0 + 11,
  2198  		REG_X0 + 12,
  2199  		REG_X0 + 13,
  2200  		REG_X0 + 14,
  2201  		REG_X0 + 15:
  2202  		return Yxr
  2203  
  2204  	case REG_CS:
  2205  		return Ycs
  2206  	case REG_SS:
  2207  		return Yss
  2208  	case REG_DS:
  2209  		return Yds
  2210  	case REG_ES:
  2211  		return Yes
  2212  	case REG_FS:
  2213  		return Yfs
  2214  	case REG_GS:
  2215  		return Ygs
  2216  	case REG_TLS:
  2217  		return Ytls
  2218  
  2219  	case REG_GDTR:
  2220  		return Ygdtr
  2221  	case REG_IDTR:
  2222  		return Yidtr
  2223  	case REG_LDTR:
  2224  		return Yldtr
  2225  	case REG_MSW:
  2226  		return Ymsw
  2227  	case REG_TASK:
  2228  		return Ytask
  2229  
  2230  	case REG_CR + 0:
  2231  		return Ycr0
  2232  	case REG_CR + 1:
  2233  		return Ycr1
  2234  	case REG_CR + 2:
  2235  		return Ycr2
  2236  	case REG_CR + 3:
  2237  		return Ycr3
  2238  	case REG_CR + 4:
  2239  		return Ycr4
  2240  	case REG_CR + 5:
  2241  		return Ycr5
  2242  	case REG_CR + 6:
  2243  		return Ycr6
  2244  	case REG_CR + 7:
  2245  		return Ycr7
  2246  	case REG_CR + 8:
  2247  		return Ycr8
  2248  
  2249  	case REG_DR + 0:
  2250  		return Ydr0
  2251  	case REG_DR + 1:
  2252  		return Ydr1
  2253  	case REG_DR + 2:
  2254  		return Ydr2
  2255  	case REG_DR + 3:
  2256  		return Ydr3
  2257  	case REG_DR + 4:
  2258  		return Ydr4
  2259  	case REG_DR + 5:
  2260  		return Ydr5
  2261  	case REG_DR + 6:
  2262  		return Ydr6
  2263  	case REG_DR + 7:
  2264  		return Ydr7
  2265  
  2266  	case REG_TR + 0:
  2267  		return Ytr0
  2268  	case REG_TR + 1:
  2269  		return Ytr1
  2270  	case REG_TR + 2:
  2271  		return Ytr2
  2272  	case REG_TR + 3:
  2273  		return Ytr3
  2274  	case REG_TR + 4:
  2275  		return Ytr4
  2276  	case REG_TR + 5:
  2277  		return Ytr5
  2278  	case REG_TR + 6:
  2279  		return Ytr6
  2280  	case REG_TR + 7:
  2281  		return Ytr7
  2282  	}
  2283  
  2284  	return Yxxx
  2285  }
  2286  
  2287  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2288  	var i int
  2289  
  2290  	switch index {
  2291  	default:
  2292  		goto bad
  2293  
  2294  	case REG_NONE:
  2295  		i = 4 << 3
  2296  		goto bas
  2297  
  2298  	case REG_R8,
  2299  		REG_R9,
  2300  		REG_R10,
  2301  		REG_R11,
  2302  		REG_R12,
  2303  		REG_R13,
  2304  		REG_R14,
  2305  		REG_R15:
  2306  		if ctxt.Asmode != 64 {
  2307  			goto bad
  2308  		}
  2309  		fallthrough
  2310  
  2311  	case REG_AX,
  2312  		REG_CX,
  2313  		REG_DX,
  2314  		REG_BX,
  2315  		REG_BP,
  2316  		REG_SI,
  2317  		REG_DI:
  2318  		i = reg[index] << 3
  2319  	}
  2320  
  2321  	switch scale {
  2322  	default:
  2323  		goto bad
  2324  
  2325  	case 1:
  2326  		break
  2327  
  2328  	case 2:
  2329  		i |= 1 << 6
  2330  
  2331  	case 4:
  2332  		i |= 2 << 6
  2333  
  2334  	case 8:
  2335  		i |= 3 << 6
  2336  	}
  2337  
  2338  bas:
  2339  	switch base {
  2340  	default:
  2341  		goto bad
  2342  
  2343  	case REG_NONE: /* must be mod=00 */
  2344  		i |= 5
  2345  
  2346  	case REG_R8,
  2347  		REG_R9,
  2348  		REG_R10,
  2349  		REG_R11,
  2350  		REG_R12,
  2351  		REG_R13,
  2352  		REG_R14,
  2353  		REG_R15:
  2354  		if ctxt.Asmode != 64 {
  2355  			goto bad
  2356  		}
  2357  		fallthrough
  2358  
  2359  	case REG_AX,
  2360  		REG_CX,
  2361  		REG_DX,
  2362  		REG_BX,
  2363  		REG_SP,
  2364  		REG_BP,
  2365  		REG_SI,
  2366  		REG_DI:
  2367  		i |= reg[base]
  2368  	}
  2369  
  2370  	ctxt.Andptr[0] = byte(i)
  2371  	ctxt.Andptr = ctxt.Andptr[1:]
  2372  	return
  2373  
  2374  bad:
  2375  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2376  	ctxt.Andptr[0] = 0
  2377  	ctxt.Andptr = ctxt.Andptr[1:]
  2378  	return
  2379  }
  2380  
  2381  func put4(ctxt *obj.Link, v int32) {
  2382  	ctxt.Andptr[0] = byte(v)
  2383  	ctxt.Andptr[1] = byte(v >> 8)
  2384  	ctxt.Andptr[2] = byte(v >> 16)
  2385  	ctxt.Andptr[3] = byte(v >> 24)
  2386  	ctxt.Andptr = ctxt.Andptr[4:]
  2387  }
  2388  
  2389  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2390  	var rel obj.Reloc
  2391  
  2392  	v := vaddr(ctxt, p, a, &rel)
  2393  	if rel.Siz != 0 {
  2394  		if rel.Siz != 4 {
  2395  			ctxt.Diag("bad reloc")
  2396  		}
  2397  		r := obj.Addrel(ctxt.Cursym)
  2398  		*r = rel
  2399  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2400  	}
  2401  
  2402  	put4(ctxt, int32(v))
  2403  }
  2404  
  2405  func put8(ctxt *obj.Link, v int64) {
  2406  	ctxt.Andptr[0] = byte(v)
  2407  	ctxt.Andptr[1] = byte(v >> 8)
  2408  	ctxt.Andptr[2] = byte(v >> 16)
  2409  	ctxt.Andptr[3] = byte(v >> 24)
  2410  	ctxt.Andptr[4] = byte(v >> 32)
  2411  	ctxt.Andptr[5] = byte(v >> 40)
  2412  	ctxt.Andptr[6] = byte(v >> 48)
  2413  	ctxt.Andptr[7] = byte(v >> 56)
  2414  	ctxt.Andptr = ctxt.Andptr[8:]
  2415  }
  2416  
  2417  /*
  2418  static void
  2419  relput8(Prog *p, Addr *a)
  2420  {
  2421  	vlong v;
  2422  	Reloc rel, *r;
  2423  
  2424  	v = vaddr(ctxt, p, a, &rel);
  2425  	if(rel.siz != 0) {
  2426  		r = addrel(ctxt->cursym);
  2427  		*r = rel;
  2428  		r->siz = 8;
  2429  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2430  	}
  2431  	put8(ctxt, v);
  2432  }
  2433  */
  2434  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2435  	if r != nil {
  2436  		*r = obj.Reloc{}
  2437  	}
  2438  
  2439  	switch a.Name {
  2440  	case obj.NAME_STATIC,
  2441  		obj.NAME_GOTREF,
  2442  		obj.NAME_EXTERN:
  2443  		s := a.Sym
  2444  		if r == nil {
  2445  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2446  			log.Fatalf("reloc")
  2447  		}
  2448  
  2449  		if a.Name == obj.NAME_GOTREF {
  2450  			r.Siz = 4
  2451  			r.Type = obj.R_GOTPCREL
  2452  		} else if isextern(s) || p.Mode != 64 {
  2453  			r.Siz = 4
  2454  			r.Type = obj.R_ADDR
  2455  		} else {
  2456  			r.Siz = 4
  2457  			r.Type = obj.R_PCREL
  2458  		}
  2459  
  2460  		r.Off = -1 // caller must fill in
  2461  		r.Sym = s
  2462  		r.Add = a.Offset
  2463  
  2464  		return 0
  2465  	}
  2466  
  2467  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2468  		if r == nil {
  2469  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2470  			log.Fatalf("reloc")
  2471  		}
  2472  
  2473  		r.Type = obj.R_TLS_LE
  2474  		r.Siz = 4
  2475  		r.Off = -1 // caller must fill in
  2476  		r.Add = a.Offset
  2477  		return 0
  2478  	}
  2479  
  2480  	return a.Offset
  2481  }
  2482  
  2483  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2484  	var base int
  2485  	var rel obj.Reloc
  2486  
  2487  	rex &= 0x40 | Rxr
  2488  	v := int32(a.Offset)
  2489  	rel.Siz = 0
  2490  
  2491  	switch a.Type {
  2492  	case obj.TYPE_ADDR:
  2493  		if a.Name == obj.NAME_NONE {
  2494  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2495  		}
  2496  		if a.Index == REG_TLS {
  2497  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2498  		}
  2499  		goto bad
  2500  
  2501  	case obj.TYPE_REG:
  2502  		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
  2503  			goto bad
  2504  		}
  2505  		if v != 0 {
  2506  			goto bad
  2507  		}
  2508  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2509  		ctxt.Andptr = ctxt.Andptr[1:]
  2510  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2511  		return
  2512  	}
  2513  
  2514  	if a.Type != obj.TYPE_MEM {
  2515  		goto bad
  2516  	}
  2517  
  2518  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2519  		base := int(a.Reg)
  2520  		switch a.Name {
  2521  		case obj.NAME_EXTERN,
  2522  			obj.NAME_GOTREF,
  2523  			obj.NAME_STATIC:
  2524  			if !isextern(a.Sym) && p.Mode == 64 {
  2525  				goto bad
  2526  			}
  2527  			base = REG_NONE
  2528  			v = int32(vaddr(ctxt, p, a, &rel))
  2529  
  2530  		case obj.NAME_AUTO,
  2531  			obj.NAME_PARAM:
  2532  			base = REG_SP
  2533  		}
  2534  
  2535  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2536  		if base == REG_NONE {
  2537  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2538  			ctxt.Andptr = ctxt.Andptr[1:]
  2539  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2540  			goto putrelv
  2541  		}
  2542  
  2543  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2544  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2545  			ctxt.Andptr = ctxt.Andptr[1:]
  2546  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2547  			return
  2548  		}
  2549  
  2550  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2551  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2552  			ctxt.Andptr = ctxt.Andptr[1:]
  2553  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2554  			ctxt.Andptr[0] = byte(v)
  2555  			ctxt.Andptr = ctxt.Andptr[1:]
  2556  			return
  2557  		}
  2558  
  2559  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2560  		ctxt.Andptr = ctxt.Andptr[1:]
  2561  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2562  		goto putrelv
  2563  	}
  2564  
  2565  	base = int(a.Reg)
  2566  	switch a.Name {
  2567  	case obj.NAME_STATIC,
  2568  		obj.NAME_GOTREF,
  2569  		obj.NAME_EXTERN:
  2570  		if a.Sym == nil {
  2571  			ctxt.Diag("bad addr: %v", p)
  2572  		}
  2573  		base = REG_NONE
  2574  		v = int32(vaddr(ctxt, p, a, &rel))
  2575  
  2576  	case obj.NAME_AUTO,
  2577  		obj.NAME_PARAM:
  2578  		base = REG_SP
  2579  	}
  2580  
  2581  	if base == REG_TLS {
  2582  		v = int32(vaddr(ctxt, p, a, &rel))
  2583  	}
  2584  
  2585  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2586  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2587  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2588  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2589  				ctxt.Diag("%v has offset against gotref", p)
  2590  			}
  2591  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2592  			ctxt.Andptr = ctxt.Andptr[1:]
  2593  			goto putrelv
  2594  		}
  2595  
  2596  		/* temporary */
  2597  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2598  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2599  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2600  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2601  		goto putrelv
  2602  	}
  2603  
  2604  	if base == REG_SP || base == REG_R12 {
  2605  		if v == 0 {
  2606  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2607  			ctxt.Andptr = ctxt.Andptr[1:]
  2608  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2609  			return
  2610  		}
  2611  
  2612  		if v >= -128 && v < 128 {
  2613  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2614  			ctxt.Andptr = ctxt.Andptr[1:]
  2615  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2616  			ctxt.Andptr[0] = byte(v)
  2617  			ctxt.Andptr = ctxt.Andptr[1:]
  2618  			return
  2619  		}
  2620  
  2621  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2622  		ctxt.Andptr = ctxt.Andptr[1:]
  2623  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2624  		goto putrelv
  2625  	}
  2626  
  2627  	if REG_AX <= base && base <= REG_R15 {
  2628  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2629  			rel = obj.Reloc{}
  2630  			rel.Type = obj.R_TLS_LE
  2631  			rel.Siz = 4
  2632  			rel.Sym = nil
  2633  			rel.Add = int64(v)
  2634  			v = 0
  2635  		}
  2636  
  2637  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2638  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2639  			ctxt.Andptr = ctxt.Andptr[1:]
  2640  			return
  2641  		}
  2642  
  2643  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2644  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2645  			ctxt.Andptr[1] = byte(v)
  2646  			ctxt.Andptr = ctxt.Andptr[2:]
  2647  			return
  2648  		}
  2649  
  2650  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2651  		ctxt.Andptr = ctxt.Andptr[1:]
  2652  		goto putrelv
  2653  	}
  2654  
  2655  	goto bad
  2656  
  2657  putrelv:
  2658  	if rel.Siz != 0 {
  2659  		if rel.Siz != 4 {
  2660  			ctxt.Diag("bad rel")
  2661  			goto bad
  2662  		}
  2663  
  2664  		r := obj.Addrel(ctxt.Cursym)
  2665  		*r = rel
  2666  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2667  	}
  2668  
  2669  	put4(ctxt, v)
  2670  	return
  2671  
  2672  bad:
  2673  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2674  	return
  2675  }
  2676  
  2677  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2678  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2679  }
  2680  
  2681  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2682  	asmandsz(ctxt, p, a, o, 0, 0)
  2683  }
  2684  
  2685  func bytereg(a *obj.Addr, t *uint8) {
  2686  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2687  		a.Reg += REG_AL - REG_AX
  2688  		*t = 0
  2689  	}
  2690  }
  2691  
  2692  func unbytereg(a *obj.Addr, t *uint8) {
  2693  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2694  		a.Reg += REG_AX - REG_AL
  2695  		*t = 0
  2696  	}
  2697  }
  2698  
  2699  const (
  2700  	E = 0xff
  2701  )
  2702  
  2703  var ymovtab = []Movtab{
  2704  	/* push */
  2705  	Movtab{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2706  	Movtab{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2707  	Movtab{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2708  	Movtab{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2709  	Movtab{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2710  	Movtab{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2711  	Movtab{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2712  	Movtab{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2713  	Movtab{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2714  	Movtab{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2715  	Movtab{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2716  	Movtab{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2717  	Movtab{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2718  	Movtab{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2719  
  2720  	/* pop */
  2721  	Movtab{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2722  	Movtab{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2723  	Movtab{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2724  	Movtab{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2725  	Movtab{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2726  	Movtab{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2727  	Movtab{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2728  	Movtab{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2729  	Movtab{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2730  	Movtab{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2731  	Movtab{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2732  	Movtab{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  2733  
  2734  	/* mov seg */
  2735  	Movtab{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  2736  	Movtab{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  2737  	Movtab{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  2738  	Movtab{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  2739  	Movtab{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  2740  	Movtab{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  2741  	Movtab{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  2742  	Movtab{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  2743  	Movtab{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  2744  	Movtab{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  2745  	Movtab{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  2746  	Movtab{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  2747  
  2748  	/* mov cr */
  2749  	Movtab{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2750  	Movtab{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2751  	Movtab{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2752  	Movtab{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2753  	Movtab{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2754  	Movtab{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2755  	Movtab{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2756  	Movtab{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2757  	Movtab{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2758  	Movtab{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2759  	Movtab{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2760  	Movtab{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2761  	Movtab{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2762  	Movtab{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2763  	Movtab{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2764  	Movtab{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2765  	Movtab{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2766  	Movtab{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2767  	Movtab{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2768  	Movtab{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2769  
  2770  	/* mov dr */
  2771  	Movtab{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2772  	Movtab{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2773  	Movtab{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2774  	Movtab{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2775  	Movtab{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2776  	Movtab{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2777  	Movtab{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2778  	Movtab{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2779  	Movtab{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2780  	Movtab{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2781  	Movtab{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2782  	Movtab{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2783  
  2784  	/* mov tr */
  2785  	Movtab{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  2786  	Movtab{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  2787  	Movtab{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  2788  	Movtab{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  2789  
  2790  	/* lgdt, sgdt, lidt, sidt */
  2791  	Movtab{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2792  	Movtab{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2793  	Movtab{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2794  	Movtab{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2795  	Movtab{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2796  	Movtab{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2797  	Movtab{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2798  	Movtab{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2799  
  2800  	/* lldt, sldt */
  2801  	Movtab{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  2802  	Movtab{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  2803  
  2804  	/* lmsw, smsw */
  2805  	Movtab{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  2806  	Movtab{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  2807  
  2808  	/* ltr, str */
  2809  	Movtab{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  2810  	Movtab{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  2811  
  2812  	/* load full pointer - unsupported
  2813  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  2814  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  2815  	*/
  2816  
  2817  	/* double shift */
  2818  	Movtab{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2819  	Movtab{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2820  	Movtab{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2821  	Movtab{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2822  	Movtab{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2823  	Movtab{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2824  	Movtab{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2825  	Movtab{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2826  	Movtab{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2827  	Movtab{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2828  	Movtab{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2829  	Movtab{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2830  	Movtab{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2831  	Movtab{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2832  	Movtab{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2833  	Movtab{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2834  	Movtab{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2835  	Movtab{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2836  
  2837  	/* load TLS base */
  2838  	Movtab{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2839  	Movtab{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2840  	Movtab{0, 0, 0, 0, 0, [4]uint8{}},
  2841  }
  2842  
  2843  func isax(a *obj.Addr) bool {
  2844  	switch a.Reg {
  2845  	case REG_AX, REG_AL, REG_AH:
  2846  		return true
  2847  	}
  2848  
  2849  	if a.Index == REG_AX {
  2850  		return true
  2851  	}
  2852  	return false
  2853  }
  2854  
  2855  func subreg(p *obj.Prog, from int, to int) {
  2856  	if false { /* debug['Q'] */
  2857  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  2858  	}
  2859  
  2860  	if int(p.From.Reg) == from {
  2861  		p.From.Reg = int16(to)
  2862  		p.Ft = 0
  2863  	}
  2864  
  2865  	if int(p.To.Reg) == from {
  2866  		p.To.Reg = int16(to)
  2867  		p.Tt = 0
  2868  	}
  2869  
  2870  	if int(p.From.Index) == from {
  2871  		p.From.Index = int16(to)
  2872  		p.Ft = 0
  2873  	}
  2874  
  2875  	if int(p.To.Index) == from {
  2876  		p.To.Index = int16(to)
  2877  		p.Tt = 0
  2878  	}
  2879  
  2880  	if false { /* debug['Q'] */
  2881  		fmt.Printf("%v\n", p)
  2882  	}
  2883  }
  2884  
  2885  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  2886  	switch op {
  2887  	case Pm, Pe, Pf2, Pf3:
  2888  		if osize != 1 {
  2889  			if op != Pm {
  2890  				ctxt.Andptr[0] = byte(op)
  2891  				ctxt.Andptr = ctxt.Andptr[1:]
  2892  			}
  2893  			ctxt.Andptr[0] = Pm
  2894  			ctxt.Andptr = ctxt.Andptr[1:]
  2895  			z++
  2896  			op = int(o.op[z])
  2897  			break
  2898  		}
  2899  		fallthrough
  2900  
  2901  	default:
  2902  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  2903  			ctxt.Andptr[0] = Pm
  2904  			ctxt.Andptr = ctxt.Andptr[1:]
  2905  		}
  2906  	}
  2907  
  2908  	ctxt.Andptr[0] = byte(op)
  2909  	ctxt.Andptr = ctxt.Andptr[1:]
  2910  	return z
  2911  }
  2912  
  2913  var bpduff1 = []byte{
  2914  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  2915  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  2916  }
  2917  
  2918  var bpduff2 = []byte{
  2919  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  2920  }
  2921  
  2922  func doasm(ctxt *obj.Link, p *obj.Prog) {
  2923  	ctxt.Curp = p // TODO
  2924  
  2925  	o := opindex[p.As&obj.AMask]
  2926  
  2927  	if o == nil {
  2928  		ctxt.Diag("asmins: missing op %v", p)
  2929  		return
  2930  	}
  2931  
  2932  	pre := prefixof(ctxt, p, &p.From)
  2933  	if pre != 0 {
  2934  		ctxt.Andptr[0] = byte(pre)
  2935  		ctxt.Andptr = ctxt.Andptr[1:]
  2936  	}
  2937  	pre = prefixof(ctxt, p, &p.To)
  2938  	if pre != 0 {
  2939  		ctxt.Andptr[0] = byte(pre)
  2940  		ctxt.Andptr = ctxt.Andptr[1:]
  2941  	}
  2942  
  2943  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  2944  	// which encodes as SHRQ $32(DX*0), AX.
  2945  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  2946  	// Change encoding generated by assemblers and compilers and remove.
  2947  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  2948  		p.From3 = new(obj.Addr)
  2949  		p.From3.Type = obj.TYPE_REG
  2950  		p.From3.Reg = p.From.Index
  2951  		p.From.Index = 0
  2952  	}
  2953  
  2954  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  2955  	// Change encoding generated by assemblers and compilers (if any) and remove.
  2956  	switch p.As {
  2957  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  2958  		if p.From3Type() == obj.TYPE_NONE {
  2959  			p.From3 = new(obj.Addr)
  2960  			*p.From3 = p.From
  2961  			p.From = obj.Addr{}
  2962  			p.From.Type = obj.TYPE_CONST
  2963  			p.From.Offset = p.To.Offset
  2964  			p.To.Offset = 0
  2965  		}
  2966  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  2967  		if p.From3Type() == obj.TYPE_NONE {
  2968  			p.From3 = new(obj.Addr)
  2969  			*p.From3 = p.To
  2970  			p.To = obj.Addr{}
  2971  			p.To.Type = obj.TYPE_CONST
  2972  			p.To.Offset = p.From3.Offset
  2973  			p.From3.Offset = 0
  2974  		}
  2975  	}
  2976  
  2977  	if p.Ft == 0 {
  2978  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  2979  	}
  2980  	if p.Tt == 0 {
  2981  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  2982  	}
  2983  
  2984  	ft := int(p.Ft) * Ymax
  2985  	f3t := Ynone * Ymax
  2986  	if p.From3 != nil {
  2987  		f3t = oclass(ctxt, p, p.From3) * Ymax
  2988  	}
  2989  	tt := int(p.Tt) * Ymax
  2990  
  2991  	xo := obj.Bool2int(o.op[0] == 0x0f)
  2992  	z := 0
  2993  	var a *obj.Addr
  2994  	var l int
  2995  	var op int
  2996  	var q *obj.Prog
  2997  	var r *obj.Reloc
  2998  	var rel obj.Reloc
  2999  	var v int64
  3000  	for i := range o.ytab {
  3001  		yt := &o.ytab[i]
  3002  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3003  			switch o.prefix {
  3004  			case Px1: /* first option valid only in 32-bit mode */
  3005  				if ctxt.Mode == 64 && z == 0 {
  3006  					z += int(yt.zoffset) + xo
  3007  					continue
  3008  				}
  3009  			case Pq: /* 16 bit escape and opcode escape */
  3010  				ctxt.Andptr[0] = Pe
  3011  				ctxt.Andptr = ctxt.Andptr[1:]
  3012  
  3013  				ctxt.Andptr[0] = Pm
  3014  				ctxt.Andptr = ctxt.Andptr[1:]
  3015  
  3016  			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
  3017  				ctxt.Andptr[0] = Pe
  3018  				ctxt.Andptr = ctxt.Andptr[1:]
  3019  
  3020  				ctxt.Andptr[0] = Pw
  3021  				ctxt.Andptr = ctxt.Andptr[1:]
  3022  				ctxt.Andptr[0] = Pm
  3023  				ctxt.Andptr = ctxt.Andptr[1:]
  3024  
  3025  			case Pf2, /* xmm opcode escape */
  3026  				Pf3:
  3027  				ctxt.Andptr[0] = byte(o.prefix)
  3028  				ctxt.Andptr = ctxt.Andptr[1:]
  3029  
  3030  				ctxt.Andptr[0] = Pm
  3031  				ctxt.Andptr = ctxt.Andptr[1:]
  3032  
  3033  			case Pm: /* opcode escape */
  3034  				ctxt.Andptr[0] = Pm
  3035  				ctxt.Andptr = ctxt.Andptr[1:]
  3036  
  3037  			case Pe: /* 16 bit escape */
  3038  				ctxt.Andptr[0] = Pe
  3039  				ctxt.Andptr = ctxt.Andptr[1:]
  3040  
  3041  			case Pw: /* 64-bit escape */
  3042  				if p.Mode != 64 {
  3043  					ctxt.Diag("asmins: illegal 64: %v", p)
  3044  				}
  3045  				ctxt.Rexflag |= Pw
  3046  
  3047  			case Pw8: /* 64-bit escape if z >= 8 */
  3048  				if z >= 8 {
  3049  					if p.Mode != 64 {
  3050  						ctxt.Diag("asmins: illegal 64: %v", p)
  3051  					}
  3052  					ctxt.Rexflag |= Pw
  3053  				}
  3054  
  3055  			case Pb: /* botch */
  3056  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3057  					goto bad
  3058  				}
  3059  				// NOTE(rsc): This is probably safe to do always,
  3060  				// but when enabled it chooses different encodings
  3061  				// than the old cmd/internal/obj/i386 code did,
  3062  				// which breaks our "same bits out" checks.
  3063  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3064  				// in the original obj/i386, and it would encode
  3065  				// (using a valid, shorter form) as 3c 00 if we enabled
  3066  				// the call to bytereg here.
  3067  				if p.Mode == 64 {
  3068  					bytereg(&p.From, &p.Ft)
  3069  					bytereg(&p.To, &p.Tt)
  3070  				}
  3071  
  3072  			case P32: /* 32 bit but illegal if 64-bit mode */
  3073  				if p.Mode == 64 {
  3074  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3075  				}
  3076  
  3077  			case Py: /* 64-bit only, no prefix */
  3078  				if p.Mode != 64 {
  3079  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3080  				}
  3081  
  3082  			case Py1: /* 64-bit only if z < 1, no prefix */
  3083  				if z < 1 && p.Mode != 64 {
  3084  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3085  				}
  3086  
  3087  			case Py3: /* 64-bit only if z < 3, no prefix */
  3088  				if z < 3 && p.Mode != 64 {
  3089  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3090  				}
  3091  			}
  3092  
  3093  			if z >= len(o.op) {
  3094  				log.Fatalf("asmins bad table %v", p)
  3095  			}
  3096  			op = int(o.op[z])
  3097  			if op == 0x0f {
  3098  				ctxt.Andptr[0] = byte(op)
  3099  				ctxt.Andptr = ctxt.Andptr[1:]
  3100  				z++
  3101  				op = int(o.op[z])
  3102  			}
  3103  
  3104  			switch yt.zcase {
  3105  			default:
  3106  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3107  				return
  3108  
  3109  			case Zpseudo:
  3110  				break
  3111  
  3112  			case Zlit:
  3113  				for ; ; z++ {
  3114  					op = int(o.op[z])
  3115  					if op == 0 {
  3116  						break
  3117  					}
  3118  					ctxt.Andptr[0] = byte(op)
  3119  					ctxt.Andptr = ctxt.Andptr[1:]
  3120  				}
  3121  
  3122  			case Zlitm_r:
  3123  				for ; ; z++ {
  3124  					op = int(o.op[z])
  3125  					if op == 0 {
  3126  						break
  3127  					}
  3128  					ctxt.Andptr[0] = byte(op)
  3129  					ctxt.Andptr = ctxt.Andptr[1:]
  3130  				}
  3131  				asmand(ctxt, p, &p.From, &p.To)
  3132  
  3133  			case Zmb_r:
  3134  				bytereg(&p.From, &p.Ft)
  3135  				fallthrough
  3136  
  3137  				/* fall through */
  3138  			case Zm_r:
  3139  				ctxt.Andptr[0] = byte(op)
  3140  				ctxt.Andptr = ctxt.Andptr[1:]
  3141  
  3142  				asmand(ctxt, p, &p.From, &p.To)
  3143  
  3144  			case Zm2_r:
  3145  				ctxt.Andptr[0] = byte(op)
  3146  				ctxt.Andptr = ctxt.Andptr[1:]
  3147  				ctxt.Andptr[0] = byte(o.op[z+1])
  3148  				ctxt.Andptr = ctxt.Andptr[1:]
  3149  				asmand(ctxt, p, &p.From, &p.To)
  3150  
  3151  			case Zm_r_xm:
  3152  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3153  				asmand(ctxt, p, &p.From, &p.To)
  3154  
  3155  			case Zm_r_xm_nr:
  3156  				ctxt.Rexflag = 0
  3157  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3158  				asmand(ctxt, p, &p.From, &p.To)
  3159  
  3160  			case Zm_r_i_xm:
  3161  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3162  				asmand(ctxt, p, &p.From, p.From3)
  3163  				ctxt.Andptr[0] = byte(p.To.Offset)
  3164  				ctxt.Andptr = ctxt.Andptr[1:]
  3165  
  3166  			case Zm_r_3d:
  3167  				ctxt.Andptr[0] = 0x0f
  3168  				ctxt.Andptr = ctxt.Andptr[1:]
  3169  				ctxt.Andptr[0] = 0x0f
  3170  				ctxt.Andptr = ctxt.Andptr[1:]
  3171  				asmand(ctxt, p, &p.From, &p.To)
  3172  				ctxt.Andptr[0] = byte(op)
  3173  				ctxt.Andptr = ctxt.Andptr[1:]
  3174  
  3175  			case Zibm_r:
  3176  				for {
  3177  					tmp1 := z
  3178  					z++
  3179  					op = int(o.op[tmp1])
  3180  					if op == 0 {
  3181  						break
  3182  					}
  3183  					ctxt.Andptr[0] = byte(op)
  3184  					ctxt.Andptr = ctxt.Andptr[1:]
  3185  				}
  3186  				asmand(ctxt, p, p.From3, &p.To)
  3187  				ctxt.Andptr[0] = byte(p.From.Offset)
  3188  				ctxt.Andptr = ctxt.Andptr[1:]
  3189  
  3190  			case Zaut_r:
  3191  				ctxt.Andptr[0] = 0x8d
  3192  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3193  				if p.From.Type != obj.TYPE_ADDR {
  3194  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3195  				}
  3196  				p.From.Type = obj.TYPE_MEM
  3197  				asmand(ctxt, p, &p.From, &p.To)
  3198  				p.From.Type = obj.TYPE_ADDR
  3199  
  3200  			case Zm_o:
  3201  				ctxt.Andptr[0] = byte(op)
  3202  				ctxt.Andptr = ctxt.Andptr[1:]
  3203  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3204  
  3205  			case Zr_m:
  3206  				ctxt.Andptr[0] = byte(op)
  3207  				ctxt.Andptr = ctxt.Andptr[1:]
  3208  				asmand(ctxt, p, &p.To, &p.From)
  3209  
  3210  			case Zr_m_xm:
  3211  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3212  				asmand(ctxt, p, &p.To, &p.From)
  3213  
  3214  			case Zr_m_xm_nr:
  3215  				ctxt.Rexflag = 0
  3216  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3217  				asmand(ctxt, p, &p.To, &p.From)
  3218  
  3219  			case Zo_m:
  3220  				ctxt.Andptr[0] = byte(op)
  3221  				ctxt.Andptr = ctxt.Andptr[1:]
  3222  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3223  
  3224  			case Zcallindreg:
  3225  				r = obj.Addrel(ctxt.Cursym)
  3226  				r.Off = int32(p.Pc)
  3227  				r.Type = obj.R_CALLIND
  3228  				r.Siz = 0
  3229  				fallthrough
  3230  
  3231  			case Zo_m64:
  3232  				ctxt.Andptr[0] = byte(op)
  3233  				ctxt.Andptr = ctxt.Andptr[1:]
  3234  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3235  
  3236  			case Zm_ibo:
  3237  				ctxt.Andptr[0] = byte(op)
  3238  				ctxt.Andptr = ctxt.Andptr[1:]
  3239  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3240  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3241  				ctxt.Andptr = ctxt.Andptr[1:]
  3242  
  3243  			case Zibo_m:
  3244  				ctxt.Andptr[0] = byte(op)
  3245  				ctxt.Andptr = ctxt.Andptr[1:]
  3246  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3247  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3248  				ctxt.Andptr = ctxt.Andptr[1:]
  3249  
  3250  			case Zibo_m_xm:
  3251  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3252  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3253  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3254  				ctxt.Andptr = ctxt.Andptr[1:]
  3255  
  3256  			case Z_ib, Zib_:
  3257  				if yt.zcase == Zib_ {
  3258  					a = &p.From
  3259  				} else {
  3260  					a = &p.To
  3261  				}
  3262  				ctxt.Andptr[0] = byte(op)
  3263  				ctxt.Andptr = ctxt.Andptr[1:]
  3264  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3265  				ctxt.Andptr = ctxt.Andptr[1:]
  3266  
  3267  			case Zib_rp:
  3268  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3269  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3270  				ctxt.Andptr = ctxt.Andptr[1:]
  3271  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3272  				ctxt.Andptr = ctxt.Andptr[1:]
  3273  
  3274  			case Zil_rp:
  3275  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3276  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3277  				ctxt.Andptr = ctxt.Andptr[1:]
  3278  				if o.prefix == Pe {
  3279  					v = vaddr(ctxt, p, &p.From, nil)
  3280  					ctxt.Andptr[0] = byte(v)
  3281  					ctxt.Andptr = ctxt.Andptr[1:]
  3282  					ctxt.Andptr[0] = byte(v >> 8)
  3283  					ctxt.Andptr = ctxt.Andptr[1:]
  3284  				} else {
  3285  					relput4(ctxt, p, &p.From)
  3286  				}
  3287  
  3288  			case Zo_iw:
  3289  				ctxt.Andptr[0] = byte(op)
  3290  				ctxt.Andptr = ctxt.Andptr[1:]
  3291  				if p.From.Type != obj.TYPE_NONE {
  3292  					v = vaddr(ctxt, p, &p.From, nil)
  3293  					ctxt.Andptr[0] = byte(v)
  3294  					ctxt.Andptr = ctxt.Andptr[1:]
  3295  					ctxt.Andptr[0] = byte(v >> 8)
  3296  					ctxt.Andptr = ctxt.Andptr[1:]
  3297  				}
  3298  
  3299  			case Ziq_rp:
  3300  				v = vaddr(ctxt, p, &p.From, &rel)
  3301  				l = int(v >> 32)
  3302  				if l == 0 && rel.Siz != 8 {
  3303  					//p->mark |= 0100;
  3304  					//print("zero: %llux %v\n", v, p);
  3305  					ctxt.Rexflag &^= (0x40 | Rxw)
  3306  
  3307  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3308  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3309  					ctxt.Andptr = ctxt.Andptr[1:]
  3310  					if rel.Type != 0 {
  3311  						r = obj.Addrel(ctxt.Cursym)
  3312  						*r = rel
  3313  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3314  					}
  3315  
  3316  					put4(ctxt, int32(v))
  3317  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3318  
  3319  					//p->mark |= 0100;
  3320  					//print("sign: %llux %v\n", v, p);
  3321  					ctxt.Andptr[0] = 0xc7
  3322  					ctxt.Andptr = ctxt.Andptr[1:]
  3323  
  3324  					asmando(ctxt, p, &p.To, 0)
  3325  					put4(ctxt, int32(v)) /* need all 8 */
  3326  				} else {
  3327  					//print("all: %llux %v\n", v, p);
  3328  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3329  
  3330  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3331  					ctxt.Andptr = ctxt.Andptr[1:]
  3332  					if rel.Type != 0 {
  3333  						r = obj.Addrel(ctxt.Cursym)
  3334  						*r = rel
  3335  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3336  					}
  3337  
  3338  					put8(ctxt, v)
  3339  				}
  3340  
  3341  			case Zib_rr:
  3342  				ctxt.Andptr[0] = byte(op)
  3343  				ctxt.Andptr = ctxt.Andptr[1:]
  3344  				asmand(ctxt, p, &p.To, &p.To)
  3345  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3346  				ctxt.Andptr = ctxt.Andptr[1:]
  3347  
  3348  			case Z_il, Zil_:
  3349  				if yt.zcase == Zil_ {
  3350  					a = &p.From
  3351  				} else {
  3352  					a = &p.To
  3353  				}
  3354  				ctxt.Andptr[0] = byte(op)
  3355  				ctxt.Andptr = ctxt.Andptr[1:]
  3356  				if o.prefix == Pe {
  3357  					v = vaddr(ctxt, p, a, nil)
  3358  					ctxt.Andptr[0] = byte(v)
  3359  					ctxt.Andptr = ctxt.Andptr[1:]
  3360  					ctxt.Andptr[0] = byte(v >> 8)
  3361  					ctxt.Andptr = ctxt.Andptr[1:]
  3362  				} else {
  3363  					relput4(ctxt, p, a)
  3364  				}
  3365  
  3366  			case Zm_ilo, Zilo_m:
  3367  				ctxt.Andptr[0] = byte(op)
  3368  				ctxt.Andptr = ctxt.Andptr[1:]
  3369  				if yt.zcase == Zilo_m {
  3370  					a = &p.From
  3371  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3372  				} else {
  3373  					a = &p.To
  3374  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3375  				}
  3376  
  3377  				if o.prefix == Pe {
  3378  					v = vaddr(ctxt, p, a, nil)
  3379  					ctxt.Andptr[0] = byte(v)
  3380  					ctxt.Andptr = ctxt.Andptr[1:]
  3381  					ctxt.Andptr[0] = byte(v >> 8)
  3382  					ctxt.Andptr = ctxt.Andptr[1:]
  3383  				} else {
  3384  					relput4(ctxt, p, a)
  3385  				}
  3386  
  3387  			case Zil_rr:
  3388  				ctxt.Andptr[0] = byte(op)
  3389  				ctxt.Andptr = ctxt.Andptr[1:]
  3390  				asmand(ctxt, p, &p.To, &p.To)
  3391  				if o.prefix == Pe {
  3392  					v = vaddr(ctxt, p, &p.From, nil)
  3393  					ctxt.Andptr[0] = byte(v)
  3394  					ctxt.Andptr = ctxt.Andptr[1:]
  3395  					ctxt.Andptr[0] = byte(v >> 8)
  3396  					ctxt.Andptr = ctxt.Andptr[1:]
  3397  				} else {
  3398  					relput4(ctxt, p, &p.From)
  3399  				}
  3400  
  3401  			case Z_rp:
  3402  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3403  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3404  				ctxt.Andptr = ctxt.Andptr[1:]
  3405  
  3406  			case Zrp_:
  3407  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3408  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3409  				ctxt.Andptr = ctxt.Andptr[1:]
  3410  
  3411  			case Zclr:
  3412  				ctxt.Rexflag &^= Pw
  3413  				ctxt.Andptr[0] = byte(op)
  3414  				ctxt.Andptr = ctxt.Andptr[1:]
  3415  				asmand(ctxt, p, &p.To, &p.To)
  3416  
  3417  			case Zcallcon, Zjmpcon:
  3418  				if yt.zcase == Zcallcon {
  3419  					ctxt.Andptr[0] = byte(op)
  3420  					ctxt.Andptr = ctxt.Andptr[1:]
  3421  				} else {
  3422  					ctxt.Andptr[0] = byte(o.op[z+1])
  3423  					ctxt.Andptr = ctxt.Andptr[1:]
  3424  				}
  3425  				r = obj.Addrel(ctxt.Cursym)
  3426  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3427  				r.Type = obj.R_PCREL
  3428  				r.Siz = 4
  3429  				r.Add = p.To.Offset
  3430  				put4(ctxt, 0)
  3431  
  3432  			case Zcallind:
  3433  				ctxt.Andptr[0] = byte(op)
  3434  				ctxt.Andptr = ctxt.Andptr[1:]
  3435  				ctxt.Andptr[0] = byte(o.op[z+1])
  3436  				ctxt.Andptr = ctxt.Andptr[1:]
  3437  				r = obj.Addrel(ctxt.Cursym)
  3438  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3439  				r.Type = obj.R_ADDR
  3440  				r.Siz = 4
  3441  				r.Add = p.To.Offset
  3442  				r.Sym = p.To.Sym
  3443  				put4(ctxt, 0)
  3444  
  3445  			case Zcall, Zcallduff:
  3446  				if p.To.Sym == nil {
  3447  					ctxt.Diag("call without target")
  3448  					log.Fatalf("bad code")
  3449  				}
  3450  
  3451  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3452  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3453  				}
  3454  
  3455  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3456  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3457  					// (the call jumps into the middle of the function).
  3458  					// This makes it possible to see call sites for duffcopy/duffzero in
  3459  					// BP-based profiling tools like Linux perf (which is the
  3460  					// whole point of obj.Framepointer_enabled).
  3461  					// MOVQ BP, -16(SP)
  3462  					// LEAQ -16(SP), BP
  3463  					copy(ctxt.Andptr, bpduff1)
  3464  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3465  				}
  3466  				ctxt.Andptr[0] = byte(op)
  3467  				ctxt.Andptr = ctxt.Andptr[1:]
  3468  				r = obj.Addrel(ctxt.Cursym)
  3469  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3470  				r.Sym = p.To.Sym
  3471  				r.Add = p.To.Offset
  3472  				r.Type = obj.R_CALL
  3473  				r.Siz = 4
  3474  				put4(ctxt, 0)
  3475  
  3476  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3477  					// Pop BP pushed above.
  3478  					// MOVQ 0(BP), BP
  3479  					copy(ctxt.Andptr, bpduff2)
  3480  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3481  				}
  3482  
  3483  			// TODO: jump across functions needs reloc
  3484  			case Zbr, Zjmp, Zloop:
  3485  				if p.To.Sym != nil {
  3486  					if yt.zcase != Zjmp {
  3487  						ctxt.Diag("branch to ATEXT")
  3488  						log.Fatalf("bad code")
  3489  					}
  3490  
  3491  					ctxt.Andptr[0] = byte(o.op[z+1])
  3492  					ctxt.Andptr = ctxt.Andptr[1:]
  3493  					r = obj.Addrel(ctxt.Cursym)
  3494  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3495  					r.Sym = p.To.Sym
  3496  					r.Type = obj.R_PCREL
  3497  					r.Siz = 4
  3498  					put4(ctxt, 0)
  3499  					break
  3500  				}
  3501  
  3502  				// Assumes q is in this function.
  3503  				// TODO: Check in input, preserve in brchain.
  3504  
  3505  				// Fill in backward jump now.
  3506  				q = p.Pcond
  3507  
  3508  				if q == nil {
  3509  					ctxt.Diag("jmp/branch/loop without target")
  3510  					log.Fatalf("bad code")
  3511  				}
  3512  
  3513  				if p.Back&1 != 0 {
  3514  					v = q.Pc - (p.Pc + 2)
  3515  					if v >= -128 {
  3516  						if p.As == AJCXZL {
  3517  							ctxt.Andptr[0] = 0x67
  3518  							ctxt.Andptr = ctxt.Andptr[1:]
  3519  						}
  3520  						ctxt.Andptr[0] = byte(op)
  3521  						ctxt.Andptr = ctxt.Andptr[1:]
  3522  						ctxt.Andptr[0] = byte(v)
  3523  						ctxt.Andptr = ctxt.Andptr[1:]
  3524  					} else if yt.zcase == Zloop {
  3525  						ctxt.Diag("loop too far: %v", p)
  3526  					} else {
  3527  						v -= 5 - 2
  3528  						if yt.zcase == Zbr {
  3529  							ctxt.Andptr[0] = 0x0f
  3530  							ctxt.Andptr = ctxt.Andptr[1:]
  3531  							v--
  3532  						}
  3533  
  3534  						ctxt.Andptr[0] = byte(o.op[z+1])
  3535  						ctxt.Andptr = ctxt.Andptr[1:]
  3536  						ctxt.Andptr[0] = byte(v)
  3537  						ctxt.Andptr = ctxt.Andptr[1:]
  3538  						ctxt.Andptr[0] = byte(v >> 8)
  3539  						ctxt.Andptr = ctxt.Andptr[1:]
  3540  						ctxt.Andptr[0] = byte(v >> 16)
  3541  						ctxt.Andptr = ctxt.Andptr[1:]
  3542  						ctxt.Andptr[0] = byte(v >> 24)
  3543  						ctxt.Andptr = ctxt.Andptr[1:]
  3544  					}
  3545  
  3546  					break
  3547  				}
  3548  
  3549  				// Annotate target; will fill in later.
  3550  				p.Forwd = q.Rel
  3551  
  3552  				q.Rel = p
  3553  				if p.Back&2 != 0 { // short
  3554  					if p.As == AJCXZL {
  3555  						ctxt.Andptr[0] = 0x67
  3556  						ctxt.Andptr = ctxt.Andptr[1:]
  3557  					}
  3558  					ctxt.Andptr[0] = byte(op)
  3559  					ctxt.Andptr = ctxt.Andptr[1:]
  3560  					ctxt.Andptr[0] = 0
  3561  					ctxt.Andptr = ctxt.Andptr[1:]
  3562  				} else if yt.zcase == Zloop {
  3563  					ctxt.Diag("loop too far: %v", p)
  3564  				} else {
  3565  					if yt.zcase == Zbr {
  3566  						ctxt.Andptr[0] = 0x0f
  3567  						ctxt.Andptr = ctxt.Andptr[1:]
  3568  					}
  3569  					ctxt.Andptr[0] = byte(o.op[z+1])
  3570  					ctxt.Andptr = ctxt.Andptr[1:]
  3571  					ctxt.Andptr[0] = 0
  3572  					ctxt.Andptr = ctxt.Andptr[1:]
  3573  					ctxt.Andptr[0] = 0
  3574  					ctxt.Andptr = ctxt.Andptr[1:]
  3575  					ctxt.Andptr[0] = 0
  3576  					ctxt.Andptr = ctxt.Andptr[1:]
  3577  					ctxt.Andptr[0] = 0
  3578  					ctxt.Andptr = ctxt.Andptr[1:]
  3579  				}
  3580  
  3581  				break
  3582  
  3583  			/*
  3584  				v = q->pc - p->pc - 2;
  3585  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3586  					*ctxt->andptr++ = op;
  3587  					*ctxt->andptr++ = v;
  3588  				} else {
  3589  					v -= 5-2;
  3590  					if(yt.zcase == Zbr) {
  3591  						*ctxt->andptr++ = 0x0f;
  3592  						v--;
  3593  					}
  3594  					*ctxt->andptr++ = o->op[z+1];
  3595  					*ctxt->andptr++ = v;
  3596  					*ctxt->andptr++ = v>>8;
  3597  					*ctxt->andptr++ = v>>16;
  3598  					*ctxt->andptr++ = v>>24;
  3599  				}
  3600  			*/
  3601  
  3602  			case Zbyte:
  3603  				v = vaddr(ctxt, p, &p.From, &rel)
  3604  				if rel.Siz != 0 {
  3605  					rel.Siz = uint8(op)
  3606  					r = obj.Addrel(ctxt.Cursym)
  3607  					*r = rel
  3608  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3609  				}
  3610  
  3611  				ctxt.Andptr[0] = byte(v)
  3612  				ctxt.Andptr = ctxt.Andptr[1:]
  3613  				if op > 1 {
  3614  					ctxt.Andptr[0] = byte(v >> 8)
  3615  					ctxt.Andptr = ctxt.Andptr[1:]
  3616  					if op > 2 {
  3617  						ctxt.Andptr[0] = byte(v >> 16)
  3618  						ctxt.Andptr = ctxt.Andptr[1:]
  3619  						ctxt.Andptr[0] = byte(v >> 24)
  3620  						ctxt.Andptr = ctxt.Andptr[1:]
  3621  						if op > 4 {
  3622  							ctxt.Andptr[0] = byte(v >> 32)
  3623  							ctxt.Andptr = ctxt.Andptr[1:]
  3624  							ctxt.Andptr[0] = byte(v >> 40)
  3625  							ctxt.Andptr = ctxt.Andptr[1:]
  3626  							ctxt.Andptr[0] = byte(v >> 48)
  3627  							ctxt.Andptr = ctxt.Andptr[1:]
  3628  							ctxt.Andptr[0] = byte(v >> 56)
  3629  							ctxt.Andptr = ctxt.Andptr[1:]
  3630  						}
  3631  					}
  3632  				}
  3633  			}
  3634  
  3635  			return
  3636  		}
  3637  		z += int(yt.zoffset) + xo
  3638  	}
  3639  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3640  		var pp obj.Prog
  3641  		var t []byte
  3642  		if p.As == mo[0].as {
  3643  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3644  				t = mo[0].op[:]
  3645  				switch mo[0].code {
  3646  				default:
  3647  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3648  
  3649  				case 0: /* lit */
  3650  					for z = 0; t[z] != E; z++ {
  3651  						ctxt.Andptr[0] = t[z]
  3652  						ctxt.Andptr = ctxt.Andptr[1:]
  3653  					}
  3654  
  3655  				case 1: /* r,m */
  3656  					ctxt.Andptr[0] = t[0]
  3657  					ctxt.Andptr = ctxt.Andptr[1:]
  3658  
  3659  					asmando(ctxt, p, &p.To, int(t[1]))
  3660  
  3661  				case 2: /* m,r */
  3662  					ctxt.Andptr[0] = t[0]
  3663  					ctxt.Andptr = ctxt.Andptr[1:]
  3664  
  3665  					asmando(ctxt, p, &p.From, int(t[1]))
  3666  
  3667  				case 3: /* r,m - 2op */
  3668  					ctxt.Andptr[0] = t[0]
  3669  					ctxt.Andptr = ctxt.Andptr[1:]
  3670  
  3671  					ctxt.Andptr[0] = t[1]
  3672  					ctxt.Andptr = ctxt.Andptr[1:]
  3673  					asmando(ctxt, p, &p.To, int(t[2]))
  3674  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3675  
  3676  				case 4: /* m,r - 2op */
  3677  					ctxt.Andptr[0] = t[0]
  3678  					ctxt.Andptr = ctxt.Andptr[1:]
  3679  
  3680  					ctxt.Andptr[0] = t[1]
  3681  					ctxt.Andptr = ctxt.Andptr[1:]
  3682  					asmando(ctxt, p, &p.From, int(t[2]))
  3683  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3684  
  3685  				case 5: /* load full pointer, trash heap */
  3686  					if t[0] != 0 {
  3687  						ctxt.Andptr[0] = t[0]
  3688  						ctxt.Andptr = ctxt.Andptr[1:]
  3689  					}
  3690  					switch p.To.Index {
  3691  					default:
  3692  						goto bad
  3693  
  3694  					case REG_DS:
  3695  						ctxt.Andptr[0] = 0xc5
  3696  						ctxt.Andptr = ctxt.Andptr[1:]
  3697  
  3698  					case REG_SS:
  3699  						ctxt.Andptr[0] = 0x0f
  3700  						ctxt.Andptr = ctxt.Andptr[1:]
  3701  						ctxt.Andptr[0] = 0xb2
  3702  						ctxt.Andptr = ctxt.Andptr[1:]
  3703  
  3704  					case REG_ES:
  3705  						ctxt.Andptr[0] = 0xc4
  3706  						ctxt.Andptr = ctxt.Andptr[1:]
  3707  
  3708  					case REG_FS:
  3709  						ctxt.Andptr[0] = 0x0f
  3710  						ctxt.Andptr = ctxt.Andptr[1:]
  3711  						ctxt.Andptr[0] = 0xb4
  3712  						ctxt.Andptr = ctxt.Andptr[1:]
  3713  
  3714  					case REG_GS:
  3715  						ctxt.Andptr[0] = 0x0f
  3716  						ctxt.Andptr = ctxt.Andptr[1:]
  3717  						ctxt.Andptr[0] = 0xb5
  3718  						ctxt.Andptr = ctxt.Andptr[1:]
  3719  					}
  3720  
  3721  					asmand(ctxt, p, &p.From, &p.To)
  3722  
  3723  				case 6: /* double shift */
  3724  					if t[0] == Pw {
  3725  						if p.Mode != 64 {
  3726  							ctxt.Diag("asmins: illegal 64: %v", p)
  3727  						}
  3728  						ctxt.Rexflag |= Pw
  3729  						t = t[1:]
  3730  					} else if t[0] == Pe {
  3731  						ctxt.Andptr[0] = Pe
  3732  						ctxt.Andptr = ctxt.Andptr[1:]
  3733  						t = t[1:]
  3734  					}
  3735  
  3736  					switch p.From.Type {
  3737  					default:
  3738  						goto bad
  3739  
  3740  					case obj.TYPE_CONST:
  3741  						ctxt.Andptr[0] = 0x0f
  3742  						ctxt.Andptr = ctxt.Andptr[1:]
  3743  						ctxt.Andptr[0] = t[0]
  3744  						ctxt.Andptr = ctxt.Andptr[1:]
  3745  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3746  						ctxt.Andptr[0] = byte(p.From.Offset)
  3747  						ctxt.Andptr = ctxt.Andptr[1:]
  3748  
  3749  					case obj.TYPE_REG:
  3750  						switch p.From.Reg {
  3751  						default:
  3752  							goto bad
  3753  
  3754  						case REG_CL, REG_CX:
  3755  							ctxt.Andptr[0] = 0x0f
  3756  							ctxt.Andptr = ctxt.Andptr[1:]
  3757  							ctxt.Andptr[0] = t[1]
  3758  							ctxt.Andptr = ctxt.Andptr[1:]
  3759  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3760  						}
  3761  					}
  3762  
  3763  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3764  				// where you load the TLS base register into a register and then index off that
  3765  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3766  				// are handled in prefixof above and should not be listed here.
  3767  				case 7: /* mov tls, r */
  3768  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3769  						ctxt.Diag("invalid load of TLS: %v", p)
  3770  					}
  3771  
  3772  					if p.Mode == 32 {
  3773  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3774  						// where you load the TLS base register into a register and then index off that
  3775  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3776  						// are handled in prefixof above and should not be listed here.
  3777  						switch ctxt.Headtype {
  3778  						default:
  3779  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3780  
  3781  						case obj.Hlinux,
  3782  							obj.Hnacl:
  3783  							// ELF TLS base is 0(GS).
  3784  							pp.From = p.From
  3785  
  3786  							pp.From.Type = obj.TYPE_MEM
  3787  							pp.From.Reg = REG_GS
  3788  							pp.From.Offset = 0
  3789  							pp.From.Index = REG_NONE
  3790  							pp.From.Scale = 0
  3791  							ctxt.Andptr[0] = 0x65
  3792  							ctxt.Andptr = ctxt.Andptr[1:] // GS
  3793  							ctxt.Andptr[0] = 0x8B
  3794  							ctxt.Andptr = ctxt.Andptr[1:]
  3795  							asmand(ctxt, p, &pp.From, &p.To)
  3796  
  3797  						case obj.Hplan9:
  3798  							if ctxt.Plan9privates == nil {
  3799  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3800  							}
  3801  							pp.From = obj.Addr{}
  3802  							pp.From.Type = obj.TYPE_MEM
  3803  							pp.From.Name = obj.NAME_EXTERN
  3804  							pp.From.Sym = ctxt.Plan9privates
  3805  							pp.From.Offset = 0
  3806  							pp.From.Index = REG_NONE
  3807  							ctxt.Andptr[0] = 0x8B
  3808  							ctxt.Andptr = ctxt.Andptr[1:]
  3809  							asmand(ctxt, p, &pp.From, &p.To)
  3810  
  3811  						case obj.Hwindows:
  3812  							// Windows TLS base is always 0x14(FS).
  3813  							pp.From = p.From
  3814  
  3815  							pp.From.Type = obj.TYPE_MEM
  3816  							pp.From.Reg = REG_FS
  3817  							pp.From.Offset = 0x14
  3818  							pp.From.Index = REG_NONE
  3819  							pp.From.Scale = 0
  3820  							ctxt.Andptr[0] = 0x64
  3821  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  3822  							ctxt.Andptr[0] = 0x8B
  3823  							ctxt.Andptr = ctxt.Andptr[1:]
  3824  							asmand(ctxt, p, &pp.From, &p.To)
  3825  						}
  3826  						break
  3827  					}
  3828  
  3829  					switch ctxt.Headtype {
  3830  					default:
  3831  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3832  
  3833  					case obj.Hlinux:
  3834  						if ctxt.Flag_shared == 0 {
  3835  							log.Fatalf("unknown TLS base location for linux without -shared")
  3836  						}
  3837  						// Note that this is not generating the same insn as the other cases.
  3838  						//     MOV TLS, R_to
  3839  						// becomes
  3840  						//     movq g@gottpoff(%rip), R_to
  3841  						// which is encoded as
  3842  						//     movq 0(%rip), R_to
  3843  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  3844  						// is g, which we can't check here, but will when we assemble the second
  3845  						// instruction.
  3846  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  3847  
  3848  						ctxt.Andptr[0] = 0x8B
  3849  						ctxt.Andptr = ctxt.Andptr[1:]
  3850  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  3851  						ctxt.Andptr = ctxt.Andptr[1:]
  3852  						r = obj.Addrel(ctxt.Cursym)
  3853  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3854  						r.Type = obj.R_TLS_IE
  3855  						r.Siz = 4
  3856  						r.Add = -4
  3857  						put4(ctxt, 0)
  3858  
  3859  					case obj.Hplan9:
  3860  						if ctxt.Plan9privates == nil {
  3861  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3862  						}
  3863  						pp.From = obj.Addr{}
  3864  						pp.From.Type = obj.TYPE_MEM
  3865  						pp.From.Name = obj.NAME_EXTERN
  3866  						pp.From.Sym = ctxt.Plan9privates
  3867  						pp.From.Offset = 0
  3868  						pp.From.Index = REG_NONE
  3869  						ctxt.Rexflag |= Pw
  3870  						ctxt.Andptr[0] = 0x8B
  3871  						ctxt.Andptr = ctxt.Andptr[1:]
  3872  						asmand(ctxt, p, &pp.From, &p.To)
  3873  
  3874  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3875  						// TLS base is 0(FS).
  3876  						pp.From = p.From
  3877  
  3878  						pp.From.Type = obj.TYPE_MEM
  3879  						pp.From.Name = obj.NAME_NONE
  3880  						pp.From.Reg = REG_NONE
  3881  						pp.From.Offset = 0
  3882  						pp.From.Index = REG_NONE
  3883  						pp.From.Scale = 0
  3884  						ctxt.Rexflag |= Pw
  3885  						ctxt.Andptr[0] = 0x64
  3886  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  3887  						ctxt.Andptr[0] = 0x8B
  3888  						ctxt.Andptr = ctxt.Andptr[1:]
  3889  						asmand(ctxt, p, &pp.From, &p.To)
  3890  
  3891  					case obj.Hwindows:
  3892  						// Windows TLS base is always 0x28(GS).
  3893  						pp.From = p.From
  3894  
  3895  						pp.From.Type = obj.TYPE_MEM
  3896  						pp.From.Name = obj.NAME_NONE
  3897  						pp.From.Reg = REG_GS
  3898  						pp.From.Offset = 0x28
  3899  						pp.From.Index = REG_NONE
  3900  						pp.From.Scale = 0
  3901  						ctxt.Rexflag |= Pw
  3902  						ctxt.Andptr[0] = 0x65
  3903  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  3904  						ctxt.Andptr[0] = 0x8B
  3905  						ctxt.Andptr = ctxt.Andptr[1:]
  3906  						asmand(ctxt, p, &pp.From, &p.To)
  3907  					}
  3908  				}
  3909  				return
  3910  			}
  3911  		}
  3912  	}
  3913  	goto bad
  3914  
  3915  bad:
  3916  	if p.Mode != 64 {
  3917  		/*
  3918  		 * here, the assembly has failed.
  3919  		 * if its a byte instruction that has
  3920  		 * unaddressable registers, try to
  3921  		 * exchange registers and reissue the
  3922  		 * instruction with the operands renamed.
  3923  		 */
  3924  		pp := *p
  3925  
  3926  		unbytereg(&pp.From, &pp.Ft)
  3927  		unbytereg(&pp.To, &pp.Tt)
  3928  
  3929  		z := int(p.From.Reg)
  3930  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3931  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3932  			// For now, different to keep bit-for-bit compatibility.
  3933  			if p.Mode == 32 {
  3934  				breg := byteswapreg(ctxt, &p.To)
  3935  				if breg != REG_AX {
  3936  					ctxt.Andptr[0] = 0x87
  3937  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3938  					asmando(ctxt, p, &p.From, reg[breg])
  3939  					subreg(&pp, z, breg)
  3940  					doasm(ctxt, &pp)
  3941  					ctxt.Andptr[0] = 0x87
  3942  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3943  					asmando(ctxt, p, &p.From, reg[breg])
  3944  				} else {
  3945  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3946  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3947  					subreg(&pp, z, REG_AX)
  3948  					doasm(ctxt, &pp)
  3949  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3950  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3951  				}
  3952  				return
  3953  			}
  3954  
  3955  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  3956  				// We certainly don't want to exchange
  3957  				// with AX if the op is MUL or DIV.
  3958  				ctxt.Andptr[0] = 0x87
  3959  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3960  				asmando(ctxt, p, &p.From, reg[REG_BX])
  3961  				subreg(&pp, z, REG_BX)
  3962  				doasm(ctxt, &pp)
  3963  				ctxt.Andptr[0] = 0x87
  3964  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3965  				asmando(ctxt, p, &p.From, reg[REG_BX])
  3966  			} else {
  3967  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  3968  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3969  				subreg(&pp, z, REG_AX)
  3970  				doasm(ctxt, &pp)
  3971  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  3972  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3973  			}
  3974  			return
  3975  		}
  3976  
  3977  		z = int(p.To.Reg)
  3978  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3979  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3980  			// For now, different to keep bit-for-bit compatibility.
  3981  			if p.Mode == 32 {
  3982  				breg := byteswapreg(ctxt, &p.From)
  3983  				if breg != REG_AX {
  3984  					ctxt.Andptr[0] = 0x87
  3985  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3986  					asmando(ctxt, p, &p.To, reg[breg])
  3987  					subreg(&pp, z, breg)
  3988  					doasm(ctxt, &pp)
  3989  					ctxt.Andptr[0] = 0x87
  3990  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3991  					asmando(ctxt, p, &p.To, reg[breg])
  3992  				} else {
  3993  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3994  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  3995  					subreg(&pp, z, REG_AX)
  3996  					doasm(ctxt, &pp)
  3997  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3998  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  3999  				}
  4000  				return
  4001  			}
  4002  
  4003  			if isax(&p.From) {
  4004  				ctxt.Andptr[0] = 0x87
  4005  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4006  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4007  				subreg(&pp, z, REG_BX)
  4008  				doasm(ctxt, &pp)
  4009  				ctxt.Andptr[0] = 0x87
  4010  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4011  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4012  			} else {
  4013  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4014  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4015  				subreg(&pp, z, REG_AX)
  4016  				doasm(ctxt, &pp)
  4017  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4018  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4019  			}
  4020  			return
  4021  		}
  4022  	}
  4023  
  4024  	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4025  	return
  4026  }
  4027  
  4028  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4029  // which is not referenced in a.
  4030  // If a is empty, it returns BX to account for MULB-like instructions
  4031  // that might use DX and AX.
  4032  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4033  	cand := 1
  4034  	canc := cand
  4035  	canb := canc
  4036  	cana := canb
  4037  
  4038  	if a.Type == obj.TYPE_NONE {
  4039  		cand = 0
  4040  		cana = cand
  4041  	}
  4042  
  4043  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4044  		switch a.Reg {
  4045  		case REG_NONE:
  4046  			cand = 0
  4047  			cana = cand
  4048  
  4049  		case REG_AX, REG_AL, REG_AH:
  4050  			cana = 0
  4051  
  4052  		case REG_BX, REG_BL, REG_BH:
  4053  			canb = 0
  4054  
  4055  		case REG_CX, REG_CL, REG_CH:
  4056  			canc = 0
  4057  
  4058  		case REG_DX, REG_DL, REG_DH:
  4059  			cand = 0
  4060  		}
  4061  	}
  4062  
  4063  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4064  		switch a.Index {
  4065  		case REG_AX:
  4066  			cana = 0
  4067  
  4068  		case REG_BX:
  4069  			canb = 0
  4070  
  4071  		case REG_CX:
  4072  			canc = 0
  4073  
  4074  		case REG_DX:
  4075  			cand = 0
  4076  		}
  4077  	}
  4078  
  4079  	if cana != 0 {
  4080  		return REG_AX
  4081  	}
  4082  	if canb != 0 {
  4083  		return REG_BX
  4084  	}
  4085  	if canc != 0 {
  4086  		return REG_CX
  4087  	}
  4088  	if cand != 0 {
  4089  		return REG_DX
  4090  	}
  4091  
  4092  	ctxt.Diag("impossible byte register")
  4093  	log.Fatalf("bad code")
  4094  	return 0
  4095  }
  4096  
  4097  func isbadbyte(a *obj.Addr) bool {
  4098  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4099  }
  4100  
  4101  var naclret = []uint8{
  4102  	0x5e, // POPL SI
  4103  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4104  	0x83,
  4105  	0xe6,
  4106  	0xe0, // ANDL $~31, SI
  4107  	0x4c,
  4108  	0x01,
  4109  	0xfe, // ADDQ R15, SI
  4110  	0xff,
  4111  	0xe6, // JMP SI
  4112  }
  4113  
  4114  var naclret8 = []uint8{
  4115  	0x5d, // POPL BP
  4116  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4117  	0x83,
  4118  	0xe5,
  4119  	0xe0, // ANDL $~31, BP
  4120  	0xff,
  4121  	0xe5, // JMP BP
  4122  }
  4123  
  4124  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4125  
  4126  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4127  
  4128  var naclmovs = []uint8{
  4129  	0x89,
  4130  	0xf6, // MOVL SI, SI
  4131  	0x49,
  4132  	0x8d,
  4133  	0x34,
  4134  	0x37, // LEAQ (R15)(SI*1), SI
  4135  	0x89,
  4136  	0xff, // MOVL DI, DI
  4137  	0x49,
  4138  	0x8d,
  4139  	0x3c,
  4140  	0x3f, // LEAQ (R15)(DI*1), DI
  4141  }
  4142  
  4143  var naclstos = []uint8{
  4144  	0x89,
  4145  	0xff, // MOVL DI, DI
  4146  	0x49,
  4147  	0x8d,
  4148  	0x3c,
  4149  	0x3f, // LEAQ (R15)(DI*1), DI
  4150  }
  4151  
  4152  func nacltrunc(ctxt *obj.Link, reg int) {
  4153  	if reg >= REG_R8 {
  4154  		ctxt.Andptr[0] = 0x45
  4155  		ctxt.Andptr = ctxt.Andptr[1:]
  4156  	}
  4157  	reg = (reg - REG_AX) & 7
  4158  	ctxt.Andptr[0] = 0x89
  4159  	ctxt.Andptr = ctxt.Andptr[1:]
  4160  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4161  	ctxt.Andptr = ctxt.Andptr[1:]
  4162  }
  4163  
  4164  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4165  	ctxt.Andptr = ctxt.And[:]
  4166  	ctxt.Asmode = int(p.Mode)
  4167  
  4168  	if p.As == obj.AUSEFIELD {
  4169  		r := obj.Addrel(ctxt.Cursym)
  4170  		r.Off = 0
  4171  		r.Siz = 0
  4172  		r.Sym = p.From.Sym
  4173  		r.Type = obj.R_USEFIELD
  4174  		return
  4175  	}
  4176  
  4177  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4178  		switch p.As {
  4179  		case obj.ARET:
  4180  			copy(ctxt.Andptr, naclret8)
  4181  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4182  			return
  4183  
  4184  		case obj.ACALL,
  4185  			obj.AJMP:
  4186  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4187  				ctxt.Andptr[0] = 0x83
  4188  				ctxt.Andptr = ctxt.Andptr[1:]
  4189  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4190  				ctxt.Andptr = ctxt.Andptr[1:]
  4191  				ctxt.Andptr[0] = 0xe0
  4192  				ctxt.Andptr = ctxt.Andptr[1:]
  4193  			}
  4194  
  4195  		case AINT:
  4196  			ctxt.Andptr[0] = 0xf4
  4197  			ctxt.Andptr = ctxt.Andptr[1:]
  4198  			return
  4199  		}
  4200  	}
  4201  
  4202  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4203  		if p.As == AREP {
  4204  			ctxt.Rep++
  4205  			return
  4206  		}
  4207  
  4208  		if p.As == AREPN {
  4209  			ctxt.Repn++
  4210  			return
  4211  		}
  4212  
  4213  		if p.As == ALOCK {
  4214  			ctxt.Lock++
  4215  			return
  4216  		}
  4217  
  4218  		if p.As != ALEAQ && p.As != ALEAL {
  4219  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4220  				nacltrunc(ctxt, int(p.From.Index))
  4221  			}
  4222  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4223  				nacltrunc(ctxt, int(p.To.Index))
  4224  			}
  4225  		}
  4226  
  4227  		switch p.As {
  4228  		case obj.ARET:
  4229  			copy(ctxt.Andptr, naclret)
  4230  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4231  			return
  4232  
  4233  		case obj.ACALL,
  4234  			obj.AJMP:
  4235  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4236  				// ANDL $~31, reg
  4237  				ctxt.Andptr[0] = 0x83
  4238  				ctxt.Andptr = ctxt.Andptr[1:]
  4239  
  4240  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4241  				ctxt.Andptr = ctxt.Andptr[1:]
  4242  				ctxt.Andptr[0] = 0xe0
  4243  				ctxt.Andptr = ctxt.Andptr[1:]
  4244  
  4245  				// ADDQ R15, reg
  4246  				ctxt.Andptr[0] = 0x4c
  4247  				ctxt.Andptr = ctxt.Andptr[1:]
  4248  
  4249  				ctxt.Andptr[0] = 0x01
  4250  				ctxt.Andptr = ctxt.Andptr[1:]
  4251  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4252  				ctxt.Andptr = ctxt.Andptr[1:]
  4253  			}
  4254  
  4255  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4256  				// ANDL $~31, reg
  4257  				ctxt.Andptr[0] = 0x41
  4258  				ctxt.Andptr = ctxt.Andptr[1:]
  4259  
  4260  				ctxt.Andptr[0] = 0x83
  4261  				ctxt.Andptr = ctxt.Andptr[1:]
  4262  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4263  				ctxt.Andptr = ctxt.Andptr[1:]
  4264  				ctxt.Andptr[0] = 0xe0
  4265  				ctxt.Andptr = ctxt.Andptr[1:]
  4266  
  4267  				// ADDQ R15, reg
  4268  				ctxt.Andptr[0] = 0x4d
  4269  				ctxt.Andptr = ctxt.Andptr[1:]
  4270  
  4271  				ctxt.Andptr[0] = 0x01
  4272  				ctxt.Andptr = ctxt.Andptr[1:]
  4273  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4274  				ctxt.Andptr = ctxt.Andptr[1:]
  4275  			}
  4276  
  4277  		case AINT:
  4278  			ctxt.Andptr[0] = 0xf4
  4279  			ctxt.Andptr = ctxt.Andptr[1:]
  4280  			return
  4281  
  4282  		case ASCASB,
  4283  			ASCASW,
  4284  			ASCASL,
  4285  			ASCASQ,
  4286  			ASTOSB,
  4287  			ASTOSW,
  4288  			ASTOSL,
  4289  			ASTOSQ:
  4290  			copy(ctxt.Andptr, naclstos)
  4291  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4292  
  4293  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4294  			copy(ctxt.Andptr, naclmovs)
  4295  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4296  		}
  4297  
  4298  		if ctxt.Rep != 0 {
  4299  			ctxt.Andptr[0] = 0xf3
  4300  			ctxt.Andptr = ctxt.Andptr[1:]
  4301  			ctxt.Rep = 0
  4302  		}
  4303  
  4304  		if ctxt.Repn != 0 {
  4305  			ctxt.Andptr[0] = 0xf2
  4306  			ctxt.Andptr = ctxt.Andptr[1:]
  4307  			ctxt.Repn = 0
  4308  		}
  4309  
  4310  		if ctxt.Lock != 0 {
  4311  			ctxt.Andptr[0] = 0xf0
  4312  			ctxt.Andptr = ctxt.Andptr[1:]
  4313  			ctxt.Lock = 0
  4314  		}
  4315  	}
  4316  
  4317  	ctxt.Rexflag = 0
  4318  	and0 := ctxt.Andptr
  4319  	ctxt.Asmode = int(p.Mode)
  4320  	doasm(ctxt, p)
  4321  	if ctxt.Rexflag != 0 {
  4322  		/*
  4323  		 * as befits the whole approach of the architecture,
  4324  		 * the rex prefix must appear before the first opcode byte
  4325  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4326  		 * before the 0f opcode escape!), or it might be ignored.
  4327  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4328  		 */
  4329  		if p.Mode != 64 {
  4330  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4331  		}
  4332  		n := -cap(ctxt.Andptr) + cap(and0)
  4333  		var c int
  4334  		var np int
  4335  		for np = 0; np < n; np++ {
  4336  			c = int(and0[np])
  4337  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4338  				break
  4339  			}
  4340  		}
  4341  
  4342  		copy(and0[np+1:], and0[np:n])
  4343  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4344  		ctxt.Andptr = ctxt.Andptr[1:]
  4345  	}
  4346  
  4347  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4348  	var r *obj.Reloc
  4349  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4350  		r = &ctxt.Cursym.R[i:][0]
  4351  		if int64(r.Off) < p.Pc {
  4352  			break
  4353  		}
  4354  		if ctxt.Rexflag != 0 {
  4355  			r.Off++
  4356  		}
  4357  		if r.Type == obj.R_PCREL {
  4358  			// PC-relative addressing is relative to the end of the instruction,
  4359  			// but the relocations applied by the linker are relative to the end
  4360  			// of the relocation. Because immediate instruction
  4361  			// arguments can follow the PC-relative memory reference in the
  4362  			// instruction encoding, the two may not coincide. In this case,
  4363  			// adjust addend so that linker can keep relocating relative to the
  4364  			// end of the relocation.
  4365  			r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4366  		}
  4367  	}
  4368  
  4369  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4370  		switch p.To.Reg {
  4371  		case REG_SP:
  4372  			copy(ctxt.Andptr, naclspfix)
  4373  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4374  
  4375  		case REG_BP:
  4376  			copy(ctxt.Andptr, naclbpfix)
  4377  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4378  		}
  4379  	}
  4380  }