rsc.io/go@v0.0.0-20150416155037-e040fd465409/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	MaxAlign = 32 // max data alignment
    44  
    45  	// Loop alignment constants:
    46  	// want to align loop entry to LoopAlign-byte boundary,
    47  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    48  	// We define a loop entry as the target of a backward jump.
    49  	//
    50  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    51  	// and it aligns all jump targets, not just backward jump targets.
    52  	//
    53  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    54  	// is very slight but negative, so the alignment is disabled by
    55  	// setting MaxLoopPad = 0. The code is here for reference and
    56  	// for future experiments.
    57  	//
    58  	LoopAlign  = 16
    59  	MaxLoopPad = 0
    60  	FuncAlign  = 16
    61  )
    62  
    63  type Optab struct {
    64  	as     int16
    65  	ytab   []ytab
    66  	prefix uint8
    67  	op     [23]uint8
    68  }
    69  
    70  type ytab struct {
    71  	from    uint8
    72  	from3   uint8
    73  	to      uint8
    74  	zcase   uint8
    75  	zoffset uint8
    76  }
    77  
    78  type Movtab struct {
    79  	as   int16
    80  	ft   uint8
    81  	f3t  uint8
    82  	tt   uint8
    83  	code uint8
    84  	op   [4]uint8
    85  }
    86  
    87  const (
    88  	Yxxx = iota
    89  	Ynone
    90  	Yi0 // $0
    91  	Yi1 // $1
    92  	Yi8 // $x, x fits in int8
    93  	Yu8 // $x, x fits in uint8
    94  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    95  	Ys32
    96  	Yi32
    97  	Yi64
    98  	Yiauto
    99  	Yal
   100  	Ycl
   101  	Yax
   102  	Ycx
   103  	Yrb
   104  	Yrl
   105  	Yrl32 // Yrl on 32-bit system
   106  	Yrf
   107  	Yf0
   108  	Yrx
   109  	Ymb
   110  	Yml
   111  	Ym
   112  	Ybr
   113  	Ycs
   114  	Yss
   115  	Yds
   116  	Yes
   117  	Yfs
   118  	Ygs
   119  	Ygdtr
   120  	Yidtr
   121  	Yldtr
   122  	Ymsw
   123  	Ytask
   124  	Ycr0
   125  	Ycr1
   126  	Ycr2
   127  	Ycr3
   128  	Ycr4
   129  	Ycr5
   130  	Ycr6
   131  	Ycr7
   132  	Ycr8
   133  	Ydr0
   134  	Ydr1
   135  	Ydr2
   136  	Ydr3
   137  	Ydr4
   138  	Ydr5
   139  	Ydr6
   140  	Ydr7
   141  	Ytr0
   142  	Ytr1
   143  	Ytr2
   144  	Ytr3
   145  	Ytr4
   146  	Ytr5
   147  	Ytr6
   148  	Ytr7
   149  	Ymr
   150  	Ymm
   151  	Yxr
   152  	Yxm
   153  	Ytls
   154  	Ytextsize
   155  	Yindir
   156  	Ymax
   157  )
   158  
   159  const (
   160  	Zxxx = iota
   161  	Zlit
   162  	Zlitm_r
   163  	Z_rp
   164  	Zbr
   165  	Zcall
   166  	Zcallcon
   167  	Zcallduff
   168  	Zcallind
   169  	Zcallindreg
   170  	Zib_
   171  	Zib_rp
   172  	Zibo_m
   173  	Zibo_m_xm
   174  	Zil_
   175  	Zil_rp
   176  	Ziq_rp
   177  	Zilo_m
   178  	Ziqo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_3d
   189  	Zm_r_xm_nr
   190  	Zr_m_xm_nr
   191  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zmax
   209  )
   210  
   211  const (
   212  	Px  = 0
   213  	Px1 = 1    // symbolic; exact value doesn't matter
   214  	P32 = 0x32 /* 32-bit only */
   215  	Pe  = 0x66 /* operand escape */
   216  	Pm  = 0x0f /* 2byte opcode escape */
   217  	Pq  = 0xff /* both escapes: 66 0f */
   218  	Pb  = 0xfe /* byte operands */
   219  	Pf2 = 0xf2 /* xmm escape 1: f2 0f */
   220  	Pf3 = 0xf3 /* xmm escape 2: f3 0f */
   221  	Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
   222  	Pw  = 0x48 /* Rex.w */
   223  	Pw8 = 0x90 // symbolic; exact value doesn't matter
   224  	Py  = 0x80 /* defaults to 64-bit mode */
   225  	Py1 = 0x81 // symbolic; exact value doesn't matter
   226  	Py3 = 0x83 // symbolic; exact value doesn't matter
   227  
   228  	Rxf = 1 << 9 /* internal flag for Rxr on from */
   229  	Rxt = 1 << 8 /* internal flag for Rxr on to */
   230  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   231  	Rxr = 1 << 2 /* extend modrm reg */
   232  	Rxx = 1 << 1 /* extend sib index */
   233  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   234  
   235  	Maxand = 10 /* in -a output width of the byte codes */
   236  )
   237  
   238  var ycover [Ymax * Ymax]uint8
   239  
   240  var reg [MAXREG]int
   241  
   242  var regrex [MAXREG + 1]int
   243  
   244  var ynone = []ytab{
   245  	{Ynone, Ynone, Ynone, Zlit, 1},
   246  }
   247  
   248  var ysahf = []ytab{
   249  	{Ynone, Ynone, Ynone, Zlit, 2},
   250  	{Ynone, Ynone, Ynone, Zlit, 1},
   251  }
   252  
   253  var ytext = []ytab{
   254  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   255  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   256  }
   257  
   258  var ynop = []ytab{
   259  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   260  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   261  	{Ynone, Ynone, Yml, Zpseudo, 0},
   262  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   263  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   264  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   265  	{Yml, Ynone, Ynone, Zpseudo, 0},
   266  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   267  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   268  }
   269  
   270  var yfuncdata = []ytab{
   271  	{Yi32, Ynone, Ym, Zpseudo, 0},
   272  }
   273  
   274  var ypcdata = []ytab{
   275  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   276  }
   277  
   278  var yxorb = []ytab{
   279  	{Yi32, Ynone, Yal, Zib_, 1},
   280  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   281  	{Yrb, Ynone, Ymb, Zr_m, 1},
   282  	{Ymb, Ynone, Yrb, Zm_r, 1},
   283  }
   284  
   285  var yxorl = []ytab{
   286  	{Yi8, Ynone, Yml, Zibo_m, 2},
   287  	{Yi32, Ynone, Yax, Zil_, 1},
   288  	{Yi32, Ynone, Yml, Zilo_m, 2},
   289  	{Yrl, Ynone, Yml, Zr_m, 1},
   290  	{Yml, Ynone, Yrl, Zm_r, 1},
   291  }
   292  
   293  var yaddl = []ytab{
   294  	{Yi8, Ynone, Yml, Zibo_m, 2},
   295  	{Yi32, Ynone, Yax, Zil_, 1},
   296  	{Yi32, Ynone, Yml, Zilo_m, 2},
   297  	{Yrl, Ynone, Yml, Zr_m, 1},
   298  	{Yml, Ynone, Yrl, Zm_r, 1},
   299  }
   300  
   301  var yincb = []ytab{
   302  	{Ynone, Ynone, Ymb, Zo_m, 2},
   303  }
   304  
   305  var yincw = []ytab{
   306  	{Ynone, Ynone, Yml, Zo_m, 2},
   307  }
   308  
   309  var yincl = []ytab{
   310  	{Ynone, Ynone, Yrl, Z_rp, 1},
   311  	{Ynone, Ynone, Yml, Zo_m, 2},
   312  }
   313  
   314  var yincq = []ytab{
   315  	{Ynone, Ynone, Yml, Zo_m, 2},
   316  }
   317  
   318  var ycmpb = []ytab{
   319  	{Yal, Ynone, Yi32, Z_ib, 1},
   320  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   321  	{Ymb, Ynone, Yrb, Zm_r, 1},
   322  	{Yrb, Ynone, Ymb, Zr_m, 1},
   323  }
   324  
   325  var ycmpl = []ytab{
   326  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   327  	{Yax, Ynone, Yi32, Z_il, 1},
   328  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   329  	{Yml, Ynone, Yrl, Zm_r, 1},
   330  	{Yrl, Ynone, Yml, Zr_m, 1},
   331  }
   332  
   333  var yshb = []ytab{
   334  	{Yi1, Ynone, Ymb, Zo_m, 2},
   335  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   336  	{Ycx, Ynone, Ymb, Zo_m, 2},
   337  }
   338  
   339  var yshl = []ytab{
   340  	{Yi1, Ynone, Yml, Zo_m, 2},
   341  	{Yi32, Ynone, Yml, Zibo_m, 2},
   342  	{Ycl, Ynone, Yml, Zo_m, 2},
   343  	{Ycx, Ynone, Yml, Zo_m, 2},
   344  }
   345  
   346  var ytestb = []ytab{
   347  	{Yi32, Ynone, Yal, Zib_, 1},
   348  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   349  	{Yrb, Ynone, Ymb, Zr_m, 1},
   350  	{Ymb, Ynone, Yrb, Zm_r, 1},
   351  }
   352  
   353  var ytestl = []ytab{
   354  	{Yi32, Ynone, Yax, Zil_, 1},
   355  	{Yi32, Ynone, Yml, Zilo_m, 2},
   356  	{Yrl, Ynone, Yml, Zr_m, 1},
   357  	{Yml, Ynone, Yrl, Zm_r, 1},
   358  }
   359  
   360  var ymovb = []ytab{
   361  	{Yrb, Ynone, Ymb, Zr_m, 1},
   362  	{Ymb, Ynone, Yrb, Zm_r, 1},
   363  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   364  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   365  }
   366  
   367  var ymbs = []ytab{
   368  	{Ymb, Ynone, Ynone, Zm_o, 2},
   369  }
   370  
   371  var ybtl = []ytab{
   372  	{Yi8, Ynone, Yml, Zibo_m, 2},
   373  	{Yrl, Ynone, Yml, Zr_m, 1},
   374  }
   375  
   376  var ymovw = []ytab{
   377  	{Yrl, Ynone, Yml, Zr_m, 1},
   378  	{Yml, Ynone, Yrl, Zm_r, 1},
   379  	{Yi0, Ynone, Yrl, Zclr, 1},
   380  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   381  	{Yi32, Ynone, Yml, Zilo_m, 2},
   382  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   383  }
   384  
   385  var ymovl = []ytab{
   386  	{Yrl, Ynone, Yml, Zr_m, 1},
   387  	{Yml, Ynone, Yrl, Zm_r, 1},
   388  	{Yi0, Ynone, Yrl, Zclr, 1},
   389  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   390  	{Yi32, Ynone, Yml, Zilo_m, 2},
   391  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   392  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   393  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   394  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   395  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   396  }
   397  
   398  var yret = []ytab{
   399  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   400  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   401  }
   402  
   403  var ymovq = []ytab{
   404  	// valid in 32-bit mode
   405  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   406  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   407  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   408  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   409  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   410  
   411  	// valid only in 64-bit mode, usually with 64-bit prefix
   412  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   413  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   414  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   415  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   416  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   417  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   418  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   419  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   420  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   421  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   422  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   423  }
   424  
   425  var ym_rl = []ytab{
   426  	{Ym, Ynone, Yrl, Zm_r, 1},
   427  }
   428  
   429  var yrl_m = []ytab{
   430  	{Yrl, Ynone, Ym, Zr_m, 1},
   431  }
   432  
   433  var ymb_rl = []ytab{
   434  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   435  }
   436  
   437  var yml_rl = []ytab{
   438  	{Yml, Ynone, Yrl, Zm_r, 1},
   439  }
   440  
   441  var yrl_ml = []ytab{
   442  	{Yrl, Ynone, Yml, Zr_m, 1},
   443  }
   444  
   445  var yml_mb = []ytab{
   446  	{Yrb, Ynone, Ymb, Zr_m, 1},
   447  	{Ymb, Ynone, Yrb, Zm_r, 1},
   448  }
   449  
   450  var yrb_mb = []ytab{
   451  	{Yrb, Ynone, Ymb, Zr_m, 1},
   452  }
   453  
   454  var yxchg = []ytab{
   455  	{Yax, Ynone, Yrl, Z_rp, 1},
   456  	{Yrl, Ynone, Yax, Zrp_, 1},
   457  	{Yrl, Ynone, Yml, Zr_m, 1},
   458  	{Yml, Ynone, Yrl, Zm_r, 1},
   459  }
   460  
   461  var ydivl = []ytab{
   462  	{Yml, Ynone, Ynone, Zm_o, 2},
   463  }
   464  
   465  var ydivb = []ytab{
   466  	{Ymb, Ynone, Ynone, Zm_o, 2},
   467  }
   468  
   469  var yimul = []ytab{
   470  	{Yml, Ynone, Ynone, Zm_o, 2},
   471  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   472  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   473  	{Yml, Ynone, Yrl, Zm_r, 2},
   474  }
   475  
   476  var yimul3 = []ytab{
   477  	{Yi8, Yml, Yrl, Zibm_r, 2},
   478  }
   479  
   480  var ybyte = []ytab{
   481  	{Yi64, Ynone, Ynone, Zbyte, 1},
   482  }
   483  
   484  var yin = []ytab{
   485  	{Yi32, Ynone, Ynone, Zib_, 1},
   486  	{Ynone, Ynone, Ynone, Zlit, 1},
   487  }
   488  
   489  var yint = []ytab{
   490  	{Yi32, Ynone, Ynone, Zib_, 1},
   491  }
   492  
   493  var ypushl = []ytab{
   494  	{Yrl, Ynone, Ynone, Zrp_, 1},
   495  	{Ym, Ynone, Ynone, Zm_o, 2},
   496  	{Yi8, Ynone, Ynone, Zib_, 1},
   497  	{Yi32, Ynone, Ynone, Zil_, 1},
   498  }
   499  
   500  var ypopl = []ytab{
   501  	{Ynone, Ynone, Yrl, Z_rp, 1},
   502  	{Ynone, Ynone, Ym, Zo_m, 2},
   503  }
   504  
   505  var ybswap = []ytab{
   506  	{Ynone, Ynone, Yrl, Z_rp, 2},
   507  }
   508  
   509  var yscond = []ytab{
   510  	{Ynone, Ynone, Ymb, Zo_m, 2},
   511  }
   512  
   513  var yjcond = []ytab{
   514  	{Ynone, Ynone, Ybr, Zbr, 0},
   515  	{Yi0, Ynone, Ybr, Zbr, 0},
   516  	{Yi1, Ynone, Ybr, Zbr, 1},
   517  }
   518  
   519  var yloop = []ytab{
   520  	{Ynone, Ynone, Ybr, Zloop, 1},
   521  }
   522  
   523  var ycall = []ytab{
   524  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   525  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   526  	{Ynone, Ynone, Yindir, Zcallind, 2},
   527  	{Ynone, Ynone, Ybr, Zcall, 0},
   528  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   529  }
   530  
   531  var yduff = []ytab{
   532  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   533  }
   534  
   535  var yjmp = []ytab{
   536  	{Ynone, Ynone, Yml, Zo_m64, 2},
   537  	{Ynone, Ynone, Ybr, Zjmp, 0},
   538  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   539  }
   540  
   541  var yfmvd = []ytab{
   542  	{Ym, Ynone, Yf0, Zm_o, 2},
   543  	{Yf0, Ynone, Ym, Zo_m, 2},
   544  	{Yrf, Ynone, Yf0, Zm_o, 2},
   545  	{Yf0, Ynone, Yrf, Zo_m, 2},
   546  }
   547  
   548  var yfmvdp = []ytab{
   549  	{Yf0, Ynone, Ym, Zo_m, 2},
   550  	{Yf0, Ynone, Yrf, Zo_m, 2},
   551  }
   552  
   553  var yfmvf = []ytab{
   554  	{Ym, Ynone, Yf0, Zm_o, 2},
   555  	{Yf0, Ynone, Ym, Zo_m, 2},
   556  }
   557  
   558  var yfmvx = []ytab{
   559  	{Ym, Ynone, Yf0, Zm_o, 2},
   560  }
   561  
   562  var yfmvp = []ytab{
   563  	{Yf0, Ynone, Ym, Zo_m, 2},
   564  }
   565  
   566  var yfcmv = []ytab{
   567  	{Yrf, Ynone, Yf0, Zm_o, 2},
   568  }
   569  
   570  var yfadd = []ytab{
   571  	{Ym, Ynone, Yf0, Zm_o, 2},
   572  	{Yrf, Ynone, Yf0, Zm_o, 2},
   573  	{Yf0, Ynone, Yrf, Zo_m, 2},
   574  }
   575  
   576  var yfaddp = []ytab{
   577  	{Yf0, Ynone, Yrf, Zo_m, 2},
   578  }
   579  
   580  var yfxch = []ytab{
   581  	{Yf0, Ynone, Yrf, Zo_m, 2},
   582  	{Yrf, Ynone, Yf0, Zm_o, 2},
   583  }
   584  
   585  var ycompp = []ytab{
   586  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   587  }
   588  
   589  var ystsw = []ytab{
   590  	{Ynone, Ynone, Ym, Zo_m, 2},
   591  	{Ynone, Ynone, Yax, Zlit, 1},
   592  }
   593  
   594  var ystcw = []ytab{
   595  	{Ynone, Ynone, Ym, Zo_m, 2},
   596  	{Ym, Ynone, Ynone, Zm_o, 2},
   597  }
   598  
   599  var ysvrs = []ytab{
   600  	{Ynone, Ynone, Ym, Zo_m, 2},
   601  	{Ym, Ynone, Ynone, Zm_o, 2},
   602  }
   603  
   604  var ymm = []ytab{
   605  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   606  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   607  }
   608  
   609  var yxm = []ytab{
   610  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   611  }
   612  
   613  var yxcvm1 = []ytab{
   614  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   615  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   616  }
   617  
   618  var yxcvm2 = []ytab{
   619  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   620  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   621  }
   622  
   623  /*
   624  var yxmq = []ytab{
   625  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   626  }
   627  */
   628  
   629  var yxr = []ytab{
   630  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   631  }
   632  
   633  var yxr_ml = []ytab{
   634  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   635  }
   636  
   637  var ymr = []ytab{
   638  	{Ymr, Ynone, Ymr, Zm_r, 1},
   639  }
   640  
   641  var ymr_ml = []ytab{
   642  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   643  }
   644  
   645  var yxcmp = []ytab{
   646  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   647  }
   648  
   649  var yxcmpi = []ytab{
   650  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   651  }
   652  
   653  var yxmov = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   655  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   656  }
   657  
   658  var yxcvfl = []ytab{
   659  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   660  }
   661  
   662  var yxcvlf = []ytab{
   663  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   664  }
   665  
   666  var yxcvfq = []ytab{
   667  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   668  }
   669  
   670  var yxcvqf = []ytab{
   671  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   672  }
   673  
   674  var yps = []ytab{
   675  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   676  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   677  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   678  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   679  }
   680  
   681  var yxrrl = []ytab{
   682  	{Yxr, Ynone, Yrl, Zm_r, 1},
   683  }
   684  
   685  var ymfp = []ytab{
   686  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   687  }
   688  
   689  var ymrxr = []ytab{
   690  	{Ymr, Ynone, Yxr, Zm_r, 1},
   691  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   692  }
   693  
   694  var ymshuf = []ytab{
   695  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   696  }
   697  
   698  var ymshufb = []ytab{
   699  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   700  }
   701  
   702  var yxshuf = []ytab{
   703  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   704  }
   705  
   706  var yextrw = []ytab{
   707  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   708  }
   709  
   710  var yinsrw = []ytab{
   711  	{Yu8, Yml, Yxr, Zibm_r, 2},
   712  }
   713  
   714  var yinsr = []ytab{
   715  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   716  }
   717  
   718  var ypsdq = []ytab{
   719  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   720  }
   721  
   722  var ymskb = []ytab{
   723  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   724  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   725  }
   726  
   727  var ycrc32l = []ytab{
   728  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   729  }
   730  
   731  var yprefetch = []ytab{
   732  	{Ym, Ynone, Ynone, Zm_o, 2},
   733  }
   734  
   735  var yaes = []ytab{
   736  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   737  }
   738  
   739  var yaes2 = []ytab{
   740  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   741  }
   742  
   743  /*
   744   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   745   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   746   * the entry with the given p->as and then looks through the ytable for that
   747   * instruction (the second field in the optab struct) for a line whose first
   748   * two values match the Ytypes of the p->from and p->to operands.  The function
   749   * oclass in span.c computes the specific Ytype of an operand and then the set
   750   * of more general Ytypes that it satisfies is implied by the ycover table, set
   751   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   752   * from the more general 8-bit constants, but instinit says
   753   *
   754   *        ycover[Yi0*Ymax + Ys32] = 1;
   755   *        ycover[Yi1*Ymax + Ys32] = 1;
   756   *        ycover[Yi8*Ymax + Ys32] = 1;
   757   *
   758   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   759   * if that's what an instruction can handle.
   760   *
   761   * In parallel with the scan through the ytable for the appropriate line, there
   762   * is a z pointer that starts out pointing at the strange magic byte list in
   763   * the Optab struct.  With each step past a non-matching ytable line, z
   764   * advances by the 4th entry in the line.  When a matching line is found, that
   765   * z pointer has the extra data to use in laying down the instruction bytes.
   766   * The actual bytes laid down are a function of the 3rd entry in the line (that
   767   * is, the Ztype) and the z bytes.
   768   *
   769   * For example, let's look at AADDL.  The optab line says:
   770   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   771   *
   772   * and yaddl says
   773   *        uchar   yaddl[] =
   774   *        {
   775   *                Yi8,    Yml,    Zibo_m, 2,
   776   *                Yi32,   Yax,    Zil_,   1,
   777   *                Yi32,   Yml,    Zilo_m, 2,
   778   *                Yrl,    Yml,    Zr_m,   1,
   779   *                Yml,    Yrl,    Zm_r,   1,
   780   *                0
   781   *        };
   782   *
   783   * so there are 5 possible types of ADDL instruction that can be laid down, and
   784   * possible states used to lay them down (Ztype and z pointer, assuming z
   785   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   786   *
   787   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   788   *        Yi32, Yax -> Zil_, z+2 (0x05)
   789   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   790   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   791   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   792   *
   793   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   794   * relatively straightforward as this program goes.
   795   *
   796   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   797   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   798   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   799   * Zilo_m is the same but a long (32-bit) immediate.
   800   */
   801  var optab =
   802  /*	as, ytab, andproto, opcode */
   803  []Optab{
   804  	Optab{obj.AXXX, nil, 0, [23]uint8{}},
   805  	Optab{AAAA, ynone, P32, [23]uint8{0x37}},
   806  	Optab{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   807  	Optab{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   808  	Optab{AAAS, ynone, P32, [23]uint8{0x3f}},
   809  	Optab{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   810  	Optab{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   811  	Optab{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   812  	Optab{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   813  	Optab{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   814  	Optab{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   815  	Optab{AADDPD, yxm, Pq, [23]uint8{0x58}},
   816  	Optab{AADDPS, yxm, Pm, [23]uint8{0x58}},
   817  	Optab{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   818  	Optab{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   819  	Optab{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   820  	Optab{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   821  	Optab{AADJSP, nil, 0, [23]uint8{}},
   822  	Optab{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   823  	Optab{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   824  	Optab{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   825  	Optab{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   826  	Optab{AANDPD, yxm, Pq, [23]uint8{0x54}},
   827  	Optab{AANDPS, yxm, Pq, [23]uint8{0x54}},
   828  	Optab{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   829  	Optab{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   830  	Optab{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   831  	Optab{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   832  	Optab{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   833  	Optab{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   834  	Optab{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   835  	Optab{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   836  	Optab{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   837  	Optab{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   838  	Optab{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   839  	Optab{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   840  	Optab{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   841  	Optab{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   842  	Optab{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   843  	Optab{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   844  	Optab{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   845  	Optab{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   846  	Optab{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   847  	Optab{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   848  	Optab{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   849  	Optab{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   850  	Optab{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   851  	Optab{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   852  	Optab{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   853  	Optab{ABYTE, ybyte, Px, [23]uint8{1}},
   854  	Optab{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   855  	Optab{ACDQ, ynone, Px, [23]uint8{0x99}},
   856  	Optab{ACLC, ynone, Px, [23]uint8{0xf8}},
   857  	Optab{ACLD, ynone, Px, [23]uint8{0xfc}},
   858  	Optab{ACLI, ynone, Px, [23]uint8{0xfa}},
   859  	Optab{ACLTS, ynone, Pm, [23]uint8{0x06}},
   860  	Optab{ACMC, ynone, Px, [23]uint8{0xf5}},
   861  	Optab{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   862  	Optab{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   863  	Optab{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   864  	Optab{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   865  	Optab{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   866  	Optab{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   867  	Optab{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   868  	Optab{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   869  	Optab{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   870  	Optab{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   871  	Optab{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   872  	Optab{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   873  	Optab{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   874  	Optab{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   875  	Optab{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   876  	Optab{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   877  	Optab{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   878  	Optab{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   879  	Optab{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   880  	Optab{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   881  	Optab{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   882  	Optab{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   883  	Optab{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   884  	Optab{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   885  	Optab{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   886  	Optab{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   887  	Optab{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   888  	Optab{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   889  	Optab{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   890  	Optab{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   891  	Optab{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   892  	Optab{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   893  	Optab{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   894  	Optab{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   895  	Optab{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   896  	Optab{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   897  	Optab{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   898  	Optab{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   899  	Optab{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   900  	Optab{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   901  	Optab{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   902  	Optab{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   903  	Optab{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   904  	Optab{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   905  	Optab{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   906  	Optab{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   907  	Optab{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   908  	Optab{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   909  	Optab{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   910  	Optab{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   911  	Optab{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   912  	Optab{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   913  	Optab{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   914  	Optab{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   915  	Optab{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   916  	Optab{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   917  	Optab{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   918  	Optab{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   919  	Optab{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   920  	Optab{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   921  	Optab{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
   922  	Optab{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
   923  	Optab{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   924  	Optab{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   925  	Optab{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   926  	Optab{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   927  	Optab{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   928  	Optab{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   929  	Optab{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   930  	Optab{API2FW, ymfp, Px, [23]uint8{0x0c}},
   931  	Optab{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   932  	Optab{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   933  	Optab{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   934  	Optab{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   935  	Optab{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   936  	Optab{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   937  	Optab{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   938  	Optab{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   939  	Optab{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   940  	Optab{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   941  	Optab{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   942  	Optab{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   943  	Optab{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   944  	Optab{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   945  	Optab{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   946  	Optab{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   947  	Optab{ACWD, ynone, Pe, [23]uint8{0x99}},
   948  	Optab{ACQO, ynone, Pw, [23]uint8{0x99}},
   949  	Optab{ADAA, ynone, P32, [23]uint8{0x27}},
   950  	Optab{ADAS, ynone, P32, [23]uint8{0x2f}},
   951  	Optab{obj.ADATA, nil, 0, [23]uint8{}},
   952  	Optab{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
   953  	Optab{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   954  	Optab{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   955  	Optab{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
   956  	Optab{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   957  	Optab{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   958  	Optab{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   959  	Optab{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   960  	Optab{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   961  	Optab{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   962  	Optab{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   963  	Optab{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   964  	Optab{AEMMS, ynone, Pm, [23]uint8{0x77}},
   965  	Optab{AENTER, nil, 0, [23]uint8{}}, /* botch */
   966  	Optab{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   967  	Optab{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   968  	Optab{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   969  	Optab{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   970  	Optab{obj.AGLOBL, nil, 0, [23]uint8{}},
   971  	Optab{AHLT, ynone, Px, [23]uint8{0xf4}},
   972  	Optab{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   973  	Optab{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   974  	Optab{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   975  	Optab{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   976  	Optab{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   977  	Optab{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   978  	Optab{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   979  	Optab{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   980  	Optab{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   981  	Optab{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   982  	Optab{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
   983  	Optab{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   984  	Optab{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   985  	Optab{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
   986  	Optab{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   987  	Optab{AINSB, ynone, Pb, [23]uint8{0x6c}},
   988  	Optab{AINSL, ynone, Px, [23]uint8{0x6d}},
   989  	Optab{AINSW, ynone, Pe, [23]uint8{0x6d}},
   990  	Optab{AINT, yint, Px, [23]uint8{0xcd}},
   991  	Optab{AINTO, ynone, P32, [23]uint8{0xce}},
   992  	Optab{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   993  	Optab{AIRETL, ynone, Px, [23]uint8{0xcf}},
   994  	Optab{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   995  	Optab{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   996  	Optab{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   997  	Optab{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   998  	Optab{AJCXZL, yloop, Px, [23]uint8{0xe3}},
   999  	Optab{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1000  	Optab{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1001  	Optab{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1002  	Optab{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1003  	Optab{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1004  	Optab{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1005  	Optab{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1006  	Optab{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1007  	Optab{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1008  	Optab{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1009  	Optab{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1010  	Optab{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1011  	Optab{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1012  	Optab{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1013  	Optab{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1014  	Optab{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1015  	Optab{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1016  	Optab{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1017  	Optab{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1018  	Optab{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1019  	Optab{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1020  	Optab{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1021  	Optab{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1022  	Optab{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1023  	Optab{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1024  	Optab{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1025  	Optab{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1026  	Optab{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1027  	Optab{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1028  	Optab{ALODSL, ynone, Px, [23]uint8{0xad}},
  1029  	Optab{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1030  	Optab{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1031  	Optab{ALONG, ybyte, Px, [23]uint8{4}},
  1032  	Optab{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1033  	Optab{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1034  	Optab{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1035  	Optab{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1036  	Optab{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1037  	Optab{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1038  	Optab{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1039  	Optab{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1040  	Optab{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1041  	Optab{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1042  	Optab{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1043  	Optab{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1044  	Optab{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1045  	Optab{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1046  	Optab{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1047  	Optab{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1048  	Optab{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1049  	Optab{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1050  	Optab{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1051  	Optab{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1052  	Optab{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1053  	Optab{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1054  	Optab{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1055  	Optab{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1056  	Optab{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1057  	Optab{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1058  	Optab{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1059  	Optab{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1060  	Optab{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1061  	Optab{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1062  	Optab{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1063  	Optab{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1064  	Optab{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1065  	Optab{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1066  	Optab{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1067  	Optab{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1068  	Optab{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1069  	Optab{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1070  	Optab{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1071  	Optab{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1072  	Optab{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1073  	Optab{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1074  	Optab{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1075  	Optab{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1076  	Optab{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1077  	Optab{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1078  	Optab{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1079  	Optab{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1080  	Optab{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1081  	Optab{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1082  	Optab{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1083  	Optab{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1084  	Optab{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1085  	Optab{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1086  	Optab{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1087  	Optab{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1088  	Optab{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1089  	Optab{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1090  	Optab{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1091  	Optab{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1092  	Optab{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1093  	Optab{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1094  	Optab{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1095  	Optab{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1096  	Optab{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1097  	Optab{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1098  	Optab{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1099  	Optab{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1100  	Optab{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1101  	Optab{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1102  	Optab{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1103  	Optab{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1104  	Optab{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1105  	Optab{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1106  	Optab{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1107  	Optab{AORPD, yxm, Pq, [23]uint8{0x56}},
  1108  	Optab{AORPS, yxm, Pm, [23]uint8{0x56}},
  1109  	Optab{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1110  	Optab{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1111  	Optab{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1112  	Optab{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1113  	Optab{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1114  	Optab{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1115  	Optab{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1116  	Optab{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1117  	Optab{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1118  	Optab{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1119  	Optab{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1120  	Optab{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1121  	Optab{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1122  	Optab{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1123  	Optab{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1124  	Optab{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1125  	Optab{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1126  	Optab{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1127  	Optab{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1128  	Optab{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1129  	Optab{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1130  	Optab{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1131  	Optab{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1132  	Optab{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1133  	Optab{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1134  	Optab{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1135  	Optab{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1136  	Optab{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1137  	Optab{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1138  	Optab{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1139  	Optab{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1140  	Optab{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1141  	Optab{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1142  	Optab{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1143  	Optab{APFACC, ymfp, Px, [23]uint8{0xae}},
  1144  	Optab{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1145  	Optab{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1146  	Optab{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1147  	Optab{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1148  	Optab{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1149  	Optab{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1150  	Optab{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1151  	Optab{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1152  	Optab{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1153  	Optab{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1154  	Optab{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1155  	Optab{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1156  	Optab{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1157  	Optab{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1158  	Optab{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1159  	Optab{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1160  	Optab{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1161  	Optab{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1162  	Optab{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1163  	Optab{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1164  	Optab{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1165  	Optab{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1166  	Optab{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1167  	Optab{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1168  	Optab{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1169  	Optab{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1170  	Optab{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1171  	Optab{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1172  	Optab{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1173  	Optab{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1174  	Optab{APOPAL, ynone, P32, [23]uint8{0x61}},
  1175  	Optab{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1176  	Optab{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1177  	Optab{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1178  	Optab{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1179  	Optab{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1180  	Optab{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1181  	Optab{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1182  	Optab{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1183  	Optab{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1184  	Optab{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1185  	Optab{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1186  	Optab{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1187  	Optab{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1188  	Optab{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1189  	Optab{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1190  	Optab{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1191  	Optab{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1192  	Optab{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1193  	Optab{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1194  	Optab{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1195  	Optab{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1196  	Optab{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1197  	Optab{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1198  	Optab{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
  1199  	Optab{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1200  	Optab{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1201  	Optab{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1202  	Optab{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1203  	Optab{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1204  	Optab{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1205  	Optab{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1206  	Optab{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1207  	Optab{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1208  	Optab{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1209  	Optab{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1210  	Optab{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1211  	Optab{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1212  	Optab{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1213  	Optab{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1214  	Optab{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1215  	Optab{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1216  	Optab{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1217  	Optab{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1218  	Optab{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1219  	Optab{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1220  	Optab{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1221  	Optab{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1222  	Optab{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1223  	Optab{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1224  	Optab{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1225  	Optab{AQUAD, ybyte, Px, [23]uint8{8}},
  1226  	Optab{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1227  	Optab{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1228  	Optab{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1229  	Optab{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1230  	Optab{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1231  	Optab{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1232  	Optab{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1233  	Optab{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1234  	Optab{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1235  	Optab{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1236  	Optab{AREP, ynone, Px, [23]uint8{0xf3}},
  1237  	Optab{AREPN, ynone, Px, [23]uint8{0xf2}},
  1238  	Optab{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1239  	Optab{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1240  	Optab{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1241  	Optab{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1242  	Optab{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1243  	Optab{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1244  	Optab{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1245  	Optab{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1246  	Optab{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1247  	Optab{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1248  	Optab{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1249  	Optab{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1250  	Optab{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1251  	Optab{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1252  	Optab{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1253  	Optab{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1254  	Optab{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1255  	Optab{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1256  	Optab{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1257  	Optab{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1258  	Optab{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1259  	Optab{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1260  	Optab{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1261  	Optab{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1262  	Optab{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1263  	Optab{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1264  	Optab{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1265  	Optab{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1266  	Optab{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1267  	Optab{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1268  	Optab{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1269  	Optab{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1270  	Optab{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1271  	Optab{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1272  	Optab{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1273  	Optab{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1274  	Optab{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1275  	Optab{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1276  	Optab{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1277  	Optab{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1278  	Optab{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1279  	Optab{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1280  	Optab{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1281  	Optab{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1282  	Optab{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1283  	Optab{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1284  	Optab{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1285  	Optab{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1286  	Optab{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1287  	Optab{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1288  	Optab{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1289  	Optab{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1290  	Optab{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1291  	Optab{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1292  	Optab{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1293  	Optab{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1294  	Optab{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1295  	Optab{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1296  	Optab{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1297  	Optab{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1298  	Optab{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1299  	Optab{ASTC, ynone, Px, [23]uint8{0xf9}},
  1300  	Optab{ASTD, ynone, Px, [23]uint8{0xfd}},
  1301  	Optab{ASTI, ynone, Px, [23]uint8{0xfb}},
  1302  	Optab{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1303  	Optab{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1304  	Optab{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1305  	Optab{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1306  	Optab{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1307  	Optab{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1308  	Optab{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1309  	Optab{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1310  	Optab{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1311  	Optab{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1312  	Optab{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1313  	Optab{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1314  	Optab{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1315  	Optab{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1316  	Optab{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1317  	Optab{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1318  	Optab{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1319  	Optab{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1320  	Optab{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1321  	Optab{obj.ATEXT, ytext, Px, [23]uint8{}},
  1322  	Optab{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1323  	Optab{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1324  	Optab{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1325  	Optab{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1326  	Optab{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1327  	Optab{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1328  	Optab{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1329  	Optab{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1330  	Optab{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1331  	Optab{AWORD, ybyte, Px, [23]uint8{2}},
  1332  	Optab{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1333  	Optab{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1334  	Optab{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1335  	Optab{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1336  	Optab{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1337  	Optab{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1338  	Optab{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1339  	Optab{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1340  	Optab{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1341  	Optab{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1342  	Optab{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1343  	Optab{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1344  	Optab{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1345  	Optab{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1346  	Optab{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1347  	Optab{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1348  	Optab{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1349  	Optab{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1350  	Optab{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1351  	Optab{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1352  	Optab{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1353  	Optab{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1354  	Optab{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1355  	Optab{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1356  	Optab{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1357  	Optab{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1358  	Optab{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1359  	Optab{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1360  	Optab{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1361  	Optab{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1362  	Optab{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1363  	Optab{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1364  	Optab{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1365  	Optab{AFCOMB, nil, 0, [23]uint8{}},
  1366  	Optab{AFCOMBP, nil, 0, [23]uint8{}},
  1367  	Optab{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1368  	Optab{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1369  	Optab{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1370  	Optab{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1371  	Optab{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1372  	Optab{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1373  	Optab{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1374  	Optab{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1375  	Optab{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1376  	Optab{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1377  	Optab{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1378  	Optab{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1379  	Optab{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1380  	Optab{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1381  	Optab{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1382  	Optab{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1383  	Optab{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1384  	Optab{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1385  	Optab{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1386  	Optab{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1387  	Optab{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1388  	Optab{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1389  	Optab{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1390  	Optab{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1391  	Optab{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1392  	Optab{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1393  	Optab{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1394  	Optab{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1395  	Optab{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1396  	Optab{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1397  	Optab{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1398  	Optab{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1399  	Optab{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1400  	Optab{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1401  	Optab{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1402  	Optab{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1403  	Optab{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1404  	Optab{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1405  	Optab{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1406  	Optab{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1407  	Optab{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1408  	Optab{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1409  	Optab{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1410  	Optab{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1411  	Optab{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1412  	Optab{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1413  	Optab{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1414  	Optab{AFFREE, nil, 0, [23]uint8{}},
  1415  	Optab{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1416  	Optab{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1417  	Optab{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1418  	Optab{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1419  	Optab{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1420  	Optab{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1421  	Optab{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1422  	Optab{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1423  	Optab{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1424  	Optab{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1425  	Optab{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1426  	Optab{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1427  	Optab{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1428  	Optab{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1429  	Optab{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1430  	Optab{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1431  	Optab{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1432  	Optab{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1433  	Optab{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1434  	Optab{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1435  	Optab{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1436  	Optab{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1437  	Optab{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1438  	Optab{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1439  	Optab{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1440  	Optab{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1441  	Optab{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1442  	Optab{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1443  	Optab{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1444  	Optab{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1445  	Optab{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1446  	Optab{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1447  	Optab{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1448  	Optab{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1449  	Optab{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1450  	Optab{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1451  	Optab{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1452  	Optab{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1453  	Optab{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1454  	Optab{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1455  	Optab{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1456  	Optab{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1457  	Optab{AINVD, ynone, Pm, [23]uint8{0x08}},
  1458  	Optab{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1459  	Optab{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1460  	Optab{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1461  	Optab{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1462  	Optab{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1463  	Optab{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1464  	Optab{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1465  	Optab{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1466  	Optab{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1467  	Optab{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1468  	Optab{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1469  	Optab{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1470  	Optab{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1471  	Optab{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1472  	Optab{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1473  	Optab{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1474  	Optab{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1475  	Optab{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1476  	Optab{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1477  	Optab{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1478  	Optab{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1479  	Optab{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1480  	Optab{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1481  	Optab{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1482  	Optab{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1483  	Optab{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1484  	Optab{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1485  	Optab{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1486  	Optab{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1487  	Optab{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1488  	Optab{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1489  	Optab{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1490  	Optab{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1491  	Optab{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1492  	Optab{obj.ATYPE, nil, 0, [23]uint8{}},
  1493  	Optab{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1494  	Optab{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1495  	Optab{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1496  	Optab{obj.AVARDEF, nil, 0, [23]uint8{}},
  1497  	Optab{obj.AVARKILL, nil, 0, [23]uint8{}},
  1498  	Optab{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1499  	Optab{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1500  	Optab{obj.AEND, nil, 0, [23]uint8{}},
  1501  	Optab{0, nil, 0, [23]uint8{}},
  1502  }
  1503  
  1504  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1505  
  1506  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1507  // This happens on systems like Solaris that call .so functions instead of system calls.
  1508  // It does not seem to be necessary for any other systems. This is probably working
  1509  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1510  // what that bug is. And this does fix it.
  1511  func isextern(s *obj.LSym) bool {
  1512  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1513  	return strings.HasPrefix(s.Name, "libc_")
  1514  }
  1515  
  1516  // single-instruction no-ops of various lengths.
  1517  // constructed by hand and disassembled with gdb to verify.
  1518  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1519  var nop = [][16]uint8{
  1520  	[16]uint8{0x90},
  1521  	[16]uint8{0x66, 0x90},
  1522  	[16]uint8{0x0F, 0x1F, 0x00},
  1523  	[16]uint8{0x0F, 0x1F, 0x40, 0x00},
  1524  	[16]uint8{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1525  	[16]uint8{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1526  	[16]uint8{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1527  	[16]uint8{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1528  	[16]uint8{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1529  }
  1530  
  1531  // Native Client rejects the repeated 0x66 prefix.
  1532  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1533  func fillnop(p []byte, n int) {
  1534  	var m int
  1535  
  1536  	for n > 0 {
  1537  		m = n
  1538  		if m > len(nop) {
  1539  			m = len(nop)
  1540  		}
  1541  		copy(p[:m], nop[m-1][:m])
  1542  		p = p[m:]
  1543  		n -= m
  1544  	}
  1545  }
  1546  
  1547  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1548  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1549  	fillnop(s.P[c:], int(pad))
  1550  	return c + pad
  1551  }
  1552  
  1553  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1554  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1555  		return l
  1556  	}
  1557  	return q
  1558  }
  1559  
  1560  func span6(ctxt *obj.Link, s *obj.LSym) {
  1561  	ctxt.Cursym = s
  1562  
  1563  	if s.P != nil {
  1564  		return
  1565  	}
  1566  
  1567  	if ycover[0] == 0 {
  1568  		instinit()
  1569  	}
  1570  
  1571  	var v int32
  1572  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1573  		if p.To.Type == obj.TYPE_BRANCH {
  1574  			if p.Pcond == nil {
  1575  				p.Pcond = p
  1576  			}
  1577  		}
  1578  		if p.As == AADJSP {
  1579  			p.To.Type = obj.TYPE_REG
  1580  			p.To.Reg = REG_SP
  1581  			v = int32(-p.From.Offset)
  1582  			p.From.Offset = int64(v)
  1583  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1584  			if v < 0 {
  1585  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1586  				v = -v
  1587  				p.From.Offset = int64(v)
  1588  			}
  1589  
  1590  			if v == 0 {
  1591  				p.As = obj.ANOP
  1592  			}
  1593  		}
  1594  	}
  1595  
  1596  	var q *obj.Prog
  1597  	for p := s.Text; p != nil; p = p.Link {
  1598  		p.Back = 2 // use short branches first time through
  1599  		q = p.Pcond
  1600  		if q != nil && (q.Back&2 != 0) {
  1601  			p.Back |= 1 // backward jump
  1602  			q.Back |= 4 // loop head
  1603  		}
  1604  
  1605  		if p.As == AADJSP {
  1606  			p.To.Type = obj.TYPE_REG
  1607  			p.To.Reg = REG_SP
  1608  			v = int32(-p.From.Offset)
  1609  			p.From.Offset = int64(v)
  1610  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1611  			if v < 0 {
  1612  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1613  				v = -v
  1614  				p.From.Offset = int64(v)
  1615  			}
  1616  
  1617  			if v == 0 {
  1618  				p.As = obj.ANOP
  1619  			}
  1620  		}
  1621  	}
  1622  
  1623  	n := 0
  1624  	var bp []byte
  1625  	var c int32
  1626  	var i int
  1627  	var loop int32
  1628  	var m int
  1629  	var p *obj.Prog
  1630  	for {
  1631  		loop = 0
  1632  		for i = 0; i < len(s.R); i++ {
  1633  			s.R[i] = obj.Reloc{}
  1634  		}
  1635  		s.R = s.R[:0]
  1636  		s.P = s.P[:0]
  1637  		c = 0
  1638  		for p = s.Text; p != nil; p = p.Link {
  1639  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1640  				var deferreturn *obj.LSym
  1641  
  1642  				if deferreturn == nil {
  1643  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1644  				}
  1645  
  1646  				// pad everything to avoid crossing 32-byte boundary
  1647  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1648  					c = naclpad(ctxt, s, c, -c&31)
  1649  				}
  1650  
  1651  				// pad call deferreturn to start at 32-byte boundary
  1652  				// so that subtracting 5 in jmpdefer will jump back
  1653  				// to that boundary and rerun the call.
  1654  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1655  					c = naclpad(ctxt, s, c, -c&31)
  1656  				}
  1657  
  1658  				// pad call to end at 32-byte boundary
  1659  				if p.As == obj.ACALL {
  1660  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1661  				}
  1662  
  1663  				// the linker treats REP and STOSQ as different instructions
  1664  				// but in fact the REP is a prefix on the STOSQ.
  1665  				// make sure REP has room for 2 more bytes, so that
  1666  				// padding will not be inserted before the next instruction.
  1667  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1668  					c = naclpad(ctxt, s, c, -c&31)
  1669  				}
  1670  
  1671  				// same for LOCK.
  1672  				// various instructions follow; the longest is 4 bytes.
  1673  				// give ourselves 8 bytes so as to avoid surprises.
  1674  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1675  					c = naclpad(ctxt, s, c, -c&31)
  1676  				}
  1677  			}
  1678  
  1679  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1680  				// pad with NOPs
  1681  				v = -c & (LoopAlign - 1)
  1682  
  1683  				if v <= MaxLoopPad {
  1684  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1685  					fillnop(s.P[c:], int(v))
  1686  					c += v
  1687  				}
  1688  			}
  1689  
  1690  			p.Pc = int64(c)
  1691  
  1692  			// process forward jumps to p
  1693  			for q = p.Comefrom; q != nil; q = q.Forwd {
  1694  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1695  				if q.Back&2 != 0 { // short
  1696  					if v > 127 {
  1697  						loop++
  1698  						q.Back ^= 2
  1699  					}
  1700  
  1701  					if q.As == AJCXZL {
  1702  						s.P[q.Pc+2] = byte(v)
  1703  					} else {
  1704  						s.P[q.Pc+1] = byte(v)
  1705  					}
  1706  				} else {
  1707  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1708  					bp[0] = byte(v)
  1709  					bp = bp[1:]
  1710  					bp[0] = byte(v >> 8)
  1711  					bp = bp[1:]
  1712  					bp[0] = byte(v >> 16)
  1713  					bp = bp[1:]
  1714  					bp[0] = byte(v >> 24)
  1715  				}
  1716  			}
  1717  
  1718  			p.Comefrom = nil
  1719  
  1720  			p.Pc = int64(c)
  1721  			asmins(ctxt, p)
  1722  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1723  			if int(p.Isize) != m {
  1724  				p.Isize = uint8(m)
  1725  				loop++
  1726  			}
  1727  
  1728  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1729  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1730  			p.Mark = uint16(m)
  1731  			c += int32(m)
  1732  		}
  1733  
  1734  		n++
  1735  		if n > 20 {
  1736  			ctxt.Diag("span must be looping")
  1737  			log.Fatalf("loop")
  1738  		}
  1739  		if loop == 0 {
  1740  			break
  1741  		}
  1742  	}
  1743  
  1744  	if ctxt.Headtype == obj.Hnacl {
  1745  		c = naclpad(ctxt, s, c, -c&31)
  1746  	}
  1747  
  1748  	c += -c & (FuncAlign - 1)
  1749  	s.Size = int64(c)
  1750  
  1751  	if false { /* debug['a'] > 1 */
  1752  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1753  		var i int
  1754  		for i = 0; i < len(s.P); i++ {
  1755  			fmt.Printf(" %.2x", s.P[i])
  1756  			if i%16 == 15 {
  1757  				fmt.Printf("\n  %.6x", uint(i+1))
  1758  			}
  1759  		}
  1760  
  1761  		if i%16 != 0 {
  1762  			fmt.Printf("\n")
  1763  		}
  1764  
  1765  		for i := 0; i < len(s.R); i++ {
  1766  			r := &s.R[i]
  1767  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1768  		}
  1769  	}
  1770  }
  1771  
  1772  func instinit() {
  1773  	var c int
  1774  
  1775  	for i := 1; optab[i].as != 0; i++ {
  1776  		c = int(optab[i].as)
  1777  		if opindex[c&obj.AMask] != nil {
  1778  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1779  		}
  1780  		opindex[c&obj.AMask] = &optab[i]
  1781  	}
  1782  
  1783  	for i := 0; i < Ymax; i++ {
  1784  		ycover[i*Ymax+i] = 1
  1785  	}
  1786  
  1787  	ycover[Yi0*Ymax+Yi8] = 1
  1788  	ycover[Yi1*Ymax+Yi8] = 1
  1789  	ycover[Yu7*Ymax+Yi8] = 1
  1790  
  1791  	ycover[Yi0*Ymax+Yu7] = 1
  1792  	ycover[Yi1*Ymax+Yu7] = 1
  1793  
  1794  	ycover[Yi0*Ymax+Yu8] = 1
  1795  	ycover[Yi1*Ymax+Yu8] = 1
  1796  	ycover[Yu7*Ymax+Yu8] = 1
  1797  
  1798  	ycover[Yi0*Ymax+Ys32] = 1
  1799  	ycover[Yi1*Ymax+Ys32] = 1
  1800  	ycover[Yu7*Ymax+Ys32] = 1
  1801  	ycover[Yu8*Ymax+Ys32] = 1
  1802  	ycover[Yi8*Ymax+Ys32] = 1
  1803  
  1804  	ycover[Yi0*Ymax+Yi32] = 1
  1805  	ycover[Yi1*Ymax+Yi32] = 1
  1806  	ycover[Yu7*Ymax+Yi32] = 1
  1807  	ycover[Yu8*Ymax+Yi32] = 1
  1808  	ycover[Yi8*Ymax+Yi32] = 1
  1809  	ycover[Ys32*Ymax+Yi32] = 1
  1810  
  1811  	ycover[Yi0*Ymax+Yi64] = 1
  1812  	ycover[Yi1*Ymax+Yi64] = 1
  1813  	ycover[Yu7*Ymax+Yi64] = 1
  1814  	ycover[Yu8*Ymax+Yi64] = 1
  1815  	ycover[Yi8*Ymax+Yi64] = 1
  1816  	ycover[Ys32*Ymax+Yi64] = 1
  1817  	ycover[Yi32*Ymax+Yi64] = 1
  1818  
  1819  	ycover[Yal*Ymax+Yrb] = 1
  1820  	ycover[Ycl*Ymax+Yrb] = 1
  1821  	ycover[Yax*Ymax+Yrb] = 1
  1822  	ycover[Ycx*Ymax+Yrb] = 1
  1823  	ycover[Yrx*Ymax+Yrb] = 1
  1824  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  1825  
  1826  	ycover[Ycl*Ymax+Ycx] = 1
  1827  
  1828  	ycover[Yax*Ymax+Yrx] = 1
  1829  	ycover[Ycx*Ymax+Yrx] = 1
  1830  
  1831  	ycover[Yax*Ymax+Yrl] = 1
  1832  	ycover[Ycx*Ymax+Yrl] = 1
  1833  	ycover[Yrx*Ymax+Yrl] = 1
  1834  	ycover[Yrl32*Ymax+Yrl] = 1
  1835  
  1836  	ycover[Yf0*Ymax+Yrf] = 1
  1837  
  1838  	ycover[Yal*Ymax+Ymb] = 1
  1839  	ycover[Ycl*Ymax+Ymb] = 1
  1840  	ycover[Yax*Ymax+Ymb] = 1
  1841  	ycover[Ycx*Ymax+Ymb] = 1
  1842  	ycover[Yrx*Ymax+Ymb] = 1
  1843  	ycover[Yrb*Ymax+Ymb] = 1
  1844  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  1845  	ycover[Ym*Ymax+Ymb] = 1
  1846  
  1847  	ycover[Yax*Ymax+Yml] = 1
  1848  	ycover[Ycx*Ymax+Yml] = 1
  1849  	ycover[Yrx*Ymax+Yml] = 1
  1850  	ycover[Yrl*Ymax+Yml] = 1
  1851  	ycover[Yrl32*Ymax+Yml] = 1
  1852  	ycover[Ym*Ymax+Yml] = 1
  1853  
  1854  	ycover[Yax*Ymax+Ymm] = 1
  1855  	ycover[Ycx*Ymax+Ymm] = 1
  1856  	ycover[Yrx*Ymax+Ymm] = 1
  1857  	ycover[Yrl*Ymax+Ymm] = 1
  1858  	ycover[Yrl32*Ymax+Ymm] = 1
  1859  	ycover[Ym*Ymax+Ymm] = 1
  1860  	ycover[Ymr*Ymax+Ymm] = 1
  1861  
  1862  	ycover[Ym*Ymax+Yxm] = 1
  1863  	ycover[Yxr*Ymax+Yxm] = 1
  1864  
  1865  	for i := 0; i < MAXREG; i++ {
  1866  		reg[i] = -1
  1867  		if i >= REG_AL && i <= REG_R15B {
  1868  			reg[i] = (i - REG_AL) & 7
  1869  			if i >= REG_SPB && i <= REG_DIB {
  1870  				regrex[i] = 0x40
  1871  			}
  1872  			if i >= REG_R8B && i <= REG_R15B {
  1873  				regrex[i] = Rxr | Rxx | Rxb
  1874  			}
  1875  		}
  1876  
  1877  		if i >= REG_AH && i <= REG_BH {
  1878  			reg[i] = 4 + ((i - REG_AH) & 7)
  1879  		}
  1880  		if i >= REG_AX && i <= REG_R15 {
  1881  			reg[i] = (i - REG_AX) & 7
  1882  			if i >= REG_R8 {
  1883  				regrex[i] = Rxr | Rxx | Rxb
  1884  			}
  1885  		}
  1886  
  1887  		if i >= REG_F0 && i <= REG_F0+7 {
  1888  			reg[i] = (i - REG_F0) & 7
  1889  		}
  1890  		if i >= REG_M0 && i <= REG_M0+7 {
  1891  			reg[i] = (i - REG_M0) & 7
  1892  		}
  1893  		if i >= REG_X0 && i <= REG_X0+15 {
  1894  			reg[i] = (i - REG_X0) & 7
  1895  			if i >= REG_X0+8 {
  1896  				regrex[i] = Rxr | Rxx | Rxb
  1897  			}
  1898  		}
  1899  
  1900  		if i >= REG_CR+8 && i <= REG_CR+15 {
  1901  			regrex[i] = Rxr
  1902  		}
  1903  	}
  1904  }
  1905  
  1906  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  1907  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  1908  		return 0
  1909  	}
  1910  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  1911  		switch a.Reg {
  1912  		case REG_CS:
  1913  			return 0x2e
  1914  
  1915  		case REG_DS:
  1916  			return 0x3e
  1917  
  1918  		case REG_ES:
  1919  			return 0x26
  1920  
  1921  		case REG_FS:
  1922  			return 0x64
  1923  
  1924  		case REG_GS:
  1925  			return 0x65
  1926  
  1927  		case REG_TLS:
  1928  			// NOTE: Systems listed here should be only systems that
  1929  			// support direct TLS references like 8(TLS) implemented as
  1930  			// direct references from FS or GS. Systems that require
  1931  			// the initial-exec model, where you load the TLS base into
  1932  			// a register and then index from that register, do not reach
  1933  			// this code and should not be listed.
  1934  			if p.Mode == 32 {
  1935  				switch ctxt.Headtype {
  1936  				default:
  1937  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1938  
  1939  				case obj.Hdarwin,
  1940  					obj.Hdragonfly,
  1941  					obj.Hfreebsd,
  1942  					obj.Hnetbsd,
  1943  					obj.Hopenbsd:
  1944  					return 0x65 // GS
  1945  				}
  1946  			}
  1947  
  1948  			switch ctxt.Headtype {
  1949  			default:
  1950  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1951  
  1952  			case obj.Hlinux:
  1953  				if ctxt.Flag_shared != 0 {
  1954  					log.Fatalf("unknown TLS base register for linux with -shared")
  1955  				} else {
  1956  					return 0x64 // FS
  1957  				}
  1958  
  1959  			case obj.Hdragonfly,
  1960  				obj.Hfreebsd,
  1961  				obj.Hnetbsd,
  1962  				obj.Hopenbsd,
  1963  				obj.Hsolaris:
  1964  				return 0x64 // FS
  1965  
  1966  			case obj.Hdarwin:
  1967  				return 0x65 // GS
  1968  			}
  1969  		}
  1970  	}
  1971  
  1972  	if p.Mode == 32 {
  1973  		return 0
  1974  	}
  1975  
  1976  	switch a.Index {
  1977  	case REG_CS:
  1978  		return 0x2e
  1979  
  1980  	case REG_DS:
  1981  		return 0x3e
  1982  
  1983  	case REG_ES:
  1984  		return 0x26
  1985  
  1986  	case REG_TLS:
  1987  		if ctxt.Flag_shared != 0 {
  1988  			// When building for inclusion into a shared library, an instruction of the form
  1989  			//     MOV 0(CX)(TLS*1), AX
  1990  			// becomes
  1991  			//     mov %fs:(%rcx), %rax
  1992  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  1993  			// there is only one TLS variable -- g -- so this is OK). When not building for
  1994  			// a shared library the instruction does not require a prefix.
  1995  			if a.Offset != 0 {
  1996  				log.Fatalf("cannot handle non-0 offsets to TLS")
  1997  			}
  1998  			return 0x64
  1999  		}
  2000  
  2001  	case REG_FS:
  2002  		return 0x64
  2003  
  2004  	case REG_GS:
  2005  		return 0x65
  2006  	}
  2007  
  2008  	return 0
  2009  }
  2010  
  2011  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2012  	switch a.Type {
  2013  	case obj.TYPE_NONE:
  2014  		return Ynone
  2015  
  2016  	case obj.TYPE_BRANCH:
  2017  		return Ybr
  2018  
  2019  	case obj.TYPE_INDIR:
  2020  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2021  			return Yindir
  2022  		}
  2023  		return Yxxx
  2024  
  2025  	case obj.TYPE_MEM:
  2026  		return Ym
  2027  
  2028  	case obj.TYPE_ADDR:
  2029  		switch a.Name {
  2030  		case obj.NAME_EXTERN,
  2031  			obj.NAME_GOTREF,
  2032  			obj.NAME_STATIC:
  2033  			if a.Sym != nil && isextern(a.Sym) || p.Mode == 32 {
  2034  				return Yi32
  2035  			}
  2036  			return Yiauto // use pc-relative addressing
  2037  
  2038  		case obj.NAME_AUTO,
  2039  			obj.NAME_PARAM:
  2040  			return Yiauto
  2041  		}
  2042  
  2043  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2044  		// and got Yi32 in an earlier version of this code.
  2045  		// Keep doing that until we fix yduff etc.
  2046  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2047  			return Yi32
  2048  		}
  2049  
  2050  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2051  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2052  		}
  2053  		fallthrough
  2054  
  2055  		// fall through
  2056  
  2057  	case obj.TYPE_CONST:
  2058  		if a.Sym != nil {
  2059  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2060  		}
  2061  
  2062  		v := a.Offset
  2063  		if p.Mode == 32 {
  2064  			v = int64(int32(v))
  2065  		}
  2066  		if v == 0 {
  2067  			return Yi0
  2068  		}
  2069  		if v == 1 {
  2070  			return Yi1
  2071  		}
  2072  		if v >= 0 && v <= 127 {
  2073  			return Yu7
  2074  		}
  2075  		if v >= 0 && v <= 255 {
  2076  			return Yu8
  2077  		}
  2078  		if v >= -128 && v <= 127 {
  2079  			return Yi8
  2080  		}
  2081  		if p.Mode == 32 {
  2082  			return Yi32
  2083  		}
  2084  		l := int32(v)
  2085  		if int64(l) == v {
  2086  			return Ys32 /* can sign extend */
  2087  		}
  2088  		if v>>32 == 0 {
  2089  			return Yi32 /* unsigned */
  2090  		}
  2091  		return Yi64
  2092  
  2093  	case obj.TYPE_TEXTSIZE:
  2094  		return Ytextsize
  2095  	}
  2096  
  2097  	if a.Type != obj.TYPE_REG {
  2098  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2099  		return Yxxx
  2100  	}
  2101  
  2102  	switch a.Reg {
  2103  	case REG_AL:
  2104  		return Yal
  2105  
  2106  	case REG_AX:
  2107  		return Yax
  2108  
  2109  		/*
  2110  			case REG_SPB:
  2111  		*/
  2112  	case REG_BPB,
  2113  		REG_SIB,
  2114  		REG_DIB,
  2115  		REG_R8B,
  2116  		REG_R9B,
  2117  		REG_R10B,
  2118  		REG_R11B,
  2119  		REG_R12B,
  2120  		REG_R13B,
  2121  		REG_R14B,
  2122  		REG_R15B:
  2123  		if ctxt.Asmode != 64 {
  2124  			return Yxxx
  2125  		}
  2126  		fallthrough
  2127  
  2128  	case REG_DL,
  2129  		REG_BL,
  2130  		REG_AH,
  2131  		REG_CH,
  2132  		REG_DH,
  2133  		REG_BH:
  2134  		return Yrb
  2135  
  2136  	case REG_CL:
  2137  		return Ycl
  2138  
  2139  	case REG_CX:
  2140  		return Ycx
  2141  
  2142  	case REG_DX, REG_BX:
  2143  		return Yrx
  2144  
  2145  	case REG_R8, /* not really Yrl */
  2146  		REG_R9,
  2147  		REG_R10,
  2148  		REG_R11,
  2149  		REG_R12,
  2150  		REG_R13,
  2151  		REG_R14,
  2152  		REG_R15:
  2153  		if ctxt.Asmode != 64 {
  2154  			return Yxxx
  2155  		}
  2156  		fallthrough
  2157  
  2158  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2159  		if p.Mode == 32 {
  2160  			return Yrl32
  2161  		}
  2162  		return Yrl
  2163  
  2164  	case REG_F0 + 0:
  2165  		return Yf0
  2166  
  2167  	case REG_F0 + 1,
  2168  		REG_F0 + 2,
  2169  		REG_F0 + 3,
  2170  		REG_F0 + 4,
  2171  		REG_F0 + 5,
  2172  		REG_F0 + 6,
  2173  		REG_F0 + 7:
  2174  		return Yrf
  2175  
  2176  	case REG_M0 + 0,
  2177  		REG_M0 + 1,
  2178  		REG_M0 + 2,
  2179  		REG_M0 + 3,
  2180  		REG_M0 + 4,
  2181  		REG_M0 + 5,
  2182  		REG_M0 + 6,
  2183  		REG_M0 + 7:
  2184  		return Ymr
  2185  
  2186  	case REG_X0 + 0,
  2187  		REG_X0 + 1,
  2188  		REG_X0 + 2,
  2189  		REG_X0 + 3,
  2190  		REG_X0 + 4,
  2191  		REG_X0 + 5,
  2192  		REG_X0 + 6,
  2193  		REG_X0 + 7,
  2194  		REG_X0 + 8,
  2195  		REG_X0 + 9,
  2196  		REG_X0 + 10,
  2197  		REG_X0 + 11,
  2198  		REG_X0 + 12,
  2199  		REG_X0 + 13,
  2200  		REG_X0 + 14,
  2201  		REG_X0 + 15:
  2202  		return Yxr
  2203  
  2204  	case REG_CS:
  2205  		return Ycs
  2206  	case REG_SS:
  2207  		return Yss
  2208  	case REG_DS:
  2209  		return Yds
  2210  	case REG_ES:
  2211  		return Yes
  2212  	case REG_FS:
  2213  		return Yfs
  2214  	case REG_GS:
  2215  		return Ygs
  2216  	case REG_TLS:
  2217  		return Ytls
  2218  
  2219  	case REG_GDTR:
  2220  		return Ygdtr
  2221  	case REG_IDTR:
  2222  		return Yidtr
  2223  	case REG_LDTR:
  2224  		return Yldtr
  2225  	case REG_MSW:
  2226  		return Ymsw
  2227  	case REG_TASK:
  2228  		return Ytask
  2229  
  2230  	case REG_CR + 0:
  2231  		return Ycr0
  2232  	case REG_CR + 1:
  2233  		return Ycr1
  2234  	case REG_CR + 2:
  2235  		return Ycr2
  2236  	case REG_CR + 3:
  2237  		return Ycr3
  2238  	case REG_CR + 4:
  2239  		return Ycr4
  2240  	case REG_CR + 5:
  2241  		return Ycr5
  2242  	case REG_CR + 6:
  2243  		return Ycr6
  2244  	case REG_CR + 7:
  2245  		return Ycr7
  2246  	case REG_CR + 8:
  2247  		return Ycr8
  2248  
  2249  	case REG_DR + 0:
  2250  		return Ydr0
  2251  	case REG_DR + 1:
  2252  		return Ydr1
  2253  	case REG_DR + 2:
  2254  		return Ydr2
  2255  	case REG_DR + 3:
  2256  		return Ydr3
  2257  	case REG_DR + 4:
  2258  		return Ydr4
  2259  	case REG_DR + 5:
  2260  		return Ydr5
  2261  	case REG_DR + 6:
  2262  		return Ydr6
  2263  	case REG_DR + 7:
  2264  		return Ydr7
  2265  
  2266  	case REG_TR + 0:
  2267  		return Ytr0
  2268  	case REG_TR + 1:
  2269  		return Ytr1
  2270  	case REG_TR + 2:
  2271  		return Ytr2
  2272  	case REG_TR + 3:
  2273  		return Ytr3
  2274  	case REG_TR + 4:
  2275  		return Ytr4
  2276  	case REG_TR + 5:
  2277  		return Ytr5
  2278  	case REG_TR + 6:
  2279  		return Ytr6
  2280  	case REG_TR + 7:
  2281  		return Ytr7
  2282  	}
  2283  
  2284  	return Yxxx
  2285  }
  2286  
  2287  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2288  	var i int
  2289  
  2290  	switch index {
  2291  	default:
  2292  		goto bad
  2293  
  2294  	case REG_NONE:
  2295  		i = 4 << 3
  2296  		goto bas
  2297  
  2298  	case REG_R8,
  2299  		REG_R9,
  2300  		REG_R10,
  2301  		REG_R11,
  2302  		REG_R12,
  2303  		REG_R13,
  2304  		REG_R14,
  2305  		REG_R15:
  2306  		if ctxt.Asmode != 64 {
  2307  			goto bad
  2308  		}
  2309  		fallthrough
  2310  
  2311  	case REG_AX,
  2312  		REG_CX,
  2313  		REG_DX,
  2314  		REG_BX,
  2315  		REG_BP,
  2316  		REG_SI,
  2317  		REG_DI:
  2318  		i = reg[index] << 3
  2319  	}
  2320  
  2321  	switch scale {
  2322  	default:
  2323  		goto bad
  2324  
  2325  	case 1:
  2326  		break
  2327  
  2328  	case 2:
  2329  		i |= 1 << 6
  2330  
  2331  	case 4:
  2332  		i |= 2 << 6
  2333  
  2334  	case 8:
  2335  		i |= 3 << 6
  2336  	}
  2337  
  2338  bas:
  2339  	switch base {
  2340  	default:
  2341  		goto bad
  2342  
  2343  	case REG_NONE: /* must be mod=00 */
  2344  		i |= 5
  2345  
  2346  	case REG_R8,
  2347  		REG_R9,
  2348  		REG_R10,
  2349  		REG_R11,
  2350  		REG_R12,
  2351  		REG_R13,
  2352  		REG_R14,
  2353  		REG_R15:
  2354  		if ctxt.Asmode != 64 {
  2355  			goto bad
  2356  		}
  2357  		fallthrough
  2358  
  2359  	case REG_AX,
  2360  		REG_CX,
  2361  		REG_DX,
  2362  		REG_BX,
  2363  		REG_SP,
  2364  		REG_BP,
  2365  		REG_SI,
  2366  		REG_DI:
  2367  		i |= reg[base]
  2368  	}
  2369  
  2370  	ctxt.Andptr[0] = byte(i)
  2371  	ctxt.Andptr = ctxt.Andptr[1:]
  2372  	return
  2373  
  2374  bad:
  2375  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2376  	ctxt.Andptr[0] = 0
  2377  	ctxt.Andptr = ctxt.Andptr[1:]
  2378  	return
  2379  }
  2380  
  2381  func put4(ctxt *obj.Link, v int32) {
  2382  	ctxt.Andptr[0] = byte(v)
  2383  	ctxt.Andptr[1] = byte(v >> 8)
  2384  	ctxt.Andptr[2] = byte(v >> 16)
  2385  	ctxt.Andptr[3] = byte(v >> 24)
  2386  	ctxt.Andptr = ctxt.Andptr[4:]
  2387  }
  2388  
  2389  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2390  	var rel obj.Reloc
  2391  
  2392  	v := vaddr(ctxt, p, a, &rel)
  2393  	if rel.Siz != 0 {
  2394  		if rel.Siz != 4 {
  2395  			ctxt.Diag("bad reloc")
  2396  		}
  2397  		r := obj.Addrel(ctxt.Cursym)
  2398  		*r = rel
  2399  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2400  	}
  2401  
  2402  	put4(ctxt, int32(v))
  2403  }
  2404  
  2405  func put8(ctxt *obj.Link, v int64) {
  2406  	ctxt.Andptr[0] = byte(v)
  2407  	ctxt.Andptr[1] = byte(v >> 8)
  2408  	ctxt.Andptr[2] = byte(v >> 16)
  2409  	ctxt.Andptr[3] = byte(v >> 24)
  2410  	ctxt.Andptr[4] = byte(v >> 32)
  2411  	ctxt.Andptr[5] = byte(v >> 40)
  2412  	ctxt.Andptr[6] = byte(v >> 48)
  2413  	ctxt.Andptr[7] = byte(v >> 56)
  2414  	ctxt.Andptr = ctxt.Andptr[8:]
  2415  }
  2416  
  2417  /*
  2418  static void
  2419  relput8(Prog *p, Addr *a)
  2420  {
  2421  	vlong v;
  2422  	Reloc rel, *r;
  2423  
  2424  	v = vaddr(ctxt, p, a, &rel);
  2425  	if(rel.siz != 0) {
  2426  		r = addrel(ctxt->cursym);
  2427  		*r = rel;
  2428  		r->siz = 8;
  2429  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2430  	}
  2431  	put8(ctxt, v);
  2432  }
  2433  */
  2434  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2435  	if r != nil {
  2436  		*r = obj.Reloc{}
  2437  	}
  2438  
  2439  	switch a.Name {
  2440  	case obj.NAME_STATIC,
  2441  		obj.NAME_GOTREF,
  2442  		obj.NAME_EXTERN:
  2443  		s := a.Sym
  2444  		if r == nil {
  2445  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2446  			log.Fatalf("reloc")
  2447  		}
  2448  
  2449  		if a.Name == obj.NAME_GOTREF {
  2450  			r.Siz = 4
  2451  			r.Type = obj.R_GOTPCREL
  2452  		} else if isextern(s) || p.Mode != 64 {
  2453  			r.Siz = 4
  2454  			r.Type = obj.R_ADDR
  2455  		} else {
  2456  			r.Siz = 4
  2457  			r.Type = obj.R_PCREL
  2458  		}
  2459  
  2460  		r.Off = -1 // caller must fill in
  2461  		r.Sym = s
  2462  		r.Add = a.Offset
  2463  		if s.Type == obj.STLSBSS {
  2464  			r.Xadd = r.Add - int64(r.Siz)
  2465  			r.Type = obj.R_TLS
  2466  			r.Xsym = s
  2467  		}
  2468  
  2469  		return 0
  2470  	}
  2471  
  2472  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2473  		if r == nil {
  2474  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2475  			log.Fatalf("reloc")
  2476  		}
  2477  
  2478  		r.Type = obj.R_TLS_LE
  2479  		r.Siz = 4
  2480  		r.Off = -1 // caller must fill in
  2481  		r.Add = a.Offset
  2482  		return 0
  2483  	}
  2484  
  2485  	return a.Offset
  2486  }
  2487  
  2488  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2489  	var base int
  2490  	var rel obj.Reloc
  2491  
  2492  	rex &= 0x40 | Rxr
  2493  	v := int32(a.Offset)
  2494  	rel.Siz = 0
  2495  
  2496  	switch a.Type {
  2497  	case obj.TYPE_ADDR:
  2498  		if a.Name == obj.NAME_NONE {
  2499  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2500  		}
  2501  		if a.Index == REG_TLS {
  2502  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2503  		}
  2504  		goto bad
  2505  
  2506  	case obj.TYPE_REG:
  2507  		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
  2508  			goto bad
  2509  		}
  2510  		if v != 0 {
  2511  			goto bad
  2512  		}
  2513  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2514  		ctxt.Andptr = ctxt.Andptr[1:]
  2515  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2516  		return
  2517  	}
  2518  
  2519  	if a.Type != obj.TYPE_MEM {
  2520  		goto bad
  2521  	}
  2522  
  2523  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2524  		base := int(a.Reg)
  2525  		switch a.Name {
  2526  		case obj.NAME_EXTERN,
  2527  			obj.NAME_GOTREF,
  2528  			obj.NAME_STATIC:
  2529  			if !isextern(a.Sym) && p.Mode == 64 {
  2530  				goto bad
  2531  			}
  2532  			base = REG_NONE
  2533  			v = int32(vaddr(ctxt, p, a, &rel))
  2534  
  2535  		case obj.NAME_AUTO,
  2536  			obj.NAME_PARAM:
  2537  			base = REG_SP
  2538  		}
  2539  
  2540  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2541  		if base == REG_NONE {
  2542  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2543  			ctxt.Andptr = ctxt.Andptr[1:]
  2544  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2545  			goto putrelv
  2546  		}
  2547  
  2548  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2549  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2550  			ctxt.Andptr = ctxt.Andptr[1:]
  2551  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2552  			return
  2553  		}
  2554  
  2555  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2556  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2557  			ctxt.Andptr = ctxt.Andptr[1:]
  2558  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2559  			ctxt.Andptr[0] = byte(v)
  2560  			ctxt.Andptr = ctxt.Andptr[1:]
  2561  			return
  2562  		}
  2563  
  2564  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2565  		ctxt.Andptr = ctxt.Andptr[1:]
  2566  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2567  		goto putrelv
  2568  	}
  2569  
  2570  	base = int(a.Reg)
  2571  	switch a.Name {
  2572  	case obj.NAME_STATIC,
  2573  		obj.NAME_GOTREF,
  2574  		obj.NAME_EXTERN:
  2575  		if a.Sym == nil {
  2576  			ctxt.Diag("bad addr: %v", p)
  2577  		}
  2578  		base = REG_NONE
  2579  		v = int32(vaddr(ctxt, p, a, &rel))
  2580  
  2581  	case obj.NAME_AUTO,
  2582  		obj.NAME_PARAM:
  2583  		base = REG_SP
  2584  	}
  2585  
  2586  	if base == REG_TLS {
  2587  		v = int32(vaddr(ctxt, p, a, &rel))
  2588  	}
  2589  
  2590  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2591  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2592  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2593  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2594  				ctxt.Diag("%v has offset against gotref", p)
  2595  			}
  2596  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2597  			ctxt.Andptr = ctxt.Andptr[1:]
  2598  			goto putrelv
  2599  		}
  2600  
  2601  		/* temporary */
  2602  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2603  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2604  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2605  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2606  		goto putrelv
  2607  	}
  2608  
  2609  	if base == REG_SP || base == REG_R12 {
  2610  		if v == 0 {
  2611  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2612  			ctxt.Andptr = ctxt.Andptr[1:]
  2613  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2614  			return
  2615  		}
  2616  
  2617  		if v >= -128 && v < 128 {
  2618  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2619  			ctxt.Andptr = ctxt.Andptr[1:]
  2620  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2621  			ctxt.Andptr[0] = byte(v)
  2622  			ctxt.Andptr = ctxt.Andptr[1:]
  2623  			return
  2624  		}
  2625  
  2626  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2627  		ctxt.Andptr = ctxt.Andptr[1:]
  2628  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2629  		goto putrelv
  2630  	}
  2631  
  2632  	if REG_AX <= base && base <= REG_R15 {
  2633  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2634  			rel = obj.Reloc{}
  2635  			rel.Type = obj.R_TLS_IE
  2636  			rel.Siz = 4
  2637  			rel.Sym = nil
  2638  			rel.Add = int64(v)
  2639  			v = 0
  2640  		}
  2641  
  2642  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2643  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2644  			ctxt.Andptr = ctxt.Andptr[1:]
  2645  			return
  2646  		}
  2647  
  2648  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2649  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2650  			ctxt.Andptr[1] = byte(v)
  2651  			ctxt.Andptr = ctxt.Andptr[2:]
  2652  			return
  2653  		}
  2654  
  2655  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2656  		ctxt.Andptr = ctxt.Andptr[1:]
  2657  		goto putrelv
  2658  	}
  2659  
  2660  	goto bad
  2661  
  2662  putrelv:
  2663  	if rel.Siz != 0 {
  2664  		if rel.Siz != 4 {
  2665  			ctxt.Diag("bad rel")
  2666  			goto bad
  2667  		}
  2668  
  2669  		r := obj.Addrel(ctxt.Cursym)
  2670  		*r = rel
  2671  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2672  	}
  2673  
  2674  	put4(ctxt, v)
  2675  	return
  2676  
  2677  bad:
  2678  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2679  	return
  2680  }
  2681  
  2682  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2683  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2684  }
  2685  
  2686  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2687  	asmandsz(ctxt, p, a, o, 0, 0)
  2688  }
  2689  
  2690  func bytereg(a *obj.Addr, t *uint8) {
  2691  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2692  		a.Reg += REG_AL - REG_AX
  2693  		*t = 0
  2694  	}
  2695  }
  2696  
  2697  func unbytereg(a *obj.Addr, t *uint8) {
  2698  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2699  		a.Reg += REG_AX - REG_AL
  2700  		*t = 0
  2701  	}
  2702  }
  2703  
  2704  const (
  2705  	E = 0xff
  2706  )
  2707  
  2708  var ymovtab = []Movtab{
  2709  	/* push */
  2710  	Movtab{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2711  	Movtab{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2712  	Movtab{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2713  	Movtab{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2714  	Movtab{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2715  	Movtab{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2716  	Movtab{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2717  	Movtab{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2718  	Movtab{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2719  	Movtab{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2720  	Movtab{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2721  	Movtab{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2722  	Movtab{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2723  	Movtab{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2724  
  2725  	/* pop */
  2726  	Movtab{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2727  	Movtab{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2728  	Movtab{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2729  	Movtab{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2730  	Movtab{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2731  	Movtab{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2732  	Movtab{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2733  	Movtab{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2734  	Movtab{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2735  	Movtab{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2736  	Movtab{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2737  	Movtab{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  2738  
  2739  	/* mov seg */
  2740  	Movtab{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  2741  	Movtab{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  2742  	Movtab{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  2743  	Movtab{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  2744  	Movtab{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  2745  	Movtab{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  2746  	Movtab{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  2747  	Movtab{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  2748  	Movtab{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  2749  	Movtab{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  2750  	Movtab{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  2751  	Movtab{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  2752  
  2753  	/* mov cr */
  2754  	Movtab{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2755  	Movtab{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2756  	Movtab{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2757  	Movtab{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2758  	Movtab{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2759  	Movtab{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2760  	Movtab{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2761  	Movtab{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2762  	Movtab{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2763  	Movtab{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2764  	Movtab{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2765  	Movtab{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2766  	Movtab{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2767  	Movtab{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2768  	Movtab{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2769  	Movtab{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2770  	Movtab{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2771  	Movtab{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2772  	Movtab{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2773  	Movtab{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2774  
  2775  	/* mov dr */
  2776  	Movtab{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2777  	Movtab{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2778  	Movtab{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2779  	Movtab{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2780  	Movtab{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2781  	Movtab{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2782  	Movtab{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2783  	Movtab{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2784  	Movtab{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2785  	Movtab{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2786  	Movtab{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2787  	Movtab{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2788  
  2789  	/* mov tr */
  2790  	Movtab{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  2791  	Movtab{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  2792  	Movtab{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  2793  	Movtab{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  2794  
  2795  	/* lgdt, sgdt, lidt, sidt */
  2796  	Movtab{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2797  	Movtab{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2798  	Movtab{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2799  	Movtab{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2800  	Movtab{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2801  	Movtab{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2802  	Movtab{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2803  	Movtab{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2804  
  2805  	/* lldt, sldt */
  2806  	Movtab{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  2807  	Movtab{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  2808  
  2809  	/* lmsw, smsw */
  2810  	Movtab{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  2811  	Movtab{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  2812  
  2813  	/* ltr, str */
  2814  	Movtab{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  2815  	Movtab{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  2816  
  2817  	/* load full pointer - unsupported
  2818  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  2819  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  2820  	*/
  2821  
  2822  	/* double shift */
  2823  	Movtab{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2824  	Movtab{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2825  	Movtab{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2826  	Movtab{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2827  	Movtab{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2828  	Movtab{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2829  	Movtab{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2830  	Movtab{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2831  	Movtab{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2832  	Movtab{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2833  	Movtab{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2834  	Movtab{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2835  	Movtab{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2836  	Movtab{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2837  	Movtab{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2838  	Movtab{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2839  	Movtab{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2840  	Movtab{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2841  
  2842  	/* load TLS base */
  2843  	Movtab{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2844  	Movtab{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2845  	Movtab{0, 0, 0, 0, 0, [4]uint8{}},
  2846  }
  2847  
  2848  func isax(a *obj.Addr) bool {
  2849  	switch a.Reg {
  2850  	case REG_AX, REG_AL, REG_AH:
  2851  		return true
  2852  	}
  2853  
  2854  	if a.Index == REG_AX {
  2855  		return true
  2856  	}
  2857  	return false
  2858  }
  2859  
  2860  func subreg(p *obj.Prog, from int, to int) {
  2861  	if false { /* debug['Q'] */
  2862  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  2863  	}
  2864  
  2865  	if int(p.From.Reg) == from {
  2866  		p.From.Reg = int16(to)
  2867  		p.Ft = 0
  2868  	}
  2869  
  2870  	if int(p.To.Reg) == from {
  2871  		p.To.Reg = int16(to)
  2872  		p.Tt = 0
  2873  	}
  2874  
  2875  	if int(p.From.Index) == from {
  2876  		p.From.Index = int16(to)
  2877  		p.Ft = 0
  2878  	}
  2879  
  2880  	if int(p.To.Index) == from {
  2881  		p.To.Index = int16(to)
  2882  		p.Tt = 0
  2883  	}
  2884  
  2885  	if false { /* debug['Q'] */
  2886  		fmt.Printf("%v\n", p)
  2887  	}
  2888  }
  2889  
  2890  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  2891  	switch op {
  2892  	case Pm, Pe, Pf2, Pf3:
  2893  		if osize != 1 {
  2894  			if op != Pm {
  2895  				ctxt.Andptr[0] = byte(op)
  2896  				ctxt.Andptr = ctxt.Andptr[1:]
  2897  			}
  2898  			ctxt.Andptr[0] = Pm
  2899  			ctxt.Andptr = ctxt.Andptr[1:]
  2900  			z++
  2901  			op = int(o.op[z])
  2902  			break
  2903  		}
  2904  		fallthrough
  2905  
  2906  	default:
  2907  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  2908  			ctxt.Andptr[0] = Pm
  2909  			ctxt.Andptr = ctxt.Andptr[1:]
  2910  		}
  2911  	}
  2912  
  2913  	ctxt.Andptr[0] = byte(op)
  2914  	ctxt.Andptr = ctxt.Andptr[1:]
  2915  	return z
  2916  }
  2917  
  2918  var bpduff1 = []byte{
  2919  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  2920  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  2921  }
  2922  
  2923  var bpduff2 = []byte{
  2924  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  2925  }
  2926  
  2927  func doasm(ctxt *obj.Link, p *obj.Prog) {
  2928  	ctxt.Curp = p // TODO
  2929  
  2930  	o := opindex[p.As&obj.AMask]
  2931  
  2932  	if o == nil {
  2933  		ctxt.Diag("asmins: missing op %v", p)
  2934  		return
  2935  	}
  2936  
  2937  	pre := prefixof(ctxt, p, &p.From)
  2938  	if pre != 0 {
  2939  		ctxt.Andptr[0] = byte(pre)
  2940  		ctxt.Andptr = ctxt.Andptr[1:]
  2941  	}
  2942  	pre = prefixof(ctxt, p, &p.To)
  2943  	if pre != 0 {
  2944  		ctxt.Andptr[0] = byte(pre)
  2945  		ctxt.Andptr = ctxt.Andptr[1:]
  2946  	}
  2947  
  2948  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  2949  	// which encodes as SHRQ $32(DX*0), AX.
  2950  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  2951  	// Change encoding generated by assemblers and compilers and remove.
  2952  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  2953  		p.From3.Type = obj.TYPE_REG
  2954  		p.From3.Reg = p.From.Index
  2955  		p.From.Index = 0
  2956  	}
  2957  
  2958  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  2959  	// Change encoding generated by assemblers and compilers (if any) and remove.
  2960  	switch p.As {
  2961  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  2962  		if p.From3.Type == obj.TYPE_NONE {
  2963  			p.From3 = p.From
  2964  			p.From = obj.Addr{}
  2965  			p.From.Type = obj.TYPE_CONST
  2966  			p.From.Offset = p.To.Offset
  2967  			p.To.Offset = 0
  2968  		}
  2969  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  2970  		if p.From3.Type == obj.TYPE_NONE {
  2971  			p.From3 = p.To
  2972  			p.To = obj.Addr{}
  2973  			p.To.Type = obj.TYPE_CONST
  2974  			p.To.Offset = p.From3.Offset
  2975  			p.From3.Offset = 0
  2976  		}
  2977  	}
  2978  
  2979  	if p.Ft == 0 {
  2980  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  2981  	}
  2982  	if p.F3t == 0 {
  2983  		p.F3t = uint8(oclass(ctxt, p, &p.From3))
  2984  	}
  2985  	if p.Tt == 0 {
  2986  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  2987  	}
  2988  
  2989  	ft := int(p.Ft) * Ymax
  2990  	f3t := int(p.F3t) * Ymax
  2991  	tt := int(p.Tt) * Ymax
  2992  
  2993  	xo := bool2int(o.op[0] == 0x0f)
  2994  	z := 0
  2995  	var a *obj.Addr
  2996  	var l int
  2997  	var op int
  2998  	var q *obj.Prog
  2999  	var r *obj.Reloc
  3000  	var rel obj.Reloc
  3001  	var v int64
  3002  	for i := range o.ytab {
  3003  		yt := &o.ytab[i]
  3004  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3005  			switch o.prefix {
  3006  			case Px1: /* first option valid only in 32-bit mode */
  3007  				if ctxt.Mode == 64 && z == 0 {
  3008  					z += int(yt.zoffset) + xo
  3009  					continue
  3010  				}
  3011  			case Pq: /* 16 bit escape and opcode escape */
  3012  				ctxt.Andptr[0] = Pe
  3013  				ctxt.Andptr = ctxt.Andptr[1:]
  3014  
  3015  				ctxt.Andptr[0] = Pm
  3016  				ctxt.Andptr = ctxt.Andptr[1:]
  3017  
  3018  			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
  3019  				ctxt.Andptr[0] = Pe
  3020  				ctxt.Andptr = ctxt.Andptr[1:]
  3021  
  3022  				ctxt.Andptr[0] = Pw
  3023  				ctxt.Andptr = ctxt.Andptr[1:]
  3024  				ctxt.Andptr[0] = Pm
  3025  				ctxt.Andptr = ctxt.Andptr[1:]
  3026  
  3027  			case Pf2, /* xmm opcode escape */
  3028  				Pf3:
  3029  				ctxt.Andptr[0] = byte(o.prefix)
  3030  				ctxt.Andptr = ctxt.Andptr[1:]
  3031  
  3032  				ctxt.Andptr[0] = Pm
  3033  				ctxt.Andptr = ctxt.Andptr[1:]
  3034  
  3035  			case Pm: /* opcode escape */
  3036  				ctxt.Andptr[0] = Pm
  3037  				ctxt.Andptr = ctxt.Andptr[1:]
  3038  
  3039  			case Pe: /* 16 bit escape */
  3040  				ctxt.Andptr[0] = Pe
  3041  				ctxt.Andptr = ctxt.Andptr[1:]
  3042  
  3043  			case Pw: /* 64-bit escape */
  3044  				if p.Mode != 64 {
  3045  					ctxt.Diag("asmins: illegal 64: %v", p)
  3046  				}
  3047  				ctxt.Rexflag |= Pw
  3048  
  3049  			case Pw8: /* 64-bit escape if z >= 8 */
  3050  				if z >= 8 {
  3051  					if p.Mode != 64 {
  3052  						ctxt.Diag("asmins: illegal 64: %v", p)
  3053  					}
  3054  					ctxt.Rexflag |= Pw
  3055  				}
  3056  
  3057  			case Pb: /* botch */
  3058  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3059  					goto bad
  3060  				}
  3061  				// NOTE(rsc): This is probably safe to do always,
  3062  				// but when enabled it chooses different encodings
  3063  				// than the old cmd/internal/obj/i386 code did,
  3064  				// which breaks our "same bits out" checks.
  3065  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3066  				// in the original obj/i386, and it would encode
  3067  				// (using a valid, shorter form) as 3c 00 if we enabled
  3068  				// the call to bytereg here.
  3069  				if p.Mode == 64 {
  3070  					bytereg(&p.From, &p.Ft)
  3071  					bytereg(&p.To, &p.Tt)
  3072  				}
  3073  
  3074  			case P32: /* 32 bit but illegal if 64-bit mode */
  3075  				if p.Mode == 64 {
  3076  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3077  				}
  3078  
  3079  			case Py: /* 64-bit only, no prefix */
  3080  				if p.Mode != 64 {
  3081  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3082  				}
  3083  
  3084  			case Py1: /* 64-bit only if z < 1, no prefix */
  3085  				if z < 1 && p.Mode != 64 {
  3086  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3087  				}
  3088  
  3089  			case Py3: /* 64-bit only if z < 3, no prefix */
  3090  				if z < 3 && p.Mode != 64 {
  3091  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3092  				}
  3093  			}
  3094  
  3095  			if z >= len(o.op) {
  3096  				log.Fatalf("asmins bad table %v", p)
  3097  			}
  3098  			op = int(o.op[z])
  3099  			if op == 0x0f {
  3100  				ctxt.Andptr[0] = byte(op)
  3101  				ctxt.Andptr = ctxt.Andptr[1:]
  3102  				z++
  3103  				op = int(o.op[z])
  3104  			}
  3105  
  3106  			switch yt.zcase {
  3107  			default:
  3108  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3109  				return
  3110  
  3111  			case Zpseudo:
  3112  				break
  3113  
  3114  			case Zlit:
  3115  				for ; ; z++ {
  3116  					op = int(o.op[z])
  3117  					if op == 0 {
  3118  						break
  3119  					}
  3120  					ctxt.Andptr[0] = byte(op)
  3121  					ctxt.Andptr = ctxt.Andptr[1:]
  3122  				}
  3123  
  3124  			case Zlitm_r:
  3125  				for ; ; z++ {
  3126  					op = int(o.op[z])
  3127  					if op == 0 {
  3128  						break
  3129  					}
  3130  					ctxt.Andptr[0] = byte(op)
  3131  					ctxt.Andptr = ctxt.Andptr[1:]
  3132  				}
  3133  				asmand(ctxt, p, &p.From, &p.To)
  3134  
  3135  			case Zmb_r:
  3136  				bytereg(&p.From, &p.Ft)
  3137  				fallthrough
  3138  
  3139  				/* fall through */
  3140  			case Zm_r:
  3141  				ctxt.Andptr[0] = byte(op)
  3142  				ctxt.Andptr = ctxt.Andptr[1:]
  3143  
  3144  				asmand(ctxt, p, &p.From, &p.To)
  3145  
  3146  			case Zm2_r:
  3147  				ctxt.Andptr[0] = byte(op)
  3148  				ctxt.Andptr = ctxt.Andptr[1:]
  3149  				ctxt.Andptr[0] = byte(o.op[z+1])
  3150  				ctxt.Andptr = ctxt.Andptr[1:]
  3151  				asmand(ctxt, p, &p.From, &p.To)
  3152  
  3153  			case Zm_r_xm:
  3154  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3155  				asmand(ctxt, p, &p.From, &p.To)
  3156  
  3157  			case Zm_r_xm_nr:
  3158  				ctxt.Rexflag = 0
  3159  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3160  				asmand(ctxt, p, &p.From, &p.To)
  3161  
  3162  			case Zm_r_i_xm:
  3163  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3164  				asmand(ctxt, p, &p.From, &p.From3)
  3165  				ctxt.Andptr[0] = byte(p.To.Offset)
  3166  				ctxt.Andptr = ctxt.Andptr[1:]
  3167  
  3168  			case Zm_r_3d:
  3169  				ctxt.Andptr[0] = 0x0f
  3170  				ctxt.Andptr = ctxt.Andptr[1:]
  3171  				ctxt.Andptr[0] = 0x0f
  3172  				ctxt.Andptr = ctxt.Andptr[1:]
  3173  				asmand(ctxt, p, &p.From, &p.To)
  3174  				ctxt.Andptr[0] = byte(op)
  3175  				ctxt.Andptr = ctxt.Andptr[1:]
  3176  
  3177  			case Zibm_r:
  3178  				for {
  3179  					tmp1 := z
  3180  					z++
  3181  					op = int(o.op[tmp1])
  3182  					if op == 0 {
  3183  						break
  3184  					}
  3185  					ctxt.Andptr[0] = byte(op)
  3186  					ctxt.Andptr = ctxt.Andptr[1:]
  3187  				}
  3188  				asmand(ctxt, p, &p.From3, &p.To)
  3189  				ctxt.Andptr[0] = byte(p.From.Offset)
  3190  				ctxt.Andptr = ctxt.Andptr[1:]
  3191  
  3192  			case Zaut_r:
  3193  				ctxt.Andptr[0] = 0x8d
  3194  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3195  				if p.From.Type != obj.TYPE_ADDR {
  3196  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3197  				}
  3198  				p.From.Type = obj.TYPE_MEM
  3199  				asmand(ctxt, p, &p.From, &p.To)
  3200  				p.From.Type = obj.TYPE_ADDR
  3201  
  3202  			case Zm_o:
  3203  				ctxt.Andptr[0] = byte(op)
  3204  				ctxt.Andptr = ctxt.Andptr[1:]
  3205  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3206  
  3207  			case Zr_m:
  3208  				ctxt.Andptr[0] = byte(op)
  3209  				ctxt.Andptr = ctxt.Andptr[1:]
  3210  				asmand(ctxt, p, &p.To, &p.From)
  3211  
  3212  			case Zr_m_xm:
  3213  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3214  				asmand(ctxt, p, &p.To, &p.From)
  3215  
  3216  			case Zr_m_xm_nr:
  3217  				ctxt.Rexflag = 0
  3218  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3219  				asmand(ctxt, p, &p.To, &p.From)
  3220  
  3221  			case Zo_m:
  3222  				ctxt.Andptr[0] = byte(op)
  3223  				ctxt.Andptr = ctxt.Andptr[1:]
  3224  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3225  
  3226  			case Zcallindreg:
  3227  				r = obj.Addrel(ctxt.Cursym)
  3228  				r.Off = int32(p.Pc)
  3229  				r.Type = obj.R_CALLIND
  3230  				r.Siz = 0
  3231  				fallthrough
  3232  
  3233  			case Zo_m64:
  3234  				ctxt.Andptr[0] = byte(op)
  3235  				ctxt.Andptr = ctxt.Andptr[1:]
  3236  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3237  
  3238  			case Zm_ibo:
  3239  				ctxt.Andptr[0] = byte(op)
  3240  				ctxt.Andptr = ctxt.Andptr[1:]
  3241  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3242  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3243  				ctxt.Andptr = ctxt.Andptr[1:]
  3244  
  3245  			case Zibo_m:
  3246  				ctxt.Andptr[0] = byte(op)
  3247  				ctxt.Andptr = ctxt.Andptr[1:]
  3248  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3249  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3250  				ctxt.Andptr = ctxt.Andptr[1:]
  3251  
  3252  			case Zibo_m_xm:
  3253  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3254  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3255  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3256  				ctxt.Andptr = ctxt.Andptr[1:]
  3257  
  3258  			case Z_ib, Zib_:
  3259  				if yt.zcase == Zib_ {
  3260  					a = &p.From
  3261  				} else {
  3262  					a = &p.To
  3263  				}
  3264  				ctxt.Andptr[0] = byte(op)
  3265  				ctxt.Andptr = ctxt.Andptr[1:]
  3266  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3267  				ctxt.Andptr = ctxt.Andptr[1:]
  3268  
  3269  			case Zib_rp:
  3270  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3271  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3272  				ctxt.Andptr = ctxt.Andptr[1:]
  3273  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3274  				ctxt.Andptr = ctxt.Andptr[1:]
  3275  
  3276  			case Zil_rp:
  3277  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3278  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3279  				ctxt.Andptr = ctxt.Andptr[1:]
  3280  				if o.prefix == Pe {
  3281  					v = vaddr(ctxt, p, &p.From, nil)
  3282  					ctxt.Andptr[0] = byte(v)
  3283  					ctxt.Andptr = ctxt.Andptr[1:]
  3284  					ctxt.Andptr[0] = byte(v >> 8)
  3285  					ctxt.Andptr = ctxt.Andptr[1:]
  3286  				} else {
  3287  					relput4(ctxt, p, &p.From)
  3288  				}
  3289  
  3290  			case Zo_iw:
  3291  				ctxt.Andptr[0] = byte(op)
  3292  				ctxt.Andptr = ctxt.Andptr[1:]
  3293  				if p.From.Type != obj.TYPE_NONE {
  3294  					v = vaddr(ctxt, p, &p.From, nil)
  3295  					ctxt.Andptr[0] = byte(v)
  3296  					ctxt.Andptr = ctxt.Andptr[1:]
  3297  					ctxt.Andptr[0] = byte(v >> 8)
  3298  					ctxt.Andptr = ctxt.Andptr[1:]
  3299  				}
  3300  
  3301  			case Ziq_rp:
  3302  				v = vaddr(ctxt, p, &p.From, &rel)
  3303  				l = int(v >> 32)
  3304  				if l == 0 && rel.Siz != 8 {
  3305  					//p->mark |= 0100;
  3306  					//print("zero: %llux %P\n", v, p);
  3307  					ctxt.Rexflag &^= (0x40 | Rxw)
  3308  
  3309  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3310  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3311  					ctxt.Andptr = ctxt.Andptr[1:]
  3312  					if rel.Type != 0 {
  3313  						r = obj.Addrel(ctxt.Cursym)
  3314  						*r = rel
  3315  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3316  					}
  3317  
  3318  					put4(ctxt, int32(v))
  3319  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3320  
  3321  					//p->mark |= 0100;
  3322  					//print("sign: %llux %P\n", v, p);
  3323  					ctxt.Andptr[0] = 0xc7
  3324  					ctxt.Andptr = ctxt.Andptr[1:]
  3325  
  3326  					asmando(ctxt, p, &p.To, 0)
  3327  					put4(ctxt, int32(v)) /* need all 8 */
  3328  				} else {
  3329  					//print("all: %llux %P\n", v, p);
  3330  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3331  
  3332  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3333  					ctxt.Andptr = ctxt.Andptr[1:]
  3334  					if rel.Type != 0 {
  3335  						r = obj.Addrel(ctxt.Cursym)
  3336  						*r = rel
  3337  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3338  					}
  3339  
  3340  					put8(ctxt, v)
  3341  				}
  3342  
  3343  			case Zib_rr:
  3344  				ctxt.Andptr[0] = byte(op)
  3345  				ctxt.Andptr = ctxt.Andptr[1:]
  3346  				asmand(ctxt, p, &p.To, &p.To)
  3347  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3348  				ctxt.Andptr = ctxt.Andptr[1:]
  3349  
  3350  			case Z_il, Zil_:
  3351  				if yt.zcase == Zil_ {
  3352  					a = &p.From
  3353  				} else {
  3354  					a = &p.To
  3355  				}
  3356  				ctxt.Andptr[0] = byte(op)
  3357  				ctxt.Andptr = ctxt.Andptr[1:]
  3358  				if o.prefix == Pe {
  3359  					v = vaddr(ctxt, p, a, nil)
  3360  					ctxt.Andptr[0] = byte(v)
  3361  					ctxt.Andptr = ctxt.Andptr[1:]
  3362  					ctxt.Andptr[0] = byte(v >> 8)
  3363  					ctxt.Andptr = ctxt.Andptr[1:]
  3364  				} else {
  3365  					relput4(ctxt, p, a)
  3366  				}
  3367  
  3368  			case Zm_ilo, Zilo_m:
  3369  				ctxt.Andptr[0] = byte(op)
  3370  				ctxt.Andptr = ctxt.Andptr[1:]
  3371  				if yt.zcase == Zilo_m {
  3372  					a = &p.From
  3373  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3374  				} else {
  3375  					a = &p.To
  3376  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3377  				}
  3378  
  3379  				if o.prefix == Pe {
  3380  					v = vaddr(ctxt, p, a, nil)
  3381  					ctxt.Andptr[0] = byte(v)
  3382  					ctxt.Andptr = ctxt.Andptr[1:]
  3383  					ctxt.Andptr[0] = byte(v >> 8)
  3384  					ctxt.Andptr = ctxt.Andptr[1:]
  3385  				} else {
  3386  					relput4(ctxt, p, a)
  3387  				}
  3388  
  3389  			case Zil_rr:
  3390  				ctxt.Andptr[0] = byte(op)
  3391  				ctxt.Andptr = ctxt.Andptr[1:]
  3392  				asmand(ctxt, p, &p.To, &p.To)
  3393  				if o.prefix == Pe {
  3394  					v = vaddr(ctxt, p, &p.From, nil)
  3395  					ctxt.Andptr[0] = byte(v)
  3396  					ctxt.Andptr = ctxt.Andptr[1:]
  3397  					ctxt.Andptr[0] = byte(v >> 8)
  3398  					ctxt.Andptr = ctxt.Andptr[1:]
  3399  				} else {
  3400  					relput4(ctxt, p, &p.From)
  3401  				}
  3402  
  3403  			case Z_rp:
  3404  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3405  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3406  				ctxt.Andptr = ctxt.Andptr[1:]
  3407  
  3408  			case Zrp_:
  3409  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3410  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3411  				ctxt.Andptr = ctxt.Andptr[1:]
  3412  
  3413  			case Zclr:
  3414  				ctxt.Rexflag &^= Pw
  3415  				ctxt.Andptr[0] = byte(op)
  3416  				ctxt.Andptr = ctxt.Andptr[1:]
  3417  				asmand(ctxt, p, &p.To, &p.To)
  3418  
  3419  			case Zcallcon, Zjmpcon:
  3420  				if yt.zcase == Zcallcon {
  3421  					ctxt.Andptr[0] = byte(op)
  3422  					ctxt.Andptr = ctxt.Andptr[1:]
  3423  				} else {
  3424  					ctxt.Andptr[0] = byte(o.op[z+1])
  3425  					ctxt.Andptr = ctxt.Andptr[1:]
  3426  				}
  3427  				r = obj.Addrel(ctxt.Cursym)
  3428  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3429  				r.Type = obj.R_PCREL
  3430  				r.Siz = 4
  3431  				r.Add = p.To.Offset
  3432  				put4(ctxt, 0)
  3433  
  3434  			case Zcallind:
  3435  				ctxt.Andptr[0] = byte(op)
  3436  				ctxt.Andptr = ctxt.Andptr[1:]
  3437  				ctxt.Andptr[0] = byte(o.op[z+1])
  3438  				ctxt.Andptr = ctxt.Andptr[1:]
  3439  				r = obj.Addrel(ctxt.Cursym)
  3440  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3441  				r.Type = obj.R_ADDR
  3442  				r.Siz = 4
  3443  				r.Add = p.To.Offset
  3444  				r.Sym = p.To.Sym
  3445  				put4(ctxt, 0)
  3446  
  3447  			case Zcall, Zcallduff:
  3448  				if p.To.Sym == nil {
  3449  					ctxt.Diag("call without target")
  3450  					log.Fatalf("bad code")
  3451  				}
  3452  
  3453  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3454  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3455  				}
  3456  
  3457  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3458  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3459  					// (the call jumps into the middle of the function).
  3460  					// This makes it possible to see call sites for duffcopy/duffzero in
  3461  					// BP-based profiling tools like Linux perf (which is the
  3462  					// whole point of obj.Framepointer_enabled).
  3463  					// MOVQ BP, -16(SP)
  3464  					// LEAQ -16(SP), BP
  3465  					copy(ctxt.Andptr, bpduff1)
  3466  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3467  				}
  3468  				ctxt.Andptr[0] = byte(op)
  3469  				ctxt.Andptr = ctxt.Andptr[1:]
  3470  				r = obj.Addrel(ctxt.Cursym)
  3471  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3472  				r.Sym = p.To.Sym
  3473  				r.Add = p.To.Offset
  3474  				r.Type = obj.R_CALL
  3475  				r.Siz = 4
  3476  				put4(ctxt, 0)
  3477  
  3478  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3479  					// Pop BP pushed above.
  3480  					// MOVQ 0(BP), BP
  3481  					copy(ctxt.Andptr, bpduff2)
  3482  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3483  				}
  3484  
  3485  			// TODO: jump across functions needs reloc
  3486  			case Zbr, Zjmp, Zloop:
  3487  				if p.To.Sym != nil {
  3488  					if yt.zcase != Zjmp {
  3489  						ctxt.Diag("branch to ATEXT")
  3490  						log.Fatalf("bad code")
  3491  					}
  3492  
  3493  					ctxt.Andptr[0] = byte(o.op[z+1])
  3494  					ctxt.Andptr = ctxt.Andptr[1:]
  3495  					r = obj.Addrel(ctxt.Cursym)
  3496  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3497  					r.Sym = p.To.Sym
  3498  					r.Type = obj.R_PCREL
  3499  					r.Siz = 4
  3500  					put4(ctxt, 0)
  3501  					break
  3502  				}
  3503  
  3504  				// Assumes q is in this function.
  3505  				// TODO: Check in input, preserve in brchain.
  3506  
  3507  				// Fill in backward jump now.
  3508  				q = p.Pcond
  3509  
  3510  				if q == nil {
  3511  					ctxt.Diag("jmp/branch/loop without target")
  3512  					log.Fatalf("bad code")
  3513  				}
  3514  
  3515  				if p.Back&1 != 0 {
  3516  					v = q.Pc - (p.Pc + 2)
  3517  					if v >= -128 {
  3518  						if p.As == AJCXZL {
  3519  							ctxt.Andptr[0] = 0x67
  3520  							ctxt.Andptr = ctxt.Andptr[1:]
  3521  						}
  3522  						ctxt.Andptr[0] = byte(op)
  3523  						ctxt.Andptr = ctxt.Andptr[1:]
  3524  						ctxt.Andptr[0] = byte(v)
  3525  						ctxt.Andptr = ctxt.Andptr[1:]
  3526  					} else if yt.zcase == Zloop {
  3527  						ctxt.Diag("loop too far: %v", p)
  3528  					} else {
  3529  						v -= 5 - 2
  3530  						if yt.zcase == Zbr {
  3531  							ctxt.Andptr[0] = 0x0f
  3532  							ctxt.Andptr = ctxt.Andptr[1:]
  3533  							v--
  3534  						}
  3535  
  3536  						ctxt.Andptr[0] = byte(o.op[z+1])
  3537  						ctxt.Andptr = ctxt.Andptr[1:]
  3538  						ctxt.Andptr[0] = byte(v)
  3539  						ctxt.Andptr = ctxt.Andptr[1:]
  3540  						ctxt.Andptr[0] = byte(v >> 8)
  3541  						ctxt.Andptr = ctxt.Andptr[1:]
  3542  						ctxt.Andptr[0] = byte(v >> 16)
  3543  						ctxt.Andptr = ctxt.Andptr[1:]
  3544  						ctxt.Andptr[0] = byte(v >> 24)
  3545  						ctxt.Andptr = ctxt.Andptr[1:]
  3546  					}
  3547  
  3548  					break
  3549  				}
  3550  
  3551  				// Annotate target; will fill in later.
  3552  				p.Forwd = q.Comefrom
  3553  
  3554  				q.Comefrom = p
  3555  				if p.Back&2 != 0 { // short
  3556  					if p.As == AJCXZL {
  3557  						ctxt.Andptr[0] = 0x67
  3558  						ctxt.Andptr = ctxt.Andptr[1:]
  3559  					}
  3560  					ctxt.Andptr[0] = byte(op)
  3561  					ctxt.Andptr = ctxt.Andptr[1:]
  3562  					ctxt.Andptr[0] = 0
  3563  					ctxt.Andptr = ctxt.Andptr[1:]
  3564  				} else if yt.zcase == Zloop {
  3565  					ctxt.Diag("loop too far: %v", p)
  3566  				} else {
  3567  					if yt.zcase == Zbr {
  3568  						ctxt.Andptr[0] = 0x0f
  3569  						ctxt.Andptr = ctxt.Andptr[1:]
  3570  					}
  3571  					ctxt.Andptr[0] = byte(o.op[z+1])
  3572  					ctxt.Andptr = ctxt.Andptr[1:]
  3573  					ctxt.Andptr[0] = 0
  3574  					ctxt.Andptr = ctxt.Andptr[1:]
  3575  					ctxt.Andptr[0] = 0
  3576  					ctxt.Andptr = ctxt.Andptr[1:]
  3577  					ctxt.Andptr[0] = 0
  3578  					ctxt.Andptr = ctxt.Andptr[1:]
  3579  					ctxt.Andptr[0] = 0
  3580  					ctxt.Andptr = ctxt.Andptr[1:]
  3581  				}
  3582  
  3583  				break
  3584  
  3585  			/*
  3586  				v = q->pc - p->pc - 2;
  3587  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3588  					*ctxt->andptr++ = op;
  3589  					*ctxt->andptr++ = v;
  3590  				} else {
  3591  					v -= 5-2;
  3592  					if(yt.zcase == Zbr) {
  3593  						*ctxt->andptr++ = 0x0f;
  3594  						v--;
  3595  					}
  3596  					*ctxt->andptr++ = o->op[z+1];
  3597  					*ctxt->andptr++ = v;
  3598  					*ctxt->andptr++ = v>>8;
  3599  					*ctxt->andptr++ = v>>16;
  3600  					*ctxt->andptr++ = v>>24;
  3601  				}
  3602  			*/
  3603  
  3604  			case Zbyte:
  3605  				v = vaddr(ctxt, p, &p.From, &rel)
  3606  				if rel.Siz != 0 {
  3607  					rel.Siz = uint8(op)
  3608  					r = obj.Addrel(ctxt.Cursym)
  3609  					*r = rel
  3610  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3611  				}
  3612  
  3613  				ctxt.Andptr[0] = byte(v)
  3614  				ctxt.Andptr = ctxt.Andptr[1:]
  3615  				if op > 1 {
  3616  					ctxt.Andptr[0] = byte(v >> 8)
  3617  					ctxt.Andptr = ctxt.Andptr[1:]
  3618  					if op > 2 {
  3619  						ctxt.Andptr[0] = byte(v >> 16)
  3620  						ctxt.Andptr = ctxt.Andptr[1:]
  3621  						ctxt.Andptr[0] = byte(v >> 24)
  3622  						ctxt.Andptr = ctxt.Andptr[1:]
  3623  						if op > 4 {
  3624  							ctxt.Andptr[0] = byte(v >> 32)
  3625  							ctxt.Andptr = ctxt.Andptr[1:]
  3626  							ctxt.Andptr[0] = byte(v >> 40)
  3627  							ctxt.Andptr = ctxt.Andptr[1:]
  3628  							ctxt.Andptr[0] = byte(v >> 48)
  3629  							ctxt.Andptr = ctxt.Andptr[1:]
  3630  							ctxt.Andptr[0] = byte(v >> 56)
  3631  							ctxt.Andptr = ctxt.Andptr[1:]
  3632  						}
  3633  					}
  3634  				}
  3635  			}
  3636  
  3637  			return
  3638  		}
  3639  		z += int(yt.zoffset) + xo
  3640  	}
  3641  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3642  		var pp obj.Prog
  3643  		var t []byte
  3644  		if p.As == mo[0].as {
  3645  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3646  				t = mo[0].op[:]
  3647  				switch mo[0].code {
  3648  				default:
  3649  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3650  
  3651  				case 0: /* lit */
  3652  					for z = 0; t[z] != E; z++ {
  3653  						ctxt.Andptr[0] = t[z]
  3654  						ctxt.Andptr = ctxt.Andptr[1:]
  3655  					}
  3656  
  3657  				case 1: /* r,m */
  3658  					ctxt.Andptr[0] = t[0]
  3659  					ctxt.Andptr = ctxt.Andptr[1:]
  3660  
  3661  					asmando(ctxt, p, &p.To, int(t[1]))
  3662  
  3663  				case 2: /* m,r */
  3664  					ctxt.Andptr[0] = t[0]
  3665  					ctxt.Andptr = ctxt.Andptr[1:]
  3666  
  3667  					asmando(ctxt, p, &p.From, int(t[1]))
  3668  
  3669  				case 3: /* r,m - 2op */
  3670  					ctxt.Andptr[0] = t[0]
  3671  					ctxt.Andptr = ctxt.Andptr[1:]
  3672  
  3673  					ctxt.Andptr[0] = t[1]
  3674  					ctxt.Andptr = ctxt.Andptr[1:]
  3675  					asmando(ctxt, p, &p.To, int(t[2]))
  3676  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3677  
  3678  				case 4: /* m,r - 2op */
  3679  					ctxt.Andptr[0] = t[0]
  3680  					ctxt.Andptr = ctxt.Andptr[1:]
  3681  
  3682  					ctxt.Andptr[0] = t[1]
  3683  					ctxt.Andptr = ctxt.Andptr[1:]
  3684  					asmando(ctxt, p, &p.From, int(t[2]))
  3685  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3686  
  3687  				case 5: /* load full pointer, trash heap */
  3688  					if t[0] != 0 {
  3689  						ctxt.Andptr[0] = t[0]
  3690  						ctxt.Andptr = ctxt.Andptr[1:]
  3691  					}
  3692  					switch p.To.Index {
  3693  					default:
  3694  						goto bad
  3695  
  3696  					case REG_DS:
  3697  						ctxt.Andptr[0] = 0xc5
  3698  						ctxt.Andptr = ctxt.Andptr[1:]
  3699  
  3700  					case REG_SS:
  3701  						ctxt.Andptr[0] = 0x0f
  3702  						ctxt.Andptr = ctxt.Andptr[1:]
  3703  						ctxt.Andptr[0] = 0xb2
  3704  						ctxt.Andptr = ctxt.Andptr[1:]
  3705  
  3706  					case REG_ES:
  3707  						ctxt.Andptr[0] = 0xc4
  3708  						ctxt.Andptr = ctxt.Andptr[1:]
  3709  
  3710  					case REG_FS:
  3711  						ctxt.Andptr[0] = 0x0f
  3712  						ctxt.Andptr = ctxt.Andptr[1:]
  3713  						ctxt.Andptr[0] = 0xb4
  3714  						ctxt.Andptr = ctxt.Andptr[1:]
  3715  
  3716  					case REG_GS:
  3717  						ctxt.Andptr[0] = 0x0f
  3718  						ctxt.Andptr = ctxt.Andptr[1:]
  3719  						ctxt.Andptr[0] = 0xb5
  3720  						ctxt.Andptr = ctxt.Andptr[1:]
  3721  					}
  3722  
  3723  					asmand(ctxt, p, &p.From, &p.To)
  3724  
  3725  				case 6: /* double shift */
  3726  					if t[0] == Pw {
  3727  						if p.Mode != 64 {
  3728  							ctxt.Diag("asmins: illegal 64: %v", p)
  3729  						}
  3730  						ctxt.Rexflag |= Pw
  3731  						t = t[1:]
  3732  					} else if t[0] == Pe {
  3733  						ctxt.Andptr[0] = Pe
  3734  						ctxt.Andptr = ctxt.Andptr[1:]
  3735  						t = t[1:]
  3736  					}
  3737  
  3738  					switch p.From.Type {
  3739  					default:
  3740  						goto bad
  3741  
  3742  					case obj.TYPE_CONST:
  3743  						ctxt.Andptr[0] = 0x0f
  3744  						ctxt.Andptr = ctxt.Andptr[1:]
  3745  						ctxt.Andptr[0] = t[0]
  3746  						ctxt.Andptr = ctxt.Andptr[1:]
  3747  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3748  						ctxt.Andptr[0] = byte(p.From.Offset)
  3749  						ctxt.Andptr = ctxt.Andptr[1:]
  3750  
  3751  					case obj.TYPE_REG:
  3752  						switch p.From.Reg {
  3753  						default:
  3754  							goto bad
  3755  
  3756  						case REG_CL, REG_CX:
  3757  							ctxt.Andptr[0] = 0x0f
  3758  							ctxt.Andptr = ctxt.Andptr[1:]
  3759  							ctxt.Andptr[0] = t[1]
  3760  							ctxt.Andptr = ctxt.Andptr[1:]
  3761  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3762  						}
  3763  					}
  3764  
  3765  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3766  				// where you load the TLS base register into a register and then index off that
  3767  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3768  				// are handled in prefixof above and should not be listed here.
  3769  				case 7: /* mov tls, r */
  3770  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3771  						ctxt.Diag("invalid load of TLS: %v", p)
  3772  					}
  3773  
  3774  					if p.Mode == 32 {
  3775  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3776  						// where you load the TLS base register into a register and then index off that
  3777  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3778  						// are handled in prefixof above and should not be listed here.
  3779  						switch ctxt.Headtype {
  3780  						default:
  3781  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3782  
  3783  						case obj.Hlinux,
  3784  							obj.Hnacl:
  3785  							// ELF TLS base is 0(GS).
  3786  							pp.From = p.From
  3787  
  3788  							pp.From.Type = obj.TYPE_MEM
  3789  							pp.From.Reg = REG_GS
  3790  							pp.From.Offset = 0
  3791  							pp.From.Index = REG_NONE
  3792  							pp.From.Scale = 0
  3793  							ctxt.Andptr[0] = 0x65
  3794  							ctxt.Andptr = ctxt.Andptr[1:] // GS
  3795  							ctxt.Andptr[0] = 0x8B
  3796  							ctxt.Andptr = ctxt.Andptr[1:]
  3797  							asmand(ctxt, p, &pp.From, &p.To)
  3798  
  3799  						case obj.Hplan9:
  3800  							if ctxt.Plan9privates == nil {
  3801  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3802  							}
  3803  							pp.From = obj.Addr{}
  3804  							pp.From.Type = obj.TYPE_MEM
  3805  							pp.From.Name = obj.NAME_EXTERN
  3806  							pp.From.Sym = ctxt.Plan9privates
  3807  							pp.From.Offset = 0
  3808  							pp.From.Index = REG_NONE
  3809  							ctxt.Andptr[0] = 0x8B
  3810  							ctxt.Andptr = ctxt.Andptr[1:]
  3811  							asmand(ctxt, p, &pp.From, &p.To)
  3812  
  3813  						case obj.Hwindows:
  3814  							// Windows TLS base is always 0x14(FS).
  3815  							pp.From = p.From
  3816  
  3817  							pp.From.Type = obj.TYPE_MEM
  3818  							pp.From.Reg = REG_FS
  3819  							pp.From.Offset = 0x14
  3820  							pp.From.Index = REG_NONE
  3821  							pp.From.Scale = 0
  3822  							ctxt.Andptr[0] = 0x64
  3823  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  3824  							ctxt.Andptr[0] = 0x8B
  3825  							ctxt.Andptr = ctxt.Andptr[1:]
  3826  							asmand(ctxt, p, &pp.From, &p.To)
  3827  						}
  3828  						break
  3829  					}
  3830  
  3831  					switch ctxt.Headtype {
  3832  					default:
  3833  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3834  
  3835  					case obj.Hlinux:
  3836  						if ctxt.Flag_shared == 0 {
  3837  							log.Fatalf("unknown TLS base location for linux without -shared")
  3838  						}
  3839  						// Note that this is not generating the same insn as the other cases.
  3840  						//     MOV TLS, R_to
  3841  						// becomes
  3842  						//     movq g@gottpoff(%rip), R_to
  3843  						// which is encoded as
  3844  						//     movq 0(%rip), R_to
  3845  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  3846  						// is g, which we can't check here, but will when we assemble the second
  3847  						// instruction.
  3848  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  3849  
  3850  						ctxt.Andptr[0] = 0x8B
  3851  						ctxt.Andptr = ctxt.Andptr[1:]
  3852  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  3853  						ctxt.Andptr = ctxt.Andptr[1:]
  3854  						r = obj.Addrel(ctxt.Cursym)
  3855  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3856  						r.Type = obj.R_TLS_IE
  3857  						r.Siz = 4
  3858  						r.Add = -4
  3859  						put4(ctxt, 0)
  3860  
  3861  					case obj.Hplan9:
  3862  						if ctxt.Plan9privates == nil {
  3863  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3864  						}
  3865  						pp.From = obj.Addr{}
  3866  						pp.From.Type = obj.TYPE_MEM
  3867  						pp.From.Name = obj.NAME_EXTERN
  3868  						pp.From.Sym = ctxt.Plan9privates
  3869  						pp.From.Offset = 0
  3870  						pp.From.Index = REG_NONE
  3871  						ctxt.Rexflag |= Pw
  3872  						ctxt.Andptr[0] = 0x8B
  3873  						ctxt.Andptr = ctxt.Andptr[1:]
  3874  						asmand(ctxt, p, &pp.From, &p.To)
  3875  
  3876  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3877  						// TLS base is 0(FS).
  3878  						pp.From = p.From
  3879  
  3880  						pp.From.Type = obj.TYPE_MEM
  3881  						pp.From.Name = obj.NAME_NONE
  3882  						pp.From.Reg = REG_NONE
  3883  						pp.From.Offset = 0
  3884  						pp.From.Index = REG_NONE
  3885  						pp.From.Scale = 0
  3886  						ctxt.Rexflag |= Pw
  3887  						ctxt.Andptr[0] = 0x64
  3888  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  3889  						ctxt.Andptr[0] = 0x8B
  3890  						ctxt.Andptr = ctxt.Andptr[1:]
  3891  						asmand(ctxt, p, &pp.From, &p.To)
  3892  
  3893  					case obj.Hwindows:
  3894  						// Windows TLS base is always 0x28(GS).
  3895  						pp.From = p.From
  3896  
  3897  						pp.From.Type = obj.TYPE_MEM
  3898  						pp.From.Name = obj.NAME_NONE
  3899  						pp.From.Reg = REG_GS
  3900  						pp.From.Offset = 0x28
  3901  						pp.From.Index = REG_NONE
  3902  						pp.From.Scale = 0
  3903  						ctxt.Rexflag |= Pw
  3904  						ctxt.Andptr[0] = 0x65
  3905  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  3906  						ctxt.Andptr[0] = 0x8B
  3907  						ctxt.Andptr = ctxt.Andptr[1:]
  3908  						asmand(ctxt, p, &pp.From, &p.To)
  3909  					}
  3910  				}
  3911  				return
  3912  			}
  3913  		}
  3914  	}
  3915  	goto bad
  3916  
  3917  bad:
  3918  	if p.Mode != 64 {
  3919  		/*
  3920  		 * here, the assembly has failed.
  3921  		 * if its a byte instruction that has
  3922  		 * unaddressable registers, try to
  3923  		 * exchange registers and reissue the
  3924  		 * instruction with the operands renamed.
  3925  		 */
  3926  		pp := *p
  3927  
  3928  		unbytereg(&pp.From, &pp.Ft)
  3929  		unbytereg(&pp.To, &pp.Tt)
  3930  
  3931  		z := int(p.From.Reg)
  3932  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3933  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3934  			// For now, different to keep bit-for-bit compatibility.
  3935  			if p.Mode == 32 {
  3936  				breg := byteswapreg(ctxt, &p.To)
  3937  				if breg != REG_AX {
  3938  					ctxt.Andptr[0] = 0x87
  3939  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3940  					asmando(ctxt, p, &p.From, reg[breg])
  3941  					subreg(&pp, z, breg)
  3942  					doasm(ctxt, &pp)
  3943  					ctxt.Andptr[0] = 0x87
  3944  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3945  					asmando(ctxt, p, &p.From, reg[breg])
  3946  				} else {
  3947  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3948  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3949  					subreg(&pp, z, REG_AX)
  3950  					doasm(ctxt, &pp)
  3951  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3952  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3953  				}
  3954  				return
  3955  			}
  3956  
  3957  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  3958  				// We certainly don't want to exchange
  3959  				// with AX if the op is MUL or DIV.
  3960  				ctxt.Andptr[0] = 0x87
  3961  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3962  				asmando(ctxt, p, &p.From, reg[REG_BX])
  3963  				subreg(&pp, z, REG_BX)
  3964  				doasm(ctxt, &pp)
  3965  				ctxt.Andptr[0] = 0x87
  3966  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3967  				asmando(ctxt, p, &p.From, reg[REG_BX])
  3968  			} else {
  3969  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  3970  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3971  				subreg(&pp, z, REG_AX)
  3972  				doasm(ctxt, &pp)
  3973  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  3974  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3975  			}
  3976  			return
  3977  		}
  3978  
  3979  		z = int(p.To.Reg)
  3980  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3981  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3982  			// For now, different to keep bit-for-bit compatibility.
  3983  			if p.Mode == 32 {
  3984  				breg := byteswapreg(ctxt, &p.From)
  3985  				if breg != REG_AX {
  3986  					ctxt.Andptr[0] = 0x87
  3987  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3988  					asmando(ctxt, p, &p.To, reg[breg])
  3989  					subreg(&pp, z, breg)
  3990  					doasm(ctxt, &pp)
  3991  					ctxt.Andptr[0] = 0x87
  3992  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3993  					asmando(ctxt, p, &p.To, reg[breg])
  3994  				} else {
  3995  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3996  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  3997  					subreg(&pp, z, REG_AX)
  3998  					doasm(ctxt, &pp)
  3999  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4000  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4001  				}
  4002  				return
  4003  			}
  4004  
  4005  			if isax(&p.From) {
  4006  				ctxt.Andptr[0] = 0x87
  4007  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4008  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4009  				subreg(&pp, z, REG_BX)
  4010  				doasm(ctxt, &pp)
  4011  				ctxt.Andptr[0] = 0x87
  4012  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4013  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4014  			} else {
  4015  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4016  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4017  				subreg(&pp, z, REG_AX)
  4018  				doasm(ctxt, &pp)
  4019  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4020  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4021  			}
  4022  			return
  4023  		}
  4024  	}
  4025  
  4026  	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4027  	return
  4028  }
  4029  
  4030  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4031  // which is not referenced in a.
  4032  // If a is empty, it returns BX to account for MULB-like instructions
  4033  // that might use DX and AX.
  4034  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4035  	cand := 1
  4036  	canc := cand
  4037  	canb := canc
  4038  	cana := canb
  4039  
  4040  	if a.Type == obj.TYPE_NONE {
  4041  		cand = 0
  4042  		cana = cand
  4043  	}
  4044  
  4045  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4046  		switch a.Reg {
  4047  		case REG_NONE:
  4048  			cand = 0
  4049  			cana = cand
  4050  
  4051  		case REG_AX, REG_AL, REG_AH:
  4052  			cana = 0
  4053  
  4054  		case REG_BX, REG_BL, REG_BH:
  4055  			canb = 0
  4056  
  4057  		case REG_CX, REG_CL, REG_CH:
  4058  			canc = 0
  4059  
  4060  		case REG_DX, REG_DL, REG_DH:
  4061  			cand = 0
  4062  		}
  4063  	}
  4064  
  4065  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4066  		switch a.Index {
  4067  		case REG_AX:
  4068  			cana = 0
  4069  
  4070  		case REG_BX:
  4071  			canb = 0
  4072  
  4073  		case REG_CX:
  4074  			canc = 0
  4075  
  4076  		case REG_DX:
  4077  			cand = 0
  4078  		}
  4079  	}
  4080  
  4081  	if cana != 0 {
  4082  		return REG_AX
  4083  	}
  4084  	if canb != 0 {
  4085  		return REG_BX
  4086  	}
  4087  	if canc != 0 {
  4088  		return REG_CX
  4089  	}
  4090  	if cand != 0 {
  4091  		return REG_DX
  4092  	}
  4093  
  4094  	ctxt.Diag("impossible byte register")
  4095  	log.Fatalf("bad code")
  4096  	return 0
  4097  }
  4098  
  4099  func isbadbyte(a *obj.Addr) bool {
  4100  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4101  }
  4102  
  4103  var naclret = []uint8{
  4104  	0x5e, // POPL SI
  4105  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4106  	0x83,
  4107  	0xe6,
  4108  	0xe0, // ANDL $~31, SI
  4109  	0x4c,
  4110  	0x01,
  4111  	0xfe, // ADDQ R15, SI
  4112  	0xff,
  4113  	0xe6, // JMP SI
  4114  }
  4115  
  4116  var naclret8 = []uint8{
  4117  	0x5d, // POPL BP
  4118  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4119  	0x83,
  4120  	0xe5,
  4121  	0xe0, // ANDL $~31, BP
  4122  	0xff,
  4123  	0xe5, // JMP BP
  4124  }
  4125  
  4126  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4127  
  4128  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4129  
  4130  var naclmovs = []uint8{
  4131  	0x89,
  4132  	0xf6, // MOVL SI, SI
  4133  	0x49,
  4134  	0x8d,
  4135  	0x34,
  4136  	0x37, // LEAQ (R15)(SI*1), SI
  4137  	0x89,
  4138  	0xff, // MOVL DI, DI
  4139  	0x49,
  4140  	0x8d,
  4141  	0x3c,
  4142  	0x3f, // LEAQ (R15)(DI*1), DI
  4143  }
  4144  
  4145  var naclstos = []uint8{
  4146  	0x89,
  4147  	0xff, // MOVL DI, DI
  4148  	0x49,
  4149  	0x8d,
  4150  	0x3c,
  4151  	0x3f, // LEAQ (R15)(DI*1), DI
  4152  }
  4153  
  4154  func nacltrunc(ctxt *obj.Link, reg int) {
  4155  	if reg >= REG_R8 {
  4156  		ctxt.Andptr[0] = 0x45
  4157  		ctxt.Andptr = ctxt.Andptr[1:]
  4158  	}
  4159  	reg = (reg - REG_AX) & 7
  4160  	ctxt.Andptr[0] = 0x89
  4161  	ctxt.Andptr = ctxt.Andptr[1:]
  4162  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4163  	ctxt.Andptr = ctxt.Andptr[1:]
  4164  }
  4165  
  4166  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4167  	ctxt.Andptr = ctxt.And[:]
  4168  	ctxt.Asmode = int(p.Mode)
  4169  
  4170  	if p.As == obj.AUSEFIELD {
  4171  		r := obj.Addrel(ctxt.Cursym)
  4172  		r.Off = 0
  4173  		r.Siz = 0
  4174  		r.Sym = p.From.Sym
  4175  		r.Type = obj.R_USEFIELD
  4176  		return
  4177  	}
  4178  
  4179  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4180  		switch p.As {
  4181  		case obj.ARET:
  4182  			copy(ctxt.Andptr, naclret8)
  4183  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4184  			return
  4185  
  4186  		case obj.ACALL,
  4187  			obj.AJMP:
  4188  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4189  				ctxt.Andptr[0] = 0x83
  4190  				ctxt.Andptr = ctxt.Andptr[1:]
  4191  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4192  				ctxt.Andptr = ctxt.Andptr[1:]
  4193  				ctxt.Andptr[0] = 0xe0
  4194  				ctxt.Andptr = ctxt.Andptr[1:]
  4195  			}
  4196  
  4197  		case AINT:
  4198  			ctxt.Andptr[0] = 0xf4
  4199  			ctxt.Andptr = ctxt.Andptr[1:]
  4200  			return
  4201  		}
  4202  	}
  4203  
  4204  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4205  		if p.As == AREP {
  4206  			ctxt.Rep++
  4207  			return
  4208  		}
  4209  
  4210  		if p.As == AREPN {
  4211  			ctxt.Repn++
  4212  			return
  4213  		}
  4214  
  4215  		if p.As == ALOCK {
  4216  			ctxt.Lock++
  4217  			return
  4218  		}
  4219  
  4220  		if p.As != ALEAQ && p.As != ALEAL {
  4221  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4222  				nacltrunc(ctxt, int(p.From.Index))
  4223  			}
  4224  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4225  				nacltrunc(ctxt, int(p.To.Index))
  4226  			}
  4227  		}
  4228  
  4229  		switch p.As {
  4230  		case obj.ARET:
  4231  			copy(ctxt.Andptr, naclret)
  4232  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4233  			return
  4234  
  4235  		case obj.ACALL,
  4236  			obj.AJMP:
  4237  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4238  				// ANDL $~31, reg
  4239  				ctxt.Andptr[0] = 0x83
  4240  				ctxt.Andptr = ctxt.Andptr[1:]
  4241  
  4242  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4243  				ctxt.Andptr = ctxt.Andptr[1:]
  4244  				ctxt.Andptr[0] = 0xe0
  4245  				ctxt.Andptr = ctxt.Andptr[1:]
  4246  
  4247  				// ADDQ R15, reg
  4248  				ctxt.Andptr[0] = 0x4c
  4249  				ctxt.Andptr = ctxt.Andptr[1:]
  4250  
  4251  				ctxt.Andptr[0] = 0x01
  4252  				ctxt.Andptr = ctxt.Andptr[1:]
  4253  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4254  				ctxt.Andptr = ctxt.Andptr[1:]
  4255  			}
  4256  
  4257  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4258  				// ANDL $~31, reg
  4259  				ctxt.Andptr[0] = 0x41
  4260  				ctxt.Andptr = ctxt.Andptr[1:]
  4261  
  4262  				ctxt.Andptr[0] = 0x83
  4263  				ctxt.Andptr = ctxt.Andptr[1:]
  4264  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4265  				ctxt.Andptr = ctxt.Andptr[1:]
  4266  				ctxt.Andptr[0] = 0xe0
  4267  				ctxt.Andptr = ctxt.Andptr[1:]
  4268  
  4269  				// ADDQ R15, reg
  4270  				ctxt.Andptr[0] = 0x4d
  4271  				ctxt.Andptr = ctxt.Andptr[1:]
  4272  
  4273  				ctxt.Andptr[0] = 0x01
  4274  				ctxt.Andptr = ctxt.Andptr[1:]
  4275  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4276  				ctxt.Andptr = ctxt.Andptr[1:]
  4277  			}
  4278  
  4279  		case AINT:
  4280  			ctxt.Andptr[0] = 0xf4
  4281  			ctxt.Andptr = ctxt.Andptr[1:]
  4282  			return
  4283  
  4284  		case ASCASB,
  4285  			ASCASW,
  4286  			ASCASL,
  4287  			ASCASQ,
  4288  			ASTOSB,
  4289  			ASTOSW,
  4290  			ASTOSL,
  4291  			ASTOSQ:
  4292  			copy(ctxt.Andptr, naclstos)
  4293  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4294  
  4295  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4296  			copy(ctxt.Andptr, naclmovs)
  4297  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4298  		}
  4299  
  4300  		if ctxt.Rep != 0 {
  4301  			ctxt.Andptr[0] = 0xf3
  4302  			ctxt.Andptr = ctxt.Andptr[1:]
  4303  			ctxt.Rep = 0
  4304  		}
  4305  
  4306  		if ctxt.Repn != 0 {
  4307  			ctxt.Andptr[0] = 0xf2
  4308  			ctxt.Andptr = ctxt.Andptr[1:]
  4309  			ctxt.Repn = 0
  4310  		}
  4311  
  4312  		if ctxt.Lock != 0 {
  4313  			ctxt.Andptr[0] = 0xf0
  4314  			ctxt.Andptr = ctxt.Andptr[1:]
  4315  			ctxt.Lock = 0
  4316  		}
  4317  	}
  4318  
  4319  	ctxt.Rexflag = 0
  4320  	and0 := ctxt.Andptr
  4321  	ctxt.Asmode = int(p.Mode)
  4322  	doasm(ctxt, p)
  4323  	if ctxt.Rexflag != 0 {
  4324  		/*
  4325  		 * as befits the whole approach of the architecture,
  4326  		 * the rex prefix must appear before the first opcode byte
  4327  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4328  		 * before the 0f opcode escape!), or it might be ignored.
  4329  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4330  		 */
  4331  		if p.Mode != 64 {
  4332  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4333  		}
  4334  		n := -cap(ctxt.Andptr) + cap(and0)
  4335  		var c int
  4336  		var np int
  4337  		for np = 0; np < n; np++ {
  4338  			c = int(and0[np])
  4339  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4340  				break
  4341  			}
  4342  		}
  4343  
  4344  		copy(and0[np+1:], and0[np:n])
  4345  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4346  		ctxt.Andptr = ctxt.Andptr[1:]
  4347  	}
  4348  
  4349  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4350  	var r *obj.Reloc
  4351  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4352  		r = &ctxt.Cursym.R[i:][0]
  4353  		if int64(r.Off) < p.Pc {
  4354  			break
  4355  		}
  4356  		if ctxt.Rexflag != 0 {
  4357  			r.Off++
  4358  		}
  4359  		if r.Type == obj.R_PCREL {
  4360  			// PC-relative addressing is relative to the end of the instruction,
  4361  			// but the relocations applied by the linker are relative to the end
  4362  			// of the relocation. Because immediate instruction
  4363  			// arguments can follow the PC-relative memory reference in the
  4364  			// instruction encoding, the two may not coincide. In this case,
  4365  			// adjust addend so that linker can keep relocating relative to the
  4366  			// end of the relocation.
  4367  			r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4368  		}
  4369  	}
  4370  
  4371  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4372  		switch p.To.Reg {
  4373  		case REG_SP:
  4374  			copy(ctxt.Andptr, naclspfix)
  4375  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4376  
  4377  		case REG_BP:
  4378  			copy(ctxt.Andptr, naclbpfix)
  4379  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4380  		}
  4381  	}
  4382  }