github.com/d4l3k/go@v0.0.0-20151015000803-65fc379daeda/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	// Loop alignment constants:
    44  	// want to align loop entry to LoopAlign-byte boundary,
    45  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    46  	// We define a loop entry as the target of a backward jump.
    47  	//
    48  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    49  	// and it aligns all jump targets, not just backward jump targets.
    50  	//
    51  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    52  	// is very slight but negative, so the alignment is disabled by
    53  	// setting MaxLoopPad = 0. The code is here for reference and
    54  	// for future experiments.
    55  	//
    56  	LoopAlign  = 16
    57  	MaxLoopPad = 0
    58  	FuncAlign  = 16
    59  )
    60  
    61  type Optab struct {
    62  	as     int16
    63  	ytab   []ytab
    64  	prefix uint8
    65  	op     [23]uint8
    66  }
    67  
    68  type ytab struct {
    69  	from    uint8
    70  	from3   uint8
    71  	to      uint8
    72  	zcase   uint8
    73  	zoffset uint8
    74  }
    75  
    76  type Movtab struct {
    77  	as   int16
    78  	ft   uint8
    79  	f3t  uint8
    80  	tt   uint8
    81  	code uint8
    82  	op   [4]uint8
    83  }
    84  
    85  const (
    86  	Yxxx = iota
    87  	Ynone
    88  	Yi0 // $0
    89  	Yi1 // $1
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Ytls
   152  	Ytextsize
   153  	Yindir
   154  	Ymax
   155  )
   156  
   157  const (
   158  	Zxxx = iota
   159  	Zlit
   160  	Zlitm_r
   161  	Z_rp
   162  	Zbr
   163  	Zcall
   164  	Zcallcon
   165  	Zcallduff
   166  	Zcallind
   167  	Zcallindreg
   168  	Zib_
   169  	Zib_rp
   170  	Zibo_m
   171  	Zibo_m_xm
   172  	Zil_
   173  	Zil_rp
   174  	Ziq_rp
   175  	Zilo_m
   176  	Zjmp
   177  	Zjmpcon
   178  	Zloop
   179  	Zo_iw
   180  	Zm_o
   181  	Zm_r
   182  	Zm2_r
   183  	Zm_r_xm
   184  	Zm_r_xm_vex
   185  	Zm_r_i_xm
   186  	Zm_r_3d
   187  	Zm_r_xm_nr
   188  	Zr_m_xm_nr
   189  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   190  	Zmb_r
   191  	Zaut_r
   192  	Zo_m
   193  	Zo_m64
   194  	Zpseudo
   195  	Zr_m
   196  	Zr_m_xm
   197  	Zr_m_xm_vex
   198  	Zrp_
   199  	Z_ib
   200  	Z_il
   201  	Zm_ibo
   202  	Zm_ilo
   203  	Zib_rr
   204  	Zil_rr
   205  	Zclr
   206  	Zbyte
   207  	Zmax
   208  )
   209  
   210  const (
   211  	Px    = 0
   212  	Px1   = 1    // symbolic; exact value doesn't matter
   213  	P32   = 0x32 /* 32-bit only */
   214  	Pe    = 0x66 /* operand escape */
   215  	Pm    = 0x0f /* 2byte opcode escape */
   216  	Pq    = 0xff /* both escapes: 66 0f */
   217  	Pb    = 0xfe /* byte operands */
   218  	Pf2   = 0xf2 /* xmm escape 1: f2 0f */
   219  	Pf3   = 0xf3 /* xmm escape 2: f3 0f */
   220  	Pq3   = 0x67 /* xmm escape 3: 66 48 0f */
   221  	Pvex1 = 0xc5 /* 66 escape, vex encoding */
   222  	Pvex2 = 0xc6 /* f3 escape, vex encoding */
   223  	Pw    = 0x48 /* Rex.w */
   224  	Pw8   = 0x90 // symbolic; exact value doesn't matter
   225  	Py    = 0x80 /* defaults to 64-bit mode */
   226  	Py1   = 0x81 // symbolic; exact value doesn't matter
   227  	Py3   = 0x83 // symbolic; exact value doesn't matter
   228  
   229  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   230  	Rxr = 1 << 2 /* extend modrm reg */
   231  	Rxx = 1 << 1 /* extend sib index */
   232  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   233  )
   234  
   235  var ycover [Ymax * Ymax]uint8
   236  
   237  var reg [MAXREG]int
   238  
   239  var regrex [MAXREG + 1]int
   240  
   241  var ynone = []ytab{
   242  	{Ynone, Ynone, Ynone, Zlit, 1},
   243  }
   244  
   245  var ytext = []ytab{
   246  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   247  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   248  }
   249  
   250  var ynop = []ytab{
   251  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   252  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   253  	{Ynone, Ynone, Yml, Zpseudo, 0},
   254  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   255  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   256  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   257  	{Yml, Ynone, Ynone, Zpseudo, 0},
   258  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   259  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   260  }
   261  
   262  var yfuncdata = []ytab{
   263  	{Yi32, Ynone, Ym, Zpseudo, 0},
   264  }
   265  
   266  var ypcdata = []ytab{
   267  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   268  }
   269  
   270  var yxorb = []ytab{
   271  	{Yi32, Ynone, Yal, Zib_, 1},
   272  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   273  	{Yrb, Ynone, Ymb, Zr_m, 1},
   274  	{Ymb, Ynone, Yrb, Zm_r, 1},
   275  }
   276  
   277  var yxorl = []ytab{
   278  	{Yi8, Ynone, Yml, Zibo_m, 2},
   279  	{Yi32, Ynone, Yax, Zil_, 1},
   280  	{Yi32, Ynone, Yml, Zilo_m, 2},
   281  	{Yrl, Ynone, Yml, Zr_m, 1},
   282  	{Yml, Ynone, Yrl, Zm_r, 1},
   283  }
   284  
   285  var yaddl = []ytab{
   286  	{Yi8, Ynone, Yml, Zibo_m, 2},
   287  	{Yi32, Ynone, Yax, Zil_, 1},
   288  	{Yi32, Ynone, Yml, Zilo_m, 2},
   289  	{Yrl, Ynone, Yml, Zr_m, 1},
   290  	{Yml, Ynone, Yrl, Zm_r, 1},
   291  }
   292  
   293  var yincb = []ytab{
   294  	{Ynone, Ynone, Ymb, Zo_m, 2},
   295  }
   296  
   297  var yincw = []ytab{
   298  	{Ynone, Ynone, Yml, Zo_m, 2},
   299  }
   300  
   301  var yincl = []ytab{
   302  	{Ynone, Ynone, Yrl, Z_rp, 1},
   303  	{Ynone, Ynone, Yml, Zo_m, 2},
   304  }
   305  
   306  var yincq = []ytab{
   307  	{Ynone, Ynone, Yml, Zo_m, 2},
   308  }
   309  
   310  var ycmpb = []ytab{
   311  	{Yal, Ynone, Yi32, Z_ib, 1},
   312  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   313  	{Ymb, Ynone, Yrb, Zm_r, 1},
   314  	{Yrb, Ynone, Ymb, Zr_m, 1},
   315  }
   316  
   317  var ycmpl = []ytab{
   318  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   319  	{Yax, Ynone, Yi32, Z_il, 1},
   320  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   321  	{Yml, Ynone, Yrl, Zm_r, 1},
   322  	{Yrl, Ynone, Yml, Zr_m, 1},
   323  }
   324  
   325  var yshb = []ytab{
   326  	{Yi1, Ynone, Ymb, Zo_m, 2},
   327  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   328  	{Ycx, Ynone, Ymb, Zo_m, 2},
   329  }
   330  
   331  var yshl = []ytab{
   332  	{Yi1, Ynone, Yml, Zo_m, 2},
   333  	{Yi32, Ynone, Yml, Zibo_m, 2},
   334  	{Ycl, Ynone, Yml, Zo_m, 2},
   335  	{Ycx, Ynone, Yml, Zo_m, 2},
   336  }
   337  
   338  var ytestb = []ytab{
   339  	{Yi32, Ynone, Yal, Zib_, 1},
   340  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   341  	{Yrb, Ynone, Ymb, Zr_m, 1},
   342  	{Ymb, Ynone, Yrb, Zm_r, 1},
   343  }
   344  
   345  var ytestl = []ytab{
   346  	{Yi32, Ynone, Yax, Zil_, 1},
   347  	{Yi32, Ynone, Yml, Zilo_m, 2},
   348  	{Yrl, Ynone, Yml, Zr_m, 1},
   349  	{Yml, Ynone, Yrl, Zm_r, 1},
   350  }
   351  
   352  var ymovb = []ytab{
   353  	{Yrb, Ynone, Ymb, Zr_m, 1},
   354  	{Ymb, Ynone, Yrb, Zm_r, 1},
   355  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   356  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   357  }
   358  
   359  var ymbs = []ytab{
   360  	{Ymb, Ynone, Ynone, Zm_o, 2},
   361  }
   362  
   363  var ybtl = []ytab{
   364  	{Yi8, Ynone, Yml, Zibo_m, 2},
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  }
   367  
   368  var ymovw = []ytab{
   369  	{Yrl, Ynone, Yml, Zr_m, 1},
   370  	{Yml, Ynone, Yrl, Zm_r, 1},
   371  	{Yi0, Ynone, Yrl, Zclr, 1},
   372  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   373  	{Yi32, Ynone, Yml, Zilo_m, 2},
   374  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   375  }
   376  
   377  var ymovl = []ytab{
   378  	{Yrl, Ynone, Yml, Zr_m, 1},
   379  	{Yml, Ynone, Yrl, Zm_r, 1},
   380  	{Yi0, Ynone, Yrl, Zclr, 1},
   381  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   382  	{Yi32, Ynone, Yml, Zilo_m, 2},
   383  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   384  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   385  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   386  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   387  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   388  }
   389  
   390  var yret = []ytab{
   391  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   392  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   393  }
   394  
   395  var ymovq = []ytab{
   396  	// valid in 32-bit mode
   397  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   398  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   399  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   400  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   401  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   402  
   403  	// valid only in 64-bit mode, usually with 64-bit prefix
   404  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   405  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   406  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   407  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   408  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   409  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   410  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   411  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   412  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   413  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   414  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   415  }
   416  
   417  var ym_rl = []ytab{
   418  	{Ym, Ynone, Yrl, Zm_r, 1},
   419  }
   420  
   421  var yrl_m = []ytab{
   422  	{Yrl, Ynone, Ym, Zr_m, 1},
   423  }
   424  
   425  var ymb_rl = []ytab{
   426  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   427  }
   428  
   429  var yml_rl = []ytab{
   430  	{Yml, Ynone, Yrl, Zm_r, 1},
   431  }
   432  
   433  var yrl_ml = []ytab{
   434  	{Yrl, Ynone, Yml, Zr_m, 1},
   435  }
   436  
   437  var yml_mb = []ytab{
   438  	{Yrb, Ynone, Ymb, Zr_m, 1},
   439  	{Ymb, Ynone, Yrb, Zm_r, 1},
   440  }
   441  
   442  var yrb_mb = []ytab{
   443  	{Yrb, Ynone, Ymb, Zr_m, 1},
   444  }
   445  
   446  var yxchg = []ytab{
   447  	{Yax, Ynone, Yrl, Z_rp, 1},
   448  	{Yrl, Ynone, Yax, Zrp_, 1},
   449  	{Yrl, Ynone, Yml, Zr_m, 1},
   450  	{Yml, Ynone, Yrl, Zm_r, 1},
   451  }
   452  
   453  var ydivl = []ytab{
   454  	{Yml, Ynone, Ynone, Zm_o, 2},
   455  }
   456  
   457  var ydivb = []ytab{
   458  	{Ymb, Ynone, Ynone, Zm_o, 2},
   459  }
   460  
   461  var yimul = []ytab{
   462  	{Yml, Ynone, Ynone, Zm_o, 2},
   463  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   464  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   465  	{Yml, Ynone, Yrl, Zm_r, 2},
   466  }
   467  
   468  var yimul3 = []ytab{
   469  	{Yi8, Yml, Yrl, Zibm_r, 2},
   470  }
   471  
   472  var ybyte = []ytab{
   473  	{Yi64, Ynone, Ynone, Zbyte, 1},
   474  }
   475  
   476  var yin = []ytab{
   477  	{Yi32, Ynone, Ynone, Zib_, 1},
   478  	{Ynone, Ynone, Ynone, Zlit, 1},
   479  }
   480  
   481  var yint = []ytab{
   482  	{Yi32, Ynone, Ynone, Zib_, 1},
   483  }
   484  
   485  var ypushl = []ytab{
   486  	{Yrl, Ynone, Ynone, Zrp_, 1},
   487  	{Ym, Ynone, Ynone, Zm_o, 2},
   488  	{Yi8, Ynone, Ynone, Zib_, 1},
   489  	{Yi32, Ynone, Ynone, Zil_, 1},
   490  }
   491  
   492  var ypopl = []ytab{
   493  	{Ynone, Ynone, Yrl, Z_rp, 1},
   494  	{Ynone, Ynone, Ym, Zo_m, 2},
   495  }
   496  
   497  var ybswap = []ytab{
   498  	{Ynone, Ynone, Yrl, Z_rp, 2},
   499  }
   500  
   501  var yscond = []ytab{
   502  	{Ynone, Ynone, Ymb, Zo_m, 2},
   503  }
   504  
   505  var yjcond = []ytab{
   506  	{Ynone, Ynone, Ybr, Zbr, 0},
   507  	{Yi0, Ynone, Ybr, Zbr, 0},
   508  	{Yi1, Ynone, Ybr, Zbr, 1},
   509  }
   510  
   511  var yloop = []ytab{
   512  	{Ynone, Ynone, Ybr, Zloop, 1},
   513  }
   514  
   515  var ycall = []ytab{
   516  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   517  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   518  	{Ynone, Ynone, Yindir, Zcallind, 2},
   519  	{Ynone, Ynone, Ybr, Zcall, 0},
   520  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   521  }
   522  
   523  var yduff = []ytab{
   524  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   525  }
   526  
   527  var yjmp = []ytab{
   528  	{Ynone, Ynone, Yml, Zo_m64, 2},
   529  	{Ynone, Ynone, Ybr, Zjmp, 0},
   530  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   531  }
   532  
   533  var yfmvd = []ytab{
   534  	{Ym, Ynone, Yf0, Zm_o, 2},
   535  	{Yf0, Ynone, Ym, Zo_m, 2},
   536  	{Yrf, Ynone, Yf0, Zm_o, 2},
   537  	{Yf0, Ynone, Yrf, Zo_m, 2},
   538  }
   539  
   540  var yfmvdp = []ytab{
   541  	{Yf0, Ynone, Ym, Zo_m, 2},
   542  	{Yf0, Ynone, Yrf, Zo_m, 2},
   543  }
   544  
   545  var yfmvf = []ytab{
   546  	{Ym, Ynone, Yf0, Zm_o, 2},
   547  	{Yf0, Ynone, Ym, Zo_m, 2},
   548  }
   549  
   550  var yfmvx = []ytab{
   551  	{Ym, Ynone, Yf0, Zm_o, 2},
   552  }
   553  
   554  var yfmvp = []ytab{
   555  	{Yf0, Ynone, Ym, Zo_m, 2},
   556  }
   557  
   558  var yfcmv = []ytab{
   559  	{Yrf, Ynone, Yf0, Zm_o, 2},
   560  }
   561  
   562  var yfadd = []ytab{
   563  	{Ym, Ynone, Yf0, Zm_o, 2},
   564  	{Yrf, Ynone, Yf0, Zm_o, 2},
   565  	{Yf0, Ynone, Yrf, Zo_m, 2},
   566  }
   567  
   568  var yfaddp = []ytab{
   569  	{Yf0, Ynone, Yrf, Zo_m, 2},
   570  }
   571  
   572  var yfxch = []ytab{
   573  	{Yf0, Ynone, Yrf, Zo_m, 2},
   574  	{Yrf, Ynone, Yf0, Zm_o, 2},
   575  }
   576  
   577  var ycompp = []ytab{
   578  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   579  }
   580  
   581  var ystsw = []ytab{
   582  	{Ynone, Ynone, Ym, Zo_m, 2},
   583  	{Ynone, Ynone, Yax, Zlit, 1},
   584  }
   585  
   586  var ystcw = []ytab{
   587  	{Ynone, Ynone, Ym, Zo_m, 2},
   588  	{Ym, Ynone, Ynone, Zm_o, 2},
   589  }
   590  
   591  var ysvrs = []ytab{
   592  	{Ynone, Ynone, Ym, Zo_m, 2},
   593  	{Ym, Ynone, Ynone, Zm_o, 2},
   594  }
   595  
   596  var ymm = []ytab{
   597  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   598  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   599  }
   600  
   601  var yxm = []ytab{
   602  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   603  }
   604  
   605  var yxcvm1 = []ytab{
   606  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   607  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   608  }
   609  
   610  var yxcvm2 = []ytab{
   611  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   612  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   613  }
   614  
   615  /*
   616  var yxmq = []ytab{
   617  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   618  }
   619  */
   620  
   621  var yxr = []ytab{
   622  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   623  }
   624  
   625  var yxr_ml = []ytab{
   626  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   627  }
   628  
   629  var yxr_ml_vex = []ytab{
   630  	{Yxr, Ynone, Yml, Zr_m_xm_vex, 1},
   631  }
   632  
   633  var ymr = []ytab{
   634  	{Ymr, Ynone, Ymr, Zm_r, 1},
   635  }
   636  
   637  var ymr_ml = []ytab{
   638  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   639  }
   640  
   641  var yxcmp = []ytab{
   642  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   643  }
   644  
   645  var yxcmpi = []ytab{
   646  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   647  }
   648  
   649  var yxmov_vex = []ytab{
   650  	{Yxm, Ynone, Yxr, Zm_r_xm_vex, 1},
   651  	{Yxr, Ynone, Yxm, Zr_m_xm_vex, 1},
   652  }
   653  
   654  var yxmov = []ytab{
   655  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   656  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   657  }
   658  
   659  var yxcvfl = []ytab{
   660  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   661  }
   662  
   663  var yxcvlf = []ytab{
   664  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   665  }
   666  
   667  var yxcvfq = []ytab{
   668  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   669  }
   670  
   671  var yxcvqf = []ytab{
   672  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   673  }
   674  
   675  var yps = []ytab{
   676  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   677  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   678  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   679  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   680  }
   681  
   682  var yxrrl = []ytab{
   683  	{Yxr, Ynone, Yrl, Zm_r, 1},
   684  }
   685  
   686  var ymfp = []ytab{
   687  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   688  }
   689  
   690  var ymrxr = []ytab{
   691  	{Ymr, Ynone, Yxr, Zm_r, 1},
   692  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   693  }
   694  
   695  var ymshuf = []ytab{
   696  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   697  }
   698  
   699  var ymshufb = []ytab{
   700  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   701  }
   702  
   703  var yxshuf = []ytab{
   704  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   705  }
   706  
   707  var yextrw = []ytab{
   708  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   709  }
   710  
   711  var yinsrw = []ytab{
   712  	{Yu8, Yml, Yxr, Zibm_r, 2},
   713  }
   714  
   715  var yinsr = []ytab{
   716  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   717  }
   718  
   719  var ypsdq = []ytab{
   720  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   721  }
   722  
   723  var ymskb = []ytab{
   724  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   725  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   726  }
   727  
   728  var ycrc32l = []ytab{
   729  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   730  }
   731  
   732  var yprefetch = []ytab{
   733  	{Ym, Ynone, Ynone, Zm_o, 2},
   734  }
   735  
   736  var yaes = []ytab{
   737  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   738  }
   739  
   740  var yaes2 = []ytab{
   741  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   742  }
   743  
   744  /*
   745   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   746   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   747   * the entry with the given p->as and then looks through the ytable for that
   748   * instruction (the second field in the optab struct) for a line whose first
   749   * two values match the Ytypes of the p->from and p->to operands.  The function
   750   * oclass in span.c computes the specific Ytype of an operand and then the set
   751   * of more general Ytypes that it satisfies is implied by the ycover table, set
   752   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   753   * from the more general 8-bit constants, but instinit says
   754   *
   755   *        ycover[Yi0*Ymax + Ys32] = 1;
   756   *        ycover[Yi1*Ymax + Ys32] = 1;
   757   *        ycover[Yi8*Ymax + Ys32] = 1;
   758   *
   759   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   760   * if that's what an instruction can handle.
   761   *
   762   * In parallel with the scan through the ytable for the appropriate line, there
   763   * is a z pointer that starts out pointing at the strange magic byte list in
   764   * the Optab struct.  With each step past a non-matching ytable line, z
   765   * advances by the 4th entry in the line.  When a matching line is found, that
   766   * z pointer has the extra data to use in laying down the instruction bytes.
   767   * The actual bytes laid down are a function of the 3rd entry in the line (that
   768   * is, the Ztype) and the z bytes.
   769   *
   770   * For example, let's look at AADDL.  The optab line says:
   771   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   772   *
   773   * and yaddl says
   774   *        uchar   yaddl[] =
   775   *        {
   776   *                Yi8,    Yml,    Zibo_m, 2,
   777   *                Yi32,   Yax,    Zil_,   1,
   778   *                Yi32,   Yml,    Zilo_m, 2,
   779   *                Yrl,    Yml,    Zr_m,   1,
   780   *                Yml,    Yrl,    Zm_r,   1,
   781   *                0
   782   *        };
   783   *
   784   * so there are 5 possible types of ADDL instruction that can be laid down, and
   785   * possible states used to lay them down (Ztype and z pointer, assuming z
   786   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   787   *
   788   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   789   *        Yi32, Yax -> Zil_, z+2 (0x05)
   790   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   791   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   792   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   793   *
   794   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   795   * relatively straightforward as this program goes.
   796   *
   797   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   798   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   799   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   800   * Zilo_m is the same but a long (32-bit) immediate.
   801   */
   802  var optab =
   803  /*	as, ytab, andproto, opcode */
   804  []Optab{
   805  	{obj.AXXX, nil, 0, [23]uint8{}},
   806  	{AAAA, ynone, P32, [23]uint8{0x37}},
   807  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   808  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   809  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   810  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   811  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   812  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   813  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   814  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   815  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   816  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   817  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   818  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   819  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   820  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   821  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   822  	{AADJSP, nil, 0, [23]uint8{}},
   823  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   824  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   825  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   826  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   827  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   828  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   829  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   830  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   831  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   832  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   833  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   834  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   835  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   836  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   837  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   838  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   839  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   840  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   841  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   842  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   843  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   844  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   845  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   846  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   847  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   848  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   849  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   850  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   851  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   852  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   853  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   854  	{ABYTE, ybyte, Px, [23]uint8{1}},
   855  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   856  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   857  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   858  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   859  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   860  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   861  	{ACMC, ynone, Px, [23]uint8{0xf5}},
   862  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   863  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   864  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   865  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   866  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   867  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   868  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   869  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   870  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   871  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   872  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   873  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   874  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   875  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   876  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   877  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   878  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   879  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   880  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   881  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   882  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   883  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   884  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   885  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   886  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   887  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   888  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   889  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   890  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   891  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   892  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   893  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   894  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   895  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   896  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   897  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   898  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   899  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   900  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   901  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   902  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   903  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   904  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   905  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   906  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   907  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   908  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   909  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   910  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   911  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   912  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   913  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   914  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   915  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   916  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   917  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   918  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   919  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   920  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   921  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   922  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
   923  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
   924  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   925  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   926  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   927  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   928  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   929  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   930  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   931  	{API2FW, ymfp, Px, [23]uint8{0x0c}},
   932  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   933  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   934  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   935  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   936  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   937  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   938  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   939  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   940  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   941  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   942  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   943  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   944  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   945  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   946  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   947  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   948  	{ACWD, ynone, Pe, [23]uint8{0x99}},
   949  	{ACQO, ynone, Pw, [23]uint8{0x99}},
   950  	{ADAA, ynone, P32, [23]uint8{0x27}},
   951  	{ADAS, ynone, P32, [23]uint8{0x2f}},
   952  	{obj.ADATA, nil, 0, [23]uint8{}},
   953  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
   954  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   955  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   956  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
   957  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   958  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   959  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   960  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   961  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   962  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   963  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   964  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   965  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
   966  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
   967  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   968  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   969  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   970  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   971  	{obj.AGLOBL, nil, 0, [23]uint8{}},
   972  	{AHLT, ynone, Px, [23]uint8{0xf4}},
   973  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   974  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   975  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   976  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   977  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   978  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   979  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   980  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   981  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   982  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   983  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
   984  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   985  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   986  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
   987  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   988  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
   989  	{AINSL, ynone, Px, [23]uint8{0x6d}},
   990  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
   991  	{AINT, yint, Px, [23]uint8{0xcd}},
   992  	{AINTO, ynone, P32, [23]uint8{0xce}},
   993  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   994  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
   995  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   996  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   997  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   998  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   999  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1000  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1001  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1002  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1003  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1004  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1005  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1006  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1007  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1008  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1009  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1010  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1011  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1012  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1013  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1014  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1015  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1016  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1017  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1018  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1019  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1020  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1021  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1022  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1023  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1024  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1025  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1026  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1027  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1028  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1029  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1030  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1031  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1032  	{ALONG, ybyte, Px, [23]uint8{4}},
  1033  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1034  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1035  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1036  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1037  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1038  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1039  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1040  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1041  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1042  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1043  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1044  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1045  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1046  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1047  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1048  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1049  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1050  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1051  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1052  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1053  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1054  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1055  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1056  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1057  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1058  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1059  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1060  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1061  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1062  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1063  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1064  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1065  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1066  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1067  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1068  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1069  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1070  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1071  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1072  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1073  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1074  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1075  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1076  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1077  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1078  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1079  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1080  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1081  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1082  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1083  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1084  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1085  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1086  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1087  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1088  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1089  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1090  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1091  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1092  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1093  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1094  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1095  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1096  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1097  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1098  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1099  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1100  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1101  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1102  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1103  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1104  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1105  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1106  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1107  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1108  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1109  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1110  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1111  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1112  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1113  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1114  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1115  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1116  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1117  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1118  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1119  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1120  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1121  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1122  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1123  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1124  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1125  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1126  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1127  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1128  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1129  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1130  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1131  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1132  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1133  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1134  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1135  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1136  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1137  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1138  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1139  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1140  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1141  	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1142  	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1143  	{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1144  	{APFACC, ymfp, Px, [23]uint8{0xae}},
  1145  	{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1146  	{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1147  	{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1148  	{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1149  	{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1150  	{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1151  	{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1152  	{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1153  	{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1154  	{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1155  	{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1156  	{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1157  	{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1158  	{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1159  	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1160  	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1161  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1162  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1163  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1164  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1165  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1166  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1167  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1168  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1169  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1170  	{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1171  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1172  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1173  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1174  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1175  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1176  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1177  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1178  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1179  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1180  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1181  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1182  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1183  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1184  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1185  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1186  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1187  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1188  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1189  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1190  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1191  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1192  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1193  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1194  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1195  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1196  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1197  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1198  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1199  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
  1200  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1201  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1202  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1203  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1204  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1205  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1206  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1207  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1208  	{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1209  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1210  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1211  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1212  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1213  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1214  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1215  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1216  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1217  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1218  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1219  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1220  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1221  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1222  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1223  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1224  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1225  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1226  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1227  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1228  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1229  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1230  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1231  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1232  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1233  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1234  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1235  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1236  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1237  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1238  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1239  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1240  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1241  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1242  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1243  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1244  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1245  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1246  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1247  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1248  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1249  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1250  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1251  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1252  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1253  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1254  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1255  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1256  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1257  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1258  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1259  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1260  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1261  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1262  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1263  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1264  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1265  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1266  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1267  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1268  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1269  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1270  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1271  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1272  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1273  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1274  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1275  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1276  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1277  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1278  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1279  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1280  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1281  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1282  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1283  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1284  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1285  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1286  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1287  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1288  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1289  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1290  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1291  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1292  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1293  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1294  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1295  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1296  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1297  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1298  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1299  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1300  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1301  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1302  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1303  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1304  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1305  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1306  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1307  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1308  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1309  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1310  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1311  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1312  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1313  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1314  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1315  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1316  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1317  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1318  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1319  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1320  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1321  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1322  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1323  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1324  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1325  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1326  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1327  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1328  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1329  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1330  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1331  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1332  	{AWORD, ybyte, Px, [23]uint8{2}},
  1333  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1334  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1335  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1336  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1337  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1338  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1339  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1340  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1341  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1342  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1343  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1344  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1345  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1346  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1347  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1348  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1349  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1350  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1351  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1352  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1353  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1354  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1355  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1356  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1357  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1358  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1359  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1360  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1361  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1362  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1363  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1364  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1365  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1366  	{AFCOMB, nil, 0, [23]uint8{}},
  1367  	{AFCOMBP, nil, 0, [23]uint8{}},
  1368  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1369  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1370  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1371  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1372  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1373  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1374  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1375  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1376  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1377  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1378  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1379  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1380  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1381  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1382  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1383  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1384  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1385  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1386  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1387  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1388  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1389  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1390  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1391  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1392  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1393  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1394  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1395  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1396  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1397  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1398  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1399  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1400  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1401  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1402  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1403  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1404  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1405  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1406  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1407  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1408  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1409  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1410  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1411  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1412  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1413  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1414  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1415  	{AFFREE, nil, 0, [23]uint8{}},
  1416  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1417  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1418  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1419  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1420  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1421  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1422  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1423  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1424  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1425  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1426  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1427  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1428  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1429  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1430  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1431  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1432  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1433  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1434  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1435  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1436  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1437  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1438  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1439  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1440  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1441  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1442  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1443  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1444  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1445  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1446  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1447  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1448  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1449  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1450  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1451  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1452  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1453  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1454  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1455  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1456  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1457  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1458  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1459  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1460  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1461  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1462  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1463  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1464  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1465  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1466  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1467  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1468  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1469  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1470  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1471  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1472  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1473  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1474  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1475  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1476  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1477  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1478  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1479  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1480  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1481  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1482  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1483  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1484  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1485  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1486  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1487  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1488  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1489  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1490  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1491  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1492  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1493  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1494  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1495  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1496  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1497  	{AMOVHDU, yxmov_vex, Pvex2, [23]uint8{0x6f, 0x7f}},
  1498  	{AMOVNTHD, yxr_ml_vex, Pvex1, [23]uint8{0xe7}},
  1499  	{AMOVHDA, yxmov_vex, Pvex1, [23]uint8{0x6f, 0x7f}},
  1500  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1501  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1502  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1503  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1504  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1505  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1506  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1507  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1508  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1509  	{obj.AEND, nil, 0, [23]uint8{}},
  1510  	{0, nil, 0, [23]uint8{}},
  1511  }
  1512  
  1513  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1514  
  1515  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1516  // This happens on systems like Solaris that call .so functions instead of system calls.
  1517  // It does not seem to be necessary for any other systems. This is probably working
  1518  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1519  // what that bug is. And this does fix it.
  1520  func isextern(s *obj.LSym) bool {
  1521  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1522  	return strings.HasPrefix(s.Name, "libc_")
  1523  }
  1524  
  1525  // single-instruction no-ops of various lengths.
  1526  // constructed by hand and disassembled with gdb to verify.
  1527  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1528  var nop = [][16]uint8{
  1529  	{0x90},
  1530  	{0x66, 0x90},
  1531  	{0x0F, 0x1F, 0x00},
  1532  	{0x0F, 0x1F, 0x40, 0x00},
  1533  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1534  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1535  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1536  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1537  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1538  }
  1539  
  1540  // Native Client rejects the repeated 0x66 prefix.
  1541  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1542  func fillnop(p []byte, n int) {
  1543  	var m int
  1544  
  1545  	for n > 0 {
  1546  		m = n
  1547  		if m > len(nop) {
  1548  			m = len(nop)
  1549  		}
  1550  		copy(p[:m], nop[m-1][:m])
  1551  		p = p[m:]
  1552  		n -= m
  1553  	}
  1554  }
  1555  
  1556  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1557  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1558  	fillnop(s.P[c:], int(pad))
  1559  	return c + pad
  1560  }
  1561  
  1562  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1563  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1564  		return l
  1565  	}
  1566  	return q
  1567  }
  1568  
  1569  func span6(ctxt *obj.Link, s *obj.LSym) {
  1570  	ctxt.Cursym = s
  1571  
  1572  	if s.P != nil {
  1573  		return
  1574  	}
  1575  
  1576  	if ycover[0] == 0 {
  1577  		instinit()
  1578  	}
  1579  
  1580  	var v int32
  1581  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1582  		if p.To.Type == obj.TYPE_BRANCH {
  1583  			if p.Pcond == nil {
  1584  				p.Pcond = p
  1585  			}
  1586  		}
  1587  		if p.As == AADJSP {
  1588  			p.To.Type = obj.TYPE_REG
  1589  			p.To.Reg = REG_SP
  1590  			v = int32(-p.From.Offset)
  1591  			p.From.Offset = int64(v)
  1592  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1593  			if v < 0 {
  1594  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1595  				v = -v
  1596  				p.From.Offset = int64(v)
  1597  			}
  1598  
  1599  			if v == 0 {
  1600  				p.As = obj.ANOP
  1601  			}
  1602  		}
  1603  	}
  1604  
  1605  	var q *obj.Prog
  1606  	for p := s.Text; p != nil; p = p.Link {
  1607  		p.Back = 2 // use short branches first time through
  1608  		q = p.Pcond
  1609  		if q != nil && (q.Back&2 != 0) {
  1610  			p.Back |= 1 // backward jump
  1611  			q.Back |= 4 // loop head
  1612  		}
  1613  
  1614  		if p.As == AADJSP {
  1615  			p.To.Type = obj.TYPE_REG
  1616  			p.To.Reg = REG_SP
  1617  			v = int32(-p.From.Offset)
  1618  			p.From.Offset = int64(v)
  1619  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1620  			if v < 0 {
  1621  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1622  				v = -v
  1623  				p.From.Offset = int64(v)
  1624  			}
  1625  
  1626  			if v == 0 {
  1627  				p.As = obj.ANOP
  1628  			}
  1629  		}
  1630  	}
  1631  
  1632  	n := 0
  1633  	var bp []byte
  1634  	var c int32
  1635  	var i int
  1636  	var loop int32
  1637  	var m int
  1638  	var p *obj.Prog
  1639  	for {
  1640  		loop = 0
  1641  		for i = 0; i < len(s.R); i++ {
  1642  			s.R[i] = obj.Reloc{}
  1643  		}
  1644  		s.R = s.R[:0]
  1645  		s.P = s.P[:0]
  1646  		c = 0
  1647  		for p = s.Text; p != nil; p = p.Link {
  1648  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1649  				var deferreturn *obj.LSym
  1650  
  1651  				if deferreturn == nil {
  1652  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1653  				}
  1654  
  1655  				// pad everything to avoid crossing 32-byte boundary
  1656  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1657  					c = naclpad(ctxt, s, c, -c&31)
  1658  				}
  1659  
  1660  				// pad call deferreturn to start at 32-byte boundary
  1661  				// so that subtracting 5 in jmpdefer will jump back
  1662  				// to that boundary and rerun the call.
  1663  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1664  					c = naclpad(ctxt, s, c, -c&31)
  1665  				}
  1666  
  1667  				// pad call to end at 32-byte boundary
  1668  				if p.As == obj.ACALL {
  1669  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1670  				}
  1671  
  1672  				// the linker treats REP and STOSQ as different instructions
  1673  				// but in fact the REP is a prefix on the STOSQ.
  1674  				// make sure REP has room for 2 more bytes, so that
  1675  				// padding will not be inserted before the next instruction.
  1676  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1677  					c = naclpad(ctxt, s, c, -c&31)
  1678  				}
  1679  
  1680  				// same for LOCK.
  1681  				// various instructions follow; the longest is 4 bytes.
  1682  				// give ourselves 8 bytes so as to avoid surprises.
  1683  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1684  					c = naclpad(ctxt, s, c, -c&31)
  1685  				}
  1686  			}
  1687  
  1688  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1689  				// pad with NOPs
  1690  				v = -c & (LoopAlign - 1)
  1691  
  1692  				if v <= MaxLoopPad {
  1693  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1694  					fillnop(s.P[c:], int(v))
  1695  					c += v
  1696  				}
  1697  			}
  1698  
  1699  			p.Pc = int64(c)
  1700  
  1701  			// process forward jumps to p
  1702  			for q = p.Rel; q != nil; q = q.Forwd {
  1703  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1704  				if q.Back&2 != 0 { // short
  1705  					if v > 127 {
  1706  						loop++
  1707  						q.Back ^= 2
  1708  					}
  1709  
  1710  					if q.As == AJCXZL {
  1711  						s.P[q.Pc+2] = byte(v)
  1712  					} else {
  1713  						s.P[q.Pc+1] = byte(v)
  1714  					}
  1715  				} else {
  1716  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1717  					bp[0] = byte(v)
  1718  					bp = bp[1:]
  1719  					bp[0] = byte(v >> 8)
  1720  					bp = bp[1:]
  1721  					bp[0] = byte(v >> 16)
  1722  					bp = bp[1:]
  1723  					bp[0] = byte(v >> 24)
  1724  				}
  1725  			}
  1726  
  1727  			p.Rel = nil
  1728  
  1729  			p.Pc = int64(c)
  1730  			asmins(ctxt, p)
  1731  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1732  			if int(p.Isize) != m {
  1733  				p.Isize = uint8(m)
  1734  				loop++
  1735  			}
  1736  
  1737  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1738  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1739  			p.Mark = uint16(m)
  1740  			c += int32(m)
  1741  		}
  1742  
  1743  		n++
  1744  		if n > 20 {
  1745  			ctxt.Diag("span must be looping")
  1746  			log.Fatalf("loop")
  1747  		}
  1748  		if loop == 0 {
  1749  			break
  1750  		}
  1751  	}
  1752  
  1753  	if ctxt.Headtype == obj.Hnacl {
  1754  		c = naclpad(ctxt, s, c, -c&31)
  1755  	}
  1756  
  1757  	c += -c & (FuncAlign - 1)
  1758  	s.Size = int64(c)
  1759  
  1760  	if false { /* debug['a'] > 1 */
  1761  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1762  		var i int
  1763  		for i = 0; i < len(s.P); i++ {
  1764  			fmt.Printf(" %.2x", s.P[i])
  1765  			if i%16 == 15 {
  1766  				fmt.Printf("\n  %.6x", uint(i+1))
  1767  			}
  1768  		}
  1769  
  1770  		if i%16 != 0 {
  1771  			fmt.Printf("\n")
  1772  		}
  1773  
  1774  		for i := 0; i < len(s.R); i++ {
  1775  			r := &s.R[i]
  1776  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1777  		}
  1778  	}
  1779  }
  1780  
  1781  func instinit() {
  1782  	var c int
  1783  
  1784  	for i := 1; optab[i].as != 0; i++ {
  1785  		c = int(optab[i].as)
  1786  		if opindex[c&obj.AMask] != nil {
  1787  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1788  		}
  1789  		opindex[c&obj.AMask] = &optab[i]
  1790  	}
  1791  
  1792  	for i := 0; i < Ymax; i++ {
  1793  		ycover[i*Ymax+i] = 1
  1794  	}
  1795  
  1796  	ycover[Yi0*Ymax+Yi8] = 1
  1797  	ycover[Yi1*Ymax+Yi8] = 1
  1798  	ycover[Yu7*Ymax+Yi8] = 1
  1799  
  1800  	ycover[Yi0*Ymax+Yu7] = 1
  1801  	ycover[Yi1*Ymax+Yu7] = 1
  1802  
  1803  	ycover[Yi0*Ymax+Yu8] = 1
  1804  	ycover[Yi1*Ymax+Yu8] = 1
  1805  	ycover[Yu7*Ymax+Yu8] = 1
  1806  
  1807  	ycover[Yi0*Ymax+Ys32] = 1
  1808  	ycover[Yi1*Ymax+Ys32] = 1
  1809  	ycover[Yu7*Ymax+Ys32] = 1
  1810  	ycover[Yu8*Ymax+Ys32] = 1
  1811  	ycover[Yi8*Ymax+Ys32] = 1
  1812  
  1813  	ycover[Yi0*Ymax+Yi32] = 1
  1814  	ycover[Yi1*Ymax+Yi32] = 1
  1815  	ycover[Yu7*Ymax+Yi32] = 1
  1816  	ycover[Yu8*Ymax+Yi32] = 1
  1817  	ycover[Yi8*Ymax+Yi32] = 1
  1818  	ycover[Ys32*Ymax+Yi32] = 1
  1819  
  1820  	ycover[Yi0*Ymax+Yi64] = 1
  1821  	ycover[Yi1*Ymax+Yi64] = 1
  1822  	ycover[Yu7*Ymax+Yi64] = 1
  1823  	ycover[Yu8*Ymax+Yi64] = 1
  1824  	ycover[Yi8*Ymax+Yi64] = 1
  1825  	ycover[Ys32*Ymax+Yi64] = 1
  1826  	ycover[Yi32*Ymax+Yi64] = 1
  1827  
  1828  	ycover[Yal*Ymax+Yrb] = 1
  1829  	ycover[Ycl*Ymax+Yrb] = 1
  1830  	ycover[Yax*Ymax+Yrb] = 1
  1831  	ycover[Ycx*Ymax+Yrb] = 1
  1832  	ycover[Yrx*Ymax+Yrb] = 1
  1833  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  1834  
  1835  	ycover[Ycl*Ymax+Ycx] = 1
  1836  
  1837  	ycover[Yax*Ymax+Yrx] = 1
  1838  	ycover[Ycx*Ymax+Yrx] = 1
  1839  
  1840  	ycover[Yax*Ymax+Yrl] = 1
  1841  	ycover[Ycx*Ymax+Yrl] = 1
  1842  	ycover[Yrx*Ymax+Yrl] = 1
  1843  	ycover[Yrl32*Ymax+Yrl] = 1
  1844  
  1845  	ycover[Yf0*Ymax+Yrf] = 1
  1846  
  1847  	ycover[Yal*Ymax+Ymb] = 1
  1848  	ycover[Ycl*Ymax+Ymb] = 1
  1849  	ycover[Yax*Ymax+Ymb] = 1
  1850  	ycover[Ycx*Ymax+Ymb] = 1
  1851  	ycover[Yrx*Ymax+Ymb] = 1
  1852  	ycover[Yrb*Ymax+Ymb] = 1
  1853  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  1854  	ycover[Ym*Ymax+Ymb] = 1
  1855  
  1856  	ycover[Yax*Ymax+Yml] = 1
  1857  	ycover[Ycx*Ymax+Yml] = 1
  1858  	ycover[Yrx*Ymax+Yml] = 1
  1859  	ycover[Yrl*Ymax+Yml] = 1
  1860  	ycover[Yrl32*Ymax+Yml] = 1
  1861  	ycover[Ym*Ymax+Yml] = 1
  1862  
  1863  	ycover[Yax*Ymax+Ymm] = 1
  1864  	ycover[Ycx*Ymax+Ymm] = 1
  1865  	ycover[Yrx*Ymax+Ymm] = 1
  1866  	ycover[Yrl*Ymax+Ymm] = 1
  1867  	ycover[Yrl32*Ymax+Ymm] = 1
  1868  	ycover[Ym*Ymax+Ymm] = 1
  1869  	ycover[Ymr*Ymax+Ymm] = 1
  1870  
  1871  	ycover[Ym*Ymax+Yxm] = 1
  1872  	ycover[Yxr*Ymax+Yxm] = 1
  1873  
  1874  	for i := 0; i < MAXREG; i++ {
  1875  		reg[i] = -1
  1876  		if i >= REG_AL && i <= REG_R15B {
  1877  			reg[i] = (i - REG_AL) & 7
  1878  			if i >= REG_SPB && i <= REG_DIB {
  1879  				regrex[i] = 0x40
  1880  			}
  1881  			if i >= REG_R8B && i <= REG_R15B {
  1882  				regrex[i] = Rxr | Rxx | Rxb
  1883  			}
  1884  		}
  1885  
  1886  		if i >= REG_AH && i <= REG_BH {
  1887  			reg[i] = 4 + ((i - REG_AH) & 7)
  1888  		}
  1889  		if i >= REG_AX && i <= REG_R15 {
  1890  			reg[i] = (i - REG_AX) & 7
  1891  			if i >= REG_R8 {
  1892  				regrex[i] = Rxr | Rxx | Rxb
  1893  			}
  1894  		}
  1895  
  1896  		if i >= REG_F0 && i <= REG_F0+7 {
  1897  			reg[i] = (i - REG_F0) & 7
  1898  		}
  1899  		if i >= REG_M0 && i <= REG_M0+7 {
  1900  			reg[i] = (i - REG_M0) & 7
  1901  		}
  1902  		if i >= REG_X0 && i <= REG_X0+15 {
  1903  			reg[i] = (i - REG_X0) & 7
  1904  			if i >= REG_X0+8 {
  1905  				regrex[i] = Rxr | Rxx | Rxb
  1906  			}
  1907  		}
  1908  
  1909  		if i >= REG_CR+8 && i <= REG_CR+15 {
  1910  			regrex[i] = Rxr
  1911  		}
  1912  	}
  1913  }
  1914  
  1915  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  1916  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  1917  		return 0
  1918  	}
  1919  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  1920  		switch a.Reg {
  1921  		case REG_CS:
  1922  			return 0x2e
  1923  
  1924  		case REG_DS:
  1925  			return 0x3e
  1926  
  1927  		case REG_ES:
  1928  			return 0x26
  1929  
  1930  		case REG_FS:
  1931  			return 0x64
  1932  
  1933  		case REG_GS:
  1934  			return 0x65
  1935  
  1936  		case REG_TLS:
  1937  			// NOTE: Systems listed here should be only systems that
  1938  			// support direct TLS references like 8(TLS) implemented as
  1939  			// direct references from FS or GS. Systems that require
  1940  			// the initial-exec model, where you load the TLS base into
  1941  			// a register and then index from that register, do not reach
  1942  			// this code and should not be listed.
  1943  			if p.Mode == 32 {
  1944  				switch ctxt.Headtype {
  1945  				default:
  1946  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1947  
  1948  				case obj.Hdarwin,
  1949  					obj.Hdragonfly,
  1950  					obj.Hfreebsd,
  1951  					obj.Hnetbsd,
  1952  					obj.Hopenbsd:
  1953  					return 0x65 // GS
  1954  				}
  1955  			}
  1956  
  1957  			switch ctxt.Headtype {
  1958  			default:
  1959  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1960  
  1961  			case obj.Hlinux:
  1962  				if ctxt.Flag_shared != 0 {
  1963  					log.Fatalf("unknown TLS base register for linux with -shared")
  1964  				} else {
  1965  					return 0x64 // FS
  1966  				}
  1967  
  1968  			case obj.Hdragonfly,
  1969  				obj.Hfreebsd,
  1970  				obj.Hnetbsd,
  1971  				obj.Hopenbsd,
  1972  				obj.Hsolaris:
  1973  				return 0x64 // FS
  1974  
  1975  			case obj.Hdarwin:
  1976  				return 0x65 // GS
  1977  			}
  1978  		}
  1979  	}
  1980  
  1981  	if p.Mode == 32 {
  1982  		return 0
  1983  	}
  1984  
  1985  	switch a.Index {
  1986  	case REG_CS:
  1987  		return 0x2e
  1988  
  1989  	case REG_DS:
  1990  		return 0x3e
  1991  
  1992  	case REG_ES:
  1993  		return 0x26
  1994  
  1995  	case REG_TLS:
  1996  		if ctxt.Flag_shared != 0 {
  1997  			// When building for inclusion into a shared library, an instruction of the form
  1998  			//     MOV 0(CX)(TLS*1), AX
  1999  			// becomes
  2000  			//     mov %fs:(%rcx), %rax
  2001  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2002  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2003  			// a shared library the instruction does not require a prefix.
  2004  			if a.Offset != 0 {
  2005  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2006  			}
  2007  			return 0x64
  2008  		}
  2009  
  2010  	case REG_FS:
  2011  		return 0x64
  2012  
  2013  	case REG_GS:
  2014  		return 0x65
  2015  	}
  2016  
  2017  	return 0
  2018  }
  2019  
  2020  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2021  	switch a.Type {
  2022  	case obj.TYPE_NONE:
  2023  		return Ynone
  2024  
  2025  	case obj.TYPE_BRANCH:
  2026  		return Ybr
  2027  
  2028  	case obj.TYPE_INDIR:
  2029  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2030  			return Yindir
  2031  		}
  2032  		return Yxxx
  2033  
  2034  	case obj.TYPE_MEM:
  2035  		return Ym
  2036  
  2037  	case obj.TYPE_ADDR:
  2038  		switch a.Name {
  2039  		case obj.NAME_EXTERN,
  2040  			obj.NAME_GOTREF,
  2041  			obj.NAME_STATIC:
  2042  			if a.Sym != nil && isextern(a.Sym) || p.Mode == 32 {
  2043  				return Yi32
  2044  			}
  2045  			return Yiauto // use pc-relative addressing
  2046  
  2047  		case obj.NAME_AUTO,
  2048  			obj.NAME_PARAM:
  2049  			return Yiauto
  2050  		}
  2051  
  2052  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2053  		// and got Yi32 in an earlier version of this code.
  2054  		// Keep doing that until we fix yduff etc.
  2055  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2056  			return Yi32
  2057  		}
  2058  
  2059  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2060  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2061  		}
  2062  		fallthrough
  2063  
  2064  		// fall through
  2065  
  2066  	case obj.TYPE_CONST:
  2067  		if a.Sym != nil {
  2068  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2069  		}
  2070  
  2071  		v := a.Offset
  2072  		if p.Mode == 32 {
  2073  			v = int64(int32(v))
  2074  		}
  2075  		if v == 0 {
  2076  			return Yi0
  2077  		}
  2078  		if v == 1 {
  2079  			return Yi1
  2080  		}
  2081  		if v >= 0 && v <= 127 {
  2082  			return Yu7
  2083  		}
  2084  		if v >= 0 && v <= 255 {
  2085  			return Yu8
  2086  		}
  2087  		if v >= -128 && v <= 127 {
  2088  			return Yi8
  2089  		}
  2090  		if p.Mode == 32 {
  2091  			return Yi32
  2092  		}
  2093  		l := int32(v)
  2094  		if int64(l) == v {
  2095  			return Ys32 /* can sign extend */
  2096  		}
  2097  		if v>>32 == 0 {
  2098  			return Yi32 /* unsigned */
  2099  		}
  2100  		return Yi64
  2101  
  2102  	case obj.TYPE_TEXTSIZE:
  2103  		return Ytextsize
  2104  	}
  2105  
  2106  	if a.Type != obj.TYPE_REG {
  2107  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2108  		return Yxxx
  2109  	}
  2110  
  2111  	switch a.Reg {
  2112  	case REG_AL:
  2113  		return Yal
  2114  
  2115  	case REG_AX:
  2116  		return Yax
  2117  
  2118  		/*
  2119  			case REG_SPB:
  2120  		*/
  2121  	case REG_BPB,
  2122  		REG_SIB,
  2123  		REG_DIB,
  2124  		REG_R8B,
  2125  		REG_R9B,
  2126  		REG_R10B,
  2127  		REG_R11B,
  2128  		REG_R12B,
  2129  		REG_R13B,
  2130  		REG_R14B,
  2131  		REG_R15B:
  2132  		if ctxt.Asmode != 64 {
  2133  			return Yxxx
  2134  		}
  2135  		fallthrough
  2136  
  2137  	case REG_DL,
  2138  		REG_BL,
  2139  		REG_AH,
  2140  		REG_CH,
  2141  		REG_DH,
  2142  		REG_BH:
  2143  		return Yrb
  2144  
  2145  	case REG_CL:
  2146  		return Ycl
  2147  
  2148  	case REG_CX:
  2149  		return Ycx
  2150  
  2151  	case REG_DX, REG_BX:
  2152  		return Yrx
  2153  
  2154  	case REG_R8, /* not really Yrl */
  2155  		REG_R9,
  2156  		REG_R10,
  2157  		REG_R11,
  2158  		REG_R12,
  2159  		REG_R13,
  2160  		REG_R14,
  2161  		REG_R15:
  2162  		if ctxt.Asmode != 64 {
  2163  			return Yxxx
  2164  		}
  2165  		fallthrough
  2166  
  2167  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2168  		if p.Mode == 32 {
  2169  			return Yrl32
  2170  		}
  2171  		return Yrl
  2172  
  2173  	case REG_F0 + 0:
  2174  		return Yf0
  2175  
  2176  	case REG_F0 + 1,
  2177  		REG_F0 + 2,
  2178  		REG_F0 + 3,
  2179  		REG_F0 + 4,
  2180  		REG_F0 + 5,
  2181  		REG_F0 + 6,
  2182  		REG_F0 + 7:
  2183  		return Yrf
  2184  
  2185  	case REG_M0 + 0,
  2186  		REG_M0 + 1,
  2187  		REG_M0 + 2,
  2188  		REG_M0 + 3,
  2189  		REG_M0 + 4,
  2190  		REG_M0 + 5,
  2191  		REG_M0 + 6,
  2192  		REG_M0 + 7:
  2193  		return Ymr
  2194  
  2195  	case REG_X0 + 0,
  2196  		REG_X0 + 1,
  2197  		REG_X0 + 2,
  2198  		REG_X0 + 3,
  2199  		REG_X0 + 4,
  2200  		REG_X0 + 5,
  2201  		REG_X0 + 6,
  2202  		REG_X0 + 7,
  2203  		REG_X0 + 8,
  2204  		REG_X0 + 9,
  2205  		REG_X0 + 10,
  2206  		REG_X0 + 11,
  2207  		REG_X0 + 12,
  2208  		REG_X0 + 13,
  2209  		REG_X0 + 14,
  2210  		REG_X0 + 15:
  2211  		return Yxr
  2212  
  2213  	case REG_CS:
  2214  		return Ycs
  2215  	case REG_SS:
  2216  		return Yss
  2217  	case REG_DS:
  2218  		return Yds
  2219  	case REG_ES:
  2220  		return Yes
  2221  	case REG_FS:
  2222  		return Yfs
  2223  	case REG_GS:
  2224  		return Ygs
  2225  	case REG_TLS:
  2226  		return Ytls
  2227  
  2228  	case REG_GDTR:
  2229  		return Ygdtr
  2230  	case REG_IDTR:
  2231  		return Yidtr
  2232  	case REG_LDTR:
  2233  		return Yldtr
  2234  	case REG_MSW:
  2235  		return Ymsw
  2236  	case REG_TASK:
  2237  		return Ytask
  2238  
  2239  	case REG_CR + 0:
  2240  		return Ycr0
  2241  	case REG_CR + 1:
  2242  		return Ycr1
  2243  	case REG_CR + 2:
  2244  		return Ycr2
  2245  	case REG_CR + 3:
  2246  		return Ycr3
  2247  	case REG_CR + 4:
  2248  		return Ycr4
  2249  	case REG_CR + 5:
  2250  		return Ycr5
  2251  	case REG_CR + 6:
  2252  		return Ycr6
  2253  	case REG_CR + 7:
  2254  		return Ycr7
  2255  	case REG_CR + 8:
  2256  		return Ycr8
  2257  
  2258  	case REG_DR + 0:
  2259  		return Ydr0
  2260  	case REG_DR + 1:
  2261  		return Ydr1
  2262  	case REG_DR + 2:
  2263  		return Ydr2
  2264  	case REG_DR + 3:
  2265  		return Ydr3
  2266  	case REG_DR + 4:
  2267  		return Ydr4
  2268  	case REG_DR + 5:
  2269  		return Ydr5
  2270  	case REG_DR + 6:
  2271  		return Ydr6
  2272  	case REG_DR + 7:
  2273  		return Ydr7
  2274  
  2275  	case REG_TR + 0:
  2276  		return Ytr0
  2277  	case REG_TR + 1:
  2278  		return Ytr1
  2279  	case REG_TR + 2:
  2280  		return Ytr2
  2281  	case REG_TR + 3:
  2282  		return Ytr3
  2283  	case REG_TR + 4:
  2284  		return Ytr4
  2285  	case REG_TR + 5:
  2286  		return Ytr5
  2287  	case REG_TR + 6:
  2288  		return Ytr6
  2289  	case REG_TR + 7:
  2290  		return Ytr7
  2291  	}
  2292  
  2293  	return Yxxx
  2294  }
  2295  
  2296  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2297  	var i int
  2298  
  2299  	switch index {
  2300  	default:
  2301  		goto bad
  2302  
  2303  	case REG_NONE:
  2304  		i = 4 << 3
  2305  		goto bas
  2306  
  2307  	case REG_R8,
  2308  		REG_R9,
  2309  		REG_R10,
  2310  		REG_R11,
  2311  		REG_R12,
  2312  		REG_R13,
  2313  		REG_R14,
  2314  		REG_R15:
  2315  		if ctxt.Asmode != 64 {
  2316  			goto bad
  2317  		}
  2318  		fallthrough
  2319  
  2320  	case REG_AX,
  2321  		REG_CX,
  2322  		REG_DX,
  2323  		REG_BX,
  2324  		REG_BP,
  2325  		REG_SI,
  2326  		REG_DI:
  2327  		i = reg[index] << 3
  2328  	}
  2329  
  2330  	switch scale {
  2331  	default:
  2332  		goto bad
  2333  
  2334  	case 1:
  2335  		break
  2336  
  2337  	case 2:
  2338  		i |= 1 << 6
  2339  
  2340  	case 4:
  2341  		i |= 2 << 6
  2342  
  2343  	case 8:
  2344  		i |= 3 << 6
  2345  	}
  2346  
  2347  bas:
  2348  	switch base {
  2349  	default:
  2350  		goto bad
  2351  
  2352  	case REG_NONE: /* must be mod=00 */
  2353  		i |= 5
  2354  
  2355  	case REG_R8,
  2356  		REG_R9,
  2357  		REG_R10,
  2358  		REG_R11,
  2359  		REG_R12,
  2360  		REG_R13,
  2361  		REG_R14,
  2362  		REG_R15:
  2363  		if ctxt.Asmode != 64 {
  2364  			goto bad
  2365  		}
  2366  		fallthrough
  2367  
  2368  	case REG_AX,
  2369  		REG_CX,
  2370  		REG_DX,
  2371  		REG_BX,
  2372  		REG_SP,
  2373  		REG_BP,
  2374  		REG_SI,
  2375  		REG_DI:
  2376  		i |= reg[base]
  2377  	}
  2378  
  2379  	ctxt.Andptr[0] = byte(i)
  2380  	ctxt.Andptr = ctxt.Andptr[1:]
  2381  	return
  2382  
  2383  bad:
  2384  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2385  	ctxt.Andptr[0] = 0
  2386  	ctxt.Andptr = ctxt.Andptr[1:]
  2387  	return
  2388  }
  2389  
  2390  func put4(ctxt *obj.Link, v int32) {
  2391  	ctxt.Andptr[0] = byte(v)
  2392  	ctxt.Andptr[1] = byte(v >> 8)
  2393  	ctxt.Andptr[2] = byte(v >> 16)
  2394  	ctxt.Andptr[3] = byte(v >> 24)
  2395  	ctxt.Andptr = ctxt.Andptr[4:]
  2396  }
  2397  
  2398  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2399  	var rel obj.Reloc
  2400  
  2401  	v := vaddr(ctxt, p, a, &rel)
  2402  	if rel.Siz != 0 {
  2403  		if rel.Siz != 4 {
  2404  			ctxt.Diag("bad reloc")
  2405  		}
  2406  		r := obj.Addrel(ctxt.Cursym)
  2407  		*r = rel
  2408  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2409  	}
  2410  
  2411  	put4(ctxt, int32(v))
  2412  }
  2413  
  2414  func put8(ctxt *obj.Link, v int64) {
  2415  	ctxt.Andptr[0] = byte(v)
  2416  	ctxt.Andptr[1] = byte(v >> 8)
  2417  	ctxt.Andptr[2] = byte(v >> 16)
  2418  	ctxt.Andptr[3] = byte(v >> 24)
  2419  	ctxt.Andptr[4] = byte(v >> 32)
  2420  	ctxt.Andptr[5] = byte(v >> 40)
  2421  	ctxt.Andptr[6] = byte(v >> 48)
  2422  	ctxt.Andptr[7] = byte(v >> 56)
  2423  	ctxt.Andptr = ctxt.Andptr[8:]
  2424  }
  2425  
  2426  /*
  2427  static void
  2428  relput8(Prog *p, Addr *a)
  2429  {
  2430  	vlong v;
  2431  	Reloc rel, *r;
  2432  
  2433  	v = vaddr(ctxt, p, a, &rel);
  2434  	if(rel.siz != 0) {
  2435  		r = addrel(ctxt->cursym);
  2436  		*r = rel;
  2437  		r->siz = 8;
  2438  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2439  	}
  2440  	put8(ctxt, v);
  2441  }
  2442  */
  2443  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2444  	if r != nil {
  2445  		*r = obj.Reloc{}
  2446  	}
  2447  
  2448  	switch a.Name {
  2449  	case obj.NAME_STATIC,
  2450  		obj.NAME_GOTREF,
  2451  		obj.NAME_EXTERN:
  2452  		s := a.Sym
  2453  		if r == nil {
  2454  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2455  			log.Fatalf("reloc")
  2456  		}
  2457  
  2458  		if a.Name == obj.NAME_GOTREF {
  2459  			r.Siz = 4
  2460  			r.Type = obj.R_GOTPCREL
  2461  		} else if isextern(s) || p.Mode != 64 {
  2462  			r.Siz = 4
  2463  			r.Type = obj.R_ADDR
  2464  		} else {
  2465  			r.Siz = 4
  2466  			r.Type = obj.R_PCREL
  2467  		}
  2468  
  2469  		r.Off = -1 // caller must fill in
  2470  		r.Sym = s
  2471  		r.Add = a.Offset
  2472  
  2473  		return 0
  2474  	}
  2475  
  2476  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2477  		if r == nil {
  2478  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2479  			log.Fatalf("reloc")
  2480  		}
  2481  
  2482  		r.Type = obj.R_TLS_LE
  2483  		r.Siz = 4
  2484  		r.Off = -1 // caller must fill in
  2485  		r.Add = a.Offset
  2486  		return 0
  2487  	}
  2488  
  2489  	return a.Offset
  2490  }
  2491  
  2492  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2493  	var base int
  2494  	var rel obj.Reloc
  2495  
  2496  	rex &= 0x40 | Rxr
  2497  	v := int32(a.Offset)
  2498  	rel.Siz = 0
  2499  
  2500  	switch a.Type {
  2501  	case obj.TYPE_ADDR:
  2502  		if a.Name == obj.NAME_NONE {
  2503  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2504  		}
  2505  		if a.Index == REG_TLS {
  2506  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2507  		}
  2508  		goto bad
  2509  
  2510  	case obj.TYPE_REG:
  2511  		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
  2512  			goto bad
  2513  		}
  2514  		if v != 0 {
  2515  			goto bad
  2516  		}
  2517  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2518  		ctxt.Andptr = ctxt.Andptr[1:]
  2519  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2520  		return
  2521  	}
  2522  
  2523  	if a.Type != obj.TYPE_MEM {
  2524  		goto bad
  2525  	}
  2526  
  2527  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2528  		base := int(a.Reg)
  2529  		switch a.Name {
  2530  		case obj.NAME_EXTERN,
  2531  			obj.NAME_GOTREF,
  2532  			obj.NAME_STATIC:
  2533  			if !isextern(a.Sym) && p.Mode == 64 {
  2534  				goto bad
  2535  			}
  2536  			base = REG_NONE
  2537  			v = int32(vaddr(ctxt, p, a, &rel))
  2538  
  2539  		case obj.NAME_AUTO,
  2540  			obj.NAME_PARAM:
  2541  			base = REG_SP
  2542  		}
  2543  
  2544  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2545  		if base == REG_NONE {
  2546  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2547  			ctxt.Andptr = ctxt.Andptr[1:]
  2548  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2549  			goto putrelv
  2550  		}
  2551  
  2552  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2553  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2554  			ctxt.Andptr = ctxt.Andptr[1:]
  2555  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2556  			return
  2557  		}
  2558  
  2559  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2560  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2561  			ctxt.Andptr = ctxt.Andptr[1:]
  2562  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2563  			ctxt.Andptr[0] = byte(v)
  2564  			ctxt.Andptr = ctxt.Andptr[1:]
  2565  			return
  2566  		}
  2567  
  2568  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2569  		ctxt.Andptr = ctxt.Andptr[1:]
  2570  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2571  		goto putrelv
  2572  	}
  2573  
  2574  	base = int(a.Reg)
  2575  	switch a.Name {
  2576  	case obj.NAME_STATIC,
  2577  		obj.NAME_GOTREF,
  2578  		obj.NAME_EXTERN:
  2579  		if a.Sym == nil {
  2580  			ctxt.Diag("bad addr: %v", p)
  2581  		}
  2582  		base = REG_NONE
  2583  		v = int32(vaddr(ctxt, p, a, &rel))
  2584  
  2585  	case obj.NAME_AUTO,
  2586  		obj.NAME_PARAM:
  2587  		base = REG_SP
  2588  	}
  2589  
  2590  	if base == REG_TLS {
  2591  		v = int32(vaddr(ctxt, p, a, &rel))
  2592  	}
  2593  
  2594  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2595  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2596  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2597  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2598  				ctxt.Diag("%v has offset against gotref", p)
  2599  			}
  2600  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2601  			ctxt.Andptr = ctxt.Andptr[1:]
  2602  			goto putrelv
  2603  		}
  2604  
  2605  		/* temporary */
  2606  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2607  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2608  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2609  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2610  		goto putrelv
  2611  	}
  2612  
  2613  	if base == REG_SP || base == REG_R12 {
  2614  		if v == 0 {
  2615  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2616  			ctxt.Andptr = ctxt.Andptr[1:]
  2617  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2618  			return
  2619  		}
  2620  
  2621  		if v >= -128 && v < 128 {
  2622  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2623  			ctxt.Andptr = ctxt.Andptr[1:]
  2624  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2625  			ctxt.Andptr[0] = byte(v)
  2626  			ctxt.Andptr = ctxt.Andptr[1:]
  2627  			return
  2628  		}
  2629  
  2630  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2631  		ctxt.Andptr = ctxt.Andptr[1:]
  2632  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2633  		goto putrelv
  2634  	}
  2635  
  2636  	if REG_AX <= base && base <= REG_R15 {
  2637  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2638  			rel = obj.Reloc{}
  2639  			rel.Type = obj.R_TLS_LE
  2640  			rel.Siz = 4
  2641  			rel.Sym = nil
  2642  			rel.Add = int64(v)
  2643  			v = 0
  2644  		}
  2645  
  2646  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2647  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2648  			ctxt.Andptr = ctxt.Andptr[1:]
  2649  			return
  2650  		}
  2651  
  2652  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2653  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2654  			ctxt.Andptr[1] = byte(v)
  2655  			ctxt.Andptr = ctxt.Andptr[2:]
  2656  			return
  2657  		}
  2658  
  2659  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2660  		ctxt.Andptr = ctxt.Andptr[1:]
  2661  		goto putrelv
  2662  	}
  2663  
  2664  	goto bad
  2665  
  2666  putrelv:
  2667  	if rel.Siz != 0 {
  2668  		if rel.Siz != 4 {
  2669  			ctxt.Diag("bad rel")
  2670  			goto bad
  2671  		}
  2672  
  2673  		r := obj.Addrel(ctxt.Cursym)
  2674  		*r = rel
  2675  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2676  	}
  2677  
  2678  	put4(ctxt, v)
  2679  	return
  2680  
  2681  bad:
  2682  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2683  	return
  2684  }
  2685  
  2686  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2687  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2688  }
  2689  
  2690  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2691  	asmandsz(ctxt, p, a, o, 0, 0)
  2692  }
  2693  
  2694  func bytereg(a *obj.Addr, t *uint8) {
  2695  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2696  		a.Reg += REG_AL - REG_AX
  2697  		*t = 0
  2698  	}
  2699  }
  2700  
  2701  func unbytereg(a *obj.Addr, t *uint8) {
  2702  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2703  		a.Reg += REG_AX - REG_AL
  2704  		*t = 0
  2705  	}
  2706  }
  2707  
  2708  const (
  2709  	E = 0xff
  2710  )
  2711  
  2712  var ymovtab = []Movtab{
  2713  	/* push */
  2714  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2715  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2716  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2717  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2718  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2719  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2720  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2721  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2722  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2723  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2724  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2725  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2726  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2727  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2728  
  2729  	/* pop */
  2730  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2731  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2732  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2733  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2734  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2735  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2736  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2737  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2738  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2739  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2740  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2741  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  2742  
  2743  	/* mov seg */
  2744  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  2745  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  2746  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  2747  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  2748  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  2749  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  2750  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  2751  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  2752  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  2753  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  2754  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  2755  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  2756  
  2757  	/* mov cr */
  2758  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2759  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2760  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2761  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2762  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2763  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2764  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2765  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2766  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2767  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2768  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2769  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2770  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2771  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2772  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2773  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2774  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2775  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2776  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2777  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2778  
  2779  	/* mov dr */
  2780  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2781  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2782  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2783  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2784  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2785  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2786  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2787  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2788  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2789  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2790  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2791  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2792  
  2793  	/* mov tr */
  2794  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  2795  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  2796  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  2797  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  2798  
  2799  	/* lgdt, sgdt, lidt, sidt */
  2800  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2801  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2802  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2803  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2804  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2805  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2806  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2807  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2808  
  2809  	/* lldt, sldt */
  2810  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  2811  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  2812  
  2813  	/* lmsw, smsw */
  2814  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  2815  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  2816  
  2817  	/* ltr, str */
  2818  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  2819  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  2820  
  2821  	/* load full pointer - unsupported
  2822  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  2823  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  2824  	*/
  2825  
  2826  	/* double shift */
  2827  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2828  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2829  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2830  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2831  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2832  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2833  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2834  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2835  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2836  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2837  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2838  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2839  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2840  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2841  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2842  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2843  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2844  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2845  
  2846  	/* load TLS base */
  2847  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2848  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2849  	{0, 0, 0, 0, 0, [4]uint8{}},
  2850  }
  2851  
  2852  func isax(a *obj.Addr) bool {
  2853  	switch a.Reg {
  2854  	case REG_AX, REG_AL, REG_AH:
  2855  		return true
  2856  	}
  2857  
  2858  	if a.Index == REG_AX {
  2859  		return true
  2860  	}
  2861  	return false
  2862  }
  2863  
  2864  func subreg(p *obj.Prog, from int, to int) {
  2865  	if false { /* debug['Q'] */
  2866  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  2867  	}
  2868  
  2869  	if int(p.From.Reg) == from {
  2870  		p.From.Reg = int16(to)
  2871  		p.Ft = 0
  2872  	}
  2873  
  2874  	if int(p.To.Reg) == from {
  2875  		p.To.Reg = int16(to)
  2876  		p.Tt = 0
  2877  	}
  2878  
  2879  	if int(p.From.Index) == from {
  2880  		p.From.Index = int16(to)
  2881  		p.Ft = 0
  2882  	}
  2883  
  2884  	if int(p.To.Index) == from {
  2885  		p.To.Index = int16(to)
  2886  		p.Tt = 0
  2887  	}
  2888  
  2889  	if false { /* debug['Q'] */
  2890  		fmt.Printf("%v\n", p)
  2891  	}
  2892  }
  2893  
  2894  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  2895  	switch op {
  2896  	case Pm, Pe, Pf2, Pf3:
  2897  		if osize != 1 {
  2898  			if op != Pm {
  2899  				ctxt.Andptr[0] = byte(op)
  2900  				ctxt.Andptr = ctxt.Andptr[1:]
  2901  			}
  2902  			ctxt.Andptr[0] = Pm
  2903  			ctxt.Andptr = ctxt.Andptr[1:]
  2904  			z++
  2905  			op = int(o.op[z])
  2906  			break
  2907  		}
  2908  		fallthrough
  2909  
  2910  	default:
  2911  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  2912  			ctxt.Andptr[0] = Pm
  2913  			ctxt.Andptr = ctxt.Andptr[1:]
  2914  		}
  2915  	}
  2916  
  2917  	ctxt.Andptr[0] = byte(op)
  2918  	ctxt.Andptr = ctxt.Andptr[1:]
  2919  	return z
  2920  }
  2921  
  2922  var bpduff1 = []byte{
  2923  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  2924  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  2925  }
  2926  
  2927  var bpduff2 = []byte{
  2928  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  2929  }
  2930  
  2931  func vexprefix(ctxt *obj.Link, to *obj.Addr, from *obj.Addr, pref uint8) {
  2932  	rexR := regrex[to.Reg]
  2933  	rexB := regrex[from.Reg]
  2934  	rexX := regrex[from.Index]
  2935  	var prefBit uint8
  2936  	if pref == Pvex1 {
  2937  		prefBit = 1
  2938  	} else if pref == Pvex2 {
  2939  		prefBit = 2
  2940  	} // TODO add Pvex0,Pvex3
  2941  
  2942  	if rexX == 0 && rexB == 0 { // 2-byte vex prefix
  2943  		ctxt.Andptr[0] = 0xc5
  2944  		ctxt.Andptr = ctxt.Andptr[1:]
  2945  
  2946  		if rexR != 0 {
  2947  			ctxt.Andptr[0] = 0x7c
  2948  		} else {
  2949  			ctxt.Andptr[0] = 0xfc
  2950  		}
  2951  		ctxt.Andptr[0] |= prefBit
  2952  		ctxt.Andptr = ctxt.Andptr[1:]
  2953  	} else {
  2954  		ctxt.Andptr[0] = 0xc4
  2955  		ctxt.Andptr = ctxt.Andptr[1:]
  2956  
  2957  		ctxt.Andptr[0] = 0x1 // TODO handle different prefix
  2958  		if rexR == 0 {
  2959  			ctxt.Andptr[0] |= 0x80
  2960  		}
  2961  		if rexX == 0 {
  2962  			ctxt.Andptr[0] |= 0x40
  2963  		}
  2964  		if rexB == 0 {
  2965  			ctxt.Andptr[0] |= 0x20
  2966  		}
  2967  		ctxt.Andptr = ctxt.Andptr[1:]
  2968  
  2969  		ctxt.Andptr[0] = 0x7c
  2970  		ctxt.Andptr[0] |= prefBit
  2971  		ctxt.Andptr = ctxt.Andptr[1:]
  2972  	}
  2973  }
  2974  
  2975  func doasm(ctxt *obj.Link, p *obj.Prog) {
  2976  	ctxt.Curp = p // TODO
  2977  
  2978  	o := opindex[p.As&obj.AMask]
  2979  
  2980  	if o == nil {
  2981  		ctxt.Diag("asmins: missing op %v", p)
  2982  		return
  2983  	}
  2984  
  2985  	pre := prefixof(ctxt, p, &p.From)
  2986  	if pre != 0 {
  2987  		ctxt.Andptr[0] = byte(pre)
  2988  		ctxt.Andptr = ctxt.Andptr[1:]
  2989  	}
  2990  	pre = prefixof(ctxt, p, &p.To)
  2991  	if pre != 0 {
  2992  		ctxt.Andptr[0] = byte(pre)
  2993  		ctxt.Andptr = ctxt.Andptr[1:]
  2994  	}
  2995  
  2996  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  2997  	// which encodes as SHRQ $32(DX*0), AX.
  2998  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  2999  	// Change encoding generated by assemblers and compilers and remove.
  3000  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3001  		p.From3 = new(obj.Addr)
  3002  		p.From3.Type = obj.TYPE_REG
  3003  		p.From3.Reg = p.From.Index
  3004  		p.From.Index = 0
  3005  	}
  3006  
  3007  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3008  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3009  	switch p.As {
  3010  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3011  		if p.From3Type() == obj.TYPE_NONE {
  3012  			p.From3 = new(obj.Addr)
  3013  			*p.From3 = p.From
  3014  			p.From = obj.Addr{}
  3015  			p.From.Type = obj.TYPE_CONST
  3016  			p.From.Offset = p.To.Offset
  3017  			p.To.Offset = 0
  3018  		}
  3019  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3020  		if p.From3Type() == obj.TYPE_NONE {
  3021  			p.From3 = new(obj.Addr)
  3022  			*p.From3 = p.To
  3023  			p.To = obj.Addr{}
  3024  			p.To.Type = obj.TYPE_CONST
  3025  			p.To.Offset = p.From3.Offset
  3026  			p.From3.Offset = 0
  3027  		}
  3028  	}
  3029  
  3030  	if p.Ft == 0 {
  3031  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3032  	}
  3033  	if p.Tt == 0 {
  3034  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3035  	}
  3036  
  3037  	ft := int(p.Ft) * Ymax
  3038  	f3t := Ynone * Ymax
  3039  	if p.From3 != nil {
  3040  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3041  	}
  3042  	tt := int(p.Tt) * Ymax
  3043  
  3044  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3045  	z := 0
  3046  	var a *obj.Addr
  3047  	var l int
  3048  	var op int
  3049  	var q *obj.Prog
  3050  	var r *obj.Reloc
  3051  	var rel obj.Reloc
  3052  	var v int64
  3053  	for i := range o.ytab {
  3054  		yt := &o.ytab[i]
  3055  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3056  			switch o.prefix {
  3057  			case Px1: /* first option valid only in 32-bit mode */
  3058  				if ctxt.Mode == 64 && z == 0 {
  3059  					z += int(yt.zoffset) + xo
  3060  					continue
  3061  				}
  3062  			case Pq: /* 16 bit escape and opcode escape */
  3063  				ctxt.Andptr[0] = Pe
  3064  				ctxt.Andptr = ctxt.Andptr[1:]
  3065  
  3066  				ctxt.Andptr[0] = Pm
  3067  				ctxt.Andptr = ctxt.Andptr[1:]
  3068  
  3069  			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
  3070  				ctxt.Andptr[0] = Pe
  3071  				ctxt.Andptr = ctxt.Andptr[1:]
  3072  
  3073  				ctxt.Andptr[0] = Pw
  3074  				ctxt.Andptr = ctxt.Andptr[1:]
  3075  				ctxt.Andptr[0] = Pm
  3076  				ctxt.Andptr = ctxt.Andptr[1:]
  3077  
  3078  			case Pf2, /* xmm opcode escape */
  3079  				Pf3:
  3080  				ctxt.Andptr[0] = byte(o.prefix)
  3081  				ctxt.Andptr = ctxt.Andptr[1:]
  3082  
  3083  				ctxt.Andptr[0] = Pm
  3084  				ctxt.Andptr = ctxt.Andptr[1:]
  3085  
  3086  			case Pm: /* opcode escape */
  3087  				ctxt.Andptr[0] = Pm
  3088  				ctxt.Andptr = ctxt.Andptr[1:]
  3089  
  3090  			case Pe: /* 16 bit escape */
  3091  				ctxt.Andptr[0] = Pe
  3092  				ctxt.Andptr = ctxt.Andptr[1:]
  3093  
  3094  			case Pw: /* 64-bit escape */
  3095  				if p.Mode != 64 {
  3096  					ctxt.Diag("asmins: illegal 64: %v", p)
  3097  				}
  3098  				ctxt.Rexflag |= Pw
  3099  
  3100  			case Pw8: /* 64-bit escape if z >= 8 */
  3101  				if z >= 8 {
  3102  					if p.Mode != 64 {
  3103  						ctxt.Diag("asmins: illegal 64: %v", p)
  3104  					}
  3105  					ctxt.Rexflag |= Pw
  3106  				}
  3107  
  3108  			case Pb: /* botch */
  3109  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3110  					goto bad
  3111  				}
  3112  				// NOTE(rsc): This is probably safe to do always,
  3113  				// but when enabled it chooses different encodings
  3114  				// than the old cmd/internal/obj/i386 code did,
  3115  				// which breaks our "same bits out" checks.
  3116  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3117  				// in the original obj/i386, and it would encode
  3118  				// (using a valid, shorter form) as 3c 00 if we enabled
  3119  				// the call to bytereg here.
  3120  				if p.Mode == 64 {
  3121  					bytereg(&p.From, &p.Ft)
  3122  					bytereg(&p.To, &p.Tt)
  3123  				}
  3124  
  3125  			case P32: /* 32 bit but illegal if 64-bit mode */
  3126  				if p.Mode == 64 {
  3127  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3128  				}
  3129  
  3130  			case Py: /* 64-bit only, no prefix */
  3131  				if p.Mode != 64 {
  3132  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3133  				}
  3134  
  3135  			case Py1: /* 64-bit only if z < 1, no prefix */
  3136  				if z < 1 && p.Mode != 64 {
  3137  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3138  				}
  3139  
  3140  			case Py3: /* 64-bit only if z < 3, no prefix */
  3141  				if z < 3 && p.Mode != 64 {
  3142  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3143  				}
  3144  			}
  3145  
  3146  			if z >= len(o.op) {
  3147  				log.Fatalf("asmins bad table %v", p)
  3148  			}
  3149  			op = int(o.op[z])
  3150  			if op == 0x0f {
  3151  				ctxt.Andptr[0] = byte(op)
  3152  				ctxt.Andptr = ctxt.Andptr[1:]
  3153  				z++
  3154  				op = int(o.op[z])
  3155  			}
  3156  
  3157  			switch yt.zcase {
  3158  			default:
  3159  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3160  				return
  3161  
  3162  			case Zpseudo:
  3163  				break
  3164  
  3165  			case Zlit:
  3166  				for ; ; z++ {
  3167  					op = int(o.op[z])
  3168  					if op == 0 {
  3169  						break
  3170  					}
  3171  					ctxt.Andptr[0] = byte(op)
  3172  					ctxt.Andptr = ctxt.Andptr[1:]
  3173  				}
  3174  
  3175  			case Zlitm_r:
  3176  				for ; ; z++ {
  3177  					op = int(o.op[z])
  3178  					if op == 0 {
  3179  						break
  3180  					}
  3181  					ctxt.Andptr[0] = byte(op)
  3182  					ctxt.Andptr = ctxt.Andptr[1:]
  3183  				}
  3184  				asmand(ctxt, p, &p.From, &p.To)
  3185  
  3186  			case Zmb_r:
  3187  				bytereg(&p.From, &p.Ft)
  3188  				fallthrough
  3189  
  3190  				/* fall through */
  3191  			case Zm_r:
  3192  				ctxt.Andptr[0] = byte(op)
  3193  				ctxt.Andptr = ctxt.Andptr[1:]
  3194  
  3195  				asmand(ctxt, p, &p.From, &p.To)
  3196  
  3197  			case Zm2_r:
  3198  				ctxt.Andptr[0] = byte(op)
  3199  				ctxt.Andptr = ctxt.Andptr[1:]
  3200  				ctxt.Andptr[0] = byte(o.op[z+1])
  3201  				ctxt.Andptr = ctxt.Andptr[1:]
  3202  				asmand(ctxt, p, &p.From, &p.To)
  3203  
  3204  			case Zm_r_xm:
  3205  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3206  				asmand(ctxt, p, &p.From, &p.To)
  3207  
  3208  			case Zm_r_xm_vex:
  3209  				ctxt.Vexflag = 1
  3210  				vexprefix(ctxt, &p.To, &p.From, o.prefix)
  3211  				ctxt.Andptr[0] = byte(op)
  3212  				ctxt.Andptr = ctxt.Andptr[1:]
  3213  				asmand(ctxt, p, &p.From, &p.To)
  3214  
  3215  			case Zm_r_xm_nr:
  3216  				ctxt.Rexflag = 0
  3217  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3218  				asmand(ctxt, p, &p.From, &p.To)
  3219  
  3220  			case Zm_r_i_xm:
  3221  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3222  				asmand(ctxt, p, &p.From, p.From3)
  3223  				ctxt.Andptr[0] = byte(p.To.Offset)
  3224  				ctxt.Andptr = ctxt.Andptr[1:]
  3225  
  3226  			case Zm_r_3d:
  3227  				ctxt.Andptr[0] = 0x0f
  3228  				ctxt.Andptr = ctxt.Andptr[1:]
  3229  				ctxt.Andptr[0] = 0x0f
  3230  				ctxt.Andptr = ctxt.Andptr[1:]
  3231  				asmand(ctxt, p, &p.From, &p.To)
  3232  				ctxt.Andptr[0] = byte(op)
  3233  				ctxt.Andptr = ctxt.Andptr[1:]
  3234  
  3235  			case Zibm_r:
  3236  				for {
  3237  					tmp1 := z
  3238  					z++
  3239  					op = int(o.op[tmp1])
  3240  					if op == 0 {
  3241  						break
  3242  					}
  3243  					ctxt.Andptr[0] = byte(op)
  3244  					ctxt.Andptr = ctxt.Andptr[1:]
  3245  				}
  3246  				asmand(ctxt, p, p.From3, &p.To)
  3247  				ctxt.Andptr[0] = byte(p.From.Offset)
  3248  				ctxt.Andptr = ctxt.Andptr[1:]
  3249  
  3250  			case Zaut_r:
  3251  				ctxt.Andptr[0] = 0x8d
  3252  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3253  				if p.From.Type != obj.TYPE_ADDR {
  3254  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3255  				}
  3256  				p.From.Type = obj.TYPE_MEM
  3257  				asmand(ctxt, p, &p.From, &p.To)
  3258  				p.From.Type = obj.TYPE_ADDR
  3259  
  3260  			case Zm_o:
  3261  				ctxt.Andptr[0] = byte(op)
  3262  				ctxt.Andptr = ctxt.Andptr[1:]
  3263  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3264  
  3265  			case Zr_m:
  3266  				ctxt.Andptr[0] = byte(op)
  3267  				ctxt.Andptr = ctxt.Andptr[1:]
  3268  				asmand(ctxt, p, &p.To, &p.From)
  3269  
  3270  			case Zr_m_xm_vex:
  3271  				ctxt.Vexflag = 1
  3272  				vexprefix(ctxt, &p.From, &p.To, o.prefix)
  3273  				ctxt.Andptr[0] = byte(op)
  3274  				ctxt.Andptr = ctxt.Andptr[1:]
  3275  				asmand(ctxt, p, &p.To, &p.From)
  3276  
  3277  			case Zr_m_xm:
  3278  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3279  				asmand(ctxt, p, &p.To, &p.From)
  3280  
  3281  			case Zr_m_xm_nr:
  3282  				ctxt.Rexflag = 0
  3283  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3284  				asmand(ctxt, p, &p.To, &p.From)
  3285  
  3286  			case Zo_m:
  3287  				ctxt.Andptr[0] = byte(op)
  3288  				ctxt.Andptr = ctxt.Andptr[1:]
  3289  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3290  
  3291  			case Zcallindreg:
  3292  				r = obj.Addrel(ctxt.Cursym)
  3293  				r.Off = int32(p.Pc)
  3294  				r.Type = obj.R_CALLIND
  3295  				r.Siz = 0
  3296  				fallthrough
  3297  
  3298  			case Zo_m64:
  3299  				ctxt.Andptr[0] = byte(op)
  3300  				ctxt.Andptr = ctxt.Andptr[1:]
  3301  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3302  
  3303  			case Zm_ibo:
  3304  				ctxt.Andptr[0] = byte(op)
  3305  				ctxt.Andptr = ctxt.Andptr[1:]
  3306  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3307  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3308  				ctxt.Andptr = ctxt.Andptr[1:]
  3309  
  3310  			case Zibo_m:
  3311  				ctxt.Andptr[0] = byte(op)
  3312  				ctxt.Andptr = ctxt.Andptr[1:]
  3313  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3314  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3315  				ctxt.Andptr = ctxt.Andptr[1:]
  3316  
  3317  			case Zibo_m_xm:
  3318  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3319  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3320  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3321  				ctxt.Andptr = ctxt.Andptr[1:]
  3322  
  3323  			case Z_ib, Zib_:
  3324  				if yt.zcase == Zib_ {
  3325  					a = &p.From
  3326  				} else {
  3327  					a = &p.To
  3328  				}
  3329  				ctxt.Andptr[0] = byte(op)
  3330  				ctxt.Andptr = ctxt.Andptr[1:]
  3331  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3332  				ctxt.Andptr = ctxt.Andptr[1:]
  3333  
  3334  			case Zib_rp:
  3335  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3336  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3337  				ctxt.Andptr = ctxt.Andptr[1:]
  3338  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3339  				ctxt.Andptr = ctxt.Andptr[1:]
  3340  
  3341  			case Zil_rp:
  3342  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3343  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3344  				ctxt.Andptr = ctxt.Andptr[1:]
  3345  				if o.prefix == Pe {
  3346  					v = vaddr(ctxt, p, &p.From, nil)
  3347  					ctxt.Andptr[0] = byte(v)
  3348  					ctxt.Andptr = ctxt.Andptr[1:]
  3349  					ctxt.Andptr[0] = byte(v >> 8)
  3350  					ctxt.Andptr = ctxt.Andptr[1:]
  3351  				} else {
  3352  					relput4(ctxt, p, &p.From)
  3353  				}
  3354  
  3355  			case Zo_iw:
  3356  				ctxt.Andptr[0] = byte(op)
  3357  				ctxt.Andptr = ctxt.Andptr[1:]
  3358  				if p.From.Type != obj.TYPE_NONE {
  3359  					v = vaddr(ctxt, p, &p.From, nil)
  3360  					ctxt.Andptr[0] = byte(v)
  3361  					ctxt.Andptr = ctxt.Andptr[1:]
  3362  					ctxt.Andptr[0] = byte(v >> 8)
  3363  					ctxt.Andptr = ctxt.Andptr[1:]
  3364  				}
  3365  
  3366  			case Ziq_rp:
  3367  				v = vaddr(ctxt, p, &p.From, &rel)
  3368  				l = int(v >> 32)
  3369  				if l == 0 && rel.Siz != 8 {
  3370  					//p->mark |= 0100;
  3371  					//print("zero: %llux %v\n", v, p);
  3372  					ctxt.Rexflag &^= (0x40 | Rxw)
  3373  
  3374  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3375  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3376  					ctxt.Andptr = ctxt.Andptr[1:]
  3377  					if rel.Type != 0 {
  3378  						r = obj.Addrel(ctxt.Cursym)
  3379  						*r = rel
  3380  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3381  					}
  3382  
  3383  					put4(ctxt, int32(v))
  3384  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3385  
  3386  					//p->mark |= 0100;
  3387  					//print("sign: %llux %v\n", v, p);
  3388  					ctxt.Andptr[0] = 0xc7
  3389  					ctxt.Andptr = ctxt.Andptr[1:]
  3390  
  3391  					asmando(ctxt, p, &p.To, 0)
  3392  					put4(ctxt, int32(v)) /* need all 8 */
  3393  				} else {
  3394  					//print("all: %llux %v\n", v, p);
  3395  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3396  
  3397  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3398  					ctxt.Andptr = ctxt.Andptr[1:]
  3399  					if rel.Type != 0 {
  3400  						r = obj.Addrel(ctxt.Cursym)
  3401  						*r = rel
  3402  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3403  					}
  3404  
  3405  					put8(ctxt, v)
  3406  				}
  3407  
  3408  			case Zib_rr:
  3409  				ctxt.Andptr[0] = byte(op)
  3410  				ctxt.Andptr = ctxt.Andptr[1:]
  3411  				asmand(ctxt, p, &p.To, &p.To)
  3412  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3413  				ctxt.Andptr = ctxt.Andptr[1:]
  3414  
  3415  			case Z_il, Zil_:
  3416  				if yt.zcase == Zil_ {
  3417  					a = &p.From
  3418  				} else {
  3419  					a = &p.To
  3420  				}
  3421  				ctxt.Andptr[0] = byte(op)
  3422  				ctxt.Andptr = ctxt.Andptr[1:]
  3423  				if o.prefix == Pe {
  3424  					v = vaddr(ctxt, p, a, nil)
  3425  					ctxt.Andptr[0] = byte(v)
  3426  					ctxt.Andptr = ctxt.Andptr[1:]
  3427  					ctxt.Andptr[0] = byte(v >> 8)
  3428  					ctxt.Andptr = ctxt.Andptr[1:]
  3429  				} else {
  3430  					relput4(ctxt, p, a)
  3431  				}
  3432  
  3433  			case Zm_ilo, Zilo_m:
  3434  				ctxt.Andptr[0] = byte(op)
  3435  				ctxt.Andptr = ctxt.Andptr[1:]
  3436  				if yt.zcase == Zilo_m {
  3437  					a = &p.From
  3438  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3439  				} else {
  3440  					a = &p.To
  3441  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3442  				}
  3443  
  3444  				if o.prefix == Pe {
  3445  					v = vaddr(ctxt, p, a, nil)
  3446  					ctxt.Andptr[0] = byte(v)
  3447  					ctxt.Andptr = ctxt.Andptr[1:]
  3448  					ctxt.Andptr[0] = byte(v >> 8)
  3449  					ctxt.Andptr = ctxt.Andptr[1:]
  3450  				} else {
  3451  					relput4(ctxt, p, a)
  3452  				}
  3453  
  3454  			case Zil_rr:
  3455  				ctxt.Andptr[0] = byte(op)
  3456  				ctxt.Andptr = ctxt.Andptr[1:]
  3457  				asmand(ctxt, p, &p.To, &p.To)
  3458  				if o.prefix == Pe {
  3459  					v = vaddr(ctxt, p, &p.From, nil)
  3460  					ctxt.Andptr[0] = byte(v)
  3461  					ctxt.Andptr = ctxt.Andptr[1:]
  3462  					ctxt.Andptr[0] = byte(v >> 8)
  3463  					ctxt.Andptr = ctxt.Andptr[1:]
  3464  				} else {
  3465  					relput4(ctxt, p, &p.From)
  3466  				}
  3467  
  3468  			case Z_rp:
  3469  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3470  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3471  				ctxt.Andptr = ctxt.Andptr[1:]
  3472  
  3473  			case Zrp_:
  3474  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3475  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3476  				ctxt.Andptr = ctxt.Andptr[1:]
  3477  
  3478  			case Zclr:
  3479  				ctxt.Rexflag &^= Pw
  3480  				ctxt.Andptr[0] = byte(op)
  3481  				ctxt.Andptr = ctxt.Andptr[1:]
  3482  				asmand(ctxt, p, &p.To, &p.To)
  3483  
  3484  			case Zcallcon, Zjmpcon:
  3485  				if yt.zcase == Zcallcon {
  3486  					ctxt.Andptr[0] = byte(op)
  3487  					ctxt.Andptr = ctxt.Andptr[1:]
  3488  				} else {
  3489  					ctxt.Andptr[0] = byte(o.op[z+1])
  3490  					ctxt.Andptr = ctxt.Andptr[1:]
  3491  				}
  3492  				r = obj.Addrel(ctxt.Cursym)
  3493  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3494  				r.Type = obj.R_PCREL
  3495  				r.Siz = 4
  3496  				r.Add = p.To.Offset
  3497  				put4(ctxt, 0)
  3498  
  3499  			case Zcallind:
  3500  				ctxt.Andptr[0] = byte(op)
  3501  				ctxt.Andptr = ctxt.Andptr[1:]
  3502  				ctxt.Andptr[0] = byte(o.op[z+1])
  3503  				ctxt.Andptr = ctxt.Andptr[1:]
  3504  				r = obj.Addrel(ctxt.Cursym)
  3505  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3506  				r.Type = obj.R_ADDR
  3507  				r.Siz = 4
  3508  				r.Add = p.To.Offset
  3509  				r.Sym = p.To.Sym
  3510  				put4(ctxt, 0)
  3511  
  3512  			case Zcall, Zcallduff:
  3513  				if p.To.Sym == nil {
  3514  					ctxt.Diag("call without target")
  3515  					log.Fatalf("bad code")
  3516  				}
  3517  
  3518  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3519  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3520  				}
  3521  
  3522  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3523  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3524  					// (the call jumps into the middle of the function).
  3525  					// This makes it possible to see call sites for duffcopy/duffzero in
  3526  					// BP-based profiling tools like Linux perf (which is the
  3527  					// whole point of obj.Framepointer_enabled).
  3528  					// MOVQ BP, -16(SP)
  3529  					// LEAQ -16(SP), BP
  3530  					copy(ctxt.Andptr, bpduff1)
  3531  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3532  				}
  3533  				ctxt.Andptr[0] = byte(op)
  3534  				ctxt.Andptr = ctxt.Andptr[1:]
  3535  				r = obj.Addrel(ctxt.Cursym)
  3536  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3537  				r.Sym = p.To.Sym
  3538  				r.Add = p.To.Offset
  3539  				r.Type = obj.R_CALL
  3540  				r.Siz = 4
  3541  				put4(ctxt, 0)
  3542  
  3543  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3544  					// Pop BP pushed above.
  3545  					// MOVQ 0(BP), BP
  3546  					copy(ctxt.Andptr, bpduff2)
  3547  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3548  				}
  3549  
  3550  			// TODO: jump across functions needs reloc
  3551  			case Zbr, Zjmp, Zloop:
  3552  				if p.To.Sym != nil {
  3553  					if yt.zcase != Zjmp {
  3554  						ctxt.Diag("branch to ATEXT")
  3555  						log.Fatalf("bad code")
  3556  					}
  3557  
  3558  					ctxt.Andptr[0] = byte(o.op[z+1])
  3559  					ctxt.Andptr = ctxt.Andptr[1:]
  3560  					r = obj.Addrel(ctxt.Cursym)
  3561  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3562  					r.Sym = p.To.Sym
  3563  					r.Type = obj.R_PCREL
  3564  					r.Siz = 4
  3565  					put4(ctxt, 0)
  3566  					break
  3567  				}
  3568  
  3569  				// Assumes q is in this function.
  3570  				// TODO: Check in input, preserve in brchain.
  3571  
  3572  				// Fill in backward jump now.
  3573  				q = p.Pcond
  3574  
  3575  				if q == nil {
  3576  					ctxt.Diag("jmp/branch/loop without target")
  3577  					log.Fatalf("bad code")
  3578  				}
  3579  
  3580  				if p.Back&1 != 0 {
  3581  					v = q.Pc - (p.Pc + 2)
  3582  					if v >= -128 {
  3583  						if p.As == AJCXZL {
  3584  							ctxt.Andptr[0] = 0x67
  3585  							ctxt.Andptr = ctxt.Andptr[1:]
  3586  						}
  3587  						ctxt.Andptr[0] = byte(op)
  3588  						ctxt.Andptr = ctxt.Andptr[1:]
  3589  						ctxt.Andptr[0] = byte(v)
  3590  						ctxt.Andptr = ctxt.Andptr[1:]
  3591  					} else if yt.zcase == Zloop {
  3592  						ctxt.Diag("loop too far: %v", p)
  3593  					} else {
  3594  						v -= 5 - 2
  3595  						if yt.zcase == Zbr {
  3596  							ctxt.Andptr[0] = 0x0f
  3597  							ctxt.Andptr = ctxt.Andptr[1:]
  3598  							v--
  3599  						}
  3600  
  3601  						ctxt.Andptr[0] = byte(o.op[z+1])
  3602  						ctxt.Andptr = ctxt.Andptr[1:]
  3603  						ctxt.Andptr[0] = byte(v)
  3604  						ctxt.Andptr = ctxt.Andptr[1:]
  3605  						ctxt.Andptr[0] = byte(v >> 8)
  3606  						ctxt.Andptr = ctxt.Andptr[1:]
  3607  						ctxt.Andptr[0] = byte(v >> 16)
  3608  						ctxt.Andptr = ctxt.Andptr[1:]
  3609  						ctxt.Andptr[0] = byte(v >> 24)
  3610  						ctxt.Andptr = ctxt.Andptr[1:]
  3611  					}
  3612  
  3613  					break
  3614  				}
  3615  
  3616  				// Annotate target; will fill in later.
  3617  				p.Forwd = q.Rel
  3618  
  3619  				q.Rel = p
  3620  				if p.Back&2 != 0 { // short
  3621  					if p.As == AJCXZL {
  3622  						ctxt.Andptr[0] = 0x67
  3623  						ctxt.Andptr = ctxt.Andptr[1:]
  3624  					}
  3625  					ctxt.Andptr[0] = byte(op)
  3626  					ctxt.Andptr = ctxt.Andptr[1:]
  3627  					ctxt.Andptr[0] = 0
  3628  					ctxt.Andptr = ctxt.Andptr[1:]
  3629  				} else if yt.zcase == Zloop {
  3630  					ctxt.Diag("loop too far: %v", p)
  3631  				} else {
  3632  					if yt.zcase == Zbr {
  3633  						ctxt.Andptr[0] = 0x0f
  3634  						ctxt.Andptr = ctxt.Andptr[1:]
  3635  					}
  3636  					ctxt.Andptr[0] = byte(o.op[z+1])
  3637  					ctxt.Andptr = ctxt.Andptr[1:]
  3638  					ctxt.Andptr[0] = 0
  3639  					ctxt.Andptr = ctxt.Andptr[1:]
  3640  					ctxt.Andptr[0] = 0
  3641  					ctxt.Andptr = ctxt.Andptr[1:]
  3642  					ctxt.Andptr[0] = 0
  3643  					ctxt.Andptr = ctxt.Andptr[1:]
  3644  					ctxt.Andptr[0] = 0
  3645  					ctxt.Andptr = ctxt.Andptr[1:]
  3646  				}
  3647  
  3648  				break
  3649  
  3650  			/*
  3651  				v = q->pc - p->pc - 2;
  3652  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3653  					*ctxt->andptr++ = op;
  3654  					*ctxt->andptr++ = v;
  3655  				} else {
  3656  					v -= 5-2;
  3657  					if(yt.zcase == Zbr) {
  3658  						*ctxt->andptr++ = 0x0f;
  3659  						v--;
  3660  					}
  3661  					*ctxt->andptr++ = o->op[z+1];
  3662  					*ctxt->andptr++ = v;
  3663  					*ctxt->andptr++ = v>>8;
  3664  					*ctxt->andptr++ = v>>16;
  3665  					*ctxt->andptr++ = v>>24;
  3666  				}
  3667  			*/
  3668  
  3669  			case Zbyte:
  3670  				v = vaddr(ctxt, p, &p.From, &rel)
  3671  				if rel.Siz != 0 {
  3672  					rel.Siz = uint8(op)
  3673  					r = obj.Addrel(ctxt.Cursym)
  3674  					*r = rel
  3675  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3676  				}
  3677  
  3678  				ctxt.Andptr[0] = byte(v)
  3679  				ctxt.Andptr = ctxt.Andptr[1:]
  3680  				if op > 1 {
  3681  					ctxt.Andptr[0] = byte(v >> 8)
  3682  					ctxt.Andptr = ctxt.Andptr[1:]
  3683  					if op > 2 {
  3684  						ctxt.Andptr[0] = byte(v >> 16)
  3685  						ctxt.Andptr = ctxt.Andptr[1:]
  3686  						ctxt.Andptr[0] = byte(v >> 24)
  3687  						ctxt.Andptr = ctxt.Andptr[1:]
  3688  						if op > 4 {
  3689  							ctxt.Andptr[0] = byte(v >> 32)
  3690  							ctxt.Andptr = ctxt.Andptr[1:]
  3691  							ctxt.Andptr[0] = byte(v >> 40)
  3692  							ctxt.Andptr = ctxt.Andptr[1:]
  3693  							ctxt.Andptr[0] = byte(v >> 48)
  3694  							ctxt.Andptr = ctxt.Andptr[1:]
  3695  							ctxt.Andptr[0] = byte(v >> 56)
  3696  							ctxt.Andptr = ctxt.Andptr[1:]
  3697  						}
  3698  					}
  3699  				}
  3700  			}
  3701  
  3702  			return
  3703  		}
  3704  		z += int(yt.zoffset) + xo
  3705  	}
  3706  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3707  		var pp obj.Prog
  3708  		var t []byte
  3709  		if p.As == mo[0].as {
  3710  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3711  				t = mo[0].op[:]
  3712  				switch mo[0].code {
  3713  				default:
  3714  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3715  
  3716  				case 0: /* lit */
  3717  					for z = 0; t[z] != E; z++ {
  3718  						ctxt.Andptr[0] = t[z]
  3719  						ctxt.Andptr = ctxt.Andptr[1:]
  3720  					}
  3721  
  3722  				case 1: /* r,m */
  3723  					ctxt.Andptr[0] = t[0]
  3724  					ctxt.Andptr = ctxt.Andptr[1:]
  3725  
  3726  					asmando(ctxt, p, &p.To, int(t[1]))
  3727  
  3728  				case 2: /* m,r */
  3729  					ctxt.Andptr[0] = t[0]
  3730  					ctxt.Andptr = ctxt.Andptr[1:]
  3731  
  3732  					asmando(ctxt, p, &p.From, int(t[1]))
  3733  
  3734  				case 3: /* r,m - 2op */
  3735  					ctxt.Andptr[0] = t[0]
  3736  					ctxt.Andptr = ctxt.Andptr[1:]
  3737  
  3738  					ctxt.Andptr[0] = t[1]
  3739  					ctxt.Andptr = ctxt.Andptr[1:]
  3740  					asmando(ctxt, p, &p.To, int(t[2]))
  3741  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3742  
  3743  				case 4: /* m,r - 2op */
  3744  					ctxt.Andptr[0] = t[0]
  3745  					ctxt.Andptr = ctxt.Andptr[1:]
  3746  
  3747  					ctxt.Andptr[0] = t[1]
  3748  					ctxt.Andptr = ctxt.Andptr[1:]
  3749  					asmando(ctxt, p, &p.From, int(t[2]))
  3750  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3751  
  3752  				case 5: /* load full pointer, trash heap */
  3753  					if t[0] != 0 {
  3754  						ctxt.Andptr[0] = t[0]
  3755  						ctxt.Andptr = ctxt.Andptr[1:]
  3756  					}
  3757  					switch p.To.Index {
  3758  					default:
  3759  						goto bad
  3760  
  3761  					case REG_DS:
  3762  						ctxt.Andptr[0] = 0xc5
  3763  						ctxt.Andptr = ctxt.Andptr[1:]
  3764  
  3765  					case REG_SS:
  3766  						ctxt.Andptr[0] = 0x0f
  3767  						ctxt.Andptr = ctxt.Andptr[1:]
  3768  						ctxt.Andptr[0] = 0xb2
  3769  						ctxt.Andptr = ctxt.Andptr[1:]
  3770  
  3771  					case REG_ES:
  3772  						ctxt.Andptr[0] = 0xc4
  3773  						ctxt.Andptr = ctxt.Andptr[1:]
  3774  
  3775  					case REG_FS:
  3776  						ctxt.Andptr[0] = 0x0f
  3777  						ctxt.Andptr = ctxt.Andptr[1:]
  3778  						ctxt.Andptr[0] = 0xb4
  3779  						ctxt.Andptr = ctxt.Andptr[1:]
  3780  
  3781  					case REG_GS:
  3782  						ctxt.Andptr[0] = 0x0f
  3783  						ctxt.Andptr = ctxt.Andptr[1:]
  3784  						ctxt.Andptr[0] = 0xb5
  3785  						ctxt.Andptr = ctxt.Andptr[1:]
  3786  					}
  3787  
  3788  					asmand(ctxt, p, &p.From, &p.To)
  3789  
  3790  				case 6: /* double shift */
  3791  					if t[0] == Pw {
  3792  						if p.Mode != 64 {
  3793  							ctxt.Diag("asmins: illegal 64: %v", p)
  3794  						}
  3795  						ctxt.Rexflag |= Pw
  3796  						t = t[1:]
  3797  					} else if t[0] == Pe {
  3798  						ctxt.Andptr[0] = Pe
  3799  						ctxt.Andptr = ctxt.Andptr[1:]
  3800  						t = t[1:]
  3801  					}
  3802  
  3803  					switch p.From.Type {
  3804  					default:
  3805  						goto bad
  3806  
  3807  					case obj.TYPE_CONST:
  3808  						ctxt.Andptr[0] = 0x0f
  3809  						ctxt.Andptr = ctxt.Andptr[1:]
  3810  						ctxt.Andptr[0] = t[0]
  3811  						ctxt.Andptr = ctxt.Andptr[1:]
  3812  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3813  						ctxt.Andptr[0] = byte(p.From.Offset)
  3814  						ctxt.Andptr = ctxt.Andptr[1:]
  3815  
  3816  					case obj.TYPE_REG:
  3817  						switch p.From.Reg {
  3818  						default:
  3819  							goto bad
  3820  
  3821  						case REG_CL, REG_CX:
  3822  							ctxt.Andptr[0] = 0x0f
  3823  							ctxt.Andptr = ctxt.Andptr[1:]
  3824  							ctxt.Andptr[0] = t[1]
  3825  							ctxt.Andptr = ctxt.Andptr[1:]
  3826  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3827  						}
  3828  					}
  3829  
  3830  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3831  				// where you load the TLS base register into a register and then index off that
  3832  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3833  				// are handled in prefixof above and should not be listed here.
  3834  				case 7: /* mov tls, r */
  3835  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3836  						ctxt.Diag("invalid load of TLS: %v", p)
  3837  					}
  3838  
  3839  					if p.Mode == 32 {
  3840  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3841  						// where you load the TLS base register into a register and then index off that
  3842  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3843  						// are handled in prefixof above and should not be listed here.
  3844  						switch ctxt.Headtype {
  3845  						default:
  3846  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3847  
  3848  						case obj.Hlinux,
  3849  							obj.Hnacl:
  3850  							// ELF TLS base is 0(GS).
  3851  							pp.From = p.From
  3852  
  3853  							pp.From.Type = obj.TYPE_MEM
  3854  							pp.From.Reg = REG_GS
  3855  							pp.From.Offset = 0
  3856  							pp.From.Index = REG_NONE
  3857  							pp.From.Scale = 0
  3858  							ctxt.Andptr[0] = 0x65
  3859  							ctxt.Andptr = ctxt.Andptr[1:] // GS
  3860  							ctxt.Andptr[0] = 0x8B
  3861  							ctxt.Andptr = ctxt.Andptr[1:]
  3862  							asmand(ctxt, p, &pp.From, &p.To)
  3863  
  3864  						case obj.Hplan9:
  3865  							if ctxt.Plan9privates == nil {
  3866  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3867  							}
  3868  							pp.From = obj.Addr{}
  3869  							pp.From.Type = obj.TYPE_MEM
  3870  							pp.From.Name = obj.NAME_EXTERN
  3871  							pp.From.Sym = ctxt.Plan9privates
  3872  							pp.From.Offset = 0
  3873  							pp.From.Index = REG_NONE
  3874  							ctxt.Andptr[0] = 0x8B
  3875  							ctxt.Andptr = ctxt.Andptr[1:]
  3876  							asmand(ctxt, p, &pp.From, &p.To)
  3877  
  3878  						case obj.Hwindows:
  3879  							// Windows TLS base is always 0x14(FS).
  3880  							pp.From = p.From
  3881  
  3882  							pp.From.Type = obj.TYPE_MEM
  3883  							pp.From.Reg = REG_FS
  3884  							pp.From.Offset = 0x14
  3885  							pp.From.Index = REG_NONE
  3886  							pp.From.Scale = 0
  3887  							ctxt.Andptr[0] = 0x64
  3888  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  3889  							ctxt.Andptr[0] = 0x8B
  3890  							ctxt.Andptr = ctxt.Andptr[1:]
  3891  							asmand(ctxt, p, &pp.From, &p.To)
  3892  						}
  3893  						break
  3894  					}
  3895  
  3896  					switch ctxt.Headtype {
  3897  					default:
  3898  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3899  
  3900  					case obj.Hlinux:
  3901  						if ctxt.Flag_shared == 0 {
  3902  							log.Fatalf("unknown TLS base location for linux without -shared")
  3903  						}
  3904  						// Note that this is not generating the same insn as the other cases.
  3905  						//     MOV TLS, R_to
  3906  						// becomes
  3907  						//     movq g@gottpoff(%rip), R_to
  3908  						// which is encoded as
  3909  						//     movq 0(%rip), R_to
  3910  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  3911  						// is g, which we can't check here, but will when we assemble the second
  3912  						// instruction.
  3913  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  3914  
  3915  						ctxt.Andptr[0] = 0x8B
  3916  						ctxt.Andptr = ctxt.Andptr[1:]
  3917  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  3918  						ctxt.Andptr = ctxt.Andptr[1:]
  3919  						r = obj.Addrel(ctxt.Cursym)
  3920  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3921  						r.Type = obj.R_TLS_IE
  3922  						r.Siz = 4
  3923  						r.Add = -4
  3924  						put4(ctxt, 0)
  3925  
  3926  					case obj.Hplan9:
  3927  						if ctxt.Plan9privates == nil {
  3928  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3929  						}
  3930  						pp.From = obj.Addr{}
  3931  						pp.From.Type = obj.TYPE_MEM
  3932  						pp.From.Name = obj.NAME_EXTERN
  3933  						pp.From.Sym = ctxt.Plan9privates
  3934  						pp.From.Offset = 0
  3935  						pp.From.Index = REG_NONE
  3936  						ctxt.Rexflag |= Pw
  3937  						ctxt.Andptr[0] = 0x8B
  3938  						ctxt.Andptr = ctxt.Andptr[1:]
  3939  						asmand(ctxt, p, &pp.From, &p.To)
  3940  
  3941  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3942  						// TLS base is 0(FS).
  3943  						pp.From = p.From
  3944  
  3945  						pp.From.Type = obj.TYPE_MEM
  3946  						pp.From.Name = obj.NAME_NONE
  3947  						pp.From.Reg = REG_NONE
  3948  						pp.From.Offset = 0
  3949  						pp.From.Index = REG_NONE
  3950  						pp.From.Scale = 0
  3951  						ctxt.Rexflag |= Pw
  3952  						ctxt.Andptr[0] = 0x64
  3953  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  3954  						ctxt.Andptr[0] = 0x8B
  3955  						ctxt.Andptr = ctxt.Andptr[1:]
  3956  						asmand(ctxt, p, &pp.From, &p.To)
  3957  
  3958  					case obj.Hwindows:
  3959  						// Windows TLS base is always 0x28(GS).
  3960  						pp.From = p.From
  3961  
  3962  						pp.From.Type = obj.TYPE_MEM
  3963  						pp.From.Name = obj.NAME_NONE
  3964  						pp.From.Reg = REG_GS
  3965  						pp.From.Offset = 0x28
  3966  						pp.From.Index = REG_NONE
  3967  						pp.From.Scale = 0
  3968  						ctxt.Rexflag |= Pw
  3969  						ctxt.Andptr[0] = 0x65
  3970  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  3971  						ctxt.Andptr[0] = 0x8B
  3972  						ctxt.Andptr = ctxt.Andptr[1:]
  3973  						asmand(ctxt, p, &pp.From, &p.To)
  3974  					}
  3975  				}
  3976  				return
  3977  			}
  3978  		}
  3979  	}
  3980  	goto bad
  3981  
  3982  bad:
  3983  	if p.Mode != 64 {
  3984  		/*
  3985  		 * here, the assembly has failed.
  3986  		 * if its a byte instruction that has
  3987  		 * unaddressable registers, try to
  3988  		 * exchange registers and reissue the
  3989  		 * instruction with the operands renamed.
  3990  		 */
  3991  		pp := *p
  3992  
  3993  		unbytereg(&pp.From, &pp.Ft)
  3994  		unbytereg(&pp.To, &pp.Tt)
  3995  
  3996  		z := int(p.From.Reg)
  3997  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3998  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3999  			// For now, different to keep bit-for-bit compatibility.
  4000  			if p.Mode == 32 {
  4001  				breg := byteswapreg(ctxt, &p.To)
  4002  				if breg != REG_AX {
  4003  					ctxt.Andptr[0] = 0x87
  4004  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4005  					asmando(ctxt, p, &p.From, reg[breg])
  4006  					subreg(&pp, z, breg)
  4007  					doasm(ctxt, &pp)
  4008  					ctxt.Andptr[0] = 0x87
  4009  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4010  					asmando(ctxt, p, &p.From, reg[breg])
  4011  				} else {
  4012  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4013  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4014  					subreg(&pp, z, REG_AX)
  4015  					doasm(ctxt, &pp)
  4016  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4017  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4018  				}
  4019  				return
  4020  			}
  4021  
  4022  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4023  				// We certainly don't want to exchange
  4024  				// with AX if the op is MUL or DIV.
  4025  				ctxt.Andptr[0] = 0x87
  4026  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4027  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4028  				subreg(&pp, z, REG_BX)
  4029  				doasm(ctxt, &pp)
  4030  				ctxt.Andptr[0] = 0x87
  4031  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4032  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4033  			} else {
  4034  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4035  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4036  				subreg(&pp, z, REG_AX)
  4037  				doasm(ctxt, &pp)
  4038  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4039  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4040  			}
  4041  			return
  4042  		}
  4043  
  4044  		z = int(p.To.Reg)
  4045  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4046  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4047  			// For now, different to keep bit-for-bit compatibility.
  4048  			if p.Mode == 32 {
  4049  				breg := byteswapreg(ctxt, &p.From)
  4050  				if breg != REG_AX {
  4051  					ctxt.Andptr[0] = 0x87
  4052  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4053  					asmando(ctxt, p, &p.To, reg[breg])
  4054  					subreg(&pp, z, breg)
  4055  					doasm(ctxt, &pp)
  4056  					ctxt.Andptr[0] = 0x87
  4057  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4058  					asmando(ctxt, p, &p.To, reg[breg])
  4059  				} else {
  4060  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4061  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4062  					subreg(&pp, z, REG_AX)
  4063  					doasm(ctxt, &pp)
  4064  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4065  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4066  				}
  4067  				return
  4068  			}
  4069  
  4070  			if isax(&p.From) {
  4071  				ctxt.Andptr[0] = 0x87
  4072  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4073  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4074  				subreg(&pp, z, REG_BX)
  4075  				doasm(ctxt, &pp)
  4076  				ctxt.Andptr[0] = 0x87
  4077  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4078  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4079  			} else {
  4080  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4081  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4082  				subreg(&pp, z, REG_AX)
  4083  				doasm(ctxt, &pp)
  4084  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4085  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4086  			}
  4087  			return
  4088  		}
  4089  	}
  4090  
  4091  	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4092  	return
  4093  }
  4094  
  4095  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4096  // which is not referenced in a.
  4097  // If a is empty, it returns BX to account for MULB-like instructions
  4098  // that might use DX and AX.
  4099  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4100  	cand := 1
  4101  	canc := cand
  4102  	canb := canc
  4103  	cana := canb
  4104  
  4105  	if a.Type == obj.TYPE_NONE {
  4106  		cand = 0
  4107  		cana = cand
  4108  	}
  4109  
  4110  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4111  		switch a.Reg {
  4112  		case REG_NONE:
  4113  			cand = 0
  4114  			cana = cand
  4115  
  4116  		case REG_AX, REG_AL, REG_AH:
  4117  			cana = 0
  4118  
  4119  		case REG_BX, REG_BL, REG_BH:
  4120  			canb = 0
  4121  
  4122  		case REG_CX, REG_CL, REG_CH:
  4123  			canc = 0
  4124  
  4125  		case REG_DX, REG_DL, REG_DH:
  4126  			cand = 0
  4127  		}
  4128  	}
  4129  
  4130  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4131  		switch a.Index {
  4132  		case REG_AX:
  4133  			cana = 0
  4134  
  4135  		case REG_BX:
  4136  			canb = 0
  4137  
  4138  		case REG_CX:
  4139  			canc = 0
  4140  
  4141  		case REG_DX:
  4142  			cand = 0
  4143  		}
  4144  	}
  4145  
  4146  	if cana != 0 {
  4147  		return REG_AX
  4148  	}
  4149  	if canb != 0 {
  4150  		return REG_BX
  4151  	}
  4152  	if canc != 0 {
  4153  		return REG_CX
  4154  	}
  4155  	if cand != 0 {
  4156  		return REG_DX
  4157  	}
  4158  
  4159  	ctxt.Diag("impossible byte register")
  4160  	log.Fatalf("bad code")
  4161  	return 0
  4162  }
  4163  
  4164  func isbadbyte(a *obj.Addr) bool {
  4165  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4166  }
  4167  
  4168  var naclret = []uint8{
  4169  	0x5e, // POPL SI
  4170  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4171  	0x83,
  4172  	0xe6,
  4173  	0xe0, // ANDL $~31, SI
  4174  	0x4c,
  4175  	0x01,
  4176  	0xfe, // ADDQ R15, SI
  4177  	0xff,
  4178  	0xe6, // JMP SI
  4179  }
  4180  
  4181  var naclret8 = []uint8{
  4182  	0x5d, // POPL BP
  4183  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4184  	0x83,
  4185  	0xe5,
  4186  	0xe0, // ANDL $~31, BP
  4187  	0xff,
  4188  	0xe5, // JMP BP
  4189  }
  4190  
  4191  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4192  
  4193  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4194  
  4195  var naclmovs = []uint8{
  4196  	0x89,
  4197  	0xf6, // MOVL SI, SI
  4198  	0x49,
  4199  	0x8d,
  4200  	0x34,
  4201  	0x37, // LEAQ (R15)(SI*1), SI
  4202  	0x89,
  4203  	0xff, // MOVL DI, DI
  4204  	0x49,
  4205  	0x8d,
  4206  	0x3c,
  4207  	0x3f, // LEAQ (R15)(DI*1), DI
  4208  }
  4209  
  4210  var naclstos = []uint8{
  4211  	0x89,
  4212  	0xff, // MOVL DI, DI
  4213  	0x49,
  4214  	0x8d,
  4215  	0x3c,
  4216  	0x3f, // LEAQ (R15)(DI*1), DI
  4217  }
  4218  
  4219  func nacltrunc(ctxt *obj.Link, reg int) {
  4220  	if reg >= REG_R8 {
  4221  		ctxt.Andptr[0] = 0x45
  4222  		ctxt.Andptr = ctxt.Andptr[1:]
  4223  	}
  4224  	reg = (reg - REG_AX) & 7
  4225  	ctxt.Andptr[0] = 0x89
  4226  	ctxt.Andptr = ctxt.Andptr[1:]
  4227  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4228  	ctxt.Andptr = ctxt.Andptr[1:]
  4229  }
  4230  
  4231  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4232  	ctxt.Andptr = ctxt.And[:]
  4233  	ctxt.Asmode = int(p.Mode)
  4234  
  4235  	if p.As == obj.AUSEFIELD {
  4236  		r := obj.Addrel(ctxt.Cursym)
  4237  		r.Off = 0
  4238  		r.Siz = 0
  4239  		r.Sym = p.From.Sym
  4240  		r.Type = obj.R_USEFIELD
  4241  		return
  4242  	}
  4243  
  4244  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4245  		switch p.As {
  4246  		case obj.ARET:
  4247  			copy(ctxt.Andptr, naclret8)
  4248  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4249  			return
  4250  
  4251  		case obj.ACALL,
  4252  			obj.AJMP:
  4253  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4254  				ctxt.Andptr[0] = 0x83
  4255  				ctxt.Andptr = ctxt.Andptr[1:]
  4256  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4257  				ctxt.Andptr = ctxt.Andptr[1:]
  4258  				ctxt.Andptr[0] = 0xe0
  4259  				ctxt.Andptr = ctxt.Andptr[1:]
  4260  			}
  4261  
  4262  		case AINT:
  4263  			ctxt.Andptr[0] = 0xf4
  4264  			ctxt.Andptr = ctxt.Andptr[1:]
  4265  			return
  4266  		}
  4267  	}
  4268  
  4269  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4270  		if p.As == AREP {
  4271  			ctxt.Rep++
  4272  			return
  4273  		}
  4274  
  4275  		if p.As == AREPN {
  4276  			ctxt.Repn++
  4277  			return
  4278  		}
  4279  
  4280  		if p.As == ALOCK {
  4281  			ctxt.Lock++
  4282  			return
  4283  		}
  4284  
  4285  		if p.As != ALEAQ && p.As != ALEAL {
  4286  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4287  				nacltrunc(ctxt, int(p.From.Index))
  4288  			}
  4289  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4290  				nacltrunc(ctxt, int(p.To.Index))
  4291  			}
  4292  		}
  4293  
  4294  		switch p.As {
  4295  		case obj.ARET:
  4296  			copy(ctxt.Andptr, naclret)
  4297  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4298  			return
  4299  
  4300  		case obj.ACALL,
  4301  			obj.AJMP:
  4302  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4303  				// ANDL $~31, reg
  4304  				ctxt.Andptr[0] = 0x83
  4305  				ctxt.Andptr = ctxt.Andptr[1:]
  4306  
  4307  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4308  				ctxt.Andptr = ctxt.Andptr[1:]
  4309  				ctxt.Andptr[0] = 0xe0
  4310  				ctxt.Andptr = ctxt.Andptr[1:]
  4311  
  4312  				// ADDQ R15, reg
  4313  				ctxt.Andptr[0] = 0x4c
  4314  				ctxt.Andptr = ctxt.Andptr[1:]
  4315  
  4316  				ctxt.Andptr[0] = 0x01
  4317  				ctxt.Andptr = ctxt.Andptr[1:]
  4318  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4319  				ctxt.Andptr = ctxt.Andptr[1:]
  4320  			}
  4321  
  4322  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4323  				// ANDL $~31, reg
  4324  				ctxt.Andptr[0] = 0x41
  4325  				ctxt.Andptr = ctxt.Andptr[1:]
  4326  
  4327  				ctxt.Andptr[0] = 0x83
  4328  				ctxt.Andptr = ctxt.Andptr[1:]
  4329  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4330  				ctxt.Andptr = ctxt.Andptr[1:]
  4331  				ctxt.Andptr[0] = 0xe0
  4332  				ctxt.Andptr = ctxt.Andptr[1:]
  4333  
  4334  				// ADDQ R15, reg
  4335  				ctxt.Andptr[0] = 0x4d
  4336  				ctxt.Andptr = ctxt.Andptr[1:]
  4337  
  4338  				ctxt.Andptr[0] = 0x01
  4339  				ctxt.Andptr = ctxt.Andptr[1:]
  4340  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4341  				ctxt.Andptr = ctxt.Andptr[1:]
  4342  			}
  4343  
  4344  		case AINT:
  4345  			ctxt.Andptr[0] = 0xf4
  4346  			ctxt.Andptr = ctxt.Andptr[1:]
  4347  			return
  4348  
  4349  		case ASCASB,
  4350  			ASCASW,
  4351  			ASCASL,
  4352  			ASCASQ,
  4353  			ASTOSB,
  4354  			ASTOSW,
  4355  			ASTOSL,
  4356  			ASTOSQ:
  4357  			copy(ctxt.Andptr, naclstos)
  4358  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4359  
  4360  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4361  			copy(ctxt.Andptr, naclmovs)
  4362  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4363  		}
  4364  
  4365  		if ctxt.Rep != 0 {
  4366  			ctxt.Andptr[0] = 0xf3
  4367  			ctxt.Andptr = ctxt.Andptr[1:]
  4368  			ctxt.Rep = 0
  4369  		}
  4370  
  4371  		if ctxt.Repn != 0 {
  4372  			ctxt.Andptr[0] = 0xf2
  4373  			ctxt.Andptr = ctxt.Andptr[1:]
  4374  			ctxt.Repn = 0
  4375  		}
  4376  
  4377  		if ctxt.Lock != 0 {
  4378  			ctxt.Andptr[0] = 0xf0
  4379  			ctxt.Andptr = ctxt.Andptr[1:]
  4380  			ctxt.Lock = 0
  4381  		}
  4382  	}
  4383  
  4384  	ctxt.Rexflag = 0
  4385  	ctxt.Vexflag = 0
  4386  	and0 := ctxt.Andptr
  4387  	ctxt.Asmode = int(p.Mode)
  4388  	doasm(ctxt, p)
  4389  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4390  		/*
  4391  		 * as befits the whole approach of the architecture,
  4392  		 * the rex prefix must appear before the first opcode byte
  4393  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4394  		 * before the 0f opcode escape!), or it might be ignored.
  4395  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4396  		 */
  4397  		if p.Mode != 64 {
  4398  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4399  		}
  4400  		n := -cap(ctxt.Andptr) + cap(and0)
  4401  		var c int
  4402  		var np int
  4403  		for np = 0; np < n; np++ {
  4404  			c = int(and0[np])
  4405  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4406  				break
  4407  			}
  4408  		}
  4409  
  4410  		copy(and0[np+1:], and0[np:n])
  4411  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4412  		ctxt.Andptr = ctxt.Andptr[1:]
  4413  	}
  4414  
  4415  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4416  	var r *obj.Reloc
  4417  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4418  		r = &ctxt.Cursym.R[i:][0]
  4419  		if int64(r.Off) < p.Pc {
  4420  			break
  4421  		}
  4422  		if ctxt.Rexflag != 0 {
  4423  			r.Off++
  4424  		}
  4425  		if r.Type == obj.R_PCREL {
  4426  			// PC-relative addressing is relative to the end of the instruction,
  4427  			// but the relocations applied by the linker are relative to the end
  4428  			// of the relocation. Because immediate instruction
  4429  			// arguments can follow the PC-relative memory reference in the
  4430  			// instruction encoding, the two may not coincide. In this case,
  4431  			// adjust addend so that linker can keep relocating relative to the
  4432  			// end of the relocation.
  4433  			r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4434  		}
  4435  	}
  4436  
  4437  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4438  		switch p.To.Reg {
  4439  		case REG_SP:
  4440  			copy(ctxt.Andptr, naclspfix)
  4441  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4442  
  4443  		case REG_BP:
  4444  			copy(ctxt.Andptr, naclbpfix)
  4445  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4446  		}
  4447  	}
  4448  }