github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	// Loop alignment constants:
    44  	// want to align loop entry to LoopAlign-byte boundary,
    45  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    46  	// We define a loop entry as the target of a backward jump.
    47  	//
    48  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    49  	// and it aligns all jump targets, not just backward jump targets.
    50  	//
    51  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    52  	// is very slight but negative, so the alignment is disabled by
    53  	// setting MaxLoopPad = 0. The code is here for reference and
    54  	// for future experiments.
    55  	//
    56  	LoopAlign  = 16
    57  	MaxLoopPad = 0
    58  	FuncAlign  = 16
    59  )
    60  
    61  type Optab struct {
    62  	as     int16
    63  	ytab   []ytab
    64  	prefix uint8
    65  	op     [23]uint8
    66  }
    67  
    68  type ytab struct {
    69  	from    uint8
    70  	from3   uint8
    71  	to      uint8
    72  	zcase   uint8
    73  	zoffset uint8
    74  }
    75  
    76  type Movtab struct {
    77  	as   int16
    78  	ft   uint8
    79  	f3t  uint8
    80  	tt   uint8
    81  	code uint8
    82  	op   [4]uint8
    83  }
    84  
    85  const (
    86  	Yxxx = iota
    87  	Ynone
    88  	Yi0 // $0
    89  	Yi1 // $1
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Ytls
   152  	Ytextsize
   153  	Yindir
   154  	Ymax
   155  )
   156  
   157  const (
   158  	Zxxx = iota
   159  	Zlit
   160  	Zlitm_r
   161  	Z_rp
   162  	Zbr
   163  	Zcall
   164  	Zcallcon
   165  	Zcallduff
   166  	Zcallind
   167  	Zcallindreg
   168  	Zib_
   169  	Zib_rp
   170  	Zibo_m
   171  	Zibo_m_xm
   172  	Zil_
   173  	Zil_rp
   174  	Ziq_rp
   175  	Zilo_m
   176  	Zjmp
   177  	Zjmpcon
   178  	Zloop
   179  	Zo_iw
   180  	Zm_o
   181  	Zm_r
   182  	Zm2_r
   183  	Zm_r_xm
   184  	Zm_r_xm_vex
   185  	Zm_r_i_xm
   186  	Zm_r_3d
   187  	Zm_r_xm_nr
   188  	Zr_m_xm_nr
   189  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   190  	Zmb_r
   191  	Zaut_r
   192  	Zo_m
   193  	Zo_m64
   194  	Zpseudo
   195  	Zr_m
   196  	Zr_m_xm
   197  	Zr_m_xm_vex
   198  	Zr_r_r_vex
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zmax
   209  )
   210  
   211  const (
   212  	Px    = 0
   213  	Px1   = 1    // symbolic; exact value doesn't matter
   214  	P32   = 0x32 /* 32-bit only */
   215  	Pe    = 0x66 /* operand escape */
   216  	Pm    = 0x0f /* 2byte opcode escape */
   217  	Pq    = 0xff /* both escapes: 66 0f */
   218  	Pb    = 0xfe /* byte operands */
   219  	Pf2   = 0xf2 /* xmm escape 1: f2 0f */
   220  	Pf3   = 0xf3 /* xmm escape 2: f3 0f */
   221  	Pq3   = 0x67 /* xmm escape 3: 66 48 0f */
   222  	Pvex1 = 0xc5 /* 66.0f escape, vex encoding */
   223  	Pvex2 = 0xc6 /* f3.0f escape, vex encoding */
   224  	Pvex3 = 0xc7 /* 66.0f38 escape, vex encoding */
   225  	Pw    = 0x48 /* Rex.w */
   226  	Pw8   = 0x90 // symbolic; exact value doesn't matter
   227  	Py    = 0x80 /* defaults to 64-bit mode */
   228  	Py1   = 0x81 // symbolic; exact value doesn't matter
   229  	Py3   = 0x83 // symbolic; exact value doesn't matter
   230  
   231  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   232  	Rxr = 1 << 2 /* extend modrm reg */
   233  	Rxx = 1 << 1 /* extend sib index */
   234  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   235  )
   236  
   237  var ycover [Ymax * Ymax]uint8
   238  
   239  var reg [MAXREG]int
   240  
   241  var regrex [MAXREG + 1]int
   242  
   243  var ynone = []ytab{
   244  	{Ynone, Ynone, Ynone, Zlit, 1},
   245  }
   246  
   247  var ytext = []ytab{
   248  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   249  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   250  }
   251  
   252  var ynop = []ytab{
   253  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   254  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   255  	{Ynone, Ynone, Yml, Zpseudo, 0},
   256  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   257  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   258  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   259  	{Yml, Ynone, Ynone, Zpseudo, 0},
   260  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   261  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   262  }
   263  
   264  var yfuncdata = []ytab{
   265  	{Yi32, Ynone, Ym, Zpseudo, 0},
   266  }
   267  
   268  var ypcdata = []ytab{
   269  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   270  }
   271  
   272  var yxorb = []ytab{
   273  	{Yi32, Ynone, Yal, Zib_, 1},
   274  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   275  	{Yrb, Ynone, Ymb, Zr_m, 1},
   276  	{Ymb, Ynone, Yrb, Zm_r, 1},
   277  }
   278  
   279  var yxorl = []ytab{
   280  	{Yi8, Ynone, Yml, Zibo_m, 2},
   281  	{Yi32, Ynone, Yax, Zil_, 1},
   282  	{Yi32, Ynone, Yml, Zilo_m, 2},
   283  	{Yrl, Ynone, Yml, Zr_m, 1},
   284  	{Yml, Ynone, Yrl, Zm_r, 1},
   285  }
   286  
   287  var yaddl = []ytab{
   288  	{Yi8, Ynone, Yml, Zibo_m, 2},
   289  	{Yi32, Ynone, Yax, Zil_, 1},
   290  	{Yi32, Ynone, Yml, Zilo_m, 2},
   291  	{Yrl, Ynone, Yml, Zr_m, 1},
   292  	{Yml, Ynone, Yrl, Zm_r, 1},
   293  }
   294  
   295  var yincb = []ytab{
   296  	{Ynone, Ynone, Ymb, Zo_m, 2},
   297  }
   298  
   299  var yincw = []ytab{
   300  	{Ynone, Ynone, Yml, Zo_m, 2},
   301  }
   302  
   303  var yincl = []ytab{
   304  	{Ynone, Ynone, Yrl, Z_rp, 1},
   305  	{Ynone, Ynone, Yml, Zo_m, 2},
   306  }
   307  
   308  var yincq = []ytab{
   309  	{Ynone, Ynone, Yml, Zo_m, 2},
   310  }
   311  
   312  var ycmpb = []ytab{
   313  	{Yal, Ynone, Yi32, Z_ib, 1},
   314  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   315  	{Ymb, Ynone, Yrb, Zm_r, 1},
   316  	{Yrb, Ynone, Ymb, Zr_m, 1},
   317  }
   318  
   319  var ycmpl = []ytab{
   320  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   321  	{Yax, Ynone, Yi32, Z_il, 1},
   322  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   323  	{Yml, Ynone, Yrl, Zm_r, 1},
   324  	{Yrl, Ynone, Yml, Zr_m, 1},
   325  }
   326  
   327  var yshb = []ytab{
   328  	{Yi1, Ynone, Ymb, Zo_m, 2},
   329  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   330  	{Ycx, Ynone, Ymb, Zo_m, 2},
   331  }
   332  
   333  var yshl = []ytab{
   334  	{Yi1, Ynone, Yml, Zo_m, 2},
   335  	{Yi32, Ynone, Yml, Zibo_m, 2},
   336  	{Ycl, Ynone, Yml, Zo_m, 2},
   337  	{Ycx, Ynone, Yml, Zo_m, 2},
   338  }
   339  
   340  var ytestb = []ytab{
   341  	{Yi32, Ynone, Yal, Zib_, 1},
   342  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   343  	{Yrb, Ynone, Ymb, Zr_m, 1},
   344  	{Ymb, Ynone, Yrb, Zm_r, 1},
   345  }
   346  
   347  var ytestl = []ytab{
   348  	{Yi32, Ynone, Yax, Zil_, 1},
   349  	{Yi32, Ynone, Yml, Zilo_m, 2},
   350  	{Yrl, Ynone, Yml, Zr_m, 1},
   351  	{Yml, Ynone, Yrl, Zm_r, 1},
   352  }
   353  
   354  var ymovb = []ytab{
   355  	{Yrb, Ynone, Ymb, Zr_m, 1},
   356  	{Ymb, Ynone, Yrb, Zm_r, 1},
   357  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   358  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   359  }
   360  
   361  var ymbs = []ytab{
   362  	{Ymb, Ynone, Ynone, Zm_o, 2},
   363  }
   364  
   365  var ybtl = []ytab{
   366  	{Yi8, Ynone, Yml, Zibo_m, 2},
   367  	{Yrl, Ynone, Yml, Zr_m, 1},
   368  }
   369  
   370  var ymovw = []ytab{
   371  	{Yrl, Ynone, Yml, Zr_m, 1},
   372  	{Yml, Ynone, Yrl, Zm_r, 1},
   373  	{Yi0, Ynone, Yrl, Zclr, 1},
   374  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   375  	{Yi32, Ynone, Yml, Zilo_m, 2},
   376  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   377  }
   378  
   379  var ymovl = []ytab{
   380  	{Yrl, Ynone, Yml, Zr_m, 1},
   381  	{Yml, Ynone, Yrl, Zm_r, 1},
   382  	{Yi0, Ynone, Yrl, Zclr, 1},
   383  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   384  	{Yi32, Ynone, Yml, Zilo_m, 2},
   385  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   386  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   387  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   388  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   389  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   390  }
   391  
   392  var yret = []ytab{
   393  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   394  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   395  }
   396  
   397  var ymovq = []ytab{
   398  	// valid in 32-bit mode
   399  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   400  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   401  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   402  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   403  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   404  
   405  	// valid only in 64-bit mode, usually with 64-bit prefix
   406  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   407  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   408  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   409  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   410  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   411  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   412  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   413  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   414  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   415  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   416  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   417  }
   418  
   419  var ym_rl = []ytab{
   420  	{Ym, Ynone, Yrl, Zm_r, 1},
   421  }
   422  
   423  var yrl_m = []ytab{
   424  	{Yrl, Ynone, Ym, Zr_m, 1},
   425  }
   426  
   427  var ymb_rl = []ytab{
   428  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   429  }
   430  
   431  var yml_rl = []ytab{
   432  	{Yml, Ynone, Yrl, Zm_r, 1},
   433  }
   434  
   435  var yrl_ml = []ytab{
   436  	{Yrl, Ynone, Yml, Zr_m, 1},
   437  }
   438  
   439  var yml_mb = []ytab{
   440  	{Yrb, Ynone, Ymb, Zr_m, 1},
   441  	{Ymb, Ynone, Yrb, Zm_r, 1},
   442  }
   443  
   444  var yrb_mb = []ytab{
   445  	{Yrb, Ynone, Ymb, Zr_m, 1},
   446  }
   447  
   448  var yxchg = []ytab{
   449  	{Yax, Ynone, Yrl, Z_rp, 1},
   450  	{Yrl, Ynone, Yax, Zrp_, 1},
   451  	{Yrl, Ynone, Yml, Zr_m, 1},
   452  	{Yml, Ynone, Yrl, Zm_r, 1},
   453  }
   454  
   455  var ydivl = []ytab{
   456  	{Yml, Ynone, Ynone, Zm_o, 2},
   457  }
   458  
   459  var ydivb = []ytab{
   460  	{Ymb, Ynone, Ynone, Zm_o, 2},
   461  }
   462  
   463  var yimul = []ytab{
   464  	{Yml, Ynone, Ynone, Zm_o, 2},
   465  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   466  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   467  	{Yml, Ynone, Yrl, Zm_r, 2},
   468  }
   469  
   470  var yimul3 = []ytab{
   471  	{Yi8, Yml, Yrl, Zibm_r, 2},
   472  }
   473  
   474  var ybyte = []ytab{
   475  	{Yi64, Ynone, Ynone, Zbyte, 1},
   476  }
   477  
   478  var yin = []ytab{
   479  	{Yi32, Ynone, Ynone, Zib_, 1},
   480  	{Ynone, Ynone, Ynone, Zlit, 1},
   481  }
   482  
   483  var yint = []ytab{
   484  	{Yi32, Ynone, Ynone, Zib_, 1},
   485  }
   486  
   487  var ypushl = []ytab{
   488  	{Yrl, Ynone, Ynone, Zrp_, 1},
   489  	{Ym, Ynone, Ynone, Zm_o, 2},
   490  	{Yi8, Ynone, Ynone, Zib_, 1},
   491  	{Yi32, Ynone, Ynone, Zil_, 1},
   492  }
   493  
   494  var ypopl = []ytab{
   495  	{Ynone, Ynone, Yrl, Z_rp, 1},
   496  	{Ynone, Ynone, Ym, Zo_m, 2},
   497  }
   498  
   499  var ybswap = []ytab{
   500  	{Ynone, Ynone, Yrl, Z_rp, 2},
   501  }
   502  
   503  var yscond = []ytab{
   504  	{Ynone, Ynone, Ymb, Zo_m, 2},
   505  }
   506  
   507  var yjcond = []ytab{
   508  	{Ynone, Ynone, Ybr, Zbr, 0},
   509  	{Yi0, Ynone, Ybr, Zbr, 0},
   510  	{Yi1, Ynone, Ybr, Zbr, 1},
   511  }
   512  
   513  var yloop = []ytab{
   514  	{Ynone, Ynone, Ybr, Zloop, 1},
   515  }
   516  
   517  var ycall = []ytab{
   518  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   519  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   520  	{Ynone, Ynone, Yindir, Zcallind, 2},
   521  	{Ynone, Ynone, Ybr, Zcall, 0},
   522  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   523  }
   524  
   525  var yduff = []ytab{
   526  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   527  }
   528  
   529  var yjmp = []ytab{
   530  	{Ynone, Ynone, Yml, Zo_m64, 2},
   531  	{Ynone, Ynone, Ybr, Zjmp, 0},
   532  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   533  }
   534  
   535  var yfmvd = []ytab{
   536  	{Ym, Ynone, Yf0, Zm_o, 2},
   537  	{Yf0, Ynone, Ym, Zo_m, 2},
   538  	{Yrf, Ynone, Yf0, Zm_o, 2},
   539  	{Yf0, Ynone, Yrf, Zo_m, 2},
   540  }
   541  
   542  var yfmvdp = []ytab{
   543  	{Yf0, Ynone, Ym, Zo_m, 2},
   544  	{Yf0, Ynone, Yrf, Zo_m, 2},
   545  }
   546  
   547  var yfmvf = []ytab{
   548  	{Ym, Ynone, Yf0, Zm_o, 2},
   549  	{Yf0, Ynone, Ym, Zo_m, 2},
   550  }
   551  
   552  var yfmvx = []ytab{
   553  	{Ym, Ynone, Yf0, Zm_o, 2},
   554  }
   555  
   556  var yfmvp = []ytab{
   557  	{Yf0, Ynone, Ym, Zo_m, 2},
   558  }
   559  
   560  var yfcmv = []ytab{
   561  	{Yrf, Ynone, Yf0, Zm_o, 2},
   562  }
   563  
   564  var yfadd = []ytab{
   565  	{Ym, Ynone, Yf0, Zm_o, 2},
   566  	{Yrf, Ynone, Yf0, Zm_o, 2},
   567  	{Yf0, Ynone, Yrf, Zo_m, 2},
   568  }
   569  
   570  var yfaddp = []ytab{
   571  	{Yf0, Ynone, Yrf, Zo_m, 2},
   572  }
   573  
   574  var yfxch = []ytab{
   575  	{Yf0, Ynone, Yrf, Zo_m, 2},
   576  	{Yrf, Ynone, Yf0, Zm_o, 2},
   577  }
   578  
   579  var ycompp = []ytab{
   580  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   581  }
   582  
   583  var ystsw = []ytab{
   584  	{Ynone, Ynone, Ym, Zo_m, 2},
   585  	{Ynone, Ynone, Yax, Zlit, 1},
   586  }
   587  
   588  var ystcw = []ytab{
   589  	{Ynone, Ynone, Ym, Zo_m, 2},
   590  	{Ym, Ynone, Ynone, Zm_o, 2},
   591  }
   592  
   593  var ysvrs = []ytab{
   594  	{Ynone, Ynone, Ym, Zo_m, 2},
   595  	{Ym, Ynone, Ynone, Zm_o, 2},
   596  }
   597  
   598  var ymm = []ytab{
   599  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   600  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   601  }
   602  
   603  var yxm = []ytab{
   604  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   605  }
   606  
   607  var yxcvm1 = []ytab{
   608  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   609  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   610  }
   611  
   612  var yxcvm2 = []ytab{
   613  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   614  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   615  }
   616  
   617  /*
   618  var yxmq = []ytab{
   619  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   620  }
   621  */
   622  
   623  var yxr = []ytab{
   624  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   625  }
   626  
   627  var yxr_ml = []ytab{
   628  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   629  }
   630  
   631  var yxr_ml_vex = []ytab{
   632  	{Yxr, Ynone, Yml, Zr_m_xm_vex, 1},
   633  }
   634  
   635  var yml_xr_vex = []ytab{
   636  	{Yml, Ynone, Yxr, Zm_r_xm_vex, 1},
   637  	{Yxr, Ynone, Yxr, Zm_r_xm_vex, 1},
   638  }
   639  
   640  var yxm_xm_xm = []ytab{
   641  	{Yxr, Yxr, Yxr, Zr_r_r_vex, 1},
   642  	{Yxm, Yxr, Yxr, Zr_r_r_vex, 1},
   643  }
   644  
   645  var ymr = []ytab{
   646  	{Ymr, Ynone, Ymr, Zm_r, 1},
   647  }
   648  
   649  var ymr_ml = []ytab{
   650  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   651  }
   652  
   653  var yxcmp = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   655  }
   656  
   657  var yxcmpi = []ytab{
   658  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   659  }
   660  
   661  var yxmov_vex = []ytab{
   662  	{Yxm, Ynone, Yxr, Zm_r_xm_vex, 1},
   663  	{Yxr, Ynone, Yxm, Zr_m_xm_vex, 1},
   664  }
   665  
   666  var yxmov = []ytab{
   667  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   668  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   669  }
   670  
   671  var yxcvfl = []ytab{
   672  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   673  }
   674  
   675  var yxcvlf = []ytab{
   676  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   677  }
   678  
   679  var yxcvfq = []ytab{
   680  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   681  }
   682  
   683  var yxcvqf = []ytab{
   684  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   685  }
   686  
   687  var yps = []ytab{
   688  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   689  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   690  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   691  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   692  }
   693  
   694  var yxrrl = []ytab{
   695  	{Yxr, Ynone, Yrl, Zm_r, 1},
   696  }
   697  
   698  var ymfp = []ytab{
   699  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   700  }
   701  
   702  var ymrxr = []ytab{
   703  	{Ymr, Ynone, Yxr, Zm_r, 1},
   704  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   705  }
   706  
   707  var ymshuf = []ytab{
   708  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   709  }
   710  
   711  var ymshufb = []ytab{
   712  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   713  }
   714  
   715  var yxshuf = []ytab{
   716  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   717  }
   718  
   719  var yextrw = []ytab{
   720  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   721  }
   722  
   723  var yinsrw = []ytab{
   724  	{Yu8, Yml, Yxr, Zibm_r, 2},
   725  }
   726  
   727  var yinsr = []ytab{
   728  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   729  }
   730  
   731  var ypsdq = []ytab{
   732  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   733  }
   734  
   735  var ymskb = []ytab{
   736  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   737  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   738  }
   739  
   740  var ymskb_vex = []ytab{
   741  	{Yxr, Ynone, Yrl, Zm_r_xm_vex, 2},
   742  }
   743  
   744  var ycrc32l = []ytab{
   745  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   746  }
   747  
   748  var yprefetch = []ytab{
   749  	{Ym, Ynone, Ynone, Zm_o, 2},
   750  }
   751  
   752  var yaes = []ytab{
   753  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   754  }
   755  
   756  var yaes2 = []ytab{
   757  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   758  }
   759  
   760  var yxbegin = []ytab{
   761  	{Ynone, Ynone, Ybr, Zjmp, 1},
   762  }
   763  
   764  var yxabort = []ytab{
   765  	{Yu8, Ynone, Ynone, Zib_, 1},
   766  }
   767  
   768  /*
   769   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   770   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   771   * the entry with the given p->as and then looks through the ytable for that
   772   * instruction (the second field in the optab struct) for a line whose first
   773   * two values match the Ytypes of the p->from and p->to operands.  The function
   774   * oclass in span.c computes the specific Ytype of an operand and then the set
   775   * of more general Ytypes that it satisfies is implied by the ycover table, set
   776   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   777   * from the more general 8-bit constants, but instinit says
   778   *
   779   *        ycover[Yi0*Ymax + Ys32] = 1;
   780   *        ycover[Yi1*Ymax + Ys32] = 1;
   781   *        ycover[Yi8*Ymax + Ys32] = 1;
   782   *
   783   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   784   * if that's what an instruction can handle.
   785   *
   786   * In parallel with the scan through the ytable for the appropriate line, there
   787   * is a z pointer that starts out pointing at the strange magic byte list in
   788   * the Optab struct.  With each step past a non-matching ytable line, z
   789   * advances by the 4th entry in the line.  When a matching line is found, that
   790   * z pointer has the extra data to use in laying down the instruction bytes.
   791   * The actual bytes laid down are a function of the 3rd entry in the line (that
   792   * is, the Ztype) and the z bytes.
   793   *
   794   * For example, let's look at AADDL.  The optab line says:
   795   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   796   *
   797   * and yaddl says
   798   *        uchar   yaddl[] =
   799   *        {
   800   *                Yi8,    Yml,    Zibo_m, 2,
   801   *                Yi32,   Yax,    Zil_,   1,
   802   *                Yi32,   Yml,    Zilo_m, 2,
   803   *                Yrl,    Yml,    Zr_m,   1,
   804   *                Yml,    Yrl,    Zm_r,   1,
   805   *                0
   806   *        };
   807   *
   808   * so there are 5 possible types of ADDL instruction that can be laid down, and
   809   * possible states used to lay them down (Ztype and z pointer, assuming z
   810   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   811   *
   812   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   813   *        Yi32, Yax -> Zil_, z+2 (0x05)
   814   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   815   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   816   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   817   *
   818   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   819   * relatively straightforward as this program goes.
   820   *
   821   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   822   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   823   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   824   * Zilo_m is the same but a long (32-bit) immediate.
   825   */
   826  var optab =
   827  /*	as, ytab, andproto, opcode */
   828  []Optab{
   829  	{obj.AXXX, nil, 0, [23]uint8{}},
   830  	{AAAA, ynone, P32, [23]uint8{0x37}},
   831  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   832  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   833  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   834  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   835  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   836  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   837  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   838  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   839  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   840  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   841  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   842  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   843  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   844  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   845  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   846  	{AADJSP, nil, 0, [23]uint8{}},
   847  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   848  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   849  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   850  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   851  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   852  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   853  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   854  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   855  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   856  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   857  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   858  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   859  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   860  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   861  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   862  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   863  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   864  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   865  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   866  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   867  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   868  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   869  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   870  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   871  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   872  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   873  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   874  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   875  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   876  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   877  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   878  	{ABYTE, ybyte, Px, [23]uint8{1}},
   879  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   880  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   881  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   882  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   883  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   884  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   885  	{ACMC, ynone, Px, [23]uint8{0xf5}},
   886  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   887  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   888  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   889  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   890  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   891  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   892  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   893  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   894  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   895  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   896  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   897  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   898  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   899  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   900  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   901  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   902  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   903  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   904  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   905  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   906  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   907  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   908  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   909  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   910  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   911  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   912  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   913  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   914  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   915  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   916  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   917  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   918  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   919  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   920  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   921  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   922  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   923  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   924  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   925  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   926  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   927  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   928  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   929  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   930  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   931  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   932  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   933  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   934  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   935  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   936  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   937  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   938  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   939  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   940  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   941  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   942  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   943  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   944  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   945  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   946  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
   947  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
   948  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   949  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   950  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   951  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   952  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   953  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   954  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   955  	{API2FW, ymfp, Px, [23]uint8{0x0c}},
   956  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   957  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   958  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   959  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   960  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   961  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   962  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   963  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   964  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   965  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   966  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   967  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   968  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   969  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   970  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   971  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   972  	{ACWD, ynone, Pe, [23]uint8{0x99}},
   973  	{ACQO, ynone, Pw, [23]uint8{0x99}},
   974  	{ADAA, ynone, P32, [23]uint8{0x27}},
   975  	{ADAS, ynone, P32, [23]uint8{0x2f}},
   976  	{obj.ADATA, nil, 0, [23]uint8{}},
   977  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
   978  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   979  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   980  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
   981  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   982  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   983  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   984  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   985  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   986  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   987  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   988  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   989  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
   990  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
   991  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   992  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   993  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   994  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   995  	{obj.AGLOBL, nil, 0, [23]uint8{}},
   996  	{AHLT, ynone, Px, [23]uint8{0xf4}},
   997  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   998  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   999  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1000  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1001  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1002  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1003  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1004  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1005  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1006  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1007  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
  1008  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1009  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1010  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
  1011  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1012  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1013  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1014  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1015  	{AINT, yint, Px, [23]uint8{0xcd}},
  1016  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1017  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1018  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1019  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1020  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1021  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1022  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1023  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1024  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1025  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1026  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1027  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1028  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1029  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1030  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1031  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1032  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1033  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1034  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1035  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1036  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1037  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1038  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1039  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1040  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1041  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1042  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1043  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1044  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1045  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1046  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1047  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1048  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1049  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1050  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1051  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1052  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1053  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1054  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1055  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1056  	{ALONG, ybyte, Px, [23]uint8{4}},
  1057  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1058  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1059  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1060  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1061  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1062  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1063  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1064  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1065  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1066  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1067  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1068  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1069  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1070  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1071  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1072  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1073  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1074  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1075  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1076  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1077  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1078  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1079  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1080  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1081  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1082  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1083  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1084  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1085  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1086  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1087  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1088  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1089  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1090  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1091  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1092  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1093  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1094  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1095  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1096  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1097  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1098  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1099  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1100  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1101  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1102  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1103  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1104  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1105  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1106  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1107  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1108  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1109  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1110  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1111  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1112  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1113  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1114  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1115  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1116  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1117  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1118  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1119  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1120  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1121  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1122  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1123  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1124  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1125  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1126  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1127  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1128  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1129  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1130  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1131  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1132  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1133  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1134  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1135  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1136  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1137  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1138  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1139  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1140  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1141  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1142  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1143  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1144  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1145  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1146  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1147  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1148  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1149  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1150  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1151  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1152  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1153  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1154  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1155  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1156  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1157  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1158  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1159  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1160  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1161  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1162  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1163  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1164  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1165  	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1166  	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1167  	{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1168  	{APFACC, ymfp, Px, [23]uint8{0xae}},
  1169  	{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1170  	{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1171  	{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1172  	{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1173  	{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1174  	{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1175  	{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1176  	{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1177  	{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1178  	{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1179  	{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1180  	{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1181  	{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1182  	{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1183  	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1184  	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1185  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1186  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1187  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1188  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1189  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1190  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1191  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1192  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1193  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1194  	{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1195  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1196  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1197  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1198  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1199  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1200  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1201  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1202  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1203  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1204  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1205  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1206  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1207  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1208  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1209  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1210  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1211  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1212  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1213  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1214  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1215  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1216  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1217  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1218  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1219  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1220  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1221  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1222  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1223  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1224  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1225  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1226  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1227  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1228  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1229  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1230  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1231  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1232  	{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1233  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1234  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1235  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1236  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1237  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1238  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1239  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1240  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1241  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1242  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1243  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1244  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1245  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1246  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1247  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1248  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1249  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1250  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1251  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1252  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1253  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1254  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1255  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1256  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1257  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1258  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1259  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1260  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1261  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1262  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1263  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1264  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1265  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1266  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1267  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1268  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1269  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1270  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1271  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1272  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1273  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1274  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1275  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1276  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1277  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1278  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1279  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1280  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1281  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1282  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1283  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1284  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1285  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1286  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1287  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1288  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1289  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1290  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1291  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1292  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1293  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1294  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1295  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1296  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1297  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1298  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1299  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1300  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1301  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1302  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1303  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1304  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1305  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1306  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1307  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1308  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1309  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1310  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1311  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1312  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1313  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1314  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1315  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1316  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1317  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1318  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1319  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1320  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1321  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1322  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1323  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1324  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1325  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1326  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1327  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1328  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1329  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1330  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1331  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1332  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1333  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1334  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1335  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1336  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1337  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1338  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1339  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1340  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1341  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1342  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1343  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1344  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1345  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1346  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1347  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1348  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1349  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1350  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1351  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1352  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1353  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1354  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1355  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1356  	{AWORD, ybyte, Px, [23]uint8{2}},
  1357  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1358  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1359  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1360  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1361  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1362  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1363  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1364  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1365  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1366  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1367  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1368  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1369  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1370  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1371  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1372  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1373  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1374  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1375  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1376  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1377  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1378  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1379  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1380  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1381  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1382  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1383  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1384  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1385  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1386  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1387  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1388  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1389  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1390  	{AFCOMB, nil, 0, [23]uint8{}},
  1391  	{AFCOMBP, nil, 0, [23]uint8{}},
  1392  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1393  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1394  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1395  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1396  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1397  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1398  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1399  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1400  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1401  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1402  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1403  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1404  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1405  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1406  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1407  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1408  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1409  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1410  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1411  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1412  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1413  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1414  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1415  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1416  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1417  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1418  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1419  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1420  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1421  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1422  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1423  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1424  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1425  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1426  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1427  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1428  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1429  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1430  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1431  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1432  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1433  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1434  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1435  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1436  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1437  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1438  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1439  	{AFFREE, nil, 0, [23]uint8{}},
  1440  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1441  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1442  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1443  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1444  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1445  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1446  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1447  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1448  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1449  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1450  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1451  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1452  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1453  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1454  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1455  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1456  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1457  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1458  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1459  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1460  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1461  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1462  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1463  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1464  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1465  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1466  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1467  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1468  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1469  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1470  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1471  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1472  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1473  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1474  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1475  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1476  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1477  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1478  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1479  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1480  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1481  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1482  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1483  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1484  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1485  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1486  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1487  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1488  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1489  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1490  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1491  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1492  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1493  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1494  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1495  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1496  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1497  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1498  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1499  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1500  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1501  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1502  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1503  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1504  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1505  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1506  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1507  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1508  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1509  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1510  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1511  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1512  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1513  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1514  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1515  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1516  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1517  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1518  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1519  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1520  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1521  	{AMOVHDU, yxmov_vex, Pvex2, [23]uint8{0x6f, 0x7f}},
  1522  	{AMOVNTHD, yxr_ml_vex, Pvex1, [23]uint8{0xe7}},
  1523  	{AMOVHDA, yxmov_vex, Pvex1, [23]uint8{0x6f, 0x7f}},
  1524  	{AVPCMPEQB, yxm_xm_xm, Pvex1, [23]uint8{0x74, 0x74}},
  1525  	{AVPXOR, yxm_xm_xm, Pvex1, [23]uint8{0xef, 0xef}},
  1526  	{AVPMOVMSKB, ymskb_vex, Pvex1, [23]uint8{0xd7}},
  1527  	{AVPAND, yxm_xm_xm, Pvex1, [23]uint8{0xdb, 0xdb}},
  1528  	{AVPBROADCASTB, yml_xr_vex, Pvex3, [23]uint8{0x78, 0x78}},
  1529  	{AVPTEST, yml_xr_vex, Pvex3, [23]uint8{0x17, 0x17}},
  1530  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1531  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1532  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1533  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1534  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1535  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1536  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1537  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1538  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1539  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1540  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1541  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1542  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1543  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1544  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1545  	{obj.AEND, nil, 0, [23]uint8{}},
  1546  	{0, nil, 0, [23]uint8{}},
  1547  }
  1548  
  1549  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1550  
  1551  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1552  // This happens on systems like Solaris that call .so functions instead of system calls.
  1553  // It does not seem to be necessary for any other systems. This is probably working
  1554  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1555  // what that bug is. And this does fix it.
  1556  func isextern(s *obj.LSym) bool {
  1557  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1558  	return strings.HasPrefix(s.Name, "libc_")
  1559  }
  1560  
  1561  // single-instruction no-ops of various lengths.
  1562  // constructed by hand and disassembled with gdb to verify.
  1563  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1564  var nop = [][16]uint8{
  1565  	{0x90},
  1566  	{0x66, 0x90},
  1567  	{0x0F, 0x1F, 0x00},
  1568  	{0x0F, 0x1F, 0x40, 0x00},
  1569  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1570  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1571  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1572  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1573  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1574  }
  1575  
  1576  // Native Client rejects the repeated 0x66 prefix.
  1577  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1578  func fillnop(p []byte, n int) {
  1579  	var m int
  1580  
  1581  	for n > 0 {
  1582  		m = n
  1583  		if m > len(nop) {
  1584  			m = len(nop)
  1585  		}
  1586  		copy(p[:m], nop[m-1][:m])
  1587  		p = p[m:]
  1588  		n -= m
  1589  	}
  1590  }
  1591  
  1592  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1593  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1594  	fillnop(s.P[c:], int(pad))
  1595  	return c + pad
  1596  }
  1597  
  1598  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1599  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1600  		return l
  1601  	}
  1602  	return q
  1603  }
  1604  
  1605  func span6(ctxt *obj.Link, s *obj.LSym) {
  1606  	ctxt.Cursym = s
  1607  
  1608  	if s.P != nil {
  1609  		return
  1610  	}
  1611  
  1612  	if ycover[0] == 0 {
  1613  		instinit()
  1614  	}
  1615  
  1616  	var v int32
  1617  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1618  		if p.To.Type == obj.TYPE_BRANCH {
  1619  			if p.Pcond == nil {
  1620  				p.Pcond = p
  1621  			}
  1622  		}
  1623  		if p.As == AADJSP {
  1624  			p.To.Type = obj.TYPE_REG
  1625  			p.To.Reg = REG_SP
  1626  			v = int32(-p.From.Offset)
  1627  			p.From.Offset = int64(v)
  1628  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1629  			if v < 0 {
  1630  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1631  				v = -v
  1632  				p.From.Offset = int64(v)
  1633  			}
  1634  
  1635  			if v == 0 {
  1636  				p.As = obj.ANOP
  1637  			}
  1638  		}
  1639  	}
  1640  
  1641  	var q *obj.Prog
  1642  	for p := s.Text; p != nil; p = p.Link {
  1643  		p.Back = 2 // use short branches first time through
  1644  		q = p.Pcond
  1645  		if q != nil && (q.Back&2 != 0) {
  1646  			p.Back |= 1 // backward jump
  1647  			q.Back |= 4 // loop head
  1648  		}
  1649  
  1650  		if p.As == AADJSP {
  1651  			p.To.Type = obj.TYPE_REG
  1652  			p.To.Reg = REG_SP
  1653  			v = int32(-p.From.Offset)
  1654  			p.From.Offset = int64(v)
  1655  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1656  			if v < 0 {
  1657  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1658  				v = -v
  1659  				p.From.Offset = int64(v)
  1660  			}
  1661  
  1662  			if v == 0 {
  1663  				p.As = obj.ANOP
  1664  			}
  1665  		}
  1666  	}
  1667  
  1668  	n := 0
  1669  	var bp []byte
  1670  	var c int32
  1671  	var i int
  1672  	var loop int32
  1673  	var m int
  1674  	var p *obj.Prog
  1675  	for {
  1676  		loop = 0
  1677  		for i = 0; i < len(s.R); i++ {
  1678  			s.R[i] = obj.Reloc{}
  1679  		}
  1680  		s.R = s.R[:0]
  1681  		s.P = s.P[:0]
  1682  		c = 0
  1683  		for p = s.Text; p != nil; p = p.Link {
  1684  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1685  				var deferreturn *obj.LSym
  1686  
  1687  				if deferreturn == nil {
  1688  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1689  				}
  1690  
  1691  				// pad everything to avoid crossing 32-byte boundary
  1692  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1693  					c = naclpad(ctxt, s, c, -c&31)
  1694  				}
  1695  
  1696  				// pad call deferreturn to start at 32-byte boundary
  1697  				// so that subtracting 5 in jmpdefer will jump back
  1698  				// to that boundary and rerun the call.
  1699  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1700  					c = naclpad(ctxt, s, c, -c&31)
  1701  				}
  1702  
  1703  				// pad call to end at 32-byte boundary
  1704  				if p.As == obj.ACALL {
  1705  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1706  				}
  1707  
  1708  				// the linker treats REP and STOSQ as different instructions
  1709  				// but in fact the REP is a prefix on the STOSQ.
  1710  				// make sure REP has room for 2 more bytes, so that
  1711  				// padding will not be inserted before the next instruction.
  1712  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1713  					c = naclpad(ctxt, s, c, -c&31)
  1714  				}
  1715  
  1716  				// same for LOCK.
  1717  				// various instructions follow; the longest is 4 bytes.
  1718  				// give ourselves 8 bytes so as to avoid surprises.
  1719  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1720  					c = naclpad(ctxt, s, c, -c&31)
  1721  				}
  1722  			}
  1723  
  1724  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1725  				// pad with NOPs
  1726  				v = -c & (LoopAlign - 1)
  1727  
  1728  				if v <= MaxLoopPad {
  1729  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1730  					fillnop(s.P[c:], int(v))
  1731  					c += v
  1732  				}
  1733  			}
  1734  
  1735  			p.Pc = int64(c)
  1736  
  1737  			// process forward jumps to p
  1738  			for q = p.Rel; q != nil; q = q.Forwd {
  1739  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1740  				if q.Back&2 != 0 { // short
  1741  					if v > 127 {
  1742  						loop++
  1743  						q.Back ^= 2
  1744  					}
  1745  
  1746  					if q.As == AJCXZL || q.As == AXBEGIN {
  1747  						s.P[q.Pc+2] = byte(v)
  1748  					} else {
  1749  						s.P[q.Pc+1] = byte(v)
  1750  					}
  1751  				} else {
  1752  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1753  					bp[0] = byte(v)
  1754  					bp = bp[1:]
  1755  					bp[0] = byte(v >> 8)
  1756  					bp = bp[1:]
  1757  					bp[0] = byte(v >> 16)
  1758  					bp = bp[1:]
  1759  					bp[0] = byte(v >> 24)
  1760  				}
  1761  			}
  1762  
  1763  			p.Rel = nil
  1764  
  1765  			p.Pc = int64(c)
  1766  			asmins(ctxt, p)
  1767  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1768  			if int(p.Isize) != m {
  1769  				p.Isize = uint8(m)
  1770  				loop++
  1771  			}
  1772  
  1773  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1774  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1775  			p.Mark = uint16(m)
  1776  			c += int32(m)
  1777  		}
  1778  
  1779  		n++
  1780  		if n > 20 {
  1781  			ctxt.Diag("span must be looping")
  1782  			log.Fatalf("loop")
  1783  		}
  1784  		if loop == 0 {
  1785  			break
  1786  		}
  1787  	}
  1788  
  1789  	if ctxt.Headtype == obj.Hnacl {
  1790  		c = naclpad(ctxt, s, c, -c&31)
  1791  	}
  1792  
  1793  	// Pad functions with trap instruction, to catch invalid jumps
  1794  	if c&(FuncAlign-1) != 0 {
  1795  		v = -c & (FuncAlign - 1)
  1796  		obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1797  		for i := c; i < c+v; i++ {
  1798  			// 0xCC is INT $3 - breakpoint instruction
  1799  			s.P[i] = uint8(0xCC)
  1800  		}
  1801  		c += v
  1802  	}
  1803  	s.Size = int64(c)
  1804  
  1805  	if false { /* debug['a'] > 1 */
  1806  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1807  		var i int
  1808  		for i = 0; i < len(s.P); i++ {
  1809  			fmt.Printf(" %.2x", s.P[i])
  1810  			if i%16 == 15 {
  1811  				fmt.Printf("\n  %.6x", uint(i+1))
  1812  			}
  1813  		}
  1814  
  1815  		if i%16 != 0 {
  1816  			fmt.Printf("\n")
  1817  		}
  1818  
  1819  		for i := 0; i < len(s.R); i++ {
  1820  			r := &s.R[i]
  1821  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1822  		}
  1823  	}
  1824  }
  1825  
  1826  func instinit() {
  1827  	var c int
  1828  
  1829  	for i := 1; optab[i].as != 0; i++ {
  1830  		c = int(optab[i].as)
  1831  		if opindex[c&obj.AMask] != nil {
  1832  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1833  		}
  1834  		opindex[c&obj.AMask] = &optab[i]
  1835  	}
  1836  
  1837  	for i := 0; i < Ymax; i++ {
  1838  		ycover[i*Ymax+i] = 1
  1839  	}
  1840  
  1841  	ycover[Yi0*Ymax+Yi8] = 1
  1842  	ycover[Yi1*Ymax+Yi8] = 1
  1843  	ycover[Yu7*Ymax+Yi8] = 1
  1844  
  1845  	ycover[Yi0*Ymax+Yu7] = 1
  1846  	ycover[Yi1*Ymax+Yu7] = 1
  1847  
  1848  	ycover[Yi0*Ymax+Yu8] = 1
  1849  	ycover[Yi1*Ymax+Yu8] = 1
  1850  	ycover[Yu7*Ymax+Yu8] = 1
  1851  
  1852  	ycover[Yi0*Ymax+Ys32] = 1
  1853  	ycover[Yi1*Ymax+Ys32] = 1
  1854  	ycover[Yu7*Ymax+Ys32] = 1
  1855  	ycover[Yu8*Ymax+Ys32] = 1
  1856  	ycover[Yi8*Ymax+Ys32] = 1
  1857  
  1858  	ycover[Yi0*Ymax+Yi32] = 1
  1859  	ycover[Yi1*Ymax+Yi32] = 1
  1860  	ycover[Yu7*Ymax+Yi32] = 1
  1861  	ycover[Yu8*Ymax+Yi32] = 1
  1862  	ycover[Yi8*Ymax+Yi32] = 1
  1863  	ycover[Ys32*Ymax+Yi32] = 1
  1864  
  1865  	ycover[Yi0*Ymax+Yi64] = 1
  1866  	ycover[Yi1*Ymax+Yi64] = 1
  1867  	ycover[Yu7*Ymax+Yi64] = 1
  1868  	ycover[Yu8*Ymax+Yi64] = 1
  1869  	ycover[Yi8*Ymax+Yi64] = 1
  1870  	ycover[Ys32*Ymax+Yi64] = 1
  1871  	ycover[Yi32*Ymax+Yi64] = 1
  1872  
  1873  	ycover[Yal*Ymax+Yrb] = 1
  1874  	ycover[Ycl*Ymax+Yrb] = 1
  1875  	ycover[Yax*Ymax+Yrb] = 1
  1876  	ycover[Ycx*Ymax+Yrb] = 1
  1877  	ycover[Yrx*Ymax+Yrb] = 1
  1878  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  1879  
  1880  	ycover[Ycl*Ymax+Ycx] = 1
  1881  
  1882  	ycover[Yax*Ymax+Yrx] = 1
  1883  	ycover[Ycx*Ymax+Yrx] = 1
  1884  
  1885  	ycover[Yax*Ymax+Yrl] = 1
  1886  	ycover[Ycx*Ymax+Yrl] = 1
  1887  	ycover[Yrx*Ymax+Yrl] = 1
  1888  	ycover[Yrl32*Ymax+Yrl] = 1
  1889  
  1890  	ycover[Yf0*Ymax+Yrf] = 1
  1891  
  1892  	ycover[Yal*Ymax+Ymb] = 1
  1893  	ycover[Ycl*Ymax+Ymb] = 1
  1894  	ycover[Yax*Ymax+Ymb] = 1
  1895  	ycover[Ycx*Ymax+Ymb] = 1
  1896  	ycover[Yrx*Ymax+Ymb] = 1
  1897  	ycover[Yrb*Ymax+Ymb] = 1
  1898  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  1899  	ycover[Ym*Ymax+Ymb] = 1
  1900  
  1901  	ycover[Yax*Ymax+Yml] = 1
  1902  	ycover[Ycx*Ymax+Yml] = 1
  1903  	ycover[Yrx*Ymax+Yml] = 1
  1904  	ycover[Yrl*Ymax+Yml] = 1
  1905  	ycover[Yrl32*Ymax+Yml] = 1
  1906  	ycover[Ym*Ymax+Yml] = 1
  1907  
  1908  	ycover[Yax*Ymax+Ymm] = 1
  1909  	ycover[Ycx*Ymax+Ymm] = 1
  1910  	ycover[Yrx*Ymax+Ymm] = 1
  1911  	ycover[Yrl*Ymax+Ymm] = 1
  1912  	ycover[Yrl32*Ymax+Ymm] = 1
  1913  	ycover[Ym*Ymax+Ymm] = 1
  1914  	ycover[Ymr*Ymax+Ymm] = 1
  1915  
  1916  	ycover[Ym*Ymax+Yxm] = 1
  1917  	ycover[Yxr*Ymax+Yxm] = 1
  1918  
  1919  	for i := 0; i < MAXREG; i++ {
  1920  		reg[i] = -1
  1921  		if i >= REG_AL && i <= REG_R15B {
  1922  			reg[i] = (i - REG_AL) & 7
  1923  			if i >= REG_SPB && i <= REG_DIB {
  1924  				regrex[i] = 0x40
  1925  			}
  1926  			if i >= REG_R8B && i <= REG_R15B {
  1927  				regrex[i] = Rxr | Rxx | Rxb
  1928  			}
  1929  		}
  1930  
  1931  		if i >= REG_AH && i <= REG_BH {
  1932  			reg[i] = 4 + ((i - REG_AH) & 7)
  1933  		}
  1934  		if i >= REG_AX && i <= REG_R15 {
  1935  			reg[i] = (i - REG_AX) & 7
  1936  			if i >= REG_R8 {
  1937  				regrex[i] = Rxr | Rxx | Rxb
  1938  			}
  1939  		}
  1940  
  1941  		if i >= REG_F0 && i <= REG_F0+7 {
  1942  			reg[i] = (i - REG_F0) & 7
  1943  		}
  1944  		if i >= REG_M0 && i <= REG_M0+7 {
  1945  			reg[i] = (i - REG_M0) & 7
  1946  		}
  1947  		if i >= REG_X0 && i <= REG_X0+15 {
  1948  			reg[i] = (i - REG_X0) & 7
  1949  			if i >= REG_X0+8 {
  1950  				regrex[i] = Rxr | Rxx | Rxb
  1951  			}
  1952  		}
  1953  
  1954  		if i >= REG_CR+8 && i <= REG_CR+15 {
  1955  			regrex[i] = Rxr
  1956  		}
  1957  	}
  1958  }
  1959  
  1960  var isAndroid = (obj.Getgoos() == "android")
  1961  
  1962  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  1963  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  1964  		return 0
  1965  	}
  1966  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  1967  		switch a.Reg {
  1968  		case REG_CS:
  1969  			return 0x2e
  1970  
  1971  		case REG_DS:
  1972  			return 0x3e
  1973  
  1974  		case REG_ES:
  1975  			return 0x26
  1976  
  1977  		case REG_FS:
  1978  			return 0x64
  1979  
  1980  		case REG_GS:
  1981  			return 0x65
  1982  
  1983  		case REG_TLS:
  1984  			// NOTE: Systems listed here should be only systems that
  1985  			// support direct TLS references like 8(TLS) implemented as
  1986  			// direct references from FS or GS. Systems that require
  1987  			// the initial-exec model, where you load the TLS base into
  1988  			// a register and then index from that register, do not reach
  1989  			// this code and should not be listed.
  1990  			if p.Mode == 32 {
  1991  				switch ctxt.Headtype {
  1992  				default:
  1993  					if isAndroid {
  1994  						return 0x65 // GS
  1995  					}
  1996  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1997  
  1998  				case obj.Hdarwin,
  1999  					obj.Hdragonfly,
  2000  					obj.Hfreebsd,
  2001  					obj.Hnetbsd,
  2002  					obj.Hopenbsd:
  2003  					return 0x65 // GS
  2004  				}
  2005  			}
  2006  
  2007  			switch ctxt.Headtype {
  2008  			default:
  2009  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2010  
  2011  			case obj.Hlinux:
  2012  				if isAndroid {
  2013  					return 0x64 // FS
  2014  				}
  2015  
  2016  				if ctxt.Flag_shared != 0 {
  2017  					log.Fatalf("unknown TLS base register for linux with -shared")
  2018  				} else {
  2019  					return 0x64 // FS
  2020  				}
  2021  
  2022  			case obj.Hdragonfly,
  2023  				obj.Hfreebsd,
  2024  				obj.Hnetbsd,
  2025  				obj.Hopenbsd,
  2026  				obj.Hsolaris:
  2027  				return 0x64 // FS
  2028  
  2029  			case obj.Hdarwin:
  2030  				return 0x65 // GS
  2031  			}
  2032  		}
  2033  	}
  2034  
  2035  	if p.Mode == 32 {
  2036  		if a.Index == REG_TLS && ctxt.Flag_shared != 0 {
  2037  			// When building for inclusion into a shared library, an instruction of the form
  2038  			//     MOVL 0(CX)(TLS*1), AX
  2039  			// becomes
  2040  			//     mov %gs:(%ecx), %eax
  2041  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2042  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2043  			// a shared library the instruction it becomes
  2044  			//     mov 0x0(%ecx), $eax
  2045  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2046  			if a.Offset != 0 {
  2047  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2048  			}
  2049  			return 0x65 // GS
  2050  		}
  2051  		return 0
  2052  	}
  2053  
  2054  	switch a.Index {
  2055  	case REG_CS:
  2056  		return 0x2e
  2057  
  2058  	case REG_DS:
  2059  		return 0x3e
  2060  
  2061  	case REG_ES:
  2062  		return 0x26
  2063  
  2064  	case REG_TLS:
  2065  		if ctxt.Flag_shared != 0 {
  2066  			// When building for inclusion into a shared library, an instruction of the form
  2067  			//     MOV 0(CX)(TLS*1), AX
  2068  			// becomes
  2069  			//     mov %fs:(%rcx), %rax
  2070  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2071  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2072  			// a shared library the instruction does not require a prefix.
  2073  			if a.Offset != 0 {
  2074  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2075  			}
  2076  			return 0x64
  2077  		}
  2078  
  2079  	case REG_FS:
  2080  		return 0x64
  2081  
  2082  	case REG_GS:
  2083  		return 0x65
  2084  	}
  2085  
  2086  	return 0
  2087  }
  2088  
  2089  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2090  	switch a.Type {
  2091  	case obj.TYPE_NONE:
  2092  		return Ynone
  2093  
  2094  	case obj.TYPE_BRANCH:
  2095  		return Ybr
  2096  
  2097  	case obj.TYPE_INDIR:
  2098  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2099  			return Yindir
  2100  		}
  2101  		return Yxxx
  2102  
  2103  	case obj.TYPE_MEM:
  2104  		return Ym
  2105  
  2106  	case obj.TYPE_ADDR:
  2107  		switch a.Name {
  2108  		case obj.NAME_GOTREF:
  2109  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2110  			return Yxxx
  2111  
  2112  		case obj.NAME_EXTERN,
  2113  			obj.NAME_STATIC:
  2114  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && ctxt.Flag_shared == 0) {
  2115  				return Yi32
  2116  			}
  2117  			return Yiauto // use pc-relative addressing
  2118  
  2119  		case obj.NAME_AUTO,
  2120  			obj.NAME_PARAM:
  2121  			return Yiauto
  2122  		}
  2123  
  2124  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2125  		// and got Yi32 in an earlier version of this code.
  2126  		// Keep doing that until we fix yduff etc.
  2127  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2128  			return Yi32
  2129  		}
  2130  
  2131  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2132  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2133  		}
  2134  		fallthrough
  2135  
  2136  		// fall through
  2137  
  2138  	case obj.TYPE_CONST:
  2139  		if a.Sym != nil {
  2140  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2141  		}
  2142  
  2143  		v := a.Offset
  2144  		if p.Mode == 32 {
  2145  			v = int64(int32(v))
  2146  		}
  2147  		if v == 0 {
  2148  			return Yi0
  2149  		}
  2150  		if v == 1 {
  2151  			return Yi1
  2152  		}
  2153  		if v >= 0 && v <= 127 {
  2154  			return Yu7
  2155  		}
  2156  		if v >= 0 && v <= 255 {
  2157  			return Yu8
  2158  		}
  2159  		if v >= -128 && v <= 127 {
  2160  			return Yi8
  2161  		}
  2162  		if p.Mode == 32 {
  2163  			return Yi32
  2164  		}
  2165  		l := int32(v)
  2166  		if int64(l) == v {
  2167  			return Ys32 /* can sign extend */
  2168  		}
  2169  		if v>>32 == 0 {
  2170  			return Yi32 /* unsigned */
  2171  		}
  2172  		return Yi64
  2173  
  2174  	case obj.TYPE_TEXTSIZE:
  2175  		return Ytextsize
  2176  	}
  2177  
  2178  	if a.Type != obj.TYPE_REG {
  2179  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2180  		return Yxxx
  2181  	}
  2182  
  2183  	switch a.Reg {
  2184  	case REG_AL:
  2185  		return Yal
  2186  
  2187  	case REG_AX:
  2188  		return Yax
  2189  
  2190  		/*
  2191  			case REG_SPB:
  2192  		*/
  2193  	case REG_BPB,
  2194  		REG_SIB,
  2195  		REG_DIB,
  2196  		REG_R8B,
  2197  		REG_R9B,
  2198  		REG_R10B,
  2199  		REG_R11B,
  2200  		REG_R12B,
  2201  		REG_R13B,
  2202  		REG_R14B,
  2203  		REG_R15B:
  2204  		if ctxt.Asmode != 64 {
  2205  			return Yxxx
  2206  		}
  2207  		fallthrough
  2208  
  2209  	case REG_DL,
  2210  		REG_BL,
  2211  		REG_AH,
  2212  		REG_CH,
  2213  		REG_DH,
  2214  		REG_BH:
  2215  		return Yrb
  2216  
  2217  	case REG_CL:
  2218  		return Ycl
  2219  
  2220  	case REG_CX:
  2221  		return Ycx
  2222  
  2223  	case REG_DX, REG_BX:
  2224  		return Yrx
  2225  
  2226  	case REG_R8, /* not really Yrl */
  2227  		REG_R9,
  2228  		REG_R10,
  2229  		REG_R11,
  2230  		REG_R12,
  2231  		REG_R13,
  2232  		REG_R14,
  2233  		REG_R15:
  2234  		if ctxt.Asmode != 64 {
  2235  			return Yxxx
  2236  		}
  2237  		fallthrough
  2238  
  2239  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2240  		if p.Mode == 32 {
  2241  			return Yrl32
  2242  		}
  2243  		return Yrl
  2244  
  2245  	case REG_F0 + 0:
  2246  		return Yf0
  2247  
  2248  	case REG_F0 + 1,
  2249  		REG_F0 + 2,
  2250  		REG_F0 + 3,
  2251  		REG_F0 + 4,
  2252  		REG_F0 + 5,
  2253  		REG_F0 + 6,
  2254  		REG_F0 + 7:
  2255  		return Yrf
  2256  
  2257  	case REG_M0 + 0,
  2258  		REG_M0 + 1,
  2259  		REG_M0 + 2,
  2260  		REG_M0 + 3,
  2261  		REG_M0 + 4,
  2262  		REG_M0 + 5,
  2263  		REG_M0 + 6,
  2264  		REG_M0 + 7:
  2265  		return Ymr
  2266  
  2267  	case REG_X0 + 0,
  2268  		REG_X0 + 1,
  2269  		REG_X0 + 2,
  2270  		REG_X0 + 3,
  2271  		REG_X0 + 4,
  2272  		REG_X0 + 5,
  2273  		REG_X0 + 6,
  2274  		REG_X0 + 7,
  2275  		REG_X0 + 8,
  2276  		REG_X0 + 9,
  2277  		REG_X0 + 10,
  2278  		REG_X0 + 11,
  2279  		REG_X0 + 12,
  2280  		REG_X0 + 13,
  2281  		REG_X0 + 14,
  2282  		REG_X0 + 15:
  2283  		return Yxr
  2284  
  2285  	case REG_CS:
  2286  		return Ycs
  2287  	case REG_SS:
  2288  		return Yss
  2289  	case REG_DS:
  2290  		return Yds
  2291  	case REG_ES:
  2292  		return Yes
  2293  	case REG_FS:
  2294  		return Yfs
  2295  	case REG_GS:
  2296  		return Ygs
  2297  	case REG_TLS:
  2298  		return Ytls
  2299  
  2300  	case REG_GDTR:
  2301  		return Ygdtr
  2302  	case REG_IDTR:
  2303  		return Yidtr
  2304  	case REG_LDTR:
  2305  		return Yldtr
  2306  	case REG_MSW:
  2307  		return Ymsw
  2308  	case REG_TASK:
  2309  		return Ytask
  2310  
  2311  	case REG_CR + 0:
  2312  		return Ycr0
  2313  	case REG_CR + 1:
  2314  		return Ycr1
  2315  	case REG_CR + 2:
  2316  		return Ycr2
  2317  	case REG_CR + 3:
  2318  		return Ycr3
  2319  	case REG_CR + 4:
  2320  		return Ycr4
  2321  	case REG_CR + 5:
  2322  		return Ycr5
  2323  	case REG_CR + 6:
  2324  		return Ycr6
  2325  	case REG_CR + 7:
  2326  		return Ycr7
  2327  	case REG_CR + 8:
  2328  		return Ycr8
  2329  
  2330  	case REG_DR + 0:
  2331  		return Ydr0
  2332  	case REG_DR + 1:
  2333  		return Ydr1
  2334  	case REG_DR + 2:
  2335  		return Ydr2
  2336  	case REG_DR + 3:
  2337  		return Ydr3
  2338  	case REG_DR + 4:
  2339  		return Ydr4
  2340  	case REG_DR + 5:
  2341  		return Ydr5
  2342  	case REG_DR + 6:
  2343  		return Ydr6
  2344  	case REG_DR + 7:
  2345  		return Ydr7
  2346  
  2347  	case REG_TR + 0:
  2348  		return Ytr0
  2349  	case REG_TR + 1:
  2350  		return Ytr1
  2351  	case REG_TR + 2:
  2352  		return Ytr2
  2353  	case REG_TR + 3:
  2354  		return Ytr3
  2355  	case REG_TR + 4:
  2356  		return Ytr4
  2357  	case REG_TR + 5:
  2358  		return Ytr5
  2359  	case REG_TR + 6:
  2360  		return Ytr6
  2361  	case REG_TR + 7:
  2362  		return Ytr7
  2363  	}
  2364  
  2365  	return Yxxx
  2366  }
  2367  
  2368  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2369  	var i int
  2370  
  2371  	switch index {
  2372  	default:
  2373  		goto bad
  2374  
  2375  	case REG_NONE:
  2376  		i = 4 << 3
  2377  		goto bas
  2378  
  2379  	case REG_R8,
  2380  		REG_R9,
  2381  		REG_R10,
  2382  		REG_R11,
  2383  		REG_R12,
  2384  		REG_R13,
  2385  		REG_R14,
  2386  		REG_R15:
  2387  		if ctxt.Asmode != 64 {
  2388  			goto bad
  2389  		}
  2390  		fallthrough
  2391  
  2392  	case REG_AX,
  2393  		REG_CX,
  2394  		REG_DX,
  2395  		REG_BX,
  2396  		REG_BP,
  2397  		REG_SI,
  2398  		REG_DI:
  2399  		i = reg[index] << 3
  2400  	}
  2401  
  2402  	switch scale {
  2403  	default:
  2404  		goto bad
  2405  
  2406  	case 1:
  2407  		break
  2408  
  2409  	case 2:
  2410  		i |= 1 << 6
  2411  
  2412  	case 4:
  2413  		i |= 2 << 6
  2414  
  2415  	case 8:
  2416  		i |= 3 << 6
  2417  	}
  2418  
  2419  bas:
  2420  	switch base {
  2421  	default:
  2422  		goto bad
  2423  
  2424  	case REG_NONE: /* must be mod=00 */
  2425  		i |= 5
  2426  
  2427  	case REG_R8,
  2428  		REG_R9,
  2429  		REG_R10,
  2430  		REG_R11,
  2431  		REG_R12,
  2432  		REG_R13,
  2433  		REG_R14,
  2434  		REG_R15:
  2435  		if ctxt.Asmode != 64 {
  2436  			goto bad
  2437  		}
  2438  		fallthrough
  2439  
  2440  	case REG_AX,
  2441  		REG_CX,
  2442  		REG_DX,
  2443  		REG_BX,
  2444  		REG_SP,
  2445  		REG_BP,
  2446  		REG_SI,
  2447  		REG_DI:
  2448  		i |= reg[base]
  2449  	}
  2450  
  2451  	ctxt.Andptr[0] = byte(i)
  2452  	ctxt.Andptr = ctxt.Andptr[1:]
  2453  	return
  2454  
  2455  bad:
  2456  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2457  	ctxt.Andptr[0] = 0
  2458  	ctxt.Andptr = ctxt.Andptr[1:]
  2459  	return
  2460  }
  2461  
  2462  func put4(ctxt *obj.Link, v int32) {
  2463  	ctxt.Andptr[0] = byte(v)
  2464  	ctxt.Andptr[1] = byte(v >> 8)
  2465  	ctxt.Andptr[2] = byte(v >> 16)
  2466  	ctxt.Andptr[3] = byte(v >> 24)
  2467  	ctxt.Andptr = ctxt.Andptr[4:]
  2468  }
  2469  
  2470  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2471  	var rel obj.Reloc
  2472  
  2473  	v := vaddr(ctxt, p, a, &rel)
  2474  	if rel.Siz != 0 {
  2475  		if rel.Siz != 4 {
  2476  			ctxt.Diag("bad reloc")
  2477  		}
  2478  		r := obj.Addrel(ctxt.Cursym)
  2479  		*r = rel
  2480  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2481  	}
  2482  
  2483  	put4(ctxt, int32(v))
  2484  }
  2485  
  2486  func put8(ctxt *obj.Link, v int64) {
  2487  	ctxt.Andptr[0] = byte(v)
  2488  	ctxt.Andptr[1] = byte(v >> 8)
  2489  	ctxt.Andptr[2] = byte(v >> 16)
  2490  	ctxt.Andptr[3] = byte(v >> 24)
  2491  	ctxt.Andptr[4] = byte(v >> 32)
  2492  	ctxt.Andptr[5] = byte(v >> 40)
  2493  	ctxt.Andptr[6] = byte(v >> 48)
  2494  	ctxt.Andptr[7] = byte(v >> 56)
  2495  	ctxt.Andptr = ctxt.Andptr[8:]
  2496  }
  2497  
  2498  /*
  2499  static void
  2500  relput8(Prog *p, Addr *a)
  2501  {
  2502  	vlong v;
  2503  	Reloc rel, *r;
  2504  
  2505  	v = vaddr(ctxt, p, a, &rel);
  2506  	if(rel.siz != 0) {
  2507  		r = addrel(ctxt->cursym);
  2508  		*r = rel;
  2509  		r->siz = 8;
  2510  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2511  	}
  2512  	put8(ctxt, v);
  2513  }
  2514  */
  2515  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2516  	if r != nil {
  2517  		*r = obj.Reloc{}
  2518  	}
  2519  
  2520  	switch a.Name {
  2521  	case obj.NAME_STATIC,
  2522  		obj.NAME_GOTREF,
  2523  		obj.NAME_EXTERN:
  2524  		s := a.Sym
  2525  		if r == nil {
  2526  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2527  			log.Fatalf("reloc")
  2528  		}
  2529  
  2530  		if a.Name == obj.NAME_GOTREF {
  2531  			r.Siz = 4
  2532  			r.Type = obj.R_GOTPCREL
  2533  		} else if isextern(s) || (p.Mode != 64 && ctxt.Flag_shared == 0) {
  2534  			r.Siz = 4
  2535  			r.Type = obj.R_ADDR
  2536  		} else {
  2537  			r.Siz = 4
  2538  			r.Type = obj.R_PCREL
  2539  		}
  2540  
  2541  		r.Off = -1 // caller must fill in
  2542  		r.Sym = s
  2543  		r.Add = a.Offset
  2544  
  2545  		return 0
  2546  	}
  2547  
  2548  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2549  		if r == nil {
  2550  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2551  			log.Fatalf("reloc")
  2552  		}
  2553  
  2554  		if ctxt.Flag_shared == 0 || isAndroid {
  2555  			r.Type = obj.R_TLS_LE
  2556  			r.Siz = 4
  2557  			r.Off = -1 // caller must fill in
  2558  			r.Add = a.Offset
  2559  		}
  2560  		return 0
  2561  	}
  2562  
  2563  	return a.Offset
  2564  }
  2565  
  2566  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2567  	var base int
  2568  	var rel obj.Reloc
  2569  
  2570  	rex &= 0x40 | Rxr
  2571  	v := int32(a.Offset)
  2572  	rel.Siz = 0
  2573  
  2574  	switch a.Type {
  2575  	case obj.TYPE_ADDR:
  2576  		if a.Name == obj.NAME_NONE {
  2577  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2578  		}
  2579  		if a.Index == REG_TLS {
  2580  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2581  		}
  2582  		goto bad
  2583  
  2584  	case obj.TYPE_REG:
  2585  		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
  2586  			goto bad
  2587  		}
  2588  		if v != 0 {
  2589  			goto bad
  2590  		}
  2591  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2592  		ctxt.Andptr = ctxt.Andptr[1:]
  2593  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2594  		return
  2595  	}
  2596  
  2597  	if a.Type != obj.TYPE_MEM {
  2598  		goto bad
  2599  	}
  2600  
  2601  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2602  		base := int(a.Reg)
  2603  		switch a.Name {
  2604  		case obj.NAME_EXTERN,
  2605  			obj.NAME_GOTREF,
  2606  			obj.NAME_STATIC:
  2607  			if !isextern(a.Sym) && p.Mode == 64 {
  2608  				goto bad
  2609  			}
  2610  			if p.Mode == 32 && ctxt.Flag_shared != 0 {
  2611  				base = REG_CX
  2612  			} else {
  2613  				base = REG_NONE
  2614  			}
  2615  			v = int32(vaddr(ctxt, p, a, &rel))
  2616  
  2617  		case obj.NAME_AUTO,
  2618  			obj.NAME_PARAM:
  2619  			base = REG_SP
  2620  		}
  2621  
  2622  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2623  		if base == REG_NONE {
  2624  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2625  			ctxt.Andptr = ctxt.Andptr[1:]
  2626  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2627  			goto putrelv
  2628  		}
  2629  
  2630  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2631  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2632  			ctxt.Andptr = ctxt.Andptr[1:]
  2633  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2634  			return
  2635  		}
  2636  
  2637  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2638  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2639  			ctxt.Andptr = ctxt.Andptr[1:]
  2640  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2641  			ctxt.Andptr[0] = byte(v)
  2642  			ctxt.Andptr = ctxt.Andptr[1:]
  2643  			return
  2644  		}
  2645  
  2646  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2647  		ctxt.Andptr = ctxt.Andptr[1:]
  2648  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2649  		goto putrelv
  2650  	}
  2651  
  2652  	base = int(a.Reg)
  2653  	switch a.Name {
  2654  	case obj.NAME_STATIC,
  2655  		obj.NAME_GOTREF,
  2656  		obj.NAME_EXTERN:
  2657  		if a.Sym == nil {
  2658  			ctxt.Diag("bad addr: %v", p)
  2659  		}
  2660  		if p.Mode == 32 && ctxt.Flag_shared != 0 {
  2661  			base = REG_CX
  2662  		} else {
  2663  			base = REG_NONE
  2664  		}
  2665  		v = int32(vaddr(ctxt, p, a, &rel))
  2666  
  2667  	case obj.NAME_AUTO,
  2668  		obj.NAME_PARAM:
  2669  		base = REG_SP
  2670  	}
  2671  
  2672  	if base == REG_TLS {
  2673  		v = int32(vaddr(ctxt, p, a, &rel))
  2674  	}
  2675  
  2676  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2677  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2678  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2679  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2680  				ctxt.Diag("%v has offset against gotref", p)
  2681  			}
  2682  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2683  			ctxt.Andptr = ctxt.Andptr[1:]
  2684  			goto putrelv
  2685  		}
  2686  
  2687  		/* temporary */
  2688  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2689  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2690  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2691  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2692  		goto putrelv
  2693  	}
  2694  
  2695  	if base == REG_SP || base == REG_R12 {
  2696  		if v == 0 {
  2697  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2698  			ctxt.Andptr = ctxt.Andptr[1:]
  2699  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2700  			return
  2701  		}
  2702  
  2703  		if v >= -128 && v < 128 {
  2704  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2705  			ctxt.Andptr = ctxt.Andptr[1:]
  2706  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2707  			ctxt.Andptr[0] = byte(v)
  2708  			ctxt.Andptr = ctxt.Andptr[1:]
  2709  			return
  2710  		}
  2711  
  2712  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2713  		ctxt.Andptr = ctxt.Andptr[1:]
  2714  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2715  		goto putrelv
  2716  	}
  2717  
  2718  	if REG_AX <= base && base <= REG_R15 {
  2719  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2720  			rel = obj.Reloc{}
  2721  			rel.Type = obj.R_TLS_LE
  2722  			rel.Siz = 4
  2723  			rel.Sym = nil
  2724  			rel.Add = int64(v)
  2725  			v = 0
  2726  		}
  2727  
  2728  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2729  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2730  			ctxt.Andptr = ctxt.Andptr[1:]
  2731  			return
  2732  		}
  2733  
  2734  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2735  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2736  			ctxt.Andptr[1] = byte(v)
  2737  			ctxt.Andptr = ctxt.Andptr[2:]
  2738  			return
  2739  		}
  2740  
  2741  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2742  		ctxt.Andptr = ctxt.Andptr[1:]
  2743  		goto putrelv
  2744  	}
  2745  
  2746  	goto bad
  2747  
  2748  putrelv:
  2749  	if rel.Siz != 0 {
  2750  		if rel.Siz != 4 {
  2751  			ctxt.Diag("bad rel")
  2752  			goto bad
  2753  		}
  2754  
  2755  		r := obj.Addrel(ctxt.Cursym)
  2756  		*r = rel
  2757  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2758  	}
  2759  
  2760  	put4(ctxt, v)
  2761  	return
  2762  
  2763  bad:
  2764  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2765  	return
  2766  }
  2767  
  2768  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2769  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2770  }
  2771  
  2772  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2773  	asmandsz(ctxt, p, a, o, 0, 0)
  2774  }
  2775  
  2776  func bytereg(a *obj.Addr, t *uint8) {
  2777  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2778  		a.Reg += REG_AL - REG_AX
  2779  		*t = 0
  2780  	}
  2781  }
  2782  
  2783  func unbytereg(a *obj.Addr, t *uint8) {
  2784  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2785  		a.Reg += REG_AX - REG_AL
  2786  		*t = 0
  2787  	}
  2788  }
  2789  
  2790  const (
  2791  	E = 0xff
  2792  )
  2793  
  2794  var ymovtab = []Movtab{
  2795  	/* push */
  2796  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2797  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2798  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2799  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2800  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2801  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2802  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2803  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2804  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2805  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2806  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2807  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2808  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2809  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2810  
  2811  	/* pop */
  2812  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2813  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2814  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2815  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2816  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2817  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2818  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2819  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2820  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2821  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2822  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2823  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  2824  
  2825  	/* mov seg */
  2826  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  2827  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  2828  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  2829  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  2830  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  2831  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  2832  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  2833  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  2834  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  2835  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  2836  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  2837  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  2838  
  2839  	/* mov cr */
  2840  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2841  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2842  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2843  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2844  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2845  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2846  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2847  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2848  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2849  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2850  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2851  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2852  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2853  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2854  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2855  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2856  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2857  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2858  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2859  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2860  
  2861  	/* mov dr */
  2862  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2863  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2864  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2865  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2866  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2867  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2868  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2869  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2870  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2871  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2872  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2873  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2874  
  2875  	/* mov tr */
  2876  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  2877  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  2878  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  2879  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  2880  
  2881  	/* lgdt, sgdt, lidt, sidt */
  2882  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2883  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2884  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2885  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2886  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2887  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2888  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2889  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2890  
  2891  	/* lldt, sldt */
  2892  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  2893  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  2894  
  2895  	/* lmsw, smsw */
  2896  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  2897  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  2898  
  2899  	/* ltr, str */
  2900  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  2901  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  2902  
  2903  	/* load full pointer - unsupported
  2904  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  2905  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  2906  	*/
  2907  
  2908  	/* double shift */
  2909  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2910  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2911  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2912  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2913  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2914  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2915  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2916  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2917  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2918  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2919  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2920  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2921  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2922  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2923  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2924  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2925  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2926  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2927  
  2928  	/* load TLS base */
  2929  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2930  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2931  	{0, 0, 0, 0, 0, [4]uint8{}},
  2932  }
  2933  
  2934  func isax(a *obj.Addr) bool {
  2935  	switch a.Reg {
  2936  	case REG_AX, REG_AL, REG_AH:
  2937  		return true
  2938  	}
  2939  
  2940  	if a.Index == REG_AX {
  2941  		return true
  2942  	}
  2943  	return false
  2944  }
  2945  
  2946  func subreg(p *obj.Prog, from int, to int) {
  2947  	if false { /* debug['Q'] */
  2948  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  2949  	}
  2950  
  2951  	if int(p.From.Reg) == from {
  2952  		p.From.Reg = int16(to)
  2953  		p.Ft = 0
  2954  	}
  2955  
  2956  	if int(p.To.Reg) == from {
  2957  		p.To.Reg = int16(to)
  2958  		p.Tt = 0
  2959  	}
  2960  
  2961  	if int(p.From.Index) == from {
  2962  		p.From.Index = int16(to)
  2963  		p.Ft = 0
  2964  	}
  2965  
  2966  	if int(p.To.Index) == from {
  2967  		p.To.Index = int16(to)
  2968  		p.Tt = 0
  2969  	}
  2970  
  2971  	if false { /* debug['Q'] */
  2972  		fmt.Printf("%v\n", p)
  2973  	}
  2974  }
  2975  
  2976  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  2977  	switch op {
  2978  	case Pm, Pe, Pf2, Pf3:
  2979  		if osize != 1 {
  2980  			if op != Pm {
  2981  				ctxt.Andptr[0] = byte(op)
  2982  				ctxt.Andptr = ctxt.Andptr[1:]
  2983  			}
  2984  			ctxt.Andptr[0] = Pm
  2985  			ctxt.Andptr = ctxt.Andptr[1:]
  2986  			z++
  2987  			op = int(o.op[z])
  2988  			break
  2989  		}
  2990  		fallthrough
  2991  
  2992  	default:
  2993  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  2994  			ctxt.Andptr[0] = Pm
  2995  			ctxt.Andptr = ctxt.Andptr[1:]
  2996  		}
  2997  	}
  2998  
  2999  	ctxt.Andptr[0] = byte(op)
  3000  	ctxt.Andptr = ctxt.Andptr[1:]
  3001  	return z
  3002  }
  3003  
  3004  var bpduff1 = []byte{
  3005  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3006  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3007  }
  3008  
  3009  var bpduff2 = []byte{
  3010  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3011  }
  3012  
  3013  // Assemble vex prefix, from 3 operands and prefix.
  3014  // For details about vex prefix see:
  3015  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3016  func vexprefix(ctxt *obj.Link, to *obj.Addr, from *obj.Addr, from3 *obj.Addr, pref uint8) {
  3017  	rexR := regrex[to.Reg]
  3018  	rexB := regrex[from.Reg]
  3019  	rexX := regrex[from.Index]
  3020  	var prefBit uint8
  3021  	// This will go into VEX.PP field.
  3022  	if pref == Pvex1 || pref == Pvex3 {
  3023  		prefBit = 1
  3024  	} else if pref == Pvex2 {
  3025  		prefBit = 2
  3026  	} // TODO add Pvex0
  3027  
  3028  	if rexX == 0 && rexB == 0 && pref != Pvex3 { // 2-byte vex prefix
  3029  		// In 2-byte case, first byte is always C5
  3030  		ctxt.Andptr[0] = 0xc5
  3031  		ctxt.Andptr = ctxt.Andptr[1:]
  3032  
  3033  		if from3 == nil {
  3034  			// If this is a 2-operand instruction fill VEX.VVVV with 1111
  3035  			// We are also interested only in 256-bit version, so VEX.L=1
  3036  			ctxt.Andptr[0] = 0x7c
  3037  		} else {
  3038  			// VEX.L=1
  3039  			ctxt.Andptr[0] = 0x4
  3040  			// VEX.VVVV (bits 3:6) is a inversed register number
  3041  			ctxt.Andptr[0] |= byte((^(from3.Reg - REG_X0))<<3) & 0x78
  3042  		}
  3043  
  3044  		// VEX encodes REX.R as inversed upper bit
  3045  		if rexR == 0 {
  3046  			ctxt.Andptr[0] |= 0x80
  3047  		}
  3048  		ctxt.Andptr[0] |= prefBit
  3049  		ctxt.Andptr = ctxt.Andptr[1:]
  3050  	} else { // 3-byte case
  3051  		// First byte is always C$
  3052  		ctxt.Andptr[0] = 0xc4
  3053  		ctxt.Andptr = ctxt.Andptr[1:]
  3054  
  3055  		// Encode VEX.mmmmm with prefix value, assume 0F,
  3056  		// which encodes as 1, unless 0F38 was specified with pvex3.
  3057  		ctxt.Andptr[0] = 0x1 // TODO handle 0F3A
  3058  		if pref == Pvex3 {
  3059  			ctxt.Andptr[0] = 0x2
  3060  		}
  3061  
  3062  		// REX.[RXB] are inverted and encoded in 3 upper bits
  3063  		if rexR == 0 {
  3064  			ctxt.Andptr[0] |= 0x80
  3065  		}
  3066  		if rexX == 0 {
  3067  			ctxt.Andptr[0] |= 0x40
  3068  		}
  3069  		if rexB == 0 {
  3070  			ctxt.Andptr[0] |= 0x20
  3071  		}
  3072  		ctxt.Andptr = ctxt.Andptr[1:]
  3073  
  3074  		// Fill VEX.VVVV, same as 2-operand VEX instruction.
  3075  		if from3 == nil {
  3076  			ctxt.Andptr[0] = 0x7c
  3077  		} else {
  3078  			ctxt.Andptr[0] = 0x4
  3079  			ctxt.Andptr[0] |= byte((^(from3.Reg - REG_X0))<<3) & 0x78
  3080  		}
  3081  		ctxt.Andptr[0] |= prefBit
  3082  		ctxt.Andptr = ctxt.Andptr[1:]
  3083  	}
  3084  }
  3085  
  3086  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3087  	ctxt.Curp = p // TODO
  3088  
  3089  	o := opindex[p.As&obj.AMask]
  3090  
  3091  	if o == nil {
  3092  		ctxt.Diag("asmins: missing op %v", p)
  3093  		return
  3094  	}
  3095  
  3096  	pre := prefixof(ctxt, p, &p.From)
  3097  	if pre != 0 {
  3098  		ctxt.Andptr[0] = byte(pre)
  3099  		ctxt.Andptr = ctxt.Andptr[1:]
  3100  	}
  3101  	pre = prefixof(ctxt, p, &p.To)
  3102  	if pre != 0 {
  3103  		ctxt.Andptr[0] = byte(pre)
  3104  		ctxt.Andptr = ctxt.Andptr[1:]
  3105  	}
  3106  
  3107  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3108  	// which encodes as SHRQ $32(DX*0), AX.
  3109  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3110  	// Change encoding generated by assemblers and compilers and remove.
  3111  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3112  		p.From3 = new(obj.Addr)
  3113  		p.From3.Type = obj.TYPE_REG
  3114  		p.From3.Reg = p.From.Index
  3115  		p.From.Index = 0
  3116  	}
  3117  
  3118  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3119  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3120  	switch p.As {
  3121  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3122  		if p.From3Type() == obj.TYPE_NONE {
  3123  			p.From3 = new(obj.Addr)
  3124  			*p.From3 = p.From
  3125  			p.From = obj.Addr{}
  3126  			p.From.Type = obj.TYPE_CONST
  3127  			p.From.Offset = p.To.Offset
  3128  			p.To.Offset = 0
  3129  		}
  3130  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3131  		if p.From3Type() == obj.TYPE_NONE {
  3132  			p.From3 = new(obj.Addr)
  3133  			*p.From3 = p.To
  3134  			p.To = obj.Addr{}
  3135  			p.To.Type = obj.TYPE_CONST
  3136  			p.To.Offset = p.From3.Offset
  3137  			p.From3.Offset = 0
  3138  		}
  3139  	}
  3140  
  3141  	if p.Ft == 0 {
  3142  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3143  	}
  3144  	if p.Tt == 0 {
  3145  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3146  	}
  3147  
  3148  	ft := int(p.Ft) * Ymax
  3149  	f3t := Ynone * Ymax
  3150  	if p.From3 != nil {
  3151  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3152  	}
  3153  	tt := int(p.Tt) * Ymax
  3154  
  3155  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3156  	z := 0
  3157  	var a *obj.Addr
  3158  	var l int
  3159  	var op int
  3160  	var q *obj.Prog
  3161  	var r *obj.Reloc
  3162  	var rel obj.Reloc
  3163  	var v int64
  3164  	for i := range o.ytab {
  3165  		yt := &o.ytab[i]
  3166  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3167  			switch o.prefix {
  3168  			case Px1: /* first option valid only in 32-bit mode */
  3169  				if ctxt.Mode == 64 && z == 0 {
  3170  					z += int(yt.zoffset) + xo
  3171  					continue
  3172  				}
  3173  			case Pq: /* 16 bit escape and opcode escape */
  3174  				ctxt.Andptr[0] = Pe
  3175  				ctxt.Andptr = ctxt.Andptr[1:]
  3176  
  3177  				ctxt.Andptr[0] = Pm
  3178  				ctxt.Andptr = ctxt.Andptr[1:]
  3179  
  3180  			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
  3181  				ctxt.Andptr[0] = Pe
  3182  				ctxt.Andptr = ctxt.Andptr[1:]
  3183  
  3184  				ctxt.Andptr[0] = Pw
  3185  				ctxt.Andptr = ctxt.Andptr[1:]
  3186  				ctxt.Andptr[0] = Pm
  3187  				ctxt.Andptr = ctxt.Andptr[1:]
  3188  
  3189  			case Pf2, /* xmm opcode escape */
  3190  				Pf3:
  3191  				ctxt.Andptr[0] = byte(o.prefix)
  3192  				ctxt.Andptr = ctxt.Andptr[1:]
  3193  
  3194  				ctxt.Andptr[0] = Pm
  3195  				ctxt.Andptr = ctxt.Andptr[1:]
  3196  
  3197  			case Pm: /* opcode escape */
  3198  				ctxt.Andptr[0] = Pm
  3199  				ctxt.Andptr = ctxt.Andptr[1:]
  3200  
  3201  			case Pe: /* 16 bit escape */
  3202  				ctxt.Andptr[0] = Pe
  3203  				ctxt.Andptr = ctxt.Andptr[1:]
  3204  
  3205  			case Pw: /* 64-bit escape */
  3206  				if p.Mode != 64 {
  3207  					ctxt.Diag("asmins: illegal 64: %v", p)
  3208  				}
  3209  				ctxt.Rexflag |= Pw
  3210  
  3211  			case Pw8: /* 64-bit escape if z >= 8 */
  3212  				if z >= 8 {
  3213  					if p.Mode != 64 {
  3214  						ctxt.Diag("asmins: illegal 64: %v", p)
  3215  					}
  3216  					ctxt.Rexflag |= Pw
  3217  				}
  3218  
  3219  			case Pb: /* botch */
  3220  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3221  					goto bad
  3222  				}
  3223  				// NOTE(rsc): This is probably safe to do always,
  3224  				// but when enabled it chooses different encodings
  3225  				// than the old cmd/internal/obj/i386 code did,
  3226  				// which breaks our "same bits out" checks.
  3227  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3228  				// in the original obj/i386, and it would encode
  3229  				// (using a valid, shorter form) as 3c 00 if we enabled
  3230  				// the call to bytereg here.
  3231  				if p.Mode == 64 {
  3232  					bytereg(&p.From, &p.Ft)
  3233  					bytereg(&p.To, &p.Tt)
  3234  				}
  3235  
  3236  			case P32: /* 32 bit but illegal if 64-bit mode */
  3237  				if p.Mode == 64 {
  3238  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3239  				}
  3240  
  3241  			case Py: /* 64-bit only, no prefix */
  3242  				if p.Mode != 64 {
  3243  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3244  				}
  3245  
  3246  			case Py1: /* 64-bit only if z < 1, no prefix */
  3247  				if z < 1 && p.Mode != 64 {
  3248  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3249  				}
  3250  
  3251  			case Py3: /* 64-bit only if z < 3, no prefix */
  3252  				if z < 3 && p.Mode != 64 {
  3253  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3254  				}
  3255  			}
  3256  
  3257  			if z >= len(o.op) {
  3258  				log.Fatalf("asmins bad table %v", p)
  3259  			}
  3260  			op = int(o.op[z])
  3261  			if op == 0x0f {
  3262  				ctxt.Andptr[0] = byte(op)
  3263  				ctxt.Andptr = ctxt.Andptr[1:]
  3264  				z++
  3265  				op = int(o.op[z])
  3266  			}
  3267  
  3268  			switch yt.zcase {
  3269  			default:
  3270  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3271  				return
  3272  
  3273  			case Zpseudo:
  3274  				break
  3275  
  3276  			case Zlit:
  3277  				for ; ; z++ {
  3278  					op = int(o.op[z])
  3279  					if op == 0 {
  3280  						break
  3281  					}
  3282  					ctxt.Andptr[0] = byte(op)
  3283  					ctxt.Andptr = ctxt.Andptr[1:]
  3284  				}
  3285  
  3286  			case Zlitm_r:
  3287  				for ; ; z++ {
  3288  					op = int(o.op[z])
  3289  					if op == 0 {
  3290  						break
  3291  					}
  3292  					ctxt.Andptr[0] = byte(op)
  3293  					ctxt.Andptr = ctxt.Andptr[1:]
  3294  				}
  3295  				asmand(ctxt, p, &p.From, &p.To)
  3296  
  3297  			case Zmb_r:
  3298  				bytereg(&p.From, &p.Ft)
  3299  				fallthrough
  3300  
  3301  				/* fall through */
  3302  			case Zm_r:
  3303  				ctxt.Andptr[0] = byte(op)
  3304  				ctxt.Andptr = ctxt.Andptr[1:]
  3305  
  3306  				asmand(ctxt, p, &p.From, &p.To)
  3307  
  3308  			case Zm2_r:
  3309  				ctxt.Andptr[0] = byte(op)
  3310  				ctxt.Andptr = ctxt.Andptr[1:]
  3311  				ctxt.Andptr[0] = byte(o.op[z+1])
  3312  				ctxt.Andptr = ctxt.Andptr[1:]
  3313  				asmand(ctxt, p, &p.From, &p.To)
  3314  
  3315  			case Zm_r_xm:
  3316  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3317  				asmand(ctxt, p, &p.From, &p.To)
  3318  
  3319  			case Zm_r_xm_vex:
  3320  				ctxt.Vexflag = 1
  3321  				vexprefix(ctxt, &p.To, &p.From, nil, o.prefix)
  3322  				ctxt.Andptr[0] = byte(op)
  3323  				ctxt.Andptr = ctxt.Andptr[1:]
  3324  				asmand(ctxt, p, &p.From, &p.To)
  3325  
  3326  			case Zm_r_xm_nr:
  3327  				ctxt.Rexflag = 0
  3328  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3329  				asmand(ctxt, p, &p.From, &p.To)
  3330  
  3331  			case Zm_r_i_xm:
  3332  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3333  				asmand(ctxt, p, &p.From, p.From3)
  3334  				ctxt.Andptr[0] = byte(p.To.Offset)
  3335  				ctxt.Andptr = ctxt.Andptr[1:]
  3336  
  3337  			case Zm_r_3d:
  3338  				ctxt.Andptr[0] = 0x0f
  3339  				ctxt.Andptr = ctxt.Andptr[1:]
  3340  				ctxt.Andptr[0] = 0x0f
  3341  				ctxt.Andptr = ctxt.Andptr[1:]
  3342  				asmand(ctxt, p, &p.From, &p.To)
  3343  				ctxt.Andptr[0] = byte(op)
  3344  				ctxt.Andptr = ctxt.Andptr[1:]
  3345  
  3346  			case Zibm_r:
  3347  				for {
  3348  					tmp1 := z
  3349  					z++
  3350  					op = int(o.op[tmp1])
  3351  					if op == 0 {
  3352  						break
  3353  					}
  3354  					ctxt.Andptr[0] = byte(op)
  3355  					ctxt.Andptr = ctxt.Andptr[1:]
  3356  				}
  3357  				asmand(ctxt, p, p.From3, &p.To)
  3358  				ctxt.Andptr[0] = byte(p.From.Offset)
  3359  				ctxt.Andptr = ctxt.Andptr[1:]
  3360  
  3361  			case Zaut_r:
  3362  				ctxt.Andptr[0] = 0x8d
  3363  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3364  				if p.From.Type != obj.TYPE_ADDR {
  3365  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3366  				}
  3367  				p.From.Type = obj.TYPE_MEM
  3368  				asmand(ctxt, p, &p.From, &p.To)
  3369  				p.From.Type = obj.TYPE_ADDR
  3370  
  3371  			case Zm_o:
  3372  				ctxt.Andptr[0] = byte(op)
  3373  				ctxt.Andptr = ctxt.Andptr[1:]
  3374  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3375  
  3376  			case Zr_m:
  3377  				ctxt.Andptr[0] = byte(op)
  3378  				ctxt.Andptr = ctxt.Andptr[1:]
  3379  				asmand(ctxt, p, &p.To, &p.From)
  3380  
  3381  			case Zr_m_xm_vex:
  3382  				ctxt.Vexflag = 1
  3383  				vexprefix(ctxt, &p.From, &p.To, nil, o.prefix)
  3384  				ctxt.Andptr[0] = byte(op)
  3385  				ctxt.Andptr = ctxt.Andptr[1:]
  3386  				asmand(ctxt, p, &p.To, &p.From)
  3387  
  3388  			case Zr_r_r_vex:
  3389  				ctxt.Vexflag = 1
  3390  				vexprefix(ctxt, &p.To, &p.From, p.From3, o.prefix)
  3391  				ctxt.Andptr[0] = byte(op)
  3392  				ctxt.Andptr = ctxt.Andptr[1:]
  3393  				asmand(ctxt, p, &p.From, &p.To)
  3394  
  3395  			case Zr_m_xm:
  3396  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3397  				asmand(ctxt, p, &p.To, &p.From)
  3398  
  3399  			case Zr_m_xm_nr:
  3400  				ctxt.Rexflag = 0
  3401  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3402  				asmand(ctxt, p, &p.To, &p.From)
  3403  
  3404  			case Zo_m:
  3405  				ctxt.Andptr[0] = byte(op)
  3406  				ctxt.Andptr = ctxt.Andptr[1:]
  3407  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3408  
  3409  			case Zcallindreg:
  3410  				r = obj.Addrel(ctxt.Cursym)
  3411  				r.Off = int32(p.Pc)
  3412  				r.Type = obj.R_CALLIND
  3413  				r.Siz = 0
  3414  				fallthrough
  3415  
  3416  			case Zo_m64:
  3417  				ctxt.Andptr[0] = byte(op)
  3418  				ctxt.Andptr = ctxt.Andptr[1:]
  3419  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3420  
  3421  			case Zm_ibo:
  3422  				ctxt.Andptr[0] = byte(op)
  3423  				ctxt.Andptr = ctxt.Andptr[1:]
  3424  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3425  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3426  				ctxt.Andptr = ctxt.Andptr[1:]
  3427  
  3428  			case Zibo_m:
  3429  				ctxt.Andptr[0] = byte(op)
  3430  				ctxt.Andptr = ctxt.Andptr[1:]
  3431  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3432  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3433  				ctxt.Andptr = ctxt.Andptr[1:]
  3434  
  3435  			case Zibo_m_xm:
  3436  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3437  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3438  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3439  				ctxt.Andptr = ctxt.Andptr[1:]
  3440  
  3441  			case Z_ib, Zib_:
  3442  				if yt.zcase == Zib_ {
  3443  					a = &p.From
  3444  				} else {
  3445  					a = &p.To
  3446  				}
  3447  				ctxt.Andptr[0] = byte(op)
  3448  				ctxt.Andptr = ctxt.Andptr[1:]
  3449  				if p.As == AXABORT {
  3450  					ctxt.Andptr[0] = byte(o.op[z+1])
  3451  					ctxt.Andptr = ctxt.Andptr[1:]
  3452  				}
  3453  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3454  				ctxt.Andptr = ctxt.Andptr[1:]
  3455  
  3456  			case Zib_rp:
  3457  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3458  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3459  				ctxt.Andptr = ctxt.Andptr[1:]
  3460  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3461  				ctxt.Andptr = ctxt.Andptr[1:]
  3462  
  3463  			case Zil_rp:
  3464  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3465  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3466  				ctxt.Andptr = ctxt.Andptr[1:]
  3467  				if o.prefix == Pe {
  3468  					v = vaddr(ctxt, p, &p.From, nil)
  3469  					ctxt.Andptr[0] = byte(v)
  3470  					ctxt.Andptr = ctxt.Andptr[1:]
  3471  					ctxt.Andptr[0] = byte(v >> 8)
  3472  					ctxt.Andptr = ctxt.Andptr[1:]
  3473  				} else {
  3474  					relput4(ctxt, p, &p.From)
  3475  				}
  3476  
  3477  			case Zo_iw:
  3478  				ctxt.Andptr[0] = byte(op)
  3479  				ctxt.Andptr = ctxt.Andptr[1:]
  3480  				if p.From.Type != obj.TYPE_NONE {
  3481  					v = vaddr(ctxt, p, &p.From, nil)
  3482  					ctxt.Andptr[0] = byte(v)
  3483  					ctxt.Andptr = ctxt.Andptr[1:]
  3484  					ctxt.Andptr[0] = byte(v >> 8)
  3485  					ctxt.Andptr = ctxt.Andptr[1:]
  3486  				}
  3487  
  3488  			case Ziq_rp:
  3489  				v = vaddr(ctxt, p, &p.From, &rel)
  3490  				l = int(v >> 32)
  3491  				if l == 0 && rel.Siz != 8 {
  3492  					//p->mark |= 0100;
  3493  					//print("zero: %llux %v\n", v, p);
  3494  					ctxt.Rexflag &^= (0x40 | Rxw)
  3495  
  3496  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3497  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3498  					ctxt.Andptr = ctxt.Andptr[1:]
  3499  					if rel.Type != 0 {
  3500  						r = obj.Addrel(ctxt.Cursym)
  3501  						*r = rel
  3502  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3503  					}
  3504  
  3505  					put4(ctxt, int32(v))
  3506  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3507  
  3508  					//p->mark |= 0100;
  3509  					//print("sign: %llux %v\n", v, p);
  3510  					ctxt.Andptr[0] = 0xc7
  3511  					ctxt.Andptr = ctxt.Andptr[1:]
  3512  
  3513  					asmando(ctxt, p, &p.To, 0)
  3514  					put4(ctxt, int32(v)) /* need all 8 */
  3515  				} else {
  3516  					//print("all: %llux %v\n", v, p);
  3517  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3518  
  3519  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3520  					ctxt.Andptr = ctxt.Andptr[1:]
  3521  					if rel.Type != 0 {
  3522  						r = obj.Addrel(ctxt.Cursym)
  3523  						*r = rel
  3524  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3525  					}
  3526  
  3527  					put8(ctxt, v)
  3528  				}
  3529  
  3530  			case Zib_rr:
  3531  				ctxt.Andptr[0] = byte(op)
  3532  				ctxt.Andptr = ctxt.Andptr[1:]
  3533  				asmand(ctxt, p, &p.To, &p.To)
  3534  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3535  				ctxt.Andptr = ctxt.Andptr[1:]
  3536  
  3537  			case Z_il, Zil_:
  3538  				if yt.zcase == Zil_ {
  3539  					a = &p.From
  3540  				} else {
  3541  					a = &p.To
  3542  				}
  3543  				ctxt.Andptr[0] = byte(op)
  3544  				ctxt.Andptr = ctxt.Andptr[1:]
  3545  				if o.prefix == Pe {
  3546  					v = vaddr(ctxt, p, a, nil)
  3547  					ctxt.Andptr[0] = byte(v)
  3548  					ctxt.Andptr = ctxt.Andptr[1:]
  3549  					ctxt.Andptr[0] = byte(v >> 8)
  3550  					ctxt.Andptr = ctxt.Andptr[1:]
  3551  				} else {
  3552  					relput4(ctxt, p, a)
  3553  				}
  3554  
  3555  			case Zm_ilo, Zilo_m:
  3556  				ctxt.Andptr[0] = byte(op)
  3557  				ctxt.Andptr = ctxt.Andptr[1:]
  3558  				if yt.zcase == Zilo_m {
  3559  					a = &p.From
  3560  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3561  				} else {
  3562  					a = &p.To
  3563  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3564  				}
  3565  
  3566  				if o.prefix == Pe {
  3567  					v = vaddr(ctxt, p, a, nil)
  3568  					ctxt.Andptr[0] = byte(v)
  3569  					ctxt.Andptr = ctxt.Andptr[1:]
  3570  					ctxt.Andptr[0] = byte(v >> 8)
  3571  					ctxt.Andptr = ctxt.Andptr[1:]
  3572  				} else {
  3573  					relput4(ctxt, p, a)
  3574  				}
  3575  
  3576  			case Zil_rr:
  3577  				ctxt.Andptr[0] = byte(op)
  3578  				ctxt.Andptr = ctxt.Andptr[1:]
  3579  				asmand(ctxt, p, &p.To, &p.To)
  3580  				if o.prefix == Pe {
  3581  					v = vaddr(ctxt, p, &p.From, nil)
  3582  					ctxt.Andptr[0] = byte(v)
  3583  					ctxt.Andptr = ctxt.Andptr[1:]
  3584  					ctxt.Andptr[0] = byte(v >> 8)
  3585  					ctxt.Andptr = ctxt.Andptr[1:]
  3586  				} else {
  3587  					relput4(ctxt, p, &p.From)
  3588  				}
  3589  
  3590  			case Z_rp:
  3591  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3592  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3593  				ctxt.Andptr = ctxt.Andptr[1:]
  3594  
  3595  			case Zrp_:
  3596  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3597  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3598  				ctxt.Andptr = ctxt.Andptr[1:]
  3599  
  3600  			case Zclr:
  3601  				ctxt.Rexflag &^= Pw
  3602  				ctxt.Andptr[0] = byte(op)
  3603  				ctxt.Andptr = ctxt.Andptr[1:]
  3604  				asmand(ctxt, p, &p.To, &p.To)
  3605  
  3606  			case Zcallcon, Zjmpcon:
  3607  				if yt.zcase == Zcallcon {
  3608  					ctxt.Andptr[0] = byte(op)
  3609  					ctxt.Andptr = ctxt.Andptr[1:]
  3610  				} else {
  3611  					ctxt.Andptr[0] = byte(o.op[z+1])
  3612  					ctxt.Andptr = ctxt.Andptr[1:]
  3613  				}
  3614  				r = obj.Addrel(ctxt.Cursym)
  3615  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3616  				r.Type = obj.R_PCREL
  3617  				r.Siz = 4
  3618  				r.Add = p.To.Offset
  3619  				put4(ctxt, 0)
  3620  
  3621  			case Zcallind:
  3622  				ctxt.Andptr[0] = byte(op)
  3623  				ctxt.Andptr = ctxt.Andptr[1:]
  3624  				ctxt.Andptr[0] = byte(o.op[z+1])
  3625  				ctxt.Andptr = ctxt.Andptr[1:]
  3626  				r = obj.Addrel(ctxt.Cursym)
  3627  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3628  				r.Type = obj.R_ADDR
  3629  				r.Siz = 4
  3630  				r.Add = p.To.Offset
  3631  				r.Sym = p.To.Sym
  3632  				put4(ctxt, 0)
  3633  
  3634  			case Zcall, Zcallduff:
  3635  				if p.To.Sym == nil {
  3636  					ctxt.Diag("call without target")
  3637  					log.Fatalf("bad code")
  3638  				}
  3639  
  3640  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3641  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3642  				}
  3643  
  3644  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3645  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3646  					// (the call jumps into the middle of the function).
  3647  					// This makes it possible to see call sites for duffcopy/duffzero in
  3648  					// BP-based profiling tools like Linux perf (which is the
  3649  					// whole point of obj.Framepointer_enabled).
  3650  					// MOVQ BP, -16(SP)
  3651  					// LEAQ -16(SP), BP
  3652  					copy(ctxt.Andptr, bpduff1)
  3653  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3654  				}
  3655  				ctxt.Andptr[0] = byte(op)
  3656  				ctxt.Andptr = ctxt.Andptr[1:]
  3657  				r = obj.Addrel(ctxt.Cursym)
  3658  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3659  				r.Sym = p.To.Sym
  3660  				r.Add = p.To.Offset
  3661  				r.Type = obj.R_CALL
  3662  				r.Siz = 4
  3663  				put4(ctxt, 0)
  3664  
  3665  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3666  					// Pop BP pushed above.
  3667  					// MOVQ 0(BP), BP
  3668  					copy(ctxt.Andptr, bpduff2)
  3669  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3670  				}
  3671  
  3672  			// TODO: jump across functions needs reloc
  3673  			case Zbr, Zjmp, Zloop:
  3674  				if p.As == AXBEGIN {
  3675  					ctxt.Andptr[0] = byte(op)
  3676  					ctxt.Andptr = ctxt.Andptr[1:]
  3677  				}
  3678  				if p.To.Sym != nil {
  3679  					if yt.zcase != Zjmp {
  3680  						ctxt.Diag("branch to ATEXT")
  3681  						log.Fatalf("bad code")
  3682  					}
  3683  
  3684  					ctxt.Andptr[0] = byte(o.op[z+1])
  3685  					ctxt.Andptr = ctxt.Andptr[1:]
  3686  					r = obj.Addrel(ctxt.Cursym)
  3687  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3688  					r.Sym = p.To.Sym
  3689  					r.Type = obj.R_PCREL
  3690  					r.Siz = 4
  3691  					put4(ctxt, 0)
  3692  					break
  3693  				}
  3694  
  3695  				// Assumes q is in this function.
  3696  				// TODO: Check in input, preserve in brchain.
  3697  
  3698  				// Fill in backward jump now.
  3699  				q = p.Pcond
  3700  
  3701  				if q == nil {
  3702  					ctxt.Diag("jmp/branch/loop without target")
  3703  					log.Fatalf("bad code")
  3704  				}
  3705  
  3706  				if p.Back&1 != 0 {
  3707  					v = q.Pc - (p.Pc + 2)
  3708  					if v >= -128 && p.As != AXBEGIN {
  3709  						if p.As == AJCXZL {
  3710  							ctxt.Andptr[0] = 0x67
  3711  							ctxt.Andptr = ctxt.Andptr[1:]
  3712  						}
  3713  						ctxt.Andptr[0] = byte(op)
  3714  						ctxt.Andptr = ctxt.Andptr[1:]
  3715  						ctxt.Andptr[0] = byte(v)
  3716  						ctxt.Andptr = ctxt.Andptr[1:]
  3717  					} else if yt.zcase == Zloop {
  3718  						ctxt.Diag("loop too far: %v", p)
  3719  					} else {
  3720  						v -= 5 - 2
  3721  						if p.As == AXBEGIN {
  3722  							v--
  3723  						}
  3724  						if yt.zcase == Zbr {
  3725  							ctxt.Andptr[0] = 0x0f
  3726  							ctxt.Andptr = ctxt.Andptr[1:]
  3727  							v--
  3728  						}
  3729  
  3730  						ctxt.Andptr[0] = byte(o.op[z+1])
  3731  						ctxt.Andptr = ctxt.Andptr[1:]
  3732  						ctxt.Andptr[0] = byte(v)
  3733  						ctxt.Andptr = ctxt.Andptr[1:]
  3734  						ctxt.Andptr[0] = byte(v >> 8)
  3735  						ctxt.Andptr = ctxt.Andptr[1:]
  3736  						ctxt.Andptr[0] = byte(v >> 16)
  3737  						ctxt.Andptr = ctxt.Andptr[1:]
  3738  						ctxt.Andptr[0] = byte(v >> 24)
  3739  						ctxt.Andptr = ctxt.Andptr[1:]
  3740  					}
  3741  
  3742  					break
  3743  				}
  3744  
  3745  				// Annotate target; will fill in later.
  3746  				p.Forwd = q.Rel
  3747  
  3748  				q.Rel = p
  3749  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3750  					if p.As == AJCXZL {
  3751  						ctxt.Andptr[0] = 0x67
  3752  						ctxt.Andptr = ctxt.Andptr[1:]
  3753  					}
  3754  					ctxt.Andptr[0] = byte(op)
  3755  					ctxt.Andptr = ctxt.Andptr[1:]
  3756  					ctxt.Andptr[0] = 0
  3757  					ctxt.Andptr = ctxt.Andptr[1:]
  3758  				} else if yt.zcase == Zloop {
  3759  					ctxt.Diag("loop too far: %v", p)
  3760  				} else {
  3761  					if yt.zcase == Zbr {
  3762  						ctxt.Andptr[0] = 0x0f
  3763  						ctxt.Andptr = ctxt.Andptr[1:]
  3764  					}
  3765  					ctxt.Andptr[0] = byte(o.op[z+1])
  3766  					ctxt.Andptr = ctxt.Andptr[1:]
  3767  					ctxt.Andptr[0] = 0
  3768  					ctxt.Andptr = ctxt.Andptr[1:]
  3769  					ctxt.Andptr[0] = 0
  3770  					ctxt.Andptr = ctxt.Andptr[1:]
  3771  					ctxt.Andptr[0] = 0
  3772  					ctxt.Andptr = ctxt.Andptr[1:]
  3773  					ctxt.Andptr[0] = 0
  3774  					ctxt.Andptr = ctxt.Andptr[1:]
  3775  				}
  3776  
  3777  				break
  3778  
  3779  			/*
  3780  				v = q->pc - p->pc - 2;
  3781  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3782  					*ctxt->andptr++ = op;
  3783  					*ctxt->andptr++ = v;
  3784  				} else {
  3785  					v -= 5-2;
  3786  					if(yt.zcase == Zbr) {
  3787  						*ctxt->andptr++ = 0x0f;
  3788  						v--;
  3789  					}
  3790  					*ctxt->andptr++ = o->op[z+1];
  3791  					*ctxt->andptr++ = v;
  3792  					*ctxt->andptr++ = v>>8;
  3793  					*ctxt->andptr++ = v>>16;
  3794  					*ctxt->andptr++ = v>>24;
  3795  				}
  3796  			*/
  3797  
  3798  			case Zbyte:
  3799  				v = vaddr(ctxt, p, &p.From, &rel)
  3800  				if rel.Siz != 0 {
  3801  					rel.Siz = uint8(op)
  3802  					r = obj.Addrel(ctxt.Cursym)
  3803  					*r = rel
  3804  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3805  				}
  3806  
  3807  				ctxt.Andptr[0] = byte(v)
  3808  				ctxt.Andptr = ctxt.Andptr[1:]
  3809  				if op > 1 {
  3810  					ctxt.Andptr[0] = byte(v >> 8)
  3811  					ctxt.Andptr = ctxt.Andptr[1:]
  3812  					if op > 2 {
  3813  						ctxt.Andptr[0] = byte(v >> 16)
  3814  						ctxt.Andptr = ctxt.Andptr[1:]
  3815  						ctxt.Andptr[0] = byte(v >> 24)
  3816  						ctxt.Andptr = ctxt.Andptr[1:]
  3817  						if op > 4 {
  3818  							ctxt.Andptr[0] = byte(v >> 32)
  3819  							ctxt.Andptr = ctxt.Andptr[1:]
  3820  							ctxt.Andptr[0] = byte(v >> 40)
  3821  							ctxt.Andptr = ctxt.Andptr[1:]
  3822  							ctxt.Andptr[0] = byte(v >> 48)
  3823  							ctxt.Andptr = ctxt.Andptr[1:]
  3824  							ctxt.Andptr[0] = byte(v >> 56)
  3825  							ctxt.Andptr = ctxt.Andptr[1:]
  3826  						}
  3827  					}
  3828  				}
  3829  			}
  3830  
  3831  			return
  3832  		}
  3833  		z += int(yt.zoffset) + xo
  3834  	}
  3835  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3836  		var pp obj.Prog
  3837  		var t []byte
  3838  		if p.As == mo[0].as {
  3839  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3840  				t = mo[0].op[:]
  3841  				switch mo[0].code {
  3842  				default:
  3843  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3844  
  3845  				case 0: /* lit */
  3846  					for z = 0; t[z] != E; z++ {
  3847  						ctxt.Andptr[0] = t[z]
  3848  						ctxt.Andptr = ctxt.Andptr[1:]
  3849  					}
  3850  
  3851  				case 1: /* r,m */
  3852  					ctxt.Andptr[0] = t[0]
  3853  					ctxt.Andptr = ctxt.Andptr[1:]
  3854  
  3855  					asmando(ctxt, p, &p.To, int(t[1]))
  3856  
  3857  				case 2: /* m,r */
  3858  					ctxt.Andptr[0] = t[0]
  3859  					ctxt.Andptr = ctxt.Andptr[1:]
  3860  
  3861  					asmando(ctxt, p, &p.From, int(t[1]))
  3862  
  3863  				case 3: /* r,m - 2op */
  3864  					ctxt.Andptr[0] = t[0]
  3865  					ctxt.Andptr = ctxt.Andptr[1:]
  3866  
  3867  					ctxt.Andptr[0] = t[1]
  3868  					ctxt.Andptr = ctxt.Andptr[1:]
  3869  					asmando(ctxt, p, &p.To, int(t[2]))
  3870  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3871  
  3872  				case 4: /* m,r - 2op */
  3873  					ctxt.Andptr[0] = t[0]
  3874  					ctxt.Andptr = ctxt.Andptr[1:]
  3875  
  3876  					ctxt.Andptr[0] = t[1]
  3877  					ctxt.Andptr = ctxt.Andptr[1:]
  3878  					asmando(ctxt, p, &p.From, int(t[2]))
  3879  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3880  
  3881  				case 5: /* load full pointer, trash heap */
  3882  					if t[0] != 0 {
  3883  						ctxt.Andptr[0] = t[0]
  3884  						ctxt.Andptr = ctxt.Andptr[1:]
  3885  					}
  3886  					switch p.To.Index {
  3887  					default:
  3888  						goto bad
  3889  
  3890  					case REG_DS:
  3891  						ctxt.Andptr[0] = 0xc5
  3892  						ctxt.Andptr = ctxt.Andptr[1:]
  3893  
  3894  					case REG_SS:
  3895  						ctxt.Andptr[0] = 0x0f
  3896  						ctxt.Andptr = ctxt.Andptr[1:]
  3897  						ctxt.Andptr[0] = 0xb2
  3898  						ctxt.Andptr = ctxt.Andptr[1:]
  3899  
  3900  					case REG_ES:
  3901  						ctxt.Andptr[0] = 0xc4
  3902  						ctxt.Andptr = ctxt.Andptr[1:]
  3903  
  3904  					case REG_FS:
  3905  						ctxt.Andptr[0] = 0x0f
  3906  						ctxt.Andptr = ctxt.Andptr[1:]
  3907  						ctxt.Andptr[0] = 0xb4
  3908  						ctxt.Andptr = ctxt.Andptr[1:]
  3909  
  3910  					case REG_GS:
  3911  						ctxt.Andptr[0] = 0x0f
  3912  						ctxt.Andptr = ctxt.Andptr[1:]
  3913  						ctxt.Andptr[0] = 0xb5
  3914  						ctxt.Andptr = ctxt.Andptr[1:]
  3915  					}
  3916  
  3917  					asmand(ctxt, p, &p.From, &p.To)
  3918  
  3919  				case 6: /* double shift */
  3920  					if t[0] == Pw {
  3921  						if p.Mode != 64 {
  3922  							ctxt.Diag("asmins: illegal 64: %v", p)
  3923  						}
  3924  						ctxt.Rexflag |= Pw
  3925  						t = t[1:]
  3926  					} else if t[0] == Pe {
  3927  						ctxt.Andptr[0] = Pe
  3928  						ctxt.Andptr = ctxt.Andptr[1:]
  3929  						t = t[1:]
  3930  					}
  3931  
  3932  					switch p.From.Type {
  3933  					default:
  3934  						goto bad
  3935  
  3936  					case obj.TYPE_CONST:
  3937  						ctxt.Andptr[0] = 0x0f
  3938  						ctxt.Andptr = ctxt.Andptr[1:]
  3939  						ctxt.Andptr[0] = t[0]
  3940  						ctxt.Andptr = ctxt.Andptr[1:]
  3941  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3942  						ctxt.Andptr[0] = byte(p.From.Offset)
  3943  						ctxt.Andptr = ctxt.Andptr[1:]
  3944  
  3945  					case obj.TYPE_REG:
  3946  						switch p.From.Reg {
  3947  						default:
  3948  							goto bad
  3949  
  3950  						case REG_CL, REG_CX:
  3951  							ctxt.Andptr[0] = 0x0f
  3952  							ctxt.Andptr = ctxt.Andptr[1:]
  3953  							ctxt.Andptr[0] = t[1]
  3954  							ctxt.Andptr = ctxt.Andptr[1:]
  3955  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3956  						}
  3957  					}
  3958  
  3959  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3960  				// where you load the TLS base register into a register and then index off that
  3961  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3962  				// are handled in prefixof above and should not be listed here.
  3963  				case 7: /* mov tls, r */
  3964  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3965  						ctxt.Diag("invalid load of TLS: %v", p)
  3966  					}
  3967  
  3968  					if p.Mode == 32 {
  3969  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3970  						// where you load the TLS base register into a register and then index off that
  3971  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3972  						// are handled in prefixof above and should not be listed here.
  3973  						switch ctxt.Headtype {
  3974  						default:
  3975  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3976  
  3977  						case obj.Hlinux,
  3978  							obj.Hnacl:
  3979  							if ctxt.Flag_shared != 0 {
  3980  								// Note that this is not generating the same insns as the other cases.
  3981  								//     MOV TLS, R_to
  3982  								// becomes
  3983  								//     call __x86.get_pc_thunk.cx
  3984  								//     movl (gotpc + g@gotntpoff)(%ecx),$R_To
  3985  								// which is encoded as
  3986  								//     call __x86.get_pc_thunk.cx
  3987  								//     movq 0(%ecx), R_to
  3988  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  3989  								// is g, which we can't check here, but will when we assemble the second
  3990  								// instruction.
  3991  								ctxt.Andptr[0] = 0xe8
  3992  								ctxt.Andptr = ctxt.Andptr[1:]
  3993  								r = obj.Addrel(ctxt.Cursym)
  3994  								r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3995  								r.Type = obj.R_CALL
  3996  								r.Siz = 4
  3997  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
  3998  								put4(ctxt, 0)
  3999  
  4000  								ctxt.Andptr[0] = 0x8B
  4001  								ctxt.Andptr = ctxt.Andptr[1:]
  4002  								ctxt.Andptr[0] = byte(2<<6 | reg[REG_CX] | (reg[p.To.Reg] << 3))
  4003  								ctxt.Andptr = ctxt.Andptr[1:]
  4004  								r = obj.Addrel(ctxt.Cursym)
  4005  								r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  4006  								r.Type = obj.R_TLS_IE
  4007  								r.Siz = 4
  4008  								r.Add = 2
  4009  								put4(ctxt, 0)
  4010  							} else {
  4011  								// ELF TLS base is 0(GS).
  4012  								pp.From = p.From
  4013  
  4014  								pp.From.Type = obj.TYPE_MEM
  4015  								pp.From.Reg = REG_GS
  4016  								pp.From.Offset = 0
  4017  								pp.From.Index = REG_NONE
  4018  								pp.From.Scale = 0
  4019  								ctxt.Andptr[0] = 0x65
  4020  								ctxt.Andptr = ctxt.Andptr[1:] // GS
  4021  								ctxt.Andptr[0] = 0x8B
  4022  								ctxt.Andptr = ctxt.Andptr[1:]
  4023  								asmand(ctxt, p, &pp.From, &p.To)
  4024  							}
  4025  						case obj.Hplan9:
  4026  							if ctxt.Plan9privates == nil {
  4027  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4028  							}
  4029  							pp.From = obj.Addr{}
  4030  							pp.From.Type = obj.TYPE_MEM
  4031  							pp.From.Name = obj.NAME_EXTERN
  4032  							pp.From.Sym = ctxt.Plan9privates
  4033  							pp.From.Offset = 0
  4034  							pp.From.Index = REG_NONE
  4035  							ctxt.Andptr[0] = 0x8B
  4036  							ctxt.Andptr = ctxt.Andptr[1:]
  4037  							asmand(ctxt, p, &pp.From, &p.To)
  4038  
  4039  						case obj.Hwindows:
  4040  							// Windows TLS base is always 0x14(FS).
  4041  							pp.From = p.From
  4042  
  4043  							pp.From.Type = obj.TYPE_MEM
  4044  							pp.From.Reg = REG_FS
  4045  							pp.From.Offset = 0x14
  4046  							pp.From.Index = REG_NONE
  4047  							pp.From.Scale = 0
  4048  							ctxt.Andptr[0] = 0x64
  4049  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  4050  							ctxt.Andptr[0] = 0x8B
  4051  							ctxt.Andptr = ctxt.Andptr[1:]
  4052  							asmand(ctxt, p, &pp.From, &p.To)
  4053  						}
  4054  						break
  4055  					}
  4056  
  4057  					switch ctxt.Headtype {
  4058  					default:
  4059  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4060  
  4061  					case obj.Hlinux:
  4062  						if ctxt.Flag_shared == 0 {
  4063  							log.Fatalf("unknown TLS base location for linux without -shared")
  4064  						}
  4065  						// Note that this is not generating the same insn as the other cases.
  4066  						//     MOV TLS, R_to
  4067  						// becomes
  4068  						//     movq g@gottpoff(%rip), R_to
  4069  						// which is encoded as
  4070  						//     movq 0(%rip), R_to
  4071  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4072  						// is g, which we can't check here, but will when we assemble the second
  4073  						// instruction.
  4074  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4075  
  4076  						ctxt.Andptr[0] = 0x8B
  4077  						ctxt.Andptr = ctxt.Andptr[1:]
  4078  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  4079  						ctxt.Andptr = ctxt.Andptr[1:]
  4080  						r = obj.Addrel(ctxt.Cursym)
  4081  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  4082  						r.Type = obj.R_TLS_IE
  4083  						r.Siz = 4
  4084  						r.Add = -4
  4085  						put4(ctxt, 0)
  4086  
  4087  					case obj.Hplan9:
  4088  						if ctxt.Plan9privates == nil {
  4089  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4090  						}
  4091  						pp.From = obj.Addr{}
  4092  						pp.From.Type = obj.TYPE_MEM
  4093  						pp.From.Name = obj.NAME_EXTERN
  4094  						pp.From.Sym = ctxt.Plan9privates
  4095  						pp.From.Offset = 0
  4096  						pp.From.Index = REG_NONE
  4097  						ctxt.Rexflag |= Pw
  4098  						ctxt.Andptr[0] = 0x8B
  4099  						ctxt.Andptr = ctxt.Andptr[1:]
  4100  						asmand(ctxt, p, &pp.From, &p.To)
  4101  
  4102  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4103  						// TLS base is 0(FS).
  4104  						pp.From = p.From
  4105  
  4106  						pp.From.Type = obj.TYPE_MEM
  4107  						pp.From.Name = obj.NAME_NONE
  4108  						pp.From.Reg = REG_NONE
  4109  						pp.From.Offset = 0
  4110  						pp.From.Index = REG_NONE
  4111  						pp.From.Scale = 0
  4112  						ctxt.Rexflag |= Pw
  4113  						ctxt.Andptr[0] = 0x64
  4114  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  4115  						ctxt.Andptr[0] = 0x8B
  4116  						ctxt.Andptr = ctxt.Andptr[1:]
  4117  						asmand(ctxt, p, &pp.From, &p.To)
  4118  
  4119  					case obj.Hwindows:
  4120  						// Windows TLS base is always 0x28(GS).
  4121  						pp.From = p.From
  4122  
  4123  						pp.From.Type = obj.TYPE_MEM
  4124  						pp.From.Name = obj.NAME_NONE
  4125  						pp.From.Reg = REG_GS
  4126  						pp.From.Offset = 0x28
  4127  						pp.From.Index = REG_NONE
  4128  						pp.From.Scale = 0
  4129  						ctxt.Rexflag |= Pw
  4130  						ctxt.Andptr[0] = 0x65
  4131  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  4132  						ctxt.Andptr[0] = 0x8B
  4133  						ctxt.Andptr = ctxt.Andptr[1:]
  4134  						asmand(ctxt, p, &pp.From, &p.To)
  4135  					}
  4136  				}
  4137  				return
  4138  			}
  4139  		}
  4140  	}
  4141  	goto bad
  4142  
  4143  bad:
  4144  	if p.Mode != 64 {
  4145  		/*
  4146  		 * here, the assembly has failed.
  4147  		 * if its a byte instruction that has
  4148  		 * unaddressable registers, try to
  4149  		 * exchange registers and reissue the
  4150  		 * instruction with the operands renamed.
  4151  		 */
  4152  		pp := *p
  4153  
  4154  		unbytereg(&pp.From, &pp.Ft)
  4155  		unbytereg(&pp.To, &pp.Tt)
  4156  
  4157  		z := int(p.From.Reg)
  4158  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4159  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4160  			// For now, different to keep bit-for-bit compatibility.
  4161  			if p.Mode == 32 {
  4162  				breg := byteswapreg(ctxt, &p.To)
  4163  				if breg != REG_AX {
  4164  					ctxt.Andptr[0] = 0x87
  4165  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4166  					asmando(ctxt, p, &p.From, reg[breg])
  4167  					subreg(&pp, z, breg)
  4168  					doasm(ctxt, &pp)
  4169  					ctxt.Andptr[0] = 0x87
  4170  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4171  					asmando(ctxt, p, &p.From, reg[breg])
  4172  				} else {
  4173  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4174  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4175  					subreg(&pp, z, REG_AX)
  4176  					doasm(ctxt, &pp)
  4177  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4178  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4179  				}
  4180  				return
  4181  			}
  4182  
  4183  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4184  				// We certainly don't want to exchange
  4185  				// with AX if the op is MUL or DIV.
  4186  				ctxt.Andptr[0] = 0x87
  4187  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4188  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4189  				subreg(&pp, z, REG_BX)
  4190  				doasm(ctxt, &pp)
  4191  				ctxt.Andptr[0] = 0x87
  4192  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4193  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4194  			} else {
  4195  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4196  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4197  				subreg(&pp, z, REG_AX)
  4198  				doasm(ctxt, &pp)
  4199  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4200  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4201  			}
  4202  			return
  4203  		}
  4204  
  4205  		z = int(p.To.Reg)
  4206  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4207  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4208  			// For now, different to keep bit-for-bit compatibility.
  4209  			if p.Mode == 32 {
  4210  				breg := byteswapreg(ctxt, &p.From)
  4211  				if breg != REG_AX {
  4212  					ctxt.Andptr[0] = 0x87
  4213  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4214  					asmando(ctxt, p, &p.To, reg[breg])
  4215  					subreg(&pp, z, breg)
  4216  					doasm(ctxt, &pp)
  4217  					ctxt.Andptr[0] = 0x87
  4218  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4219  					asmando(ctxt, p, &p.To, reg[breg])
  4220  				} else {
  4221  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4222  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4223  					subreg(&pp, z, REG_AX)
  4224  					doasm(ctxt, &pp)
  4225  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4226  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4227  				}
  4228  				return
  4229  			}
  4230  
  4231  			if isax(&p.From) {
  4232  				ctxt.Andptr[0] = 0x87
  4233  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4234  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4235  				subreg(&pp, z, REG_BX)
  4236  				doasm(ctxt, &pp)
  4237  				ctxt.Andptr[0] = 0x87
  4238  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4239  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4240  			} else {
  4241  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4242  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4243  				subreg(&pp, z, REG_AX)
  4244  				doasm(ctxt, &pp)
  4245  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4246  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4247  			}
  4248  			return
  4249  		}
  4250  	}
  4251  
  4252  	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4253  	return
  4254  }
  4255  
  4256  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4257  // which is not referenced in a.
  4258  // If a is empty, it returns BX to account for MULB-like instructions
  4259  // that might use DX and AX.
  4260  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4261  	cand := 1
  4262  	canc := cand
  4263  	canb := canc
  4264  	cana := canb
  4265  
  4266  	if a.Type == obj.TYPE_NONE {
  4267  		cand = 0
  4268  		cana = cand
  4269  	}
  4270  
  4271  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4272  		switch a.Reg {
  4273  		case REG_NONE:
  4274  			cand = 0
  4275  			cana = cand
  4276  
  4277  		case REG_AX, REG_AL, REG_AH:
  4278  			cana = 0
  4279  
  4280  		case REG_BX, REG_BL, REG_BH:
  4281  			canb = 0
  4282  
  4283  		case REG_CX, REG_CL, REG_CH:
  4284  			canc = 0
  4285  
  4286  		case REG_DX, REG_DL, REG_DH:
  4287  			cand = 0
  4288  		}
  4289  	}
  4290  
  4291  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4292  		switch a.Index {
  4293  		case REG_AX:
  4294  			cana = 0
  4295  
  4296  		case REG_BX:
  4297  			canb = 0
  4298  
  4299  		case REG_CX:
  4300  			canc = 0
  4301  
  4302  		case REG_DX:
  4303  			cand = 0
  4304  		}
  4305  	}
  4306  
  4307  	if cana != 0 {
  4308  		return REG_AX
  4309  	}
  4310  	if canb != 0 {
  4311  		return REG_BX
  4312  	}
  4313  	if canc != 0 {
  4314  		return REG_CX
  4315  	}
  4316  	if cand != 0 {
  4317  		return REG_DX
  4318  	}
  4319  
  4320  	ctxt.Diag("impossible byte register")
  4321  	log.Fatalf("bad code")
  4322  	return 0
  4323  }
  4324  
  4325  func isbadbyte(a *obj.Addr) bool {
  4326  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4327  }
  4328  
  4329  var naclret = []uint8{
  4330  	0x5e, // POPL SI
  4331  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4332  	0x83,
  4333  	0xe6,
  4334  	0xe0, // ANDL $~31, SI
  4335  	0x4c,
  4336  	0x01,
  4337  	0xfe, // ADDQ R15, SI
  4338  	0xff,
  4339  	0xe6, // JMP SI
  4340  }
  4341  
  4342  var naclret8 = []uint8{
  4343  	0x5d, // POPL BP
  4344  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4345  	0x83,
  4346  	0xe5,
  4347  	0xe0, // ANDL $~31, BP
  4348  	0xff,
  4349  	0xe5, // JMP BP
  4350  }
  4351  
  4352  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4353  
  4354  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4355  
  4356  var naclmovs = []uint8{
  4357  	0x89,
  4358  	0xf6, // MOVL SI, SI
  4359  	0x49,
  4360  	0x8d,
  4361  	0x34,
  4362  	0x37, // LEAQ (R15)(SI*1), SI
  4363  	0x89,
  4364  	0xff, // MOVL DI, DI
  4365  	0x49,
  4366  	0x8d,
  4367  	0x3c,
  4368  	0x3f, // LEAQ (R15)(DI*1), DI
  4369  }
  4370  
  4371  var naclstos = []uint8{
  4372  	0x89,
  4373  	0xff, // MOVL DI, DI
  4374  	0x49,
  4375  	0x8d,
  4376  	0x3c,
  4377  	0x3f, // LEAQ (R15)(DI*1), DI
  4378  }
  4379  
  4380  func nacltrunc(ctxt *obj.Link, reg int) {
  4381  	if reg >= REG_R8 {
  4382  		ctxt.Andptr[0] = 0x45
  4383  		ctxt.Andptr = ctxt.Andptr[1:]
  4384  	}
  4385  	reg = (reg - REG_AX) & 7
  4386  	ctxt.Andptr[0] = 0x89
  4387  	ctxt.Andptr = ctxt.Andptr[1:]
  4388  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4389  	ctxt.Andptr = ctxt.Andptr[1:]
  4390  }
  4391  
  4392  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4393  	ctxt.Andptr = ctxt.And[:]
  4394  	ctxt.Asmode = int(p.Mode)
  4395  
  4396  	if p.As == obj.AUSEFIELD {
  4397  		r := obj.Addrel(ctxt.Cursym)
  4398  		r.Off = 0
  4399  		r.Siz = 0
  4400  		r.Sym = p.From.Sym
  4401  		r.Type = obj.R_USEFIELD
  4402  		return
  4403  	}
  4404  
  4405  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4406  		switch p.As {
  4407  		case obj.ARET:
  4408  			copy(ctxt.Andptr, naclret8)
  4409  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4410  			return
  4411  
  4412  		case obj.ACALL,
  4413  			obj.AJMP:
  4414  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4415  				ctxt.Andptr[0] = 0x83
  4416  				ctxt.Andptr = ctxt.Andptr[1:]
  4417  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4418  				ctxt.Andptr = ctxt.Andptr[1:]
  4419  				ctxt.Andptr[0] = 0xe0
  4420  				ctxt.Andptr = ctxt.Andptr[1:]
  4421  			}
  4422  
  4423  		case AINT:
  4424  			ctxt.Andptr[0] = 0xf4
  4425  			ctxt.Andptr = ctxt.Andptr[1:]
  4426  			return
  4427  		}
  4428  	}
  4429  
  4430  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4431  		if p.As == AREP {
  4432  			ctxt.Rep++
  4433  			return
  4434  		}
  4435  
  4436  		if p.As == AREPN {
  4437  			ctxt.Repn++
  4438  			return
  4439  		}
  4440  
  4441  		if p.As == ALOCK {
  4442  			ctxt.Lock++
  4443  			return
  4444  		}
  4445  
  4446  		if p.As != ALEAQ && p.As != ALEAL {
  4447  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4448  				nacltrunc(ctxt, int(p.From.Index))
  4449  			}
  4450  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4451  				nacltrunc(ctxt, int(p.To.Index))
  4452  			}
  4453  		}
  4454  
  4455  		switch p.As {
  4456  		case obj.ARET:
  4457  			copy(ctxt.Andptr, naclret)
  4458  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4459  			return
  4460  
  4461  		case obj.ACALL,
  4462  			obj.AJMP:
  4463  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4464  				// ANDL $~31, reg
  4465  				ctxt.Andptr[0] = 0x83
  4466  				ctxt.Andptr = ctxt.Andptr[1:]
  4467  
  4468  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4469  				ctxt.Andptr = ctxt.Andptr[1:]
  4470  				ctxt.Andptr[0] = 0xe0
  4471  				ctxt.Andptr = ctxt.Andptr[1:]
  4472  
  4473  				// ADDQ R15, reg
  4474  				ctxt.Andptr[0] = 0x4c
  4475  				ctxt.Andptr = ctxt.Andptr[1:]
  4476  
  4477  				ctxt.Andptr[0] = 0x01
  4478  				ctxt.Andptr = ctxt.Andptr[1:]
  4479  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4480  				ctxt.Andptr = ctxt.Andptr[1:]
  4481  			}
  4482  
  4483  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4484  				// ANDL $~31, reg
  4485  				ctxt.Andptr[0] = 0x41
  4486  				ctxt.Andptr = ctxt.Andptr[1:]
  4487  
  4488  				ctxt.Andptr[0] = 0x83
  4489  				ctxt.Andptr = ctxt.Andptr[1:]
  4490  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4491  				ctxt.Andptr = ctxt.Andptr[1:]
  4492  				ctxt.Andptr[0] = 0xe0
  4493  				ctxt.Andptr = ctxt.Andptr[1:]
  4494  
  4495  				// ADDQ R15, reg
  4496  				ctxt.Andptr[0] = 0x4d
  4497  				ctxt.Andptr = ctxt.Andptr[1:]
  4498  
  4499  				ctxt.Andptr[0] = 0x01
  4500  				ctxt.Andptr = ctxt.Andptr[1:]
  4501  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4502  				ctxt.Andptr = ctxt.Andptr[1:]
  4503  			}
  4504  
  4505  		case AINT:
  4506  			ctxt.Andptr[0] = 0xf4
  4507  			ctxt.Andptr = ctxt.Andptr[1:]
  4508  			return
  4509  
  4510  		case ASCASB,
  4511  			ASCASW,
  4512  			ASCASL,
  4513  			ASCASQ,
  4514  			ASTOSB,
  4515  			ASTOSW,
  4516  			ASTOSL,
  4517  			ASTOSQ:
  4518  			copy(ctxt.Andptr, naclstos)
  4519  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4520  
  4521  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4522  			copy(ctxt.Andptr, naclmovs)
  4523  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4524  		}
  4525  
  4526  		if ctxt.Rep != 0 {
  4527  			ctxt.Andptr[0] = 0xf3
  4528  			ctxt.Andptr = ctxt.Andptr[1:]
  4529  			ctxt.Rep = 0
  4530  		}
  4531  
  4532  		if ctxt.Repn != 0 {
  4533  			ctxt.Andptr[0] = 0xf2
  4534  			ctxt.Andptr = ctxt.Andptr[1:]
  4535  			ctxt.Repn = 0
  4536  		}
  4537  
  4538  		if ctxt.Lock != 0 {
  4539  			ctxt.Andptr[0] = 0xf0
  4540  			ctxt.Andptr = ctxt.Andptr[1:]
  4541  			ctxt.Lock = 0
  4542  		}
  4543  	}
  4544  
  4545  	ctxt.Rexflag = 0
  4546  	ctxt.Vexflag = 0
  4547  	and0 := ctxt.Andptr
  4548  	ctxt.Asmode = int(p.Mode)
  4549  	doasm(ctxt, p)
  4550  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4551  		/*
  4552  		 * as befits the whole approach of the architecture,
  4553  		 * the rex prefix must appear before the first opcode byte
  4554  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4555  		 * before the 0f opcode escape!), or it might be ignored.
  4556  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4557  		 */
  4558  		if p.Mode != 64 {
  4559  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4560  		}
  4561  		n := -cap(ctxt.Andptr) + cap(and0)
  4562  		var c int
  4563  		var np int
  4564  		for np = 0; np < n; np++ {
  4565  			c = int(and0[np])
  4566  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4567  				break
  4568  			}
  4569  		}
  4570  
  4571  		copy(and0[np+1:], and0[np:n])
  4572  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4573  		ctxt.Andptr = ctxt.Andptr[1:]
  4574  	}
  4575  
  4576  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4577  	var r *obj.Reloc
  4578  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4579  		r = &ctxt.Cursym.R[i:][0]
  4580  		if int64(r.Off) < p.Pc {
  4581  			break
  4582  		}
  4583  		if ctxt.Rexflag != 0 {
  4584  			r.Off++
  4585  		}
  4586  		if r.Type == obj.R_PCREL {
  4587  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4588  				// PC-relative addressing is relative to the end of the instruction,
  4589  				// but the relocations applied by the linker are relative to the end
  4590  				// of the relocation. Because immediate instruction
  4591  				// arguments can follow the PC-relative memory reference in the
  4592  				// instruction encoding, the two may not coincide. In this case,
  4593  				// adjust addend so that linker can keep relocating relative to the
  4594  				// end of the relocation.
  4595  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4596  			} else if p.Mode == 32 {
  4597  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4598  				// assumes that the previous instruction loaded the PC of the end
  4599  				// of that instruction into CX, so the adjustment is relative to
  4600  				// that.
  4601  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4602  			}
  4603  		}
  4604  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4605  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4606  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4607  		}
  4608  
  4609  	}
  4610  
  4611  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4612  		switch p.To.Reg {
  4613  		case REG_SP:
  4614  			copy(ctxt.Andptr, naclspfix)
  4615  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4616  
  4617  		case REG_BP:
  4618  			copy(ctxt.Andptr, naclbpfix)
  4619  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4620  		}
  4621  	}
  4622  }