github.com/jonasi/go@v0.0.0-20150930005915-e78e654c1de0/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	// Loop alignment constants:
    44  	// want to align loop entry to LoopAlign-byte boundary,
    45  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    46  	// We define a loop entry as the target of a backward jump.
    47  	//
    48  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    49  	// and it aligns all jump targets, not just backward jump targets.
    50  	//
    51  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    52  	// is very slight but negative, so the alignment is disabled by
    53  	// setting MaxLoopPad = 0. The code is here for reference and
    54  	// for future experiments.
    55  	//
    56  	LoopAlign  = 16
    57  	MaxLoopPad = 0
    58  	FuncAlign  = 16
    59  )
    60  
    61  type Optab struct {
    62  	as     int16
    63  	ytab   []ytab
    64  	prefix uint8
    65  	op     [23]uint8
    66  }
    67  
    68  type ytab struct {
    69  	from    uint8
    70  	from3   uint8
    71  	to      uint8
    72  	zcase   uint8
    73  	zoffset uint8
    74  }
    75  
    76  type Movtab struct {
    77  	as   int16
    78  	ft   uint8
    79  	f3t  uint8
    80  	tt   uint8
    81  	code uint8
    82  	op   [4]uint8
    83  }
    84  
    85  const (
    86  	Yxxx = iota
    87  	Ynone
    88  	Yi0 // $0
    89  	Yi1 // $1
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Ytls
   152  	Ytextsize
   153  	Yindir
   154  	Ymax
   155  )
   156  
   157  const (
   158  	Zxxx = iota
   159  	Zlit
   160  	Zlitm_r
   161  	Z_rp
   162  	Zbr
   163  	Zcall
   164  	Zcallcon
   165  	Zcallduff
   166  	Zcallind
   167  	Zcallindreg
   168  	Zib_
   169  	Zib_rp
   170  	Zibo_m
   171  	Zibo_m_xm
   172  	Zil_
   173  	Zil_rp
   174  	Ziq_rp
   175  	Zilo_m
   176  	Zjmp
   177  	Zjmpcon
   178  	Zloop
   179  	Zo_iw
   180  	Zm_o
   181  	Zm_r
   182  	Zm2_r
   183  	Zm_r_xm
   184  	Zm_r_i_xm
   185  	Zm_r_3d
   186  	Zm_r_xm_nr
   187  	Zr_m_xm_nr
   188  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   189  	Zmb_r
   190  	Zaut_r
   191  	Zo_m
   192  	Zo_m64
   193  	Zpseudo
   194  	Zr_m
   195  	Zr_m_xm
   196  	Zrp_
   197  	Z_ib
   198  	Z_il
   199  	Zm_ibo
   200  	Zm_ilo
   201  	Zib_rr
   202  	Zil_rr
   203  	Zclr
   204  	Zbyte
   205  	Zmax
   206  )
   207  
   208  const (
   209  	Px  = 0
   210  	Px1 = 1    // symbolic; exact value doesn't matter
   211  	P32 = 0x32 /* 32-bit only */
   212  	Pe  = 0x66 /* operand escape */
   213  	Pm  = 0x0f /* 2byte opcode escape */
   214  	Pq  = 0xff /* both escapes: 66 0f */
   215  	Pb  = 0xfe /* byte operands */
   216  	Pf2 = 0xf2 /* xmm escape 1: f2 0f */
   217  	Pf3 = 0xf3 /* xmm escape 2: f3 0f */
   218  	Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
   219  	Pw  = 0x48 /* Rex.w */
   220  	Pw8 = 0x90 // symbolic; exact value doesn't matter
   221  	Py  = 0x80 /* defaults to 64-bit mode */
   222  	Py1 = 0x81 // symbolic; exact value doesn't matter
   223  	Py3 = 0x83 // symbolic; exact value doesn't matter
   224  
   225  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   226  	Rxr = 1 << 2 /* extend modrm reg */
   227  	Rxx = 1 << 1 /* extend sib index */
   228  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   229  )
   230  
   231  var ycover [Ymax * Ymax]uint8
   232  
   233  var reg [MAXREG]int
   234  
   235  var regrex [MAXREG + 1]int
   236  
   237  var ynone = []ytab{
   238  	{Ynone, Ynone, Ynone, Zlit, 1},
   239  }
   240  
   241  var ytext = []ytab{
   242  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   243  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   244  }
   245  
   246  var ynop = []ytab{
   247  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   248  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   249  	{Ynone, Ynone, Yml, Zpseudo, 0},
   250  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   251  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   252  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   253  	{Yml, Ynone, Ynone, Zpseudo, 0},
   254  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   255  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   256  }
   257  
   258  var yfuncdata = []ytab{
   259  	{Yi32, Ynone, Ym, Zpseudo, 0},
   260  }
   261  
   262  var ypcdata = []ytab{
   263  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   264  }
   265  
   266  var yxorb = []ytab{
   267  	{Yi32, Ynone, Yal, Zib_, 1},
   268  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   269  	{Yrb, Ynone, Ymb, Zr_m, 1},
   270  	{Ymb, Ynone, Yrb, Zm_r, 1},
   271  }
   272  
   273  var yxorl = []ytab{
   274  	{Yi8, Ynone, Yml, Zibo_m, 2},
   275  	{Yi32, Ynone, Yax, Zil_, 1},
   276  	{Yi32, Ynone, Yml, Zilo_m, 2},
   277  	{Yrl, Ynone, Yml, Zr_m, 1},
   278  	{Yml, Ynone, Yrl, Zm_r, 1},
   279  }
   280  
   281  var yaddl = []ytab{
   282  	{Yi8, Ynone, Yml, Zibo_m, 2},
   283  	{Yi32, Ynone, Yax, Zil_, 1},
   284  	{Yi32, Ynone, Yml, Zilo_m, 2},
   285  	{Yrl, Ynone, Yml, Zr_m, 1},
   286  	{Yml, Ynone, Yrl, Zm_r, 1},
   287  }
   288  
   289  var yincb = []ytab{
   290  	{Ynone, Ynone, Ymb, Zo_m, 2},
   291  }
   292  
   293  var yincw = []ytab{
   294  	{Ynone, Ynone, Yml, Zo_m, 2},
   295  }
   296  
   297  var yincl = []ytab{
   298  	{Ynone, Ynone, Yrl, Z_rp, 1},
   299  	{Ynone, Ynone, Yml, Zo_m, 2},
   300  }
   301  
   302  var yincq = []ytab{
   303  	{Ynone, Ynone, Yml, Zo_m, 2},
   304  }
   305  
   306  var ycmpb = []ytab{
   307  	{Yal, Ynone, Yi32, Z_ib, 1},
   308  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   309  	{Ymb, Ynone, Yrb, Zm_r, 1},
   310  	{Yrb, Ynone, Ymb, Zr_m, 1},
   311  }
   312  
   313  var ycmpl = []ytab{
   314  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   315  	{Yax, Ynone, Yi32, Z_il, 1},
   316  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   317  	{Yml, Ynone, Yrl, Zm_r, 1},
   318  	{Yrl, Ynone, Yml, Zr_m, 1},
   319  }
   320  
   321  var yshb = []ytab{
   322  	{Yi1, Ynone, Ymb, Zo_m, 2},
   323  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   324  	{Ycx, Ynone, Ymb, Zo_m, 2},
   325  }
   326  
   327  var yshl = []ytab{
   328  	{Yi1, Ynone, Yml, Zo_m, 2},
   329  	{Yi32, Ynone, Yml, Zibo_m, 2},
   330  	{Ycl, Ynone, Yml, Zo_m, 2},
   331  	{Ycx, Ynone, Yml, Zo_m, 2},
   332  }
   333  
   334  var ytestb = []ytab{
   335  	{Yi32, Ynone, Yal, Zib_, 1},
   336  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   337  	{Yrb, Ynone, Ymb, Zr_m, 1},
   338  	{Ymb, Ynone, Yrb, Zm_r, 1},
   339  }
   340  
   341  var ytestl = []ytab{
   342  	{Yi32, Ynone, Yax, Zil_, 1},
   343  	{Yi32, Ynone, Yml, Zilo_m, 2},
   344  	{Yrl, Ynone, Yml, Zr_m, 1},
   345  	{Yml, Ynone, Yrl, Zm_r, 1},
   346  }
   347  
   348  var ymovb = []ytab{
   349  	{Yrb, Ynone, Ymb, Zr_m, 1},
   350  	{Ymb, Ynone, Yrb, Zm_r, 1},
   351  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   352  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   353  }
   354  
   355  var ymbs = []ytab{
   356  	{Ymb, Ynone, Ynone, Zm_o, 2},
   357  }
   358  
   359  var ybtl = []ytab{
   360  	{Yi8, Ynone, Yml, Zibo_m, 2},
   361  	{Yrl, Ynone, Yml, Zr_m, 1},
   362  }
   363  
   364  var ymovw = []ytab{
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  	{Yml, Ynone, Yrl, Zm_r, 1},
   367  	{Yi0, Ynone, Yrl, Zclr, 1},
   368  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   369  	{Yi32, Ynone, Yml, Zilo_m, 2},
   370  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   371  }
   372  
   373  var ymovl = []ytab{
   374  	{Yrl, Ynone, Yml, Zr_m, 1},
   375  	{Yml, Ynone, Yrl, Zm_r, 1},
   376  	{Yi0, Ynone, Yrl, Zclr, 1},
   377  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   378  	{Yi32, Ynone, Yml, Zilo_m, 2},
   379  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   380  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   381  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   382  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   383  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   384  }
   385  
   386  var yret = []ytab{
   387  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   388  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   389  }
   390  
   391  var ymovq = []ytab{
   392  	// valid in 32-bit mode
   393  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   394  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   395  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   396  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   397  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   398  
   399  	// valid only in 64-bit mode, usually with 64-bit prefix
   400  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   401  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   402  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   403  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   404  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   405  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   406  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   407  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   408  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   409  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   410  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   411  }
   412  
   413  var ym_rl = []ytab{
   414  	{Ym, Ynone, Yrl, Zm_r, 1},
   415  }
   416  
   417  var yrl_m = []ytab{
   418  	{Yrl, Ynone, Ym, Zr_m, 1},
   419  }
   420  
   421  var ymb_rl = []ytab{
   422  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   423  }
   424  
   425  var yml_rl = []ytab{
   426  	{Yml, Ynone, Yrl, Zm_r, 1},
   427  }
   428  
   429  var yrl_ml = []ytab{
   430  	{Yrl, Ynone, Yml, Zr_m, 1},
   431  }
   432  
   433  var yml_mb = []ytab{
   434  	{Yrb, Ynone, Ymb, Zr_m, 1},
   435  	{Ymb, Ynone, Yrb, Zm_r, 1},
   436  }
   437  
   438  var yrb_mb = []ytab{
   439  	{Yrb, Ynone, Ymb, Zr_m, 1},
   440  }
   441  
   442  var yxchg = []ytab{
   443  	{Yax, Ynone, Yrl, Z_rp, 1},
   444  	{Yrl, Ynone, Yax, Zrp_, 1},
   445  	{Yrl, Ynone, Yml, Zr_m, 1},
   446  	{Yml, Ynone, Yrl, Zm_r, 1},
   447  }
   448  
   449  var ydivl = []ytab{
   450  	{Yml, Ynone, Ynone, Zm_o, 2},
   451  }
   452  
   453  var ydivb = []ytab{
   454  	{Ymb, Ynone, Ynone, Zm_o, 2},
   455  }
   456  
   457  var yimul = []ytab{
   458  	{Yml, Ynone, Ynone, Zm_o, 2},
   459  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   460  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   461  	{Yml, Ynone, Yrl, Zm_r, 2},
   462  }
   463  
   464  var yimul3 = []ytab{
   465  	{Yi8, Yml, Yrl, Zibm_r, 2},
   466  }
   467  
   468  var ybyte = []ytab{
   469  	{Yi64, Ynone, Ynone, Zbyte, 1},
   470  }
   471  
   472  var yin = []ytab{
   473  	{Yi32, Ynone, Ynone, Zib_, 1},
   474  	{Ynone, Ynone, Ynone, Zlit, 1},
   475  }
   476  
   477  var yint = []ytab{
   478  	{Yi32, Ynone, Ynone, Zib_, 1},
   479  }
   480  
   481  var ypushl = []ytab{
   482  	{Yrl, Ynone, Ynone, Zrp_, 1},
   483  	{Ym, Ynone, Ynone, Zm_o, 2},
   484  	{Yi8, Ynone, Ynone, Zib_, 1},
   485  	{Yi32, Ynone, Ynone, Zil_, 1},
   486  }
   487  
   488  var ypopl = []ytab{
   489  	{Ynone, Ynone, Yrl, Z_rp, 1},
   490  	{Ynone, Ynone, Ym, Zo_m, 2},
   491  }
   492  
   493  var ybswap = []ytab{
   494  	{Ynone, Ynone, Yrl, Z_rp, 2},
   495  }
   496  
   497  var yscond = []ytab{
   498  	{Ynone, Ynone, Ymb, Zo_m, 2},
   499  }
   500  
   501  var yjcond = []ytab{
   502  	{Ynone, Ynone, Ybr, Zbr, 0},
   503  	{Yi0, Ynone, Ybr, Zbr, 0},
   504  	{Yi1, Ynone, Ybr, Zbr, 1},
   505  }
   506  
   507  var yloop = []ytab{
   508  	{Ynone, Ynone, Ybr, Zloop, 1},
   509  }
   510  
   511  var ycall = []ytab{
   512  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   513  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   514  	{Ynone, Ynone, Yindir, Zcallind, 2},
   515  	{Ynone, Ynone, Ybr, Zcall, 0},
   516  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   517  }
   518  
   519  var yduff = []ytab{
   520  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   521  }
   522  
   523  var yjmp = []ytab{
   524  	{Ynone, Ynone, Yml, Zo_m64, 2},
   525  	{Ynone, Ynone, Ybr, Zjmp, 0},
   526  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   527  }
   528  
   529  var yfmvd = []ytab{
   530  	{Ym, Ynone, Yf0, Zm_o, 2},
   531  	{Yf0, Ynone, Ym, Zo_m, 2},
   532  	{Yrf, Ynone, Yf0, Zm_o, 2},
   533  	{Yf0, Ynone, Yrf, Zo_m, 2},
   534  }
   535  
   536  var yfmvdp = []ytab{
   537  	{Yf0, Ynone, Ym, Zo_m, 2},
   538  	{Yf0, Ynone, Yrf, Zo_m, 2},
   539  }
   540  
   541  var yfmvf = []ytab{
   542  	{Ym, Ynone, Yf0, Zm_o, 2},
   543  	{Yf0, Ynone, Ym, Zo_m, 2},
   544  }
   545  
   546  var yfmvx = []ytab{
   547  	{Ym, Ynone, Yf0, Zm_o, 2},
   548  }
   549  
   550  var yfmvp = []ytab{
   551  	{Yf0, Ynone, Ym, Zo_m, 2},
   552  }
   553  
   554  var yfcmv = []ytab{
   555  	{Yrf, Ynone, Yf0, Zm_o, 2},
   556  }
   557  
   558  var yfadd = []ytab{
   559  	{Ym, Ynone, Yf0, Zm_o, 2},
   560  	{Yrf, Ynone, Yf0, Zm_o, 2},
   561  	{Yf0, Ynone, Yrf, Zo_m, 2},
   562  }
   563  
   564  var yfaddp = []ytab{
   565  	{Yf0, Ynone, Yrf, Zo_m, 2},
   566  }
   567  
   568  var yfxch = []ytab{
   569  	{Yf0, Ynone, Yrf, Zo_m, 2},
   570  	{Yrf, Ynone, Yf0, Zm_o, 2},
   571  }
   572  
   573  var ycompp = []ytab{
   574  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   575  }
   576  
   577  var ystsw = []ytab{
   578  	{Ynone, Ynone, Ym, Zo_m, 2},
   579  	{Ynone, Ynone, Yax, Zlit, 1},
   580  }
   581  
   582  var ystcw = []ytab{
   583  	{Ynone, Ynone, Ym, Zo_m, 2},
   584  	{Ym, Ynone, Ynone, Zm_o, 2},
   585  }
   586  
   587  var ysvrs = []ytab{
   588  	{Ynone, Ynone, Ym, Zo_m, 2},
   589  	{Ym, Ynone, Ynone, Zm_o, 2},
   590  }
   591  
   592  var ymm = []ytab{
   593  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   594  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   595  }
   596  
   597  var yxm = []ytab{
   598  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   599  }
   600  
   601  var yxcvm1 = []ytab{
   602  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   603  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   604  }
   605  
   606  var yxcvm2 = []ytab{
   607  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   608  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   609  }
   610  
   611  /*
   612  var yxmq = []ytab{
   613  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   614  }
   615  */
   616  
   617  var yxr = []ytab{
   618  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   619  }
   620  
   621  var yxr_ml = []ytab{
   622  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   623  }
   624  
   625  var ymr = []ytab{
   626  	{Ymr, Ynone, Ymr, Zm_r, 1},
   627  }
   628  
   629  var ymr_ml = []ytab{
   630  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   631  }
   632  
   633  var yxcmp = []ytab{
   634  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   635  }
   636  
   637  var yxcmpi = []ytab{
   638  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   639  }
   640  
   641  var yxmov = []ytab{
   642  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   643  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   644  }
   645  
   646  var yxcvfl = []ytab{
   647  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   648  }
   649  
   650  var yxcvlf = []ytab{
   651  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   652  }
   653  
   654  var yxcvfq = []ytab{
   655  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   656  }
   657  
   658  var yxcvqf = []ytab{
   659  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   660  }
   661  
   662  var yps = []ytab{
   663  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   664  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   665  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   666  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   667  }
   668  
   669  var yxrrl = []ytab{
   670  	{Yxr, Ynone, Yrl, Zm_r, 1},
   671  }
   672  
   673  var ymfp = []ytab{
   674  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   675  }
   676  
   677  var ymrxr = []ytab{
   678  	{Ymr, Ynone, Yxr, Zm_r, 1},
   679  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   680  }
   681  
   682  var ymshuf = []ytab{
   683  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   684  }
   685  
   686  var ymshufb = []ytab{
   687  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   688  }
   689  
   690  var yxshuf = []ytab{
   691  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   692  }
   693  
   694  var yextrw = []ytab{
   695  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   696  }
   697  
   698  var yinsrw = []ytab{
   699  	{Yu8, Yml, Yxr, Zibm_r, 2},
   700  }
   701  
   702  var yinsr = []ytab{
   703  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   704  }
   705  
   706  var ypsdq = []ytab{
   707  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   708  }
   709  
   710  var ymskb = []ytab{
   711  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   712  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   713  }
   714  
   715  var ycrc32l = []ytab{
   716  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   717  }
   718  
   719  var yprefetch = []ytab{
   720  	{Ym, Ynone, Ynone, Zm_o, 2},
   721  }
   722  
   723  var yaes = []ytab{
   724  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   725  }
   726  
   727  var yaes2 = []ytab{
   728  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   729  }
   730  
   731  /*
   732   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   733   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   734   * the entry with the given p->as and then looks through the ytable for that
   735   * instruction (the second field in the optab struct) for a line whose first
   736   * two values match the Ytypes of the p->from and p->to operands.  The function
   737   * oclass in span.c computes the specific Ytype of an operand and then the set
   738   * of more general Ytypes that it satisfies is implied by the ycover table, set
   739   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   740   * from the more general 8-bit constants, but instinit says
   741   *
   742   *        ycover[Yi0*Ymax + Ys32] = 1;
   743   *        ycover[Yi1*Ymax + Ys32] = 1;
   744   *        ycover[Yi8*Ymax + Ys32] = 1;
   745   *
   746   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   747   * if that's what an instruction can handle.
   748   *
   749   * In parallel with the scan through the ytable for the appropriate line, there
   750   * is a z pointer that starts out pointing at the strange magic byte list in
   751   * the Optab struct.  With each step past a non-matching ytable line, z
   752   * advances by the 4th entry in the line.  When a matching line is found, that
   753   * z pointer has the extra data to use in laying down the instruction bytes.
   754   * The actual bytes laid down are a function of the 3rd entry in the line (that
   755   * is, the Ztype) and the z bytes.
   756   *
   757   * For example, let's look at AADDL.  The optab line says:
   758   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   759   *
   760   * and yaddl says
   761   *        uchar   yaddl[] =
   762   *        {
   763   *                Yi8,    Yml,    Zibo_m, 2,
   764   *                Yi32,   Yax,    Zil_,   1,
   765   *                Yi32,   Yml,    Zilo_m, 2,
   766   *                Yrl,    Yml,    Zr_m,   1,
   767   *                Yml,    Yrl,    Zm_r,   1,
   768   *                0
   769   *        };
   770   *
   771   * so there are 5 possible types of ADDL instruction that can be laid down, and
   772   * possible states used to lay them down (Ztype and z pointer, assuming z
   773   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   774   *
   775   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   776   *        Yi32, Yax -> Zil_, z+2 (0x05)
   777   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   778   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   779   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   780   *
   781   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   782   * relatively straightforward as this program goes.
   783   *
   784   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   785   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   786   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   787   * Zilo_m is the same but a long (32-bit) immediate.
   788   */
   789  var optab =
   790  /*	as, ytab, andproto, opcode */
   791  []Optab{
   792  	{obj.AXXX, nil, 0, [23]uint8{}},
   793  	{AAAA, ynone, P32, [23]uint8{0x37}},
   794  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   795  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   796  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   797  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   798  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   799  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   800  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   801  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   802  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   803  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   804  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   805  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   806  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   807  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   808  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   809  	{AADJSP, nil, 0, [23]uint8{}},
   810  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   811  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   812  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   813  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   814  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   815  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   816  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   817  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   818  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   819  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   820  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   821  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   822  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   823  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   824  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   825  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   826  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   827  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   828  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   829  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   830  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   831  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   832  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   833  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   834  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   835  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   836  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   837  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   838  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   839  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   840  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   841  	{ABYTE, ybyte, Px, [23]uint8{1}},
   842  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   843  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   844  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   845  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   846  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   847  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   848  	{ACMC, ynone, Px, [23]uint8{0xf5}},
   849  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   850  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   851  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   852  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   853  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   854  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   855  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   856  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   857  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   858  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   859  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   860  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   861  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   862  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   863  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   864  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   865  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   866  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   867  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   868  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   869  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   870  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   871  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   872  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   873  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   874  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   875  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   876  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   877  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   878  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   879  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   880  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   881  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   882  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   883  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   884  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   885  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   886  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   887  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   888  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   889  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   890  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   891  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   892  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   893  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   894  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   895  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   896  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   897  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   898  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   899  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   900  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   901  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   902  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   903  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   904  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   905  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   906  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   907  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   908  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   909  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
   910  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
   911  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   912  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   913  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   914  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   915  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   916  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   917  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   918  	{API2FW, ymfp, Px, [23]uint8{0x0c}},
   919  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   920  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   921  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   922  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   923  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   924  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   925  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   926  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   927  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   928  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   929  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   930  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   931  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   932  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   933  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   934  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   935  	{ACWD, ynone, Pe, [23]uint8{0x99}},
   936  	{ACQO, ynone, Pw, [23]uint8{0x99}},
   937  	{ADAA, ynone, P32, [23]uint8{0x27}},
   938  	{ADAS, ynone, P32, [23]uint8{0x2f}},
   939  	{obj.ADATA, nil, 0, [23]uint8{}},
   940  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
   941  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   942  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   943  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
   944  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   945  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   946  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   947  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   948  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   949  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   950  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   951  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   952  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
   953  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
   954  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   955  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   956  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   957  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   958  	{obj.AGLOBL, nil, 0, [23]uint8{}},
   959  	{AHLT, ynone, Px, [23]uint8{0xf4}},
   960  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   961  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   962  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   963  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   964  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   965  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   966  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   967  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   968  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   969  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   970  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
   971  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   972  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   973  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
   974  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   975  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
   976  	{AINSL, ynone, Px, [23]uint8{0x6d}},
   977  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
   978  	{AINT, yint, Px, [23]uint8{0xcd}},
   979  	{AINTO, ynone, P32, [23]uint8{0xce}},
   980  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   981  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
   982  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   983  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   984  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   985  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   986  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
   987  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
   988  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
   989  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
   990  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
   991  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
   992  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
   993  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
   994  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
   995  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
   996  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
   997  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
   998  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
   999  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1000  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1001  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1002  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1003  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1004  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1005  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1006  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1007  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1008  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1009  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1010  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1011  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1012  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1013  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1014  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1015  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1016  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1017  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1018  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1019  	{ALONG, ybyte, Px, [23]uint8{4}},
  1020  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1021  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1022  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1023  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1024  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1025  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1026  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1027  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1028  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1029  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1030  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1031  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1032  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1033  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1034  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1035  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1036  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1037  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1038  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1039  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1040  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1041  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1042  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1043  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1044  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1045  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1046  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1047  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1048  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1049  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1050  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1051  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1052  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1053  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1054  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1055  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1056  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1057  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1058  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1059  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1060  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1061  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1062  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1063  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1064  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1065  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1066  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1067  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1068  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1069  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1070  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1071  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1072  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1073  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1074  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1075  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1076  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1077  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1078  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1079  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1080  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1081  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1082  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1083  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1084  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1085  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1086  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1087  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1088  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1089  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1090  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1091  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1092  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1093  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1094  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1095  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1096  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1097  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1098  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1099  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1100  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1101  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1102  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1103  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1104  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1105  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1106  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1107  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1108  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1109  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1110  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1111  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1112  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1113  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1114  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1115  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1116  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1117  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1118  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1119  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1120  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1121  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1122  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1123  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1124  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1125  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1126  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1127  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1128  	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1129  	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1130  	{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1131  	{APFACC, ymfp, Px, [23]uint8{0xae}},
  1132  	{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1133  	{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1134  	{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1135  	{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1136  	{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1137  	{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1138  	{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1139  	{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1140  	{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1141  	{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1142  	{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1143  	{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1144  	{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1145  	{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1146  	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1147  	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1148  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1149  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1150  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1151  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1152  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1153  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1154  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1155  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1156  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1157  	{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1158  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1159  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1160  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1161  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1162  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1163  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1164  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1165  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1166  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1167  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1168  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1169  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1170  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1171  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1172  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1173  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1174  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1175  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1176  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1177  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1178  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1179  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1180  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1181  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1182  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1183  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1184  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1185  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1186  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
  1187  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1188  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1189  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1190  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1191  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1192  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1193  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1194  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1195  	{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1196  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1197  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1198  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1199  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1200  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1201  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1202  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1203  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1204  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1205  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1206  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1207  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1208  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1209  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1210  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1211  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1212  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1213  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1214  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1215  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1216  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1217  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1218  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1219  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1220  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1221  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1222  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1223  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1224  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1225  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1226  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1227  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1228  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1229  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1230  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1231  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1232  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1233  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1234  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1235  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1236  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1237  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1238  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1239  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1240  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1241  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1242  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1243  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1244  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1245  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1246  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1247  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1248  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1249  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1250  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1251  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1252  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1253  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1254  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1255  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1256  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1257  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1258  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1259  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1260  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1261  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1262  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1263  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1264  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1265  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1266  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1267  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1268  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1269  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1270  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1271  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1272  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1273  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1274  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1275  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1276  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1277  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1278  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1279  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1280  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1281  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1282  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1283  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1284  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1285  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1286  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1287  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1288  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1289  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1290  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1291  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1292  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1293  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1294  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1295  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1296  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1297  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1298  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1299  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1300  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1301  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1302  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1303  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1304  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1305  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1306  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1307  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1308  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1309  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1310  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1311  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1312  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1313  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1314  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1315  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1316  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1317  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1318  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1319  	{AWORD, ybyte, Px, [23]uint8{2}},
  1320  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1321  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1322  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1323  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1324  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1325  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1326  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1327  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1328  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1329  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1330  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1331  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1332  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1333  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1334  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1335  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1336  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1337  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1338  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1339  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1340  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1341  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1342  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1343  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1344  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1345  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1346  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1347  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1348  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1349  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1350  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1351  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1352  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1353  	{AFCOMB, nil, 0, [23]uint8{}},
  1354  	{AFCOMBP, nil, 0, [23]uint8{}},
  1355  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1356  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1357  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1358  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1359  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1360  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1361  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1362  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1363  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1364  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1365  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1366  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1367  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1368  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1369  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1370  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1371  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1372  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1373  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1374  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1375  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1376  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1377  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1378  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1379  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1380  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1381  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1382  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1383  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1384  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1385  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1386  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1387  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1388  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1389  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1390  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1391  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1392  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1393  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1394  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1395  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1396  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1397  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1398  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1399  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1400  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1401  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1402  	{AFFREE, nil, 0, [23]uint8{}},
  1403  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1404  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1405  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1406  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1407  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1408  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1409  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1410  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1411  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1412  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1413  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1414  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1415  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1416  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1417  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1418  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1419  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1420  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1421  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1422  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1423  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1424  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1425  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1426  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1427  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1428  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1429  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1430  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1431  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1432  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1433  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1434  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1435  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1436  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1437  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1438  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1439  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1440  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1441  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1442  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1443  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1444  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1445  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1446  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1447  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1448  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1449  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1450  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1451  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1452  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1453  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1454  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1455  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1456  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1457  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1458  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1459  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1460  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1461  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1462  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1463  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1464  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1465  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1466  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1467  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1468  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1469  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1470  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1471  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1472  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1473  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1474  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1475  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1476  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1477  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1478  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1479  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1480  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1481  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1482  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1483  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1484  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1485  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1486  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1487  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1488  	{obj.AEND, nil, 0, [23]uint8{}},
  1489  	{0, nil, 0, [23]uint8{}},
  1490  }
  1491  
  1492  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1493  
  1494  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1495  // This happens on systems like Solaris that call .so functions instead of system calls.
  1496  // It does not seem to be necessary for any other systems. This is probably working
  1497  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1498  // what that bug is. And this does fix it.
  1499  func isextern(s *obj.LSym) bool {
  1500  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1501  	return strings.HasPrefix(s.Name, "libc_")
  1502  }
  1503  
  1504  // single-instruction no-ops of various lengths.
  1505  // constructed by hand and disassembled with gdb to verify.
  1506  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1507  var nop = [][16]uint8{
  1508  	{0x90},
  1509  	{0x66, 0x90},
  1510  	{0x0F, 0x1F, 0x00},
  1511  	{0x0F, 0x1F, 0x40, 0x00},
  1512  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1513  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1514  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1515  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1516  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1517  }
  1518  
  1519  // Native Client rejects the repeated 0x66 prefix.
  1520  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1521  func fillnop(p []byte, n int) {
  1522  	var m int
  1523  
  1524  	for n > 0 {
  1525  		m = n
  1526  		if m > len(nop) {
  1527  			m = len(nop)
  1528  		}
  1529  		copy(p[:m], nop[m-1][:m])
  1530  		p = p[m:]
  1531  		n -= m
  1532  	}
  1533  }
  1534  
  1535  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1536  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1537  	fillnop(s.P[c:], int(pad))
  1538  	return c + pad
  1539  }
  1540  
  1541  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1542  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1543  		return l
  1544  	}
  1545  	return q
  1546  }
  1547  
  1548  func span6(ctxt *obj.Link, s *obj.LSym) {
  1549  	ctxt.Cursym = s
  1550  
  1551  	if s.P != nil {
  1552  		return
  1553  	}
  1554  
  1555  	if ycover[0] == 0 {
  1556  		instinit()
  1557  	}
  1558  
  1559  	var v int32
  1560  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1561  		if p.To.Type == obj.TYPE_BRANCH {
  1562  			if p.Pcond == nil {
  1563  				p.Pcond = p
  1564  			}
  1565  		}
  1566  		if p.As == AADJSP {
  1567  			p.To.Type = obj.TYPE_REG
  1568  			p.To.Reg = REG_SP
  1569  			v = int32(-p.From.Offset)
  1570  			p.From.Offset = int64(v)
  1571  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1572  			if v < 0 {
  1573  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1574  				v = -v
  1575  				p.From.Offset = int64(v)
  1576  			}
  1577  
  1578  			if v == 0 {
  1579  				p.As = obj.ANOP
  1580  			}
  1581  		}
  1582  	}
  1583  
  1584  	var q *obj.Prog
  1585  	for p := s.Text; p != nil; p = p.Link {
  1586  		p.Back = 2 // use short branches first time through
  1587  		q = p.Pcond
  1588  		if q != nil && (q.Back&2 != 0) {
  1589  			p.Back |= 1 // backward jump
  1590  			q.Back |= 4 // loop head
  1591  		}
  1592  
  1593  		if p.As == AADJSP {
  1594  			p.To.Type = obj.TYPE_REG
  1595  			p.To.Reg = REG_SP
  1596  			v = int32(-p.From.Offset)
  1597  			p.From.Offset = int64(v)
  1598  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1599  			if v < 0 {
  1600  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1601  				v = -v
  1602  				p.From.Offset = int64(v)
  1603  			}
  1604  
  1605  			if v == 0 {
  1606  				p.As = obj.ANOP
  1607  			}
  1608  		}
  1609  	}
  1610  
  1611  	n := 0
  1612  	var bp []byte
  1613  	var c int32
  1614  	var i int
  1615  	var loop int32
  1616  	var m int
  1617  	var p *obj.Prog
  1618  	for {
  1619  		loop = 0
  1620  		for i = 0; i < len(s.R); i++ {
  1621  			s.R[i] = obj.Reloc{}
  1622  		}
  1623  		s.R = s.R[:0]
  1624  		s.P = s.P[:0]
  1625  		c = 0
  1626  		for p = s.Text; p != nil; p = p.Link {
  1627  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1628  				var deferreturn *obj.LSym
  1629  
  1630  				if deferreturn == nil {
  1631  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1632  				}
  1633  
  1634  				// pad everything to avoid crossing 32-byte boundary
  1635  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1636  					c = naclpad(ctxt, s, c, -c&31)
  1637  				}
  1638  
  1639  				// pad call deferreturn to start at 32-byte boundary
  1640  				// so that subtracting 5 in jmpdefer will jump back
  1641  				// to that boundary and rerun the call.
  1642  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1643  					c = naclpad(ctxt, s, c, -c&31)
  1644  				}
  1645  
  1646  				// pad call to end at 32-byte boundary
  1647  				if p.As == obj.ACALL {
  1648  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1649  				}
  1650  
  1651  				// the linker treats REP and STOSQ as different instructions
  1652  				// but in fact the REP is a prefix on the STOSQ.
  1653  				// make sure REP has room for 2 more bytes, so that
  1654  				// padding will not be inserted before the next instruction.
  1655  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1656  					c = naclpad(ctxt, s, c, -c&31)
  1657  				}
  1658  
  1659  				// same for LOCK.
  1660  				// various instructions follow; the longest is 4 bytes.
  1661  				// give ourselves 8 bytes so as to avoid surprises.
  1662  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1663  					c = naclpad(ctxt, s, c, -c&31)
  1664  				}
  1665  			}
  1666  
  1667  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1668  				// pad with NOPs
  1669  				v = -c & (LoopAlign - 1)
  1670  
  1671  				if v <= MaxLoopPad {
  1672  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1673  					fillnop(s.P[c:], int(v))
  1674  					c += v
  1675  				}
  1676  			}
  1677  
  1678  			p.Pc = int64(c)
  1679  
  1680  			// process forward jumps to p
  1681  			for q = p.Rel; q != nil; q = q.Forwd {
  1682  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1683  				if q.Back&2 != 0 { // short
  1684  					if v > 127 {
  1685  						loop++
  1686  						q.Back ^= 2
  1687  					}
  1688  
  1689  					if q.As == AJCXZL {
  1690  						s.P[q.Pc+2] = byte(v)
  1691  					} else {
  1692  						s.P[q.Pc+1] = byte(v)
  1693  					}
  1694  				} else {
  1695  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1696  					bp[0] = byte(v)
  1697  					bp = bp[1:]
  1698  					bp[0] = byte(v >> 8)
  1699  					bp = bp[1:]
  1700  					bp[0] = byte(v >> 16)
  1701  					bp = bp[1:]
  1702  					bp[0] = byte(v >> 24)
  1703  				}
  1704  			}
  1705  
  1706  			p.Rel = nil
  1707  
  1708  			p.Pc = int64(c)
  1709  			asmins(ctxt, p)
  1710  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1711  			if int(p.Isize) != m {
  1712  				p.Isize = uint8(m)
  1713  				loop++
  1714  			}
  1715  
  1716  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1717  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1718  			p.Mark = uint16(m)
  1719  			c += int32(m)
  1720  		}
  1721  
  1722  		n++
  1723  		if n > 20 {
  1724  			ctxt.Diag("span must be looping")
  1725  			log.Fatalf("loop")
  1726  		}
  1727  		if loop == 0 {
  1728  			break
  1729  		}
  1730  	}
  1731  
  1732  	if ctxt.Headtype == obj.Hnacl {
  1733  		c = naclpad(ctxt, s, c, -c&31)
  1734  	}
  1735  
  1736  	c += -c & (FuncAlign - 1)
  1737  	s.Size = int64(c)
  1738  
  1739  	if false { /* debug['a'] > 1 */
  1740  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1741  		var i int
  1742  		for i = 0; i < len(s.P); i++ {
  1743  			fmt.Printf(" %.2x", s.P[i])
  1744  			if i%16 == 15 {
  1745  				fmt.Printf("\n  %.6x", uint(i+1))
  1746  			}
  1747  		}
  1748  
  1749  		if i%16 != 0 {
  1750  			fmt.Printf("\n")
  1751  		}
  1752  
  1753  		for i := 0; i < len(s.R); i++ {
  1754  			r := &s.R[i]
  1755  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1756  		}
  1757  	}
  1758  }
  1759  
  1760  func instinit() {
  1761  	var c int
  1762  
  1763  	for i := 1; optab[i].as != 0; i++ {
  1764  		c = int(optab[i].as)
  1765  		if opindex[c&obj.AMask] != nil {
  1766  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1767  		}
  1768  		opindex[c&obj.AMask] = &optab[i]
  1769  	}
  1770  
  1771  	for i := 0; i < Ymax; i++ {
  1772  		ycover[i*Ymax+i] = 1
  1773  	}
  1774  
  1775  	ycover[Yi0*Ymax+Yi8] = 1
  1776  	ycover[Yi1*Ymax+Yi8] = 1
  1777  	ycover[Yu7*Ymax+Yi8] = 1
  1778  
  1779  	ycover[Yi0*Ymax+Yu7] = 1
  1780  	ycover[Yi1*Ymax+Yu7] = 1
  1781  
  1782  	ycover[Yi0*Ymax+Yu8] = 1
  1783  	ycover[Yi1*Ymax+Yu8] = 1
  1784  	ycover[Yu7*Ymax+Yu8] = 1
  1785  
  1786  	ycover[Yi0*Ymax+Ys32] = 1
  1787  	ycover[Yi1*Ymax+Ys32] = 1
  1788  	ycover[Yu7*Ymax+Ys32] = 1
  1789  	ycover[Yu8*Ymax+Ys32] = 1
  1790  	ycover[Yi8*Ymax+Ys32] = 1
  1791  
  1792  	ycover[Yi0*Ymax+Yi32] = 1
  1793  	ycover[Yi1*Ymax+Yi32] = 1
  1794  	ycover[Yu7*Ymax+Yi32] = 1
  1795  	ycover[Yu8*Ymax+Yi32] = 1
  1796  	ycover[Yi8*Ymax+Yi32] = 1
  1797  	ycover[Ys32*Ymax+Yi32] = 1
  1798  
  1799  	ycover[Yi0*Ymax+Yi64] = 1
  1800  	ycover[Yi1*Ymax+Yi64] = 1
  1801  	ycover[Yu7*Ymax+Yi64] = 1
  1802  	ycover[Yu8*Ymax+Yi64] = 1
  1803  	ycover[Yi8*Ymax+Yi64] = 1
  1804  	ycover[Ys32*Ymax+Yi64] = 1
  1805  	ycover[Yi32*Ymax+Yi64] = 1
  1806  
  1807  	ycover[Yal*Ymax+Yrb] = 1
  1808  	ycover[Ycl*Ymax+Yrb] = 1
  1809  	ycover[Yax*Ymax+Yrb] = 1
  1810  	ycover[Ycx*Ymax+Yrb] = 1
  1811  	ycover[Yrx*Ymax+Yrb] = 1
  1812  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  1813  
  1814  	ycover[Ycl*Ymax+Ycx] = 1
  1815  
  1816  	ycover[Yax*Ymax+Yrx] = 1
  1817  	ycover[Ycx*Ymax+Yrx] = 1
  1818  
  1819  	ycover[Yax*Ymax+Yrl] = 1
  1820  	ycover[Ycx*Ymax+Yrl] = 1
  1821  	ycover[Yrx*Ymax+Yrl] = 1
  1822  	ycover[Yrl32*Ymax+Yrl] = 1
  1823  
  1824  	ycover[Yf0*Ymax+Yrf] = 1
  1825  
  1826  	ycover[Yal*Ymax+Ymb] = 1
  1827  	ycover[Ycl*Ymax+Ymb] = 1
  1828  	ycover[Yax*Ymax+Ymb] = 1
  1829  	ycover[Ycx*Ymax+Ymb] = 1
  1830  	ycover[Yrx*Ymax+Ymb] = 1
  1831  	ycover[Yrb*Ymax+Ymb] = 1
  1832  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  1833  	ycover[Ym*Ymax+Ymb] = 1
  1834  
  1835  	ycover[Yax*Ymax+Yml] = 1
  1836  	ycover[Ycx*Ymax+Yml] = 1
  1837  	ycover[Yrx*Ymax+Yml] = 1
  1838  	ycover[Yrl*Ymax+Yml] = 1
  1839  	ycover[Yrl32*Ymax+Yml] = 1
  1840  	ycover[Ym*Ymax+Yml] = 1
  1841  
  1842  	ycover[Yax*Ymax+Ymm] = 1
  1843  	ycover[Ycx*Ymax+Ymm] = 1
  1844  	ycover[Yrx*Ymax+Ymm] = 1
  1845  	ycover[Yrl*Ymax+Ymm] = 1
  1846  	ycover[Yrl32*Ymax+Ymm] = 1
  1847  	ycover[Ym*Ymax+Ymm] = 1
  1848  	ycover[Ymr*Ymax+Ymm] = 1
  1849  
  1850  	ycover[Ym*Ymax+Yxm] = 1
  1851  	ycover[Yxr*Ymax+Yxm] = 1
  1852  
  1853  	for i := 0; i < MAXREG; i++ {
  1854  		reg[i] = -1
  1855  		if i >= REG_AL && i <= REG_R15B {
  1856  			reg[i] = (i - REG_AL) & 7
  1857  			if i >= REG_SPB && i <= REG_DIB {
  1858  				regrex[i] = 0x40
  1859  			}
  1860  			if i >= REG_R8B && i <= REG_R15B {
  1861  				regrex[i] = Rxr | Rxx | Rxb
  1862  			}
  1863  		}
  1864  
  1865  		if i >= REG_AH && i <= REG_BH {
  1866  			reg[i] = 4 + ((i - REG_AH) & 7)
  1867  		}
  1868  		if i >= REG_AX && i <= REG_R15 {
  1869  			reg[i] = (i - REG_AX) & 7
  1870  			if i >= REG_R8 {
  1871  				regrex[i] = Rxr | Rxx | Rxb
  1872  			}
  1873  		}
  1874  
  1875  		if i >= REG_F0 && i <= REG_F0+7 {
  1876  			reg[i] = (i - REG_F0) & 7
  1877  		}
  1878  		if i >= REG_M0 && i <= REG_M0+7 {
  1879  			reg[i] = (i - REG_M0) & 7
  1880  		}
  1881  		if i >= REG_X0 && i <= REG_X0+15 {
  1882  			reg[i] = (i - REG_X0) & 7
  1883  			if i >= REG_X0+8 {
  1884  				regrex[i] = Rxr | Rxx | Rxb
  1885  			}
  1886  		}
  1887  
  1888  		if i >= REG_CR+8 && i <= REG_CR+15 {
  1889  			regrex[i] = Rxr
  1890  		}
  1891  	}
  1892  }
  1893  
  1894  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  1895  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  1896  		return 0
  1897  	}
  1898  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  1899  		switch a.Reg {
  1900  		case REG_CS:
  1901  			return 0x2e
  1902  
  1903  		case REG_DS:
  1904  			return 0x3e
  1905  
  1906  		case REG_ES:
  1907  			return 0x26
  1908  
  1909  		case REG_FS:
  1910  			return 0x64
  1911  
  1912  		case REG_GS:
  1913  			return 0x65
  1914  
  1915  		case REG_TLS:
  1916  			// NOTE: Systems listed here should be only systems that
  1917  			// support direct TLS references like 8(TLS) implemented as
  1918  			// direct references from FS or GS. Systems that require
  1919  			// the initial-exec model, where you load the TLS base into
  1920  			// a register and then index from that register, do not reach
  1921  			// this code and should not be listed.
  1922  			if p.Mode == 32 {
  1923  				switch ctxt.Headtype {
  1924  				default:
  1925  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1926  
  1927  				case obj.Hdarwin,
  1928  					obj.Hdragonfly,
  1929  					obj.Hfreebsd,
  1930  					obj.Hnetbsd,
  1931  					obj.Hopenbsd:
  1932  					return 0x65 // GS
  1933  				}
  1934  			}
  1935  
  1936  			switch ctxt.Headtype {
  1937  			default:
  1938  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1939  
  1940  			case obj.Hlinux:
  1941  				if ctxt.Flag_shared != 0 {
  1942  					log.Fatalf("unknown TLS base register for linux with -shared")
  1943  				} else {
  1944  					return 0x64 // FS
  1945  				}
  1946  
  1947  			case obj.Hdragonfly,
  1948  				obj.Hfreebsd,
  1949  				obj.Hnetbsd,
  1950  				obj.Hopenbsd,
  1951  				obj.Hsolaris:
  1952  				return 0x64 // FS
  1953  
  1954  			case obj.Hdarwin:
  1955  				return 0x65 // GS
  1956  			}
  1957  		}
  1958  	}
  1959  
  1960  	if p.Mode == 32 {
  1961  		return 0
  1962  	}
  1963  
  1964  	switch a.Index {
  1965  	case REG_CS:
  1966  		return 0x2e
  1967  
  1968  	case REG_DS:
  1969  		return 0x3e
  1970  
  1971  	case REG_ES:
  1972  		return 0x26
  1973  
  1974  	case REG_TLS:
  1975  		if ctxt.Flag_shared != 0 {
  1976  			// When building for inclusion into a shared library, an instruction of the form
  1977  			//     MOV 0(CX)(TLS*1), AX
  1978  			// becomes
  1979  			//     mov %fs:(%rcx), %rax
  1980  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  1981  			// there is only one TLS variable -- g -- so this is OK). When not building for
  1982  			// a shared library the instruction does not require a prefix.
  1983  			if a.Offset != 0 {
  1984  				log.Fatalf("cannot handle non-0 offsets to TLS")
  1985  			}
  1986  			return 0x64
  1987  		}
  1988  
  1989  	case REG_FS:
  1990  		return 0x64
  1991  
  1992  	case REG_GS:
  1993  		return 0x65
  1994  	}
  1995  
  1996  	return 0
  1997  }
  1998  
  1999  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2000  	switch a.Type {
  2001  	case obj.TYPE_NONE:
  2002  		return Ynone
  2003  
  2004  	case obj.TYPE_BRANCH:
  2005  		return Ybr
  2006  
  2007  	case obj.TYPE_INDIR:
  2008  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2009  			return Yindir
  2010  		}
  2011  		return Yxxx
  2012  
  2013  	case obj.TYPE_MEM:
  2014  		return Ym
  2015  
  2016  	case obj.TYPE_ADDR:
  2017  		switch a.Name {
  2018  		case obj.NAME_EXTERN,
  2019  			obj.NAME_GOTREF,
  2020  			obj.NAME_STATIC:
  2021  			if a.Sym != nil && isextern(a.Sym) || p.Mode == 32 {
  2022  				return Yi32
  2023  			}
  2024  			return Yiauto // use pc-relative addressing
  2025  
  2026  		case obj.NAME_AUTO,
  2027  			obj.NAME_PARAM:
  2028  			return Yiauto
  2029  		}
  2030  
  2031  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2032  		// and got Yi32 in an earlier version of this code.
  2033  		// Keep doing that until we fix yduff etc.
  2034  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2035  			return Yi32
  2036  		}
  2037  
  2038  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2039  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2040  		}
  2041  		fallthrough
  2042  
  2043  		// fall through
  2044  
  2045  	case obj.TYPE_CONST:
  2046  		if a.Sym != nil {
  2047  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2048  		}
  2049  
  2050  		v := a.Offset
  2051  		if p.Mode == 32 {
  2052  			v = int64(int32(v))
  2053  		}
  2054  		if v == 0 {
  2055  			return Yi0
  2056  		}
  2057  		if v == 1 {
  2058  			return Yi1
  2059  		}
  2060  		if v >= 0 && v <= 127 {
  2061  			return Yu7
  2062  		}
  2063  		if v >= 0 && v <= 255 {
  2064  			return Yu8
  2065  		}
  2066  		if v >= -128 && v <= 127 {
  2067  			return Yi8
  2068  		}
  2069  		if p.Mode == 32 {
  2070  			return Yi32
  2071  		}
  2072  		l := int32(v)
  2073  		if int64(l) == v {
  2074  			return Ys32 /* can sign extend */
  2075  		}
  2076  		if v>>32 == 0 {
  2077  			return Yi32 /* unsigned */
  2078  		}
  2079  		return Yi64
  2080  
  2081  	case obj.TYPE_TEXTSIZE:
  2082  		return Ytextsize
  2083  	}
  2084  
  2085  	if a.Type != obj.TYPE_REG {
  2086  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2087  		return Yxxx
  2088  	}
  2089  
  2090  	switch a.Reg {
  2091  	case REG_AL:
  2092  		return Yal
  2093  
  2094  	case REG_AX:
  2095  		return Yax
  2096  
  2097  		/*
  2098  			case REG_SPB:
  2099  		*/
  2100  	case REG_BPB,
  2101  		REG_SIB,
  2102  		REG_DIB,
  2103  		REG_R8B,
  2104  		REG_R9B,
  2105  		REG_R10B,
  2106  		REG_R11B,
  2107  		REG_R12B,
  2108  		REG_R13B,
  2109  		REG_R14B,
  2110  		REG_R15B:
  2111  		if ctxt.Asmode != 64 {
  2112  			return Yxxx
  2113  		}
  2114  		fallthrough
  2115  
  2116  	case REG_DL,
  2117  		REG_BL,
  2118  		REG_AH,
  2119  		REG_CH,
  2120  		REG_DH,
  2121  		REG_BH:
  2122  		return Yrb
  2123  
  2124  	case REG_CL:
  2125  		return Ycl
  2126  
  2127  	case REG_CX:
  2128  		return Ycx
  2129  
  2130  	case REG_DX, REG_BX:
  2131  		return Yrx
  2132  
  2133  	case REG_R8, /* not really Yrl */
  2134  		REG_R9,
  2135  		REG_R10,
  2136  		REG_R11,
  2137  		REG_R12,
  2138  		REG_R13,
  2139  		REG_R14,
  2140  		REG_R15:
  2141  		if ctxt.Asmode != 64 {
  2142  			return Yxxx
  2143  		}
  2144  		fallthrough
  2145  
  2146  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2147  		if p.Mode == 32 {
  2148  			return Yrl32
  2149  		}
  2150  		return Yrl
  2151  
  2152  	case REG_F0 + 0:
  2153  		return Yf0
  2154  
  2155  	case REG_F0 + 1,
  2156  		REG_F0 + 2,
  2157  		REG_F0 + 3,
  2158  		REG_F0 + 4,
  2159  		REG_F0 + 5,
  2160  		REG_F0 + 6,
  2161  		REG_F0 + 7:
  2162  		return Yrf
  2163  
  2164  	case REG_M0 + 0,
  2165  		REG_M0 + 1,
  2166  		REG_M0 + 2,
  2167  		REG_M0 + 3,
  2168  		REG_M0 + 4,
  2169  		REG_M0 + 5,
  2170  		REG_M0 + 6,
  2171  		REG_M0 + 7:
  2172  		return Ymr
  2173  
  2174  	case REG_X0 + 0,
  2175  		REG_X0 + 1,
  2176  		REG_X0 + 2,
  2177  		REG_X0 + 3,
  2178  		REG_X0 + 4,
  2179  		REG_X0 + 5,
  2180  		REG_X0 + 6,
  2181  		REG_X0 + 7,
  2182  		REG_X0 + 8,
  2183  		REG_X0 + 9,
  2184  		REG_X0 + 10,
  2185  		REG_X0 + 11,
  2186  		REG_X0 + 12,
  2187  		REG_X0 + 13,
  2188  		REG_X0 + 14,
  2189  		REG_X0 + 15:
  2190  		return Yxr
  2191  
  2192  	case REG_CS:
  2193  		return Ycs
  2194  	case REG_SS:
  2195  		return Yss
  2196  	case REG_DS:
  2197  		return Yds
  2198  	case REG_ES:
  2199  		return Yes
  2200  	case REG_FS:
  2201  		return Yfs
  2202  	case REG_GS:
  2203  		return Ygs
  2204  	case REG_TLS:
  2205  		return Ytls
  2206  
  2207  	case REG_GDTR:
  2208  		return Ygdtr
  2209  	case REG_IDTR:
  2210  		return Yidtr
  2211  	case REG_LDTR:
  2212  		return Yldtr
  2213  	case REG_MSW:
  2214  		return Ymsw
  2215  	case REG_TASK:
  2216  		return Ytask
  2217  
  2218  	case REG_CR + 0:
  2219  		return Ycr0
  2220  	case REG_CR + 1:
  2221  		return Ycr1
  2222  	case REG_CR + 2:
  2223  		return Ycr2
  2224  	case REG_CR + 3:
  2225  		return Ycr3
  2226  	case REG_CR + 4:
  2227  		return Ycr4
  2228  	case REG_CR + 5:
  2229  		return Ycr5
  2230  	case REG_CR + 6:
  2231  		return Ycr6
  2232  	case REG_CR + 7:
  2233  		return Ycr7
  2234  	case REG_CR + 8:
  2235  		return Ycr8
  2236  
  2237  	case REG_DR + 0:
  2238  		return Ydr0
  2239  	case REG_DR + 1:
  2240  		return Ydr1
  2241  	case REG_DR + 2:
  2242  		return Ydr2
  2243  	case REG_DR + 3:
  2244  		return Ydr3
  2245  	case REG_DR + 4:
  2246  		return Ydr4
  2247  	case REG_DR + 5:
  2248  		return Ydr5
  2249  	case REG_DR + 6:
  2250  		return Ydr6
  2251  	case REG_DR + 7:
  2252  		return Ydr7
  2253  
  2254  	case REG_TR + 0:
  2255  		return Ytr0
  2256  	case REG_TR + 1:
  2257  		return Ytr1
  2258  	case REG_TR + 2:
  2259  		return Ytr2
  2260  	case REG_TR + 3:
  2261  		return Ytr3
  2262  	case REG_TR + 4:
  2263  		return Ytr4
  2264  	case REG_TR + 5:
  2265  		return Ytr5
  2266  	case REG_TR + 6:
  2267  		return Ytr6
  2268  	case REG_TR + 7:
  2269  		return Ytr7
  2270  	}
  2271  
  2272  	return Yxxx
  2273  }
  2274  
  2275  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2276  	var i int
  2277  
  2278  	switch index {
  2279  	default:
  2280  		goto bad
  2281  
  2282  	case REG_NONE:
  2283  		i = 4 << 3
  2284  		goto bas
  2285  
  2286  	case REG_R8,
  2287  		REG_R9,
  2288  		REG_R10,
  2289  		REG_R11,
  2290  		REG_R12,
  2291  		REG_R13,
  2292  		REG_R14,
  2293  		REG_R15:
  2294  		if ctxt.Asmode != 64 {
  2295  			goto bad
  2296  		}
  2297  		fallthrough
  2298  
  2299  	case REG_AX,
  2300  		REG_CX,
  2301  		REG_DX,
  2302  		REG_BX,
  2303  		REG_BP,
  2304  		REG_SI,
  2305  		REG_DI:
  2306  		i = reg[index] << 3
  2307  	}
  2308  
  2309  	switch scale {
  2310  	default:
  2311  		goto bad
  2312  
  2313  	case 1:
  2314  		break
  2315  
  2316  	case 2:
  2317  		i |= 1 << 6
  2318  
  2319  	case 4:
  2320  		i |= 2 << 6
  2321  
  2322  	case 8:
  2323  		i |= 3 << 6
  2324  	}
  2325  
  2326  bas:
  2327  	switch base {
  2328  	default:
  2329  		goto bad
  2330  
  2331  	case REG_NONE: /* must be mod=00 */
  2332  		i |= 5
  2333  
  2334  	case REG_R8,
  2335  		REG_R9,
  2336  		REG_R10,
  2337  		REG_R11,
  2338  		REG_R12,
  2339  		REG_R13,
  2340  		REG_R14,
  2341  		REG_R15:
  2342  		if ctxt.Asmode != 64 {
  2343  			goto bad
  2344  		}
  2345  		fallthrough
  2346  
  2347  	case REG_AX,
  2348  		REG_CX,
  2349  		REG_DX,
  2350  		REG_BX,
  2351  		REG_SP,
  2352  		REG_BP,
  2353  		REG_SI,
  2354  		REG_DI:
  2355  		i |= reg[base]
  2356  	}
  2357  
  2358  	ctxt.Andptr[0] = byte(i)
  2359  	ctxt.Andptr = ctxt.Andptr[1:]
  2360  	return
  2361  
  2362  bad:
  2363  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2364  	ctxt.Andptr[0] = 0
  2365  	ctxt.Andptr = ctxt.Andptr[1:]
  2366  	return
  2367  }
  2368  
  2369  func put4(ctxt *obj.Link, v int32) {
  2370  	ctxt.Andptr[0] = byte(v)
  2371  	ctxt.Andptr[1] = byte(v >> 8)
  2372  	ctxt.Andptr[2] = byte(v >> 16)
  2373  	ctxt.Andptr[3] = byte(v >> 24)
  2374  	ctxt.Andptr = ctxt.Andptr[4:]
  2375  }
  2376  
  2377  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2378  	var rel obj.Reloc
  2379  
  2380  	v := vaddr(ctxt, p, a, &rel)
  2381  	if rel.Siz != 0 {
  2382  		if rel.Siz != 4 {
  2383  			ctxt.Diag("bad reloc")
  2384  		}
  2385  		r := obj.Addrel(ctxt.Cursym)
  2386  		*r = rel
  2387  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2388  	}
  2389  
  2390  	put4(ctxt, int32(v))
  2391  }
  2392  
  2393  func put8(ctxt *obj.Link, v int64) {
  2394  	ctxt.Andptr[0] = byte(v)
  2395  	ctxt.Andptr[1] = byte(v >> 8)
  2396  	ctxt.Andptr[2] = byte(v >> 16)
  2397  	ctxt.Andptr[3] = byte(v >> 24)
  2398  	ctxt.Andptr[4] = byte(v >> 32)
  2399  	ctxt.Andptr[5] = byte(v >> 40)
  2400  	ctxt.Andptr[6] = byte(v >> 48)
  2401  	ctxt.Andptr[7] = byte(v >> 56)
  2402  	ctxt.Andptr = ctxt.Andptr[8:]
  2403  }
  2404  
  2405  /*
  2406  static void
  2407  relput8(Prog *p, Addr *a)
  2408  {
  2409  	vlong v;
  2410  	Reloc rel, *r;
  2411  
  2412  	v = vaddr(ctxt, p, a, &rel);
  2413  	if(rel.siz != 0) {
  2414  		r = addrel(ctxt->cursym);
  2415  		*r = rel;
  2416  		r->siz = 8;
  2417  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2418  	}
  2419  	put8(ctxt, v);
  2420  }
  2421  */
  2422  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2423  	if r != nil {
  2424  		*r = obj.Reloc{}
  2425  	}
  2426  
  2427  	switch a.Name {
  2428  	case obj.NAME_STATIC,
  2429  		obj.NAME_GOTREF,
  2430  		obj.NAME_EXTERN:
  2431  		s := a.Sym
  2432  		if r == nil {
  2433  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2434  			log.Fatalf("reloc")
  2435  		}
  2436  
  2437  		if a.Name == obj.NAME_GOTREF {
  2438  			r.Siz = 4
  2439  			r.Type = obj.R_GOTPCREL
  2440  		} else if isextern(s) || p.Mode != 64 {
  2441  			r.Siz = 4
  2442  			r.Type = obj.R_ADDR
  2443  		} else {
  2444  			r.Siz = 4
  2445  			r.Type = obj.R_PCREL
  2446  		}
  2447  
  2448  		r.Off = -1 // caller must fill in
  2449  		r.Sym = s
  2450  		r.Add = a.Offset
  2451  
  2452  		return 0
  2453  	}
  2454  
  2455  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2456  		if r == nil {
  2457  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2458  			log.Fatalf("reloc")
  2459  		}
  2460  
  2461  		r.Type = obj.R_TLS_LE
  2462  		r.Siz = 4
  2463  		r.Off = -1 // caller must fill in
  2464  		r.Add = a.Offset
  2465  		return 0
  2466  	}
  2467  
  2468  	return a.Offset
  2469  }
  2470  
  2471  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2472  	var base int
  2473  	var rel obj.Reloc
  2474  
  2475  	rex &= 0x40 | Rxr
  2476  	v := int32(a.Offset)
  2477  	rel.Siz = 0
  2478  
  2479  	switch a.Type {
  2480  	case obj.TYPE_ADDR:
  2481  		if a.Name == obj.NAME_NONE {
  2482  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2483  		}
  2484  		if a.Index == REG_TLS {
  2485  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2486  		}
  2487  		goto bad
  2488  
  2489  	case obj.TYPE_REG:
  2490  		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
  2491  			goto bad
  2492  		}
  2493  		if v != 0 {
  2494  			goto bad
  2495  		}
  2496  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2497  		ctxt.Andptr = ctxt.Andptr[1:]
  2498  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2499  		return
  2500  	}
  2501  
  2502  	if a.Type != obj.TYPE_MEM {
  2503  		goto bad
  2504  	}
  2505  
  2506  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2507  		base := int(a.Reg)
  2508  		switch a.Name {
  2509  		case obj.NAME_EXTERN,
  2510  			obj.NAME_GOTREF,
  2511  			obj.NAME_STATIC:
  2512  			if !isextern(a.Sym) && p.Mode == 64 {
  2513  				goto bad
  2514  			}
  2515  			base = REG_NONE
  2516  			v = int32(vaddr(ctxt, p, a, &rel))
  2517  
  2518  		case obj.NAME_AUTO,
  2519  			obj.NAME_PARAM:
  2520  			base = REG_SP
  2521  		}
  2522  
  2523  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2524  		if base == REG_NONE {
  2525  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2526  			ctxt.Andptr = ctxt.Andptr[1:]
  2527  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2528  			goto putrelv
  2529  		}
  2530  
  2531  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2532  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2533  			ctxt.Andptr = ctxt.Andptr[1:]
  2534  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2535  			return
  2536  		}
  2537  
  2538  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2539  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2540  			ctxt.Andptr = ctxt.Andptr[1:]
  2541  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2542  			ctxt.Andptr[0] = byte(v)
  2543  			ctxt.Andptr = ctxt.Andptr[1:]
  2544  			return
  2545  		}
  2546  
  2547  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2548  		ctxt.Andptr = ctxt.Andptr[1:]
  2549  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2550  		goto putrelv
  2551  	}
  2552  
  2553  	base = int(a.Reg)
  2554  	switch a.Name {
  2555  	case obj.NAME_STATIC,
  2556  		obj.NAME_GOTREF,
  2557  		obj.NAME_EXTERN:
  2558  		if a.Sym == nil {
  2559  			ctxt.Diag("bad addr: %v", p)
  2560  		}
  2561  		base = REG_NONE
  2562  		v = int32(vaddr(ctxt, p, a, &rel))
  2563  
  2564  	case obj.NAME_AUTO,
  2565  		obj.NAME_PARAM:
  2566  		base = REG_SP
  2567  	}
  2568  
  2569  	if base == REG_TLS {
  2570  		v = int32(vaddr(ctxt, p, a, &rel))
  2571  	}
  2572  
  2573  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2574  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2575  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2576  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2577  				ctxt.Diag("%v has offset against gotref", p)
  2578  			}
  2579  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2580  			ctxt.Andptr = ctxt.Andptr[1:]
  2581  			goto putrelv
  2582  		}
  2583  
  2584  		/* temporary */
  2585  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2586  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2587  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2588  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2589  		goto putrelv
  2590  	}
  2591  
  2592  	if base == REG_SP || base == REG_R12 {
  2593  		if v == 0 {
  2594  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2595  			ctxt.Andptr = ctxt.Andptr[1:]
  2596  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2597  			return
  2598  		}
  2599  
  2600  		if v >= -128 && v < 128 {
  2601  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2602  			ctxt.Andptr = ctxt.Andptr[1:]
  2603  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2604  			ctxt.Andptr[0] = byte(v)
  2605  			ctxt.Andptr = ctxt.Andptr[1:]
  2606  			return
  2607  		}
  2608  
  2609  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2610  		ctxt.Andptr = ctxt.Andptr[1:]
  2611  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2612  		goto putrelv
  2613  	}
  2614  
  2615  	if REG_AX <= base && base <= REG_R15 {
  2616  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2617  			rel = obj.Reloc{}
  2618  			rel.Type = obj.R_TLS_LE
  2619  			rel.Siz = 4
  2620  			rel.Sym = nil
  2621  			rel.Add = int64(v)
  2622  			v = 0
  2623  		}
  2624  
  2625  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2626  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2627  			ctxt.Andptr = ctxt.Andptr[1:]
  2628  			return
  2629  		}
  2630  
  2631  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2632  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2633  			ctxt.Andptr[1] = byte(v)
  2634  			ctxt.Andptr = ctxt.Andptr[2:]
  2635  			return
  2636  		}
  2637  
  2638  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2639  		ctxt.Andptr = ctxt.Andptr[1:]
  2640  		goto putrelv
  2641  	}
  2642  
  2643  	goto bad
  2644  
  2645  putrelv:
  2646  	if rel.Siz != 0 {
  2647  		if rel.Siz != 4 {
  2648  			ctxt.Diag("bad rel")
  2649  			goto bad
  2650  		}
  2651  
  2652  		r := obj.Addrel(ctxt.Cursym)
  2653  		*r = rel
  2654  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2655  	}
  2656  
  2657  	put4(ctxt, v)
  2658  	return
  2659  
  2660  bad:
  2661  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2662  	return
  2663  }
  2664  
  2665  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2666  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2667  }
  2668  
  2669  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2670  	asmandsz(ctxt, p, a, o, 0, 0)
  2671  }
  2672  
  2673  func bytereg(a *obj.Addr, t *uint8) {
  2674  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2675  		a.Reg += REG_AL - REG_AX
  2676  		*t = 0
  2677  	}
  2678  }
  2679  
  2680  func unbytereg(a *obj.Addr, t *uint8) {
  2681  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2682  		a.Reg += REG_AX - REG_AL
  2683  		*t = 0
  2684  	}
  2685  }
  2686  
  2687  const (
  2688  	E = 0xff
  2689  )
  2690  
  2691  var ymovtab = []Movtab{
  2692  	/* push */
  2693  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2694  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2695  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2696  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2697  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2698  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2699  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2700  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2701  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2702  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2703  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2704  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2705  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2706  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2707  
  2708  	/* pop */
  2709  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2710  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2711  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2712  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2713  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2714  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2715  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2716  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2717  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2718  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2719  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2720  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  2721  
  2722  	/* mov seg */
  2723  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  2724  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  2725  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  2726  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  2727  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  2728  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  2729  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  2730  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  2731  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  2732  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  2733  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  2734  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  2735  
  2736  	/* mov cr */
  2737  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2738  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2739  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2740  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2741  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2742  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2743  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2744  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2745  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2746  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2747  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2748  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2749  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2750  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2751  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2752  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2753  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2754  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2755  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2756  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2757  
  2758  	/* mov dr */
  2759  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2760  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2761  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2762  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2763  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2764  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2765  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2766  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2767  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2768  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2769  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2770  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2771  
  2772  	/* mov tr */
  2773  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  2774  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  2775  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  2776  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  2777  
  2778  	/* lgdt, sgdt, lidt, sidt */
  2779  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2780  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2781  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2782  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2783  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2784  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2785  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2786  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2787  
  2788  	/* lldt, sldt */
  2789  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  2790  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  2791  
  2792  	/* lmsw, smsw */
  2793  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  2794  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  2795  
  2796  	/* ltr, str */
  2797  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  2798  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  2799  
  2800  	/* load full pointer - unsupported
  2801  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  2802  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  2803  	*/
  2804  
  2805  	/* double shift */
  2806  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2807  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2808  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2809  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2810  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2811  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2812  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2813  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2814  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2815  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2816  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2817  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2818  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2819  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2820  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2821  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2822  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2823  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2824  
  2825  	/* load TLS base */
  2826  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2827  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2828  	{0, 0, 0, 0, 0, [4]uint8{}},
  2829  }
  2830  
  2831  func isax(a *obj.Addr) bool {
  2832  	switch a.Reg {
  2833  	case REG_AX, REG_AL, REG_AH:
  2834  		return true
  2835  	}
  2836  
  2837  	if a.Index == REG_AX {
  2838  		return true
  2839  	}
  2840  	return false
  2841  }
  2842  
  2843  func subreg(p *obj.Prog, from int, to int) {
  2844  	if false { /* debug['Q'] */
  2845  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  2846  	}
  2847  
  2848  	if int(p.From.Reg) == from {
  2849  		p.From.Reg = int16(to)
  2850  		p.Ft = 0
  2851  	}
  2852  
  2853  	if int(p.To.Reg) == from {
  2854  		p.To.Reg = int16(to)
  2855  		p.Tt = 0
  2856  	}
  2857  
  2858  	if int(p.From.Index) == from {
  2859  		p.From.Index = int16(to)
  2860  		p.Ft = 0
  2861  	}
  2862  
  2863  	if int(p.To.Index) == from {
  2864  		p.To.Index = int16(to)
  2865  		p.Tt = 0
  2866  	}
  2867  
  2868  	if false { /* debug['Q'] */
  2869  		fmt.Printf("%v\n", p)
  2870  	}
  2871  }
  2872  
  2873  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  2874  	switch op {
  2875  	case Pm, Pe, Pf2, Pf3:
  2876  		if osize != 1 {
  2877  			if op != Pm {
  2878  				ctxt.Andptr[0] = byte(op)
  2879  				ctxt.Andptr = ctxt.Andptr[1:]
  2880  			}
  2881  			ctxt.Andptr[0] = Pm
  2882  			ctxt.Andptr = ctxt.Andptr[1:]
  2883  			z++
  2884  			op = int(o.op[z])
  2885  			break
  2886  		}
  2887  		fallthrough
  2888  
  2889  	default:
  2890  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  2891  			ctxt.Andptr[0] = Pm
  2892  			ctxt.Andptr = ctxt.Andptr[1:]
  2893  		}
  2894  	}
  2895  
  2896  	ctxt.Andptr[0] = byte(op)
  2897  	ctxt.Andptr = ctxt.Andptr[1:]
  2898  	return z
  2899  }
  2900  
  2901  var bpduff1 = []byte{
  2902  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  2903  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  2904  }
  2905  
  2906  var bpduff2 = []byte{
  2907  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  2908  }
  2909  
  2910  func doasm(ctxt *obj.Link, p *obj.Prog) {
  2911  	ctxt.Curp = p // TODO
  2912  
  2913  	o := opindex[p.As&obj.AMask]
  2914  
  2915  	if o == nil {
  2916  		ctxt.Diag("asmins: missing op %v", p)
  2917  		return
  2918  	}
  2919  
  2920  	pre := prefixof(ctxt, p, &p.From)
  2921  	if pre != 0 {
  2922  		ctxt.Andptr[0] = byte(pre)
  2923  		ctxt.Andptr = ctxt.Andptr[1:]
  2924  	}
  2925  	pre = prefixof(ctxt, p, &p.To)
  2926  	if pre != 0 {
  2927  		ctxt.Andptr[0] = byte(pre)
  2928  		ctxt.Andptr = ctxt.Andptr[1:]
  2929  	}
  2930  
  2931  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  2932  	// which encodes as SHRQ $32(DX*0), AX.
  2933  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  2934  	// Change encoding generated by assemblers and compilers and remove.
  2935  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  2936  		p.From3 = new(obj.Addr)
  2937  		p.From3.Type = obj.TYPE_REG
  2938  		p.From3.Reg = p.From.Index
  2939  		p.From.Index = 0
  2940  	}
  2941  
  2942  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  2943  	// Change encoding generated by assemblers and compilers (if any) and remove.
  2944  	switch p.As {
  2945  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  2946  		if p.From3Type() == obj.TYPE_NONE {
  2947  			p.From3 = new(obj.Addr)
  2948  			*p.From3 = p.From
  2949  			p.From = obj.Addr{}
  2950  			p.From.Type = obj.TYPE_CONST
  2951  			p.From.Offset = p.To.Offset
  2952  			p.To.Offset = 0
  2953  		}
  2954  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  2955  		if p.From3Type() == obj.TYPE_NONE {
  2956  			p.From3 = new(obj.Addr)
  2957  			*p.From3 = p.To
  2958  			p.To = obj.Addr{}
  2959  			p.To.Type = obj.TYPE_CONST
  2960  			p.To.Offset = p.From3.Offset
  2961  			p.From3.Offset = 0
  2962  		}
  2963  	}
  2964  
  2965  	if p.Ft == 0 {
  2966  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  2967  	}
  2968  	if p.Tt == 0 {
  2969  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  2970  	}
  2971  
  2972  	ft := int(p.Ft) * Ymax
  2973  	f3t := Ynone * Ymax
  2974  	if p.From3 != nil {
  2975  		f3t = oclass(ctxt, p, p.From3) * Ymax
  2976  	}
  2977  	tt := int(p.Tt) * Ymax
  2978  
  2979  	xo := obj.Bool2int(o.op[0] == 0x0f)
  2980  	z := 0
  2981  	var a *obj.Addr
  2982  	var l int
  2983  	var op int
  2984  	var q *obj.Prog
  2985  	var r *obj.Reloc
  2986  	var rel obj.Reloc
  2987  	var v int64
  2988  	for i := range o.ytab {
  2989  		yt := &o.ytab[i]
  2990  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  2991  			switch o.prefix {
  2992  			case Px1: /* first option valid only in 32-bit mode */
  2993  				if ctxt.Mode == 64 && z == 0 {
  2994  					z += int(yt.zoffset) + xo
  2995  					continue
  2996  				}
  2997  			case Pq: /* 16 bit escape and opcode escape */
  2998  				ctxt.Andptr[0] = Pe
  2999  				ctxt.Andptr = ctxt.Andptr[1:]
  3000  
  3001  				ctxt.Andptr[0] = Pm
  3002  				ctxt.Andptr = ctxt.Andptr[1:]
  3003  
  3004  			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
  3005  				ctxt.Andptr[0] = Pe
  3006  				ctxt.Andptr = ctxt.Andptr[1:]
  3007  
  3008  				ctxt.Andptr[0] = Pw
  3009  				ctxt.Andptr = ctxt.Andptr[1:]
  3010  				ctxt.Andptr[0] = Pm
  3011  				ctxt.Andptr = ctxt.Andptr[1:]
  3012  
  3013  			case Pf2, /* xmm opcode escape */
  3014  				Pf3:
  3015  				ctxt.Andptr[0] = byte(o.prefix)
  3016  				ctxt.Andptr = ctxt.Andptr[1:]
  3017  
  3018  				ctxt.Andptr[0] = Pm
  3019  				ctxt.Andptr = ctxt.Andptr[1:]
  3020  
  3021  			case Pm: /* opcode escape */
  3022  				ctxt.Andptr[0] = Pm
  3023  				ctxt.Andptr = ctxt.Andptr[1:]
  3024  
  3025  			case Pe: /* 16 bit escape */
  3026  				ctxt.Andptr[0] = Pe
  3027  				ctxt.Andptr = ctxt.Andptr[1:]
  3028  
  3029  			case Pw: /* 64-bit escape */
  3030  				if p.Mode != 64 {
  3031  					ctxt.Diag("asmins: illegal 64: %v", p)
  3032  				}
  3033  				ctxt.Rexflag |= Pw
  3034  
  3035  			case Pw8: /* 64-bit escape if z >= 8 */
  3036  				if z >= 8 {
  3037  					if p.Mode != 64 {
  3038  						ctxt.Diag("asmins: illegal 64: %v", p)
  3039  					}
  3040  					ctxt.Rexflag |= Pw
  3041  				}
  3042  
  3043  			case Pb: /* botch */
  3044  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3045  					goto bad
  3046  				}
  3047  				// NOTE(rsc): This is probably safe to do always,
  3048  				// but when enabled it chooses different encodings
  3049  				// than the old cmd/internal/obj/i386 code did,
  3050  				// which breaks our "same bits out" checks.
  3051  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3052  				// in the original obj/i386, and it would encode
  3053  				// (using a valid, shorter form) as 3c 00 if we enabled
  3054  				// the call to bytereg here.
  3055  				if p.Mode == 64 {
  3056  					bytereg(&p.From, &p.Ft)
  3057  					bytereg(&p.To, &p.Tt)
  3058  				}
  3059  
  3060  			case P32: /* 32 bit but illegal if 64-bit mode */
  3061  				if p.Mode == 64 {
  3062  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3063  				}
  3064  
  3065  			case Py: /* 64-bit only, no prefix */
  3066  				if p.Mode != 64 {
  3067  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3068  				}
  3069  
  3070  			case Py1: /* 64-bit only if z < 1, no prefix */
  3071  				if z < 1 && p.Mode != 64 {
  3072  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3073  				}
  3074  
  3075  			case Py3: /* 64-bit only if z < 3, no prefix */
  3076  				if z < 3 && p.Mode != 64 {
  3077  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3078  				}
  3079  			}
  3080  
  3081  			if z >= len(o.op) {
  3082  				log.Fatalf("asmins bad table %v", p)
  3083  			}
  3084  			op = int(o.op[z])
  3085  			if op == 0x0f {
  3086  				ctxt.Andptr[0] = byte(op)
  3087  				ctxt.Andptr = ctxt.Andptr[1:]
  3088  				z++
  3089  				op = int(o.op[z])
  3090  			}
  3091  
  3092  			switch yt.zcase {
  3093  			default:
  3094  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3095  				return
  3096  
  3097  			case Zpseudo:
  3098  				break
  3099  
  3100  			case Zlit:
  3101  				for ; ; z++ {
  3102  					op = int(o.op[z])
  3103  					if op == 0 {
  3104  						break
  3105  					}
  3106  					ctxt.Andptr[0] = byte(op)
  3107  					ctxt.Andptr = ctxt.Andptr[1:]
  3108  				}
  3109  
  3110  			case Zlitm_r:
  3111  				for ; ; z++ {
  3112  					op = int(o.op[z])
  3113  					if op == 0 {
  3114  						break
  3115  					}
  3116  					ctxt.Andptr[0] = byte(op)
  3117  					ctxt.Andptr = ctxt.Andptr[1:]
  3118  				}
  3119  				asmand(ctxt, p, &p.From, &p.To)
  3120  
  3121  			case Zmb_r:
  3122  				bytereg(&p.From, &p.Ft)
  3123  				fallthrough
  3124  
  3125  				/* fall through */
  3126  			case Zm_r:
  3127  				ctxt.Andptr[0] = byte(op)
  3128  				ctxt.Andptr = ctxt.Andptr[1:]
  3129  
  3130  				asmand(ctxt, p, &p.From, &p.To)
  3131  
  3132  			case Zm2_r:
  3133  				ctxt.Andptr[0] = byte(op)
  3134  				ctxt.Andptr = ctxt.Andptr[1:]
  3135  				ctxt.Andptr[0] = byte(o.op[z+1])
  3136  				ctxt.Andptr = ctxt.Andptr[1:]
  3137  				asmand(ctxt, p, &p.From, &p.To)
  3138  
  3139  			case Zm_r_xm:
  3140  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3141  				asmand(ctxt, p, &p.From, &p.To)
  3142  
  3143  			case Zm_r_xm_nr:
  3144  				ctxt.Rexflag = 0
  3145  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3146  				asmand(ctxt, p, &p.From, &p.To)
  3147  
  3148  			case Zm_r_i_xm:
  3149  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3150  				asmand(ctxt, p, &p.From, p.From3)
  3151  				ctxt.Andptr[0] = byte(p.To.Offset)
  3152  				ctxt.Andptr = ctxt.Andptr[1:]
  3153  
  3154  			case Zm_r_3d:
  3155  				ctxt.Andptr[0] = 0x0f
  3156  				ctxt.Andptr = ctxt.Andptr[1:]
  3157  				ctxt.Andptr[0] = 0x0f
  3158  				ctxt.Andptr = ctxt.Andptr[1:]
  3159  				asmand(ctxt, p, &p.From, &p.To)
  3160  				ctxt.Andptr[0] = byte(op)
  3161  				ctxt.Andptr = ctxt.Andptr[1:]
  3162  
  3163  			case Zibm_r:
  3164  				for {
  3165  					tmp1 := z
  3166  					z++
  3167  					op = int(o.op[tmp1])
  3168  					if op == 0 {
  3169  						break
  3170  					}
  3171  					ctxt.Andptr[0] = byte(op)
  3172  					ctxt.Andptr = ctxt.Andptr[1:]
  3173  				}
  3174  				asmand(ctxt, p, p.From3, &p.To)
  3175  				ctxt.Andptr[0] = byte(p.From.Offset)
  3176  				ctxt.Andptr = ctxt.Andptr[1:]
  3177  
  3178  			case Zaut_r:
  3179  				ctxt.Andptr[0] = 0x8d
  3180  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3181  				if p.From.Type != obj.TYPE_ADDR {
  3182  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3183  				}
  3184  				p.From.Type = obj.TYPE_MEM
  3185  				asmand(ctxt, p, &p.From, &p.To)
  3186  				p.From.Type = obj.TYPE_ADDR
  3187  
  3188  			case Zm_o:
  3189  				ctxt.Andptr[0] = byte(op)
  3190  				ctxt.Andptr = ctxt.Andptr[1:]
  3191  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3192  
  3193  			case Zr_m:
  3194  				ctxt.Andptr[0] = byte(op)
  3195  				ctxt.Andptr = ctxt.Andptr[1:]
  3196  				asmand(ctxt, p, &p.To, &p.From)
  3197  
  3198  			case Zr_m_xm:
  3199  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3200  				asmand(ctxt, p, &p.To, &p.From)
  3201  
  3202  			case Zr_m_xm_nr:
  3203  				ctxt.Rexflag = 0
  3204  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3205  				asmand(ctxt, p, &p.To, &p.From)
  3206  
  3207  			case Zo_m:
  3208  				ctxt.Andptr[0] = byte(op)
  3209  				ctxt.Andptr = ctxt.Andptr[1:]
  3210  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3211  
  3212  			case Zcallindreg:
  3213  				r = obj.Addrel(ctxt.Cursym)
  3214  				r.Off = int32(p.Pc)
  3215  				r.Type = obj.R_CALLIND
  3216  				r.Siz = 0
  3217  				fallthrough
  3218  
  3219  			case Zo_m64:
  3220  				ctxt.Andptr[0] = byte(op)
  3221  				ctxt.Andptr = ctxt.Andptr[1:]
  3222  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3223  
  3224  			case Zm_ibo:
  3225  				ctxt.Andptr[0] = byte(op)
  3226  				ctxt.Andptr = ctxt.Andptr[1:]
  3227  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3228  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3229  				ctxt.Andptr = ctxt.Andptr[1:]
  3230  
  3231  			case Zibo_m:
  3232  				ctxt.Andptr[0] = byte(op)
  3233  				ctxt.Andptr = ctxt.Andptr[1:]
  3234  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3235  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3236  				ctxt.Andptr = ctxt.Andptr[1:]
  3237  
  3238  			case Zibo_m_xm:
  3239  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3240  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3241  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3242  				ctxt.Andptr = ctxt.Andptr[1:]
  3243  
  3244  			case Z_ib, Zib_:
  3245  				if yt.zcase == Zib_ {
  3246  					a = &p.From
  3247  				} else {
  3248  					a = &p.To
  3249  				}
  3250  				ctxt.Andptr[0] = byte(op)
  3251  				ctxt.Andptr = ctxt.Andptr[1:]
  3252  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3253  				ctxt.Andptr = ctxt.Andptr[1:]
  3254  
  3255  			case Zib_rp:
  3256  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3257  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3258  				ctxt.Andptr = ctxt.Andptr[1:]
  3259  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3260  				ctxt.Andptr = ctxt.Andptr[1:]
  3261  
  3262  			case Zil_rp:
  3263  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3264  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3265  				ctxt.Andptr = ctxt.Andptr[1:]
  3266  				if o.prefix == Pe {
  3267  					v = vaddr(ctxt, p, &p.From, nil)
  3268  					ctxt.Andptr[0] = byte(v)
  3269  					ctxt.Andptr = ctxt.Andptr[1:]
  3270  					ctxt.Andptr[0] = byte(v >> 8)
  3271  					ctxt.Andptr = ctxt.Andptr[1:]
  3272  				} else {
  3273  					relput4(ctxt, p, &p.From)
  3274  				}
  3275  
  3276  			case Zo_iw:
  3277  				ctxt.Andptr[0] = byte(op)
  3278  				ctxt.Andptr = ctxt.Andptr[1:]
  3279  				if p.From.Type != obj.TYPE_NONE {
  3280  					v = vaddr(ctxt, p, &p.From, nil)
  3281  					ctxt.Andptr[0] = byte(v)
  3282  					ctxt.Andptr = ctxt.Andptr[1:]
  3283  					ctxt.Andptr[0] = byte(v >> 8)
  3284  					ctxt.Andptr = ctxt.Andptr[1:]
  3285  				}
  3286  
  3287  			case Ziq_rp:
  3288  				v = vaddr(ctxt, p, &p.From, &rel)
  3289  				l = int(v >> 32)
  3290  				if l == 0 && rel.Siz != 8 {
  3291  					//p->mark |= 0100;
  3292  					//print("zero: %llux %v\n", v, p);
  3293  					ctxt.Rexflag &^= (0x40 | Rxw)
  3294  
  3295  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3296  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3297  					ctxt.Andptr = ctxt.Andptr[1:]
  3298  					if rel.Type != 0 {
  3299  						r = obj.Addrel(ctxt.Cursym)
  3300  						*r = rel
  3301  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3302  					}
  3303  
  3304  					put4(ctxt, int32(v))
  3305  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3306  
  3307  					//p->mark |= 0100;
  3308  					//print("sign: %llux %v\n", v, p);
  3309  					ctxt.Andptr[0] = 0xc7
  3310  					ctxt.Andptr = ctxt.Andptr[1:]
  3311  
  3312  					asmando(ctxt, p, &p.To, 0)
  3313  					put4(ctxt, int32(v)) /* need all 8 */
  3314  				} else {
  3315  					//print("all: %llux %v\n", v, p);
  3316  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3317  
  3318  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3319  					ctxt.Andptr = ctxt.Andptr[1:]
  3320  					if rel.Type != 0 {
  3321  						r = obj.Addrel(ctxt.Cursym)
  3322  						*r = rel
  3323  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3324  					}
  3325  
  3326  					put8(ctxt, v)
  3327  				}
  3328  
  3329  			case Zib_rr:
  3330  				ctxt.Andptr[0] = byte(op)
  3331  				ctxt.Andptr = ctxt.Andptr[1:]
  3332  				asmand(ctxt, p, &p.To, &p.To)
  3333  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3334  				ctxt.Andptr = ctxt.Andptr[1:]
  3335  
  3336  			case Z_il, Zil_:
  3337  				if yt.zcase == Zil_ {
  3338  					a = &p.From
  3339  				} else {
  3340  					a = &p.To
  3341  				}
  3342  				ctxt.Andptr[0] = byte(op)
  3343  				ctxt.Andptr = ctxt.Andptr[1:]
  3344  				if o.prefix == Pe {
  3345  					v = vaddr(ctxt, p, a, nil)
  3346  					ctxt.Andptr[0] = byte(v)
  3347  					ctxt.Andptr = ctxt.Andptr[1:]
  3348  					ctxt.Andptr[0] = byte(v >> 8)
  3349  					ctxt.Andptr = ctxt.Andptr[1:]
  3350  				} else {
  3351  					relput4(ctxt, p, a)
  3352  				}
  3353  
  3354  			case Zm_ilo, Zilo_m:
  3355  				ctxt.Andptr[0] = byte(op)
  3356  				ctxt.Andptr = ctxt.Andptr[1:]
  3357  				if yt.zcase == Zilo_m {
  3358  					a = &p.From
  3359  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3360  				} else {
  3361  					a = &p.To
  3362  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3363  				}
  3364  
  3365  				if o.prefix == Pe {
  3366  					v = vaddr(ctxt, p, a, nil)
  3367  					ctxt.Andptr[0] = byte(v)
  3368  					ctxt.Andptr = ctxt.Andptr[1:]
  3369  					ctxt.Andptr[0] = byte(v >> 8)
  3370  					ctxt.Andptr = ctxt.Andptr[1:]
  3371  				} else {
  3372  					relput4(ctxt, p, a)
  3373  				}
  3374  
  3375  			case Zil_rr:
  3376  				ctxt.Andptr[0] = byte(op)
  3377  				ctxt.Andptr = ctxt.Andptr[1:]
  3378  				asmand(ctxt, p, &p.To, &p.To)
  3379  				if o.prefix == Pe {
  3380  					v = vaddr(ctxt, p, &p.From, nil)
  3381  					ctxt.Andptr[0] = byte(v)
  3382  					ctxt.Andptr = ctxt.Andptr[1:]
  3383  					ctxt.Andptr[0] = byte(v >> 8)
  3384  					ctxt.Andptr = ctxt.Andptr[1:]
  3385  				} else {
  3386  					relput4(ctxt, p, &p.From)
  3387  				}
  3388  
  3389  			case Z_rp:
  3390  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3391  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3392  				ctxt.Andptr = ctxt.Andptr[1:]
  3393  
  3394  			case Zrp_:
  3395  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3396  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3397  				ctxt.Andptr = ctxt.Andptr[1:]
  3398  
  3399  			case Zclr:
  3400  				ctxt.Rexflag &^= Pw
  3401  				ctxt.Andptr[0] = byte(op)
  3402  				ctxt.Andptr = ctxt.Andptr[1:]
  3403  				asmand(ctxt, p, &p.To, &p.To)
  3404  
  3405  			case Zcallcon, Zjmpcon:
  3406  				if yt.zcase == Zcallcon {
  3407  					ctxt.Andptr[0] = byte(op)
  3408  					ctxt.Andptr = ctxt.Andptr[1:]
  3409  				} else {
  3410  					ctxt.Andptr[0] = byte(o.op[z+1])
  3411  					ctxt.Andptr = ctxt.Andptr[1:]
  3412  				}
  3413  				r = obj.Addrel(ctxt.Cursym)
  3414  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3415  				r.Type = obj.R_PCREL
  3416  				r.Siz = 4
  3417  				r.Add = p.To.Offset
  3418  				put4(ctxt, 0)
  3419  
  3420  			case Zcallind:
  3421  				ctxt.Andptr[0] = byte(op)
  3422  				ctxt.Andptr = ctxt.Andptr[1:]
  3423  				ctxt.Andptr[0] = byte(o.op[z+1])
  3424  				ctxt.Andptr = ctxt.Andptr[1:]
  3425  				r = obj.Addrel(ctxt.Cursym)
  3426  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3427  				r.Type = obj.R_ADDR
  3428  				r.Siz = 4
  3429  				r.Add = p.To.Offset
  3430  				r.Sym = p.To.Sym
  3431  				put4(ctxt, 0)
  3432  
  3433  			case Zcall, Zcallduff:
  3434  				if p.To.Sym == nil {
  3435  					ctxt.Diag("call without target")
  3436  					log.Fatalf("bad code")
  3437  				}
  3438  
  3439  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3440  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3441  				}
  3442  
  3443  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3444  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3445  					// (the call jumps into the middle of the function).
  3446  					// This makes it possible to see call sites for duffcopy/duffzero in
  3447  					// BP-based profiling tools like Linux perf (which is the
  3448  					// whole point of obj.Framepointer_enabled).
  3449  					// MOVQ BP, -16(SP)
  3450  					// LEAQ -16(SP), BP
  3451  					copy(ctxt.Andptr, bpduff1)
  3452  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3453  				}
  3454  				ctxt.Andptr[0] = byte(op)
  3455  				ctxt.Andptr = ctxt.Andptr[1:]
  3456  				r = obj.Addrel(ctxt.Cursym)
  3457  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3458  				r.Sym = p.To.Sym
  3459  				r.Add = p.To.Offset
  3460  				r.Type = obj.R_CALL
  3461  				r.Siz = 4
  3462  				put4(ctxt, 0)
  3463  
  3464  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3465  					// Pop BP pushed above.
  3466  					// MOVQ 0(BP), BP
  3467  					copy(ctxt.Andptr, bpduff2)
  3468  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3469  				}
  3470  
  3471  			// TODO: jump across functions needs reloc
  3472  			case Zbr, Zjmp, Zloop:
  3473  				if p.To.Sym != nil {
  3474  					if yt.zcase != Zjmp {
  3475  						ctxt.Diag("branch to ATEXT")
  3476  						log.Fatalf("bad code")
  3477  					}
  3478  
  3479  					ctxt.Andptr[0] = byte(o.op[z+1])
  3480  					ctxt.Andptr = ctxt.Andptr[1:]
  3481  					r = obj.Addrel(ctxt.Cursym)
  3482  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3483  					r.Sym = p.To.Sym
  3484  					r.Type = obj.R_PCREL
  3485  					r.Siz = 4
  3486  					put4(ctxt, 0)
  3487  					break
  3488  				}
  3489  
  3490  				// Assumes q is in this function.
  3491  				// TODO: Check in input, preserve in brchain.
  3492  
  3493  				// Fill in backward jump now.
  3494  				q = p.Pcond
  3495  
  3496  				if q == nil {
  3497  					ctxt.Diag("jmp/branch/loop without target")
  3498  					log.Fatalf("bad code")
  3499  				}
  3500  
  3501  				if p.Back&1 != 0 {
  3502  					v = q.Pc - (p.Pc + 2)
  3503  					if v >= -128 {
  3504  						if p.As == AJCXZL {
  3505  							ctxt.Andptr[0] = 0x67
  3506  							ctxt.Andptr = ctxt.Andptr[1:]
  3507  						}
  3508  						ctxt.Andptr[0] = byte(op)
  3509  						ctxt.Andptr = ctxt.Andptr[1:]
  3510  						ctxt.Andptr[0] = byte(v)
  3511  						ctxt.Andptr = ctxt.Andptr[1:]
  3512  					} else if yt.zcase == Zloop {
  3513  						ctxt.Diag("loop too far: %v", p)
  3514  					} else {
  3515  						v -= 5 - 2
  3516  						if yt.zcase == Zbr {
  3517  							ctxt.Andptr[0] = 0x0f
  3518  							ctxt.Andptr = ctxt.Andptr[1:]
  3519  							v--
  3520  						}
  3521  
  3522  						ctxt.Andptr[0] = byte(o.op[z+1])
  3523  						ctxt.Andptr = ctxt.Andptr[1:]
  3524  						ctxt.Andptr[0] = byte(v)
  3525  						ctxt.Andptr = ctxt.Andptr[1:]
  3526  						ctxt.Andptr[0] = byte(v >> 8)
  3527  						ctxt.Andptr = ctxt.Andptr[1:]
  3528  						ctxt.Andptr[0] = byte(v >> 16)
  3529  						ctxt.Andptr = ctxt.Andptr[1:]
  3530  						ctxt.Andptr[0] = byte(v >> 24)
  3531  						ctxt.Andptr = ctxt.Andptr[1:]
  3532  					}
  3533  
  3534  					break
  3535  				}
  3536  
  3537  				// Annotate target; will fill in later.
  3538  				p.Forwd = q.Rel
  3539  
  3540  				q.Rel = p
  3541  				if p.Back&2 != 0 { // short
  3542  					if p.As == AJCXZL {
  3543  						ctxt.Andptr[0] = 0x67
  3544  						ctxt.Andptr = ctxt.Andptr[1:]
  3545  					}
  3546  					ctxt.Andptr[0] = byte(op)
  3547  					ctxt.Andptr = ctxt.Andptr[1:]
  3548  					ctxt.Andptr[0] = 0
  3549  					ctxt.Andptr = ctxt.Andptr[1:]
  3550  				} else if yt.zcase == Zloop {
  3551  					ctxt.Diag("loop too far: %v", p)
  3552  				} else {
  3553  					if yt.zcase == Zbr {
  3554  						ctxt.Andptr[0] = 0x0f
  3555  						ctxt.Andptr = ctxt.Andptr[1:]
  3556  					}
  3557  					ctxt.Andptr[0] = byte(o.op[z+1])
  3558  					ctxt.Andptr = ctxt.Andptr[1:]
  3559  					ctxt.Andptr[0] = 0
  3560  					ctxt.Andptr = ctxt.Andptr[1:]
  3561  					ctxt.Andptr[0] = 0
  3562  					ctxt.Andptr = ctxt.Andptr[1:]
  3563  					ctxt.Andptr[0] = 0
  3564  					ctxt.Andptr = ctxt.Andptr[1:]
  3565  					ctxt.Andptr[0] = 0
  3566  					ctxt.Andptr = ctxt.Andptr[1:]
  3567  				}
  3568  
  3569  				break
  3570  
  3571  			/*
  3572  				v = q->pc - p->pc - 2;
  3573  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3574  					*ctxt->andptr++ = op;
  3575  					*ctxt->andptr++ = v;
  3576  				} else {
  3577  					v -= 5-2;
  3578  					if(yt.zcase == Zbr) {
  3579  						*ctxt->andptr++ = 0x0f;
  3580  						v--;
  3581  					}
  3582  					*ctxt->andptr++ = o->op[z+1];
  3583  					*ctxt->andptr++ = v;
  3584  					*ctxt->andptr++ = v>>8;
  3585  					*ctxt->andptr++ = v>>16;
  3586  					*ctxt->andptr++ = v>>24;
  3587  				}
  3588  			*/
  3589  
  3590  			case Zbyte:
  3591  				v = vaddr(ctxt, p, &p.From, &rel)
  3592  				if rel.Siz != 0 {
  3593  					rel.Siz = uint8(op)
  3594  					r = obj.Addrel(ctxt.Cursym)
  3595  					*r = rel
  3596  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3597  				}
  3598  
  3599  				ctxt.Andptr[0] = byte(v)
  3600  				ctxt.Andptr = ctxt.Andptr[1:]
  3601  				if op > 1 {
  3602  					ctxt.Andptr[0] = byte(v >> 8)
  3603  					ctxt.Andptr = ctxt.Andptr[1:]
  3604  					if op > 2 {
  3605  						ctxt.Andptr[0] = byte(v >> 16)
  3606  						ctxt.Andptr = ctxt.Andptr[1:]
  3607  						ctxt.Andptr[0] = byte(v >> 24)
  3608  						ctxt.Andptr = ctxt.Andptr[1:]
  3609  						if op > 4 {
  3610  							ctxt.Andptr[0] = byte(v >> 32)
  3611  							ctxt.Andptr = ctxt.Andptr[1:]
  3612  							ctxt.Andptr[0] = byte(v >> 40)
  3613  							ctxt.Andptr = ctxt.Andptr[1:]
  3614  							ctxt.Andptr[0] = byte(v >> 48)
  3615  							ctxt.Andptr = ctxt.Andptr[1:]
  3616  							ctxt.Andptr[0] = byte(v >> 56)
  3617  							ctxt.Andptr = ctxt.Andptr[1:]
  3618  						}
  3619  					}
  3620  				}
  3621  			}
  3622  
  3623  			return
  3624  		}
  3625  		z += int(yt.zoffset) + xo
  3626  	}
  3627  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3628  		var pp obj.Prog
  3629  		var t []byte
  3630  		if p.As == mo[0].as {
  3631  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3632  				t = mo[0].op[:]
  3633  				switch mo[0].code {
  3634  				default:
  3635  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3636  
  3637  				case 0: /* lit */
  3638  					for z = 0; t[z] != E; z++ {
  3639  						ctxt.Andptr[0] = t[z]
  3640  						ctxt.Andptr = ctxt.Andptr[1:]
  3641  					}
  3642  
  3643  				case 1: /* r,m */
  3644  					ctxt.Andptr[0] = t[0]
  3645  					ctxt.Andptr = ctxt.Andptr[1:]
  3646  
  3647  					asmando(ctxt, p, &p.To, int(t[1]))
  3648  
  3649  				case 2: /* m,r */
  3650  					ctxt.Andptr[0] = t[0]
  3651  					ctxt.Andptr = ctxt.Andptr[1:]
  3652  
  3653  					asmando(ctxt, p, &p.From, int(t[1]))
  3654  
  3655  				case 3: /* r,m - 2op */
  3656  					ctxt.Andptr[0] = t[0]
  3657  					ctxt.Andptr = ctxt.Andptr[1:]
  3658  
  3659  					ctxt.Andptr[0] = t[1]
  3660  					ctxt.Andptr = ctxt.Andptr[1:]
  3661  					asmando(ctxt, p, &p.To, int(t[2]))
  3662  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3663  
  3664  				case 4: /* m,r - 2op */
  3665  					ctxt.Andptr[0] = t[0]
  3666  					ctxt.Andptr = ctxt.Andptr[1:]
  3667  
  3668  					ctxt.Andptr[0] = t[1]
  3669  					ctxt.Andptr = ctxt.Andptr[1:]
  3670  					asmando(ctxt, p, &p.From, int(t[2]))
  3671  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3672  
  3673  				case 5: /* load full pointer, trash heap */
  3674  					if t[0] != 0 {
  3675  						ctxt.Andptr[0] = t[0]
  3676  						ctxt.Andptr = ctxt.Andptr[1:]
  3677  					}
  3678  					switch p.To.Index {
  3679  					default:
  3680  						goto bad
  3681  
  3682  					case REG_DS:
  3683  						ctxt.Andptr[0] = 0xc5
  3684  						ctxt.Andptr = ctxt.Andptr[1:]
  3685  
  3686  					case REG_SS:
  3687  						ctxt.Andptr[0] = 0x0f
  3688  						ctxt.Andptr = ctxt.Andptr[1:]
  3689  						ctxt.Andptr[0] = 0xb2
  3690  						ctxt.Andptr = ctxt.Andptr[1:]
  3691  
  3692  					case REG_ES:
  3693  						ctxt.Andptr[0] = 0xc4
  3694  						ctxt.Andptr = ctxt.Andptr[1:]
  3695  
  3696  					case REG_FS:
  3697  						ctxt.Andptr[0] = 0x0f
  3698  						ctxt.Andptr = ctxt.Andptr[1:]
  3699  						ctxt.Andptr[0] = 0xb4
  3700  						ctxt.Andptr = ctxt.Andptr[1:]
  3701  
  3702  					case REG_GS:
  3703  						ctxt.Andptr[0] = 0x0f
  3704  						ctxt.Andptr = ctxt.Andptr[1:]
  3705  						ctxt.Andptr[0] = 0xb5
  3706  						ctxt.Andptr = ctxt.Andptr[1:]
  3707  					}
  3708  
  3709  					asmand(ctxt, p, &p.From, &p.To)
  3710  
  3711  				case 6: /* double shift */
  3712  					if t[0] == Pw {
  3713  						if p.Mode != 64 {
  3714  							ctxt.Diag("asmins: illegal 64: %v", p)
  3715  						}
  3716  						ctxt.Rexflag |= Pw
  3717  						t = t[1:]
  3718  					} else if t[0] == Pe {
  3719  						ctxt.Andptr[0] = Pe
  3720  						ctxt.Andptr = ctxt.Andptr[1:]
  3721  						t = t[1:]
  3722  					}
  3723  
  3724  					switch p.From.Type {
  3725  					default:
  3726  						goto bad
  3727  
  3728  					case obj.TYPE_CONST:
  3729  						ctxt.Andptr[0] = 0x0f
  3730  						ctxt.Andptr = ctxt.Andptr[1:]
  3731  						ctxt.Andptr[0] = t[0]
  3732  						ctxt.Andptr = ctxt.Andptr[1:]
  3733  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3734  						ctxt.Andptr[0] = byte(p.From.Offset)
  3735  						ctxt.Andptr = ctxt.Andptr[1:]
  3736  
  3737  					case obj.TYPE_REG:
  3738  						switch p.From.Reg {
  3739  						default:
  3740  							goto bad
  3741  
  3742  						case REG_CL, REG_CX:
  3743  							ctxt.Andptr[0] = 0x0f
  3744  							ctxt.Andptr = ctxt.Andptr[1:]
  3745  							ctxt.Andptr[0] = t[1]
  3746  							ctxt.Andptr = ctxt.Andptr[1:]
  3747  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3748  						}
  3749  					}
  3750  
  3751  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3752  				// where you load the TLS base register into a register and then index off that
  3753  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3754  				// are handled in prefixof above and should not be listed here.
  3755  				case 7: /* mov tls, r */
  3756  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3757  						ctxt.Diag("invalid load of TLS: %v", p)
  3758  					}
  3759  
  3760  					if p.Mode == 32 {
  3761  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3762  						// where you load the TLS base register into a register and then index off that
  3763  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3764  						// are handled in prefixof above and should not be listed here.
  3765  						switch ctxt.Headtype {
  3766  						default:
  3767  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3768  
  3769  						case obj.Hlinux,
  3770  							obj.Hnacl:
  3771  							// ELF TLS base is 0(GS).
  3772  							pp.From = p.From
  3773  
  3774  							pp.From.Type = obj.TYPE_MEM
  3775  							pp.From.Reg = REG_GS
  3776  							pp.From.Offset = 0
  3777  							pp.From.Index = REG_NONE
  3778  							pp.From.Scale = 0
  3779  							ctxt.Andptr[0] = 0x65
  3780  							ctxt.Andptr = ctxt.Andptr[1:] // GS
  3781  							ctxt.Andptr[0] = 0x8B
  3782  							ctxt.Andptr = ctxt.Andptr[1:]
  3783  							asmand(ctxt, p, &pp.From, &p.To)
  3784  
  3785  						case obj.Hplan9:
  3786  							if ctxt.Plan9privates == nil {
  3787  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3788  							}
  3789  							pp.From = obj.Addr{}
  3790  							pp.From.Type = obj.TYPE_MEM
  3791  							pp.From.Name = obj.NAME_EXTERN
  3792  							pp.From.Sym = ctxt.Plan9privates
  3793  							pp.From.Offset = 0
  3794  							pp.From.Index = REG_NONE
  3795  							ctxt.Andptr[0] = 0x8B
  3796  							ctxt.Andptr = ctxt.Andptr[1:]
  3797  							asmand(ctxt, p, &pp.From, &p.To)
  3798  
  3799  						case obj.Hwindows:
  3800  							// Windows TLS base is always 0x14(FS).
  3801  							pp.From = p.From
  3802  
  3803  							pp.From.Type = obj.TYPE_MEM
  3804  							pp.From.Reg = REG_FS
  3805  							pp.From.Offset = 0x14
  3806  							pp.From.Index = REG_NONE
  3807  							pp.From.Scale = 0
  3808  							ctxt.Andptr[0] = 0x64
  3809  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  3810  							ctxt.Andptr[0] = 0x8B
  3811  							ctxt.Andptr = ctxt.Andptr[1:]
  3812  							asmand(ctxt, p, &pp.From, &p.To)
  3813  						}
  3814  						break
  3815  					}
  3816  
  3817  					switch ctxt.Headtype {
  3818  					default:
  3819  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3820  
  3821  					case obj.Hlinux:
  3822  						if ctxt.Flag_shared == 0 {
  3823  							log.Fatalf("unknown TLS base location for linux without -shared")
  3824  						}
  3825  						// Note that this is not generating the same insn as the other cases.
  3826  						//     MOV TLS, R_to
  3827  						// becomes
  3828  						//     movq g@gottpoff(%rip), R_to
  3829  						// which is encoded as
  3830  						//     movq 0(%rip), R_to
  3831  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  3832  						// is g, which we can't check here, but will when we assemble the second
  3833  						// instruction.
  3834  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  3835  
  3836  						ctxt.Andptr[0] = 0x8B
  3837  						ctxt.Andptr = ctxt.Andptr[1:]
  3838  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  3839  						ctxt.Andptr = ctxt.Andptr[1:]
  3840  						r = obj.Addrel(ctxt.Cursym)
  3841  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3842  						r.Type = obj.R_TLS_IE
  3843  						r.Siz = 4
  3844  						r.Add = -4
  3845  						put4(ctxt, 0)
  3846  
  3847  					case obj.Hplan9:
  3848  						if ctxt.Plan9privates == nil {
  3849  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3850  						}
  3851  						pp.From = obj.Addr{}
  3852  						pp.From.Type = obj.TYPE_MEM
  3853  						pp.From.Name = obj.NAME_EXTERN
  3854  						pp.From.Sym = ctxt.Plan9privates
  3855  						pp.From.Offset = 0
  3856  						pp.From.Index = REG_NONE
  3857  						ctxt.Rexflag |= Pw
  3858  						ctxt.Andptr[0] = 0x8B
  3859  						ctxt.Andptr = ctxt.Andptr[1:]
  3860  						asmand(ctxt, p, &pp.From, &p.To)
  3861  
  3862  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3863  						// TLS base is 0(FS).
  3864  						pp.From = p.From
  3865  
  3866  						pp.From.Type = obj.TYPE_MEM
  3867  						pp.From.Name = obj.NAME_NONE
  3868  						pp.From.Reg = REG_NONE
  3869  						pp.From.Offset = 0
  3870  						pp.From.Index = REG_NONE
  3871  						pp.From.Scale = 0
  3872  						ctxt.Rexflag |= Pw
  3873  						ctxt.Andptr[0] = 0x64
  3874  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  3875  						ctxt.Andptr[0] = 0x8B
  3876  						ctxt.Andptr = ctxt.Andptr[1:]
  3877  						asmand(ctxt, p, &pp.From, &p.To)
  3878  
  3879  					case obj.Hwindows:
  3880  						// Windows TLS base is always 0x28(GS).
  3881  						pp.From = p.From
  3882  
  3883  						pp.From.Type = obj.TYPE_MEM
  3884  						pp.From.Name = obj.NAME_NONE
  3885  						pp.From.Reg = REG_GS
  3886  						pp.From.Offset = 0x28
  3887  						pp.From.Index = REG_NONE
  3888  						pp.From.Scale = 0
  3889  						ctxt.Rexflag |= Pw
  3890  						ctxt.Andptr[0] = 0x65
  3891  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  3892  						ctxt.Andptr[0] = 0x8B
  3893  						ctxt.Andptr = ctxt.Andptr[1:]
  3894  						asmand(ctxt, p, &pp.From, &p.To)
  3895  					}
  3896  				}
  3897  				return
  3898  			}
  3899  		}
  3900  	}
  3901  	goto bad
  3902  
  3903  bad:
  3904  	if p.Mode != 64 {
  3905  		/*
  3906  		 * here, the assembly has failed.
  3907  		 * if its a byte instruction that has
  3908  		 * unaddressable registers, try to
  3909  		 * exchange registers and reissue the
  3910  		 * instruction with the operands renamed.
  3911  		 */
  3912  		pp := *p
  3913  
  3914  		unbytereg(&pp.From, &pp.Ft)
  3915  		unbytereg(&pp.To, &pp.Tt)
  3916  
  3917  		z := int(p.From.Reg)
  3918  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3919  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3920  			// For now, different to keep bit-for-bit compatibility.
  3921  			if p.Mode == 32 {
  3922  				breg := byteswapreg(ctxt, &p.To)
  3923  				if breg != REG_AX {
  3924  					ctxt.Andptr[0] = 0x87
  3925  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3926  					asmando(ctxt, p, &p.From, reg[breg])
  3927  					subreg(&pp, z, breg)
  3928  					doasm(ctxt, &pp)
  3929  					ctxt.Andptr[0] = 0x87
  3930  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3931  					asmando(ctxt, p, &p.From, reg[breg])
  3932  				} else {
  3933  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3934  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3935  					subreg(&pp, z, REG_AX)
  3936  					doasm(ctxt, &pp)
  3937  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3938  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3939  				}
  3940  				return
  3941  			}
  3942  
  3943  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  3944  				// We certainly don't want to exchange
  3945  				// with AX if the op is MUL or DIV.
  3946  				ctxt.Andptr[0] = 0x87
  3947  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3948  				asmando(ctxt, p, &p.From, reg[REG_BX])
  3949  				subreg(&pp, z, REG_BX)
  3950  				doasm(ctxt, &pp)
  3951  				ctxt.Andptr[0] = 0x87
  3952  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  3953  				asmando(ctxt, p, &p.From, reg[REG_BX])
  3954  			} else {
  3955  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  3956  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3957  				subreg(&pp, z, REG_AX)
  3958  				doasm(ctxt, &pp)
  3959  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  3960  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  3961  			}
  3962  			return
  3963  		}
  3964  
  3965  		z = int(p.To.Reg)
  3966  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  3967  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  3968  			// For now, different to keep bit-for-bit compatibility.
  3969  			if p.Mode == 32 {
  3970  				breg := byteswapreg(ctxt, &p.From)
  3971  				if breg != REG_AX {
  3972  					ctxt.Andptr[0] = 0x87
  3973  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3974  					asmando(ctxt, p, &p.To, reg[breg])
  3975  					subreg(&pp, z, breg)
  3976  					doasm(ctxt, &pp)
  3977  					ctxt.Andptr[0] = 0x87
  3978  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3979  					asmando(ctxt, p, &p.To, reg[breg])
  3980  				} else {
  3981  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3982  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  3983  					subreg(&pp, z, REG_AX)
  3984  					doasm(ctxt, &pp)
  3985  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  3986  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  3987  				}
  3988  				return
  3989  			}
  3990  
  3991  			if isax(&p.From) {
  3992  				ctxt.Andptr[0] = 0x87
  3993  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3994  				asmando(ctxt, p, &p.To, reg[REG_BX])
  3995  				subreg(&pp, z, REG_BX)
  3996  				doasm(ctxt, &pp)
  3997  				ctxt.Andptr[0] = 0x87
  3998  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  3999  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4000  			} else {
  4001  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4002  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4003  				subreg(&pp, z, REG_AX)
  4004  				doasm(ctxt, &pp)
  4005  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4006  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4007  			}
  4008  			return
  4009  		}
  4010  	}
  4011  
  4012  	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4013  	return
  4014  }
  4015  
  4016  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4017  // which is not referenced in a.
  4018  // If a is empty, it returns BX to account for MULB-like instructions
  4019  // that might use DX and AX.
  4020  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4021  	cand := 1
  4022  	canc := cand
  4023  	canb := canc
  4024  	cana := canb
  4025  
  4026  	if a.Type == obj.TYPE_NONE {
  4027  		cand = 0
  4028  		cana = cand
  4029  	}
  4030  
  4031  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4032  		switch a.Reg {
  4033  		case REG_NONE:
  4034  			cand = 0
  4035  			cana = cand
  4036  
  4037  		case REG_AX, REG_AL, REG_AH:
  4038  			cana = 0
  4039  
  4040  		case REG_BX, REG_BL, REG_BH:
  4041  			canb = 0
  4042  
  4043  		case REG_CX, REG_CL, REG_CH:
  4044  			canc = 0
  4045  
  4046  		case REG_DX, REG_DL, REG_DH:
  4047  			cand = 0
  4048  		}
  4049  	}
  4050  
  4051  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4052  		switch a.Index {
  4053  		case REG_AX:
  4054  			cana = 0
  4055  
  4056  		case REG_BX:
  4057  			canb = 0
  4058  
  4059  		case REG_CX:
  4060  			canc = 0
  4061  
  4062  		case REG_DX:
  4063  			cand = 0
  4064  		}
  4065  	}
  4066  
  4067  	if cana != 0 {
  4068  		return REG_AX
  4069  	}
  4070  	if canb != 0 {
  4071  		return REG_BX
  4072  	}
  4073  	if canc != 0 {
  4074  		return REG_CX
  4075  	}
  4076  	if cand != 0 {
  4077  		return REG_DX
  4078  	}
  4079  
  4080  	ctxt.Diag("impossible byte register")
  4081  	log.Fatalf("bad code")
  4082  	return 0
  4083  }
  4084  
  4085  func isbadbyte(a *obj.Addr) bool {
  4086  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4087  }
  4088  
  4089  var naclret = []uint8{
  4090  	0x5e, // POPL SI
  4091  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4092  	0x83,
  4093  	0xe6,
  4094  	0xe0, // ANDL $~31, SI
  4095  	0x4c,
  4096  	0x01,
  4097  	0xfe, // ADDQ R15, SI
  4098  	0xff,
  4099  	0xe6, // JMP SI
  4100  }
  4101  
  4102  var naclret8 = []uint8{
  4103  	0x5d, // POPL BP
  4104  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4105  	0x83,
  4106  	0xe5,
  4107  	0xe0, // ANDL $~31, BP
  4108  	0xff,
  4109  	0xe5, // JMP BP
  4110  }
  4111  
  4112  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4113  
  4114  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4115  
  4116  var naclmovs = []uint8{
  4117  	0x89,
  4118  	0xf6, // MOVL SI, SI
  4119  	0x49,
  4120  	0x8d,
  4121  	0x34,
  4122  	0x37, // LEAQ (R15)(SI*1), SI
  4123  	0x89,
  4124  	0xff, // MOVL DI, DI
  4125  	0x49,
  4126  	0x8d,
  4127  	0x3c,
  4128  	0x3f, // LEAQ (R15)(DI*1), DI
  4129  }
  4130  
  4131  var naclstos = []uint8{
  4132  	0x89,
  4133  	0xff, // MOVL DI, DI
  4134  	0x49,
  4135  	0x8d,
  4136  	0x3c,
  4137  	0x3f, // LEAQ (R15)(DI*1), DI
  4138  }
  4139  
  4140  func nacltrunc(ctxt *obj.Link, reg int) {
  4141  	if reg >= REG_R8 {
  4142  		ctxt.Andptr[0] = 0x45
  4143  		ctxt.Andptr = ctxt.Andptr[1:]
  4144  	}
  4145  	reg = (reg - REG_AX) & 7
  4146  	ctxt.Andptr[0] = 0x89
  4147  	ctxt.Andptr = ctxt.Andptr[1:]
  4148  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4149  	ctxt.Andptr = ctxt.Andptr[1:]
  4150  }
  4151  
  4152  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4153  	ctxt.Andptr = ctxt.And[:]
  4154  	ctxt.Asmode = int(p.Mode)
  4155  
  4156  	if p.As == obj.AUSEFIELD {
  4157  		r := obj.Addrel(ctxt.Cursym)
  4158  		r.Off = 0
  4159  		r.Siz = 0
  4160  		r.Sym = p.From.Sym
  4161  		r.Type = obj.R_USEFIELD
  4162  		return
  4163  	}
  4164  
  4165  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4166  		switch p.As {
  4167  		case obj.ARET:
  4168  			copy(ctxt.Andptr, naclret8)
  4169  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4170  			return
  4171  
  4172  		case obj.ACALL,
  4173  			obj.AJMP:
  4174  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4175  				ctxt.Andptr[0] = 0x83
  4176  				ctxt.Andptr = ctxt.Andptr[1:]
  4177  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4178  				ctxt.Andptr = ctxt.Andptr[1:]
  4179  				ctxt.Andptr[0] = 0xe0
  4180  				ctxt.Andptr = ctxt.Andptr[1:]
  4181  			}
  4182  
  4183  		case AINT:
  4184  			ctxt.Andptr[0] = 0xf4
  4185  			ctxt.Andptr = ctxt.Andptr[1:]
  4186  			return
  4187  		}
  4188  	}
  4189  
  4190  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4191  		if p.As == AREP {
  4192  			ctxt.Rep++
  4193  			return
  4194  		}
  4195  
  4196  		if p.As == AREPN {
  4197  			ctxt.Repn++
  4198  			return
  4199  		}
  4200  
  4201  		if p.As == ALOCK {
  4202  			ctxt.Lock++
  4203  			return
  4204  		}
  4205  
  4206  		if p.As != ALEAQ && p.As != ALEAL {
  4207  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4208  				nacltrunc(ctxt, int(p.From.Index))
  4209  			}
  4210  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4211  				nacltrunc(ctxt, int(p.To.Index))
  4212  			}
  4213  		}
  4214  
  4215  		switch p.As {
  4216  		case obj.ARET:
  4217  			copy(ctxt.Andptr, naclret)
  4218  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4219  			return
  4220  
  4221  		case obj.ACALL,
  4222  			obj.AJMP:
  4223  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4224  				// ANDL $~31, reg
  4225  				ctxt.Andptr[0] = 0x83
  4226  				ctxt.Andptr = ctxt.Andptr[1:]
  4227  
  4228  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4229  				ctxt.Andptr = ctxt.Andptr[1:]
  4230  				ctxt.Andptr[0] = 0xe0
  4231  				ctxt.Andptr = ctxt.Andptr[1:]
  4232  
  4233  				// ADDQ R15, reg
  4234  				ctxt.Andptr[0] = 0x4c
  4235  				ctxt.Andptr = ctxt.Andptr[1:]
  4236  
  4237  				ctxt.Andptr[0] = 0x01
  4238  				ctxt.Andptr = ctxt.Andptr[1:]
  4239  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4240  				ctxt.Andptr = ctxt.Andptr[1:]
  4241  			}
  4242  
  4243  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4244  				// ANDL $~31, reg
  4245  				ctxt.Andptr[0] = 0x41
  4246  				ctxt.Andptr = ctxt.Andptr[1:]
  4247  
  4248  				ctxt.Andptr[0] = 0x83
  4249  				ctxt.Andptr = ctxt.Andptr[1:]
  4250  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4251  				ctxt.Andptr = ctxt.Andptr[1:]
  4252  				ctxt.Andptr[0] = 0xe0
  4253  				ctxt.Andptr = ctxt.Andptr[1:]
  4254  
  4255  				// ADDQ R15, reg
  4256  				ctxt.Andptr[0] = 0x4d
  4257  				ctxt.Andptr = ctxt.Andptr[1:]
  4258  
  4259  				ctxt.Andptr[0] = 0x01
  4260  				ctxt.Andptr = ctxt.Andptr[1:]
  4261  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4262  				ctxt.Andptr = ctxt.Andptr[1:]
  4263  			}
  4264  
  4265  		case AINT:
  4266  			ctxt.Andptr[0] = 0xf4
  4267  			ctxt.Andptr = ctxt.Andptr[1:]
  4268  			return
  4269  
  4270  		case ASCASB,
  4271  			ASCASW,
  4272  			ASCASL,
  4273  			ASCASQ,
  4274  			ASTOSB,
  4275  			ASTOSW,
  4276  			ASTOSL,
  4277  			ASTOSQ:
  4278  			copy(ctxt.Andptr, naclstos)
  4279  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4280  
  4281  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4282  			copy(ctxt.Andptr, naclmovs)
  4283  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4284  		}
  4285  
  4286  		if ctxt.Rep != 0 {
  4287  			ctxt.Andptr[0] = 0xf3
  4288  			ctxt.Andptr = ctxt.Andptr[1:]
  4289  			ctxt.Rep = 0
  4290  		}
  4291  
  4292  		if ctxt.Repn != 0 {
  4293  			ctxt.Andptr[0] = 0xf2
  4294  			ctxt.Andptr = ctxt.Andptr[1:]
  4295  			ctxt.Repn = 0
  4296  		}
  4297  
  4298  		if ctxt.Lock != 0 {
  4299  			ctxt.Andptr[0] = 0xf0
  4300  			ctxt.Andptr = ctxt.Andptr[1:]
  4301  			ctxt.Lock = 0
  4302  		}
  4303  	}
  4304  
  4305  	ctxt.Rexflag = 0
  4306  	and0 := ctxt.Andptr
  4307  	ctxt.Asmode = int(p.Mode)
  4308  	doasm(ctxt, p)
  4309  	if ctxt.Rexflag != 0 {
  4310  		/*
  4311  		 * as befits the whole approach of the architecture,
  4312  		 * the rex prefix must appear before the first opcode byte
  4313  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4314  		 * before the 0f opcode escape!), or it might be ignored.
  4315  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4316  		 */
  4317  		if p.Mode != 64 {
  4318  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4319  		}
  4320  		n := -cap(ctxt.Andptr) + cap(and0)
  4321  		var c int
  4322  		var np int
  4323  		for np = 0; np < n; np++ {
  4324  			c = int(and0[np])
  4325  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4326  				break
  4327  			}
  4328  		}
  4329  
  4330  		copy(and0[np+1:], and0[np:n])
  4331  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4332  		ctxt.Andptr = ctxt.Andptr[1:]
  4333  	}
  4334  
  4335  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4336  	var r *obj.Reloc
  4337  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4338  		r = &ctxt.Cursym.R[i:][0]
  4339  		if int64(r.Off) < p.Pc {
  4340  			break
  4341  		}
  4342  		if ctxt.Rexflag != 0 {
  4343  			r.Off++
  4344  		}
  4345  		if r.Type == obj.R_PCREL {
  4346  			// PC-relative addressing is relative to the end of the instruction,
  4347  			// but the relocations applied by the linker are relative to the end
  4348  			// of the relocation. Because immediate instruction
  4349  			// arguments can follow the PC-relative memory reference in the
  4350  			// instruction encoding, the two may not coincide. In this case,
  4351  			// adjust addend so that linker can keep relocating relative to the
  4352  			// end of the relocation.
  4353  			r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4354  		}
  4355  	}
  4356  
  4357  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4358  		switch p.To.Reg {
  4359  		case REG_SP:
  4360  			copy(ctxt.Andptr, naclspfix)
  4361  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4362  
  4363  		case REG_BP:
  4364  			copy(ctxt.Andptr, naclbpfix)
  4365  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4366  		}
  4367  	}
  4368  }