github.com/zxy12/go_duplicate_1_12@v0.0.0-20200217043740-b1636fc0368b/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  //
    63  const (
    64  	loopAlign  = 16
    65  	maxLoopPad = 0
    66  )
    67  
    68  // Bit flags that are used to express jump target properties.
    69  const (
    70  	// branchBackwards marks targets that are located behind.
    71  	// Used to express jumps to loop headers.
    72  	branchBackwards = (1 << iota)
    73  	// branchShort marks branches those target is close,
    74  	// with offset is in -128..127 range.
    75  	branchShort
    76  	// branchLoopHead marks loop entry.
    77  	// Used to insert padding for misaligned loops.
    78  	branchLoopHead
    79  )
    80  
    81  // opBytes holds optab encoding bytes.
    82  // Each ytab reserves fixed amount of bytes in this array.
    83  //
    84  // The size should be the minimal number of bytes that
    85  // are enough to hold biggest optab op lines.
    86  type opBytes [31]uint8
    87  
    88  type Optab struct {
    89  	as     obj.As
    90  	ytab   []ytab
    91  	prefix uint8
    92  	op     opBytes
    93  }
    94  
    95  type Movtab struct {
    96  	as   obj.As
    97  	ft   uint8
    98  	f3t  uint8
    99  	tt   uint8
   100  	code uint8
   101  	op   [4]uint8
   102  }
   103  
   104  const (
   105  	Yxxx = iota
   106  	Ynone
   107  	Yi0 // $0
   108  	Yi1 // $1
   109  	Yu2 // $x, x fits in uint2
   110  	Yi8 // $x, x fits in int8
   111  	Yu8 // $x, x fits in uint8
   112  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113  	Ys32
   114  	Yi32
   115  	Yi64
   116  	Yiauto
   117  	Yal
   118  	Ycl
   119  	Yax
   120  	Ycx
   121  	Yrb
   122  	Yrl
   123  	Yrl32 // Yrl on 32-bit system
   124  	Yrf
   125  	Yf0
   126  	Yrx
   127  	Ymb
   128  	Yml
   129  	Ym
   130  	Ybr
   131  	Ycs
   132  	Yss
   133  	Yds
   134  	Yes
   135  	Yfs
   136  	Ygs
   137  	Ygdtr
   138  	Yidtr
   139  	Yldtr
   140  	Ymsw
   141  	Ytask
   142  	Ycr0
   143  	Ycr1
   144  	Ycr2
   145  	Ycr3
   146  	Ycr4
   147  	Ycr5
   148  	Ycr6
   149  	Ycr7
   150  	Ycr8
   151  	Ydr0
   152  	Ydr1
   153  	Ydr2
   154  	Ydr3
   155  	Ydr4
   156  	Ydr5
   157  	Ydr6
   158  	Ydr7
   159  	Ytr0
   160  	Ytr1
   161  	Ytr2
   162  	Ytr3
   163  	Ytr4
   164  	Ytr5
   165  	Ytr6
   166  	Ytr7
   167  	Ymr
   168  	Ymm
   169  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171  	Yxr           // X0..X15
   172  	YxrEvex       // X0..X31
   173  	Yxm
   174  	YxmEvex       // YxrEvex+Ym
   175  	Yxvm          // VSIB vector array; vm32x/vm64x
   176  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   177  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178  	Yyr           // Y0..Y15
   179  	YyrEvex       // Y0..Y31
   180  	Yym
   181  	YymEvex   // YyrEvex+Ym
   182  	Yyvm      // VSIB vector array; vm32y/vm64y
   183  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185  	Yzr       // Z0..Z31
   186  	Yzm       // Yzr+Ym
   187  	Yzvm      // VSIB vector array; vm32z/vm64z
   188  	Yk0       // K0
   189  	Yknot0    // K1..K7; write mask
   190  	Yk        // K0..K7; used for KOP
   191  	Ykm       // Yk+Ym; used for KOP
   192  	Ytls
   193  	Ytextsize
   194  	Yindir
   195  	Ymax
   196  )
   197  
   198  const (
   199  	Zxxx = iota
   200  	Zlit
   201  	Zlitm_r
   202  	Zlitr_m
   203  	Zlit_m_r
   204  	Z_rp
   205  	Zbr
   206  	Zcall
   207  	Zcallcon
   208  	Zcallduff
   209  	Zcallind
   210  	Zcallindreg
   211  	Zib_
   212  	Zib_rp
   213  	Zibo_m
   214  	Zibo_m_xm
   215  	Zil_
   216  	Zil_rp
   217  	Ziq_rp
   218  	Zilo_m
   219  	Zjmp
   220  	Zjmpcon
   221  	Zloop
   222  	Zo_iw
   223  	Zm_o
   224  	Zm_r
   225  	Z_m_r
   226  	Zm2_r
   227  	Zm_r_xm
   228  	Zm_r_i_xm
   229  	Zm_r_xm_nr
   230  	Zr_m_xm_nr
   231  	Zibm_r // mmx1,mmx2/mem64,imm8
   232  	Zibr_m
   233  	Zmb_r
   234  	Zaut_r
   235  	Zo_m
   236  	Zo_m64
   237  	Zpseudo
   238  	Zr_m
   239  	Zr_m_xm
   240  	Zrp_
   241  	Z_ib
   242  	Z_il
   243  	Zm_ibo
   244  	Zm_ilo
   245  	Zib_rr
   246  	Zil_rr
   247  	Zbyte
   248  
   249  	Zvex_rm_v_r
   250  	Zvex_rm_v_ro
   251  	Zvex_r_v_rm
   252  	Zvex_i_rm_vo
   253  	Zvex_v_rm_r
   254  	Zvex_i_rm_r
   255  	Zvex_i_r_v
   256  	Zvex_i_rm_v_r
   257  	Zvex
   258  	Zvex_rm_r_vo
   259  	Zvex_i_r_rm
   260  	Zvex_hr_rm_v_r
   261  
   262  	Zevex_first
   263  	Zevex_i_r_k_rm
   264  	Zevex_i_r_rm
   265  	Zevex_i_rm_k_r
   266  	Zevex_i_rm_k_vo
   267  	Zevex_i_rm_r
   268  	Zevex_i_rm_v_k_r
   269  	Zevex_i_rm_v_r
   270  	Zevex_i_rm_vo
   271  	Zevex_k_rmo
   272  	Zevex_r_k_rm
   273  	Zevex_r_v_k_rm
   274  	Zevex_r_v_rm
   275  	Zevex_rm_k_r
   276  	Zevex_rm_v_k_r
   277  	Zevex_rm_v_r
   278  	Zevex_last
   279  
   280  	Zmax
   281  )
   282  
   283  const (
   284  	Px   = 0
   285  	Px1  = 1    // symbolic; exact value doesn't matter
   286  	P32  = 0x32 // 32-bit only
   287  	Pe   = 0x66 // operand escape
   288  	Pm   = 0x0f // 2byte opcode escape
   289  	Pq   = 0xff // both escapes: 66 0f
   290  	Pb   = 0xfe // byte operands
   291  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   292  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   293  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300  	Pw   = 0x48 // Rex.w
   301  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   302  	Py   = 0x80 // defaults to 64-bit mode
   303  	Py1  = 0x81 // symbolic; exact value doesn't matter
   304  	Py3  = 0x83 // symbolic; exact value doesn't matter
   305  	Pavx = 0x84 // symbolic: exact value doesn't matter
   306  
   307  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308  	Rxw     = 1 << 3 // =1, 64-bit operand size
   309  	Rxr     = 1 << 2 // extend modrm reg
   310  	Rxx     = 1 << 1 // extend sib index
   311  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312  )
   313  
   314  const (
   315  	// Encoding for VEX prefix in tables.
   316  	// The P, L, and W fields are chosen to match
   317  	// their eventual locations in the VEX prefix bytes.
   318  
   319  	// Encoding for VEX prefix in tables.
   320  	// The P, L, and W fields are chosen to match
   321  	// their eventual locations in the VEX prefix bytes.
   322  
   323  	// Using spare bit to make leading [E]VEX encoding byte different from
   324  	// 0x0f even if all other VEX fields are 0.
   325  	avxEscape = 1 << 6
   326  
   327  	// P field - 2 bits
   328  	vex66 = 1 << 0
   329  	vexF3 = 2 << 0
   330  	vexF2 = 3 << 0
   331  	// L field - 1 bit
   332  	vexLZ  = 0 << 2
   333  	vexLIG = 0 << 2
   334  	vex128 = 0 << 2
   335  	vex256 = 1 << 2
   336  	// W field - 1 bit
   337  	vexWIG = 0 << 7
   338  	vexW0  = 0 << 7
   339  	vexW1  = 1 << 7
   340  	// M field - 5 bits, but mostly reserved; we can store up to 3
   341  	vex0F   = 1 << 3
   342  	vex0F38 = 2 << 3
   343  	vex0F3A = 3 << 3
   344  )
   345  
   346  var ycover [Ymax * Ymax]uint8
   347  
   348  var reg [MAXREG]int
   349  
   350  var regrex [MAXREG + 1]int
   351  
   352  var ynone = []ytab{
   353  	{Zlit, 1, argList{}},
   354  }
   355  
   356  var ytext = []ytab{
   357  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359  }
   360  
   361  var ynop = []ytab{
   362  	{Zpseudo, 0, argList{}},
   363  	{Zpseudo, 0, argList{Yiauto}},
   364  	{Zpseudo, 0, argList{Yml}},
   365  	{Zpseudo, 0, argList{Yrf}},
   366  	{Zpseudo, 0, argList{Yxr}},
   367  	{Zpseudo, 0, argList{Yiauto}},
   368  	{Zpseudo, 0, argList{Yml}},
   369  	{Zpseudo, 0, argList{Yrf}},
   370  	{Zpseudo, 1, argList{Yxr}},
   371  }
   372  
   373  var yfuncdata = []ytab{
   374  	{Zpseudo, 0, argList{Yi32, Ym}},
   375  }
   376  
   377  var ypcdata = []ytab{
   378  	{Zpseudo, 0, argList{Yi32, Yi32}},
   379  }
   380  
   381  var yxorb = []ytab{
   382  	{Zib_, 1, argList{Yi32, Yal}},
   383  	{Zibo_m, 2, argList{Yi32, Ymb}},
   384  	{Zr_m, 1, argList{Yrb, Ymb}},
   385  	{Zm_r, 1, argList{Ymb, Yrb}},
   386  }
   387  
   388  var yaddl = []ytab{
   389  	{Zibo_m, 2, argList{Yi8, Yml}},
   390  	{Zil_, 1, argList{Yi32, Yax}},
   391  	{Zilo_m, 2, argList{Yi32, Yml}},
   392  	{Zr_m, 1, argList{Yrl, Yml}},
   393  	{Zm_r, 1, argList{Yml, Yrl}},
   394  }
   395  
   396  var yincl = []ytab{
   397  	{Z_rp, 1, argList{Yrl}},
   398  	{Zo_m, 2, argList{Yml}},
   399  }
   400  
   401  var yincq = []ytab{
   402  	{Zo_m, 2, argList{Yml}},
   403  }
   404  
   405  var ycmpb = []ytab{
   406  	{Z_ib, 1, argList{Yal, Yi32}},
   407  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   408  	{Zm_r, 1, argList{Ymb, Yrb}},
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  }
   411  
   412  var ycmpl = []ytab{
   413  	{Zm_ibo, 2, argList{Yml, Yi8}},
   414  	{Z_il, 1, argList{Yax, Yi32}},
   415  	{Zm_ilo, 2, argList{Yml, Yi32}},
   416  	{Zm_r, 1, argList{Yml, Yrl}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var yshb = []ytab{
   421  	{Zo_m, 2, argList{Yi1, Ymb}},
   422  	{Zibo_m, 2, argList{Yu8, Ymb}},
   423  	{Zo_m, 2, argList{Ycx, Ymb}},
   424  }
   425  
   426  var yshl = []ytab{
   427  	{Zo_m, 2, argList{Yi1, Yml}},
   428  	{Zibo_m, 2, argList{Yu8, Yml}},
   429  	{Zo_m, 2, argList{Ycl, Yml}},
   430  	{Zo_m, 2, argList{Ycx, Yml}},
   431  }
   432  
   433  var ytestl = []ytab{
   434  	{Zil_, 1, argList{Yi32, Yax}},
   435  	{Zilo_m, 2, argList{Yi32, Yml}},
   436  	{Zr_m, 1, argList{Yrl, Yml}},
   437  	{Zm_r, 1, argList{Yml, Yrl}},
   438  }
   439  
   440  var ymovb = []ytab{
   441  	{Zr_m, 1, argList{Yrb, Ymb}},
   442  	{Zm_r, 1, argList{Ymb, Yrb}},
   443  	{Zib_rp, 1, argList{Yi32, Yrb}},
   444  	{Zibo_m, 2, argList{Yi32, Ymb}},
   445  }
   446  
   447  var ybtl = []ytab{
   448  	{Zibo_m, 2, argList{Yi8, Yml}},
   449  	{Zr_m, 1, argList{Yrl, Yml}},
   450  }
   451  
   452  var ymovw = []ytab{
   453  	{Zr_m, 1, argList{Yrl, Yml}},
   454  	{Zm_r, 1, argList{Yml, Yrl}},
   455  	{Zil_rp, 1, argList{Yi32, Yrl}},
   456  	{Zilo_m, 2, argList{Yi32, Yml}},
   457  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   458  }
   459  
   460  var ymovl = []ytab{
   461  	{Zr_m, 1, argList{Yrl, Yml}},
   462  	{Zm_r, 1, argList{Yml, Yrl}},
   463  	{Zil_rp, 1, argList{Yi32, Yrl}},
   464  	{Zilo_m, 2, argList{Yi32, Yml}},
   465  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   470  }
   471  
   472  var yret = []ytab{
   473  	{Zo_iw, 1, argList{}},
   474  	{Zo_iw, 1, argList{Yi32}},
   475  }
   476  
   477  var ymovq = []ytab{
   478  	// valid in 32-bit mode
   479  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484  
   485  	// valid only in 64-bit mode, usually with 64-bit prefix
   486  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496  }
   497  
   498  var ymovbe = []ytab{
   499  	{Zlitm_r, 3, argList{Ym, Yrl}},
   500  	{Zlitr_m, 3, argList{Yrl, Ym}},
   501  }
   502  
   503  var ym_rl = []ytab{
   504  	{Zm_r, 1, argList{Ym, Yrl}},
   505  }
   506  
   507  var yrl_m = []ytab{
   508  	{Zr_m, 1, argList{Yrl, Ym}},
   509  }
   510  
   511  var ymb_rl = []ytab{
   512  	{Zmb_r, 1, argList{Ymb, Yrl}},
   513  }
   514  
   515  var yml_rl = []ytab{
   516  	{Zm_r, 1, argList{Yml, Yrl}},
   517  }
   518  
   519  var yrl_ml = []ytab{
   520  	{Zr_m, 1, argList{Yrl, Yml}},
   521  }
   522  
   523  var yml_mb = []ytab{
   524  	{Zr_m, 1, argList{Yrb, Ymb}},
   525  	{Zm_r, 1, argList{Ymb, Yrb}},
   526  }
   527  
   528  var yrb_mb = []ytab{
   529  	{Zr_m, 1, argList{Yrb, Ymb}},
   530  }
   531  
   532  var yxchg = []ytab{
   533  	{Z_rp, 1, argList{Yax, Yrl}},
   534  	{Zrp_, 1, argList{Yrl, Yax}},
   535  	{Zr_m, 1, argList{Yrl, Yml}},
   536  	{Zm_r, 1, argList{Yml, Yrl}},
   537  }
   538  
   539  var ydivl = []ytab{
   540  	{Zm_o, 2, argList{Yml}},
   541  }
   542  
   543  var ydivb = []ytab{
   544  	{Zm_o, 2, argList{Ymb}},
   545  }
   546  
   547  var yimul = []ytab{
   548  	{Zm_o, 2, argList{Yml}},
   549  	{Zib_rr, 1, argList{Yi8, Yrl}},
   550  	{Zil_rr, 1, argList{Yi32, Yrl}},
   551  	{Zm_r, 2, argList{Yml, Yrl}},
   552  }
   553  
   554  var yimul3 = []ytab{
   555  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557  }
   558  
   559  var ybyte = []ytab{
   560  	{Zbyte, 1, argList{Yi64}},
   561  }
   562  
   563  var yin = []ytab{
   564  	{Zib_, 1, argList{Yi32}},
   565  	{Zlit, 1, argList{}},
   566  }
   567  
   568  var yint = []ytab{
   569  	{Zib_, 1, argList{Yi32}},
   570  }
   571  
   572  var ypushl = []ytab{
   573  	{Zrp_, 1, argList{Yrl}},
   574  	{Zm_o, 2, argList{Ym}},
   575  	{Zib_, 1, argList{Yi8}},
   576  	{Zil_, 1, argList{Yi32}},
   577  }
   578  
   579  var ypopl = []ytab{
   580  	{Z_rp, 1, argList{Yrl}},
   581  	{Zo_m, 2, argList{Ym}},
   582  }
   583  
   584  var ywrfsbase = []ytab{
   585  	{Zm_o, 2, argList{Yrl}},
   586  }
   587  
   588  var yrdrand = []ytab{
   589  	{Zo_m, 2, argList{Yrl}},
   590  }
   591  
   592  var yclflush = []ytab{
   593  	{Zo_m, 2, argList{Ym}},
   594  }
   595  
   596  var ybswap = []ytab{
   597  	{Z_rp, 2, argList{Yrl}},
   598  }
   599  
   600  var yscond = []ytab{
   601  	{Zo_m, 2, argList{Ymb}},
   602  }
   603  
   604  var yjcond = []ytab{
   605  	{Zbr, 0, argList{Ybr}},
   606  	{Zbr, 0, argList{Yi0, Ybr}},
   607  	{Zbr, 1, argList{Yi1, Ybr}},
   608  }
   609  
   610  var yloop = []ytab{
   611  	{Zloop, 1, argList{Ybr}},
   612  }
   613  
   614  var ycall = []ytab{
   615  	{Zcallindreg, 0, argList{Yml}},
   616  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   617  	{Zcallind, 2, argList{Yindir}},
   618  	{Zcall, 0, argList{Ybr}},
   619  	{Zcallcon, 1, argList{Yi32}},
   620  }
   621  
   622  var yduff = []ytab{
   623  	{Zcallduff, 1, argList{Yi32}},
   624  }
   625  
   626  var yjmp = []ytab{
   627  	{Zo_m64, 2, argList{Yml}},
   628  	{Zjmp, 0, argList{Ybr}},
   629  	{Zjmpcon, 1, argList{Yi32}},
   630  }
   631  
   632  var yfmvd = []ytab{
   633  	{Zm_o, 2, argList{Ym, Yf0}},
   634  	{Zo_m, 2, argList{Yf0, Ym}},
   635  	{Zm_o, 2, argList{Yrf, Yf0}},
   636  	{Zo_m, 2, argList{Yf0, Yrf}},
   637  }
   638  
   639  var yfmvdp = []ytab{
   640  	{Zo_m, 2, argList{Yf0, Ym}},
   641  	{Zo_m, 2, argList{Yf0, Yrf}},
   642  }
   643  
   644  var yfmvf = []ytab{
   645  	{Zm_o, 2, argList{Ym, Yf0}},
   646  	{Zo_m, 2, argList{Yf0, Ym}},
   647  }
   648  
   649  var yfmvx = []ytab{
   650  	{Zm_o, 2, argList{Ym, Yf0}},
   651  }
   652  
   653  var yfmvp = []ytab{
   654  	{Zo_m, 2, argList{Yf0, Ym}},
   655  }
   656  
   657  var yfcmv = []ytab{
   658  	{Zm_o, 2, argList{Yrf, Yf0}},
   659  }
   660  
   661  var yfadd = []ytab{
   662  	{Zm_o, 2, argList{Ym, Yf0}},
   663  	{Zm_o, 2, argList{Yrf, Yf0}},
   664  	{Zo_m, 2, argList{Yf0, Yrf}},
   665  }
   666  
   667  var yfxch = []ytab{
   668  	{Zo_m, 2, argList{Yf0, Yrf}},
   669  	{Zm_o, 2, argList{Yrf, Yf0}},
   670  }
   671  
   672  var ycompp = []ytab{
   673  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674  }
   675  
   676  var ystsw = []ytab{
   677  	{Zo_m, 2, argList{Ym}},
   678  	{Zlit, 1, argList{Yax}},
   679  }
   680  
   681  var ysvrs_mo = []ytab{
   682  	{Zm_o, 2, argList{Ym}},
   683  }
   684  
   685  // unaryDst version of "ysvrs_mo".
   686  var ysvrs_om = []ytab{
   687  	{Zo_m, 2, argList{Ym}},
   688  }
   689  
   690  var ymm = []ytab{
   691  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693  }
   694  
   695  var yxm = []ytab{
   696  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697  }
   698  
   699  var yxm_q4 = []ytab{
   700  	{Zm_r, 1, argList{Yxm, Yxr}},
   701  }
   702  
   703  var yxcvm1 = []ytab{
   704  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706  }
   707  
   708  var yxcvm2 = []ytab{
   709  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711  }
   712  
   713  var yxr = []ytab{
   714  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715  }
   716  
   717  var yxr_ml = []ytab{
   718  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   719  }
   720  
   721  var ymr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Ymr}},
   723  }
   724  
   725  var ymr_ml = []ytab{
   726  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   727  }
   728  
   729  var yxcmpi = []ytab{
   730  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731  }
   732  
   733  var yxmov = []ytab{
   734  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736  }
   737  
   738  var yxcvfl = []ytab{
   739  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740  }
   741  
   742  var yxcvlf = []ytab{
   743  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   744  }
   745  
   746  var yxcvfq = []ytab{
   747  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748  }
   749  
   750  var yxcvqf = []ytab{
   751  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   752  }
   753  
   754  var yps = []ytab{
   755  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759  }
   760  
   761  var yxrrl = []ytab{
   762  	{Zm_r, 1, argList{Yxr, Yrl}},
   763  }
   764  
   765  var ymrxr = []ytab{
   766  	{Zm_r, 1, argList{Ymr, Yxr}},
   767  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768  }
   769  
   770  var ymshuf = []ytab{
   771  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772  }
   773  
   774  var ymshufb = []ytab{
   775  	{Zm2_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  // It should never have more than 1 entry,
   779  // because some optab entries you opcode secuences that
   780  // are longer than 2 bytes (zoffset=2 here),
   781  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   782  // to name a few.
   783  var yxshuf = []ytab{
   784  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785  }
   786  
   787  var yextrw = []ytab{
   788  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790  }
   791  
   792  var yextr = []ytab{
   793  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794  }
   795  
   796  var yinsrw = []ytab{
   797  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798  }
   799  
   800  var yinsr = []ytab{
   801  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802  }
   803  
   804  var ypsdq = []ytab{
   805  	{Zibo_m, 2, argList{Yi8, Yxr}},
   806  }
   807  
   808  var ymskb = []ytab{
   809  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811  }
   812  
   813  var ycrc32l = []ytab{
   814  	{Zlitm_r, 0, argList{Yml, Yrl}},
   815  }
   816  
   817  var ycrc32b = []ytab{
   818  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   819  }
   820  
   821  var yprefetch = []ytab{
   822  	{Zm_o, 2, argList{Ym}},
   823  }
   824  
   825  var yaes = []ytab{
   826  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   827  }
   828  
   829  var yxbegin = []ytab{
   830  	{Zjmp, 1, argList{Ybr}},
   831  }
   832  
   833  var yxabort = []ytab{
   834  	{Zib_, 1, argList{Yu8}},
   835  }
   836  
   837  var ylddqu = []ytab{
   838  	{Zm_r, 1, argList{Ym, Yxr}},
   839  }
   840  
   841  var ypalignr = []ytab{
   842  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843  }
   844  
   845  var ysha256rnds2 = []ytab{
   846  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847  }
   848  
   849  var yblendvpd = []ytab{
   850  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851  }
   852  
   853  var ymmxmm0f38 = []ytab{
   854  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   855  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   856  }
   857  
   858  var yextractps = []ytab{
   859  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860  }
   861  
   862  var ysha1rnds4 = []ytab{
   863  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864  }
   865  
   866  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868  // to find the entry with the given p.As and then looks through the ytable for
   869  // that instruction (the second field in the optab struct) for a line whose
   870  // first two values match the Ytypes of the p.From and p.To operands.  The
   871  // function oclass computes the specific Ytype of an operand and then the set
   872  // of more general Ytypes that it satisfies is implied by the ycover table, set
   873  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874  // from the more general 8-bit constants, but instinit says
   875  //
   876  //        ycover[Yi0*Ymax+Ys32] = 1
   877  //        ycover[Yi1*Ymax+Ys32] = 1
   878  //        ycover[Yi8*Ymax+Ys32] = 1
   879  //
   880  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881  // if that's what an instruction can handle.
   882  //
   883  // In parallel with the scan through the ytable for the appropriate line, there
   884  // is a z pointer that starts out pointing at the strange magic byte list in
   885  // the Optab struct.  With each step past a non-matching ytable line, z
   886  // advances by the 4th entry in the line.  When a matching line is found, that
   887  // z pointer has the extra data to use in laying down the instruction bytes.
   888  // The actual bytes laid down are a function of the 3rd entry in the line (that
   889  // is, the Ztype) and the z bytes.
   890  //
   891  // For example, let's look at AADDL.  The optab line says:
   892  //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //        var yaddl = []ytab{
   896  //                {Yi8, Ynone, Yml, Zibo_m, 2},
   897  //                {Yi32, Ynone, Yax, Zil_, 1},
   898  //                {Yi32, Ynone, Yml, Zilo_m, 2},
   899  //                {Yrl, Ynone, Yml, Zr_m, 1},
   900  //                {Yml, Ynone, Yrl, Zm_r, 1},
   901  //        }
   902  //
   903  // so there are 5 possible types of ADDL instruction that can be laid down, and
   904  // possible states used to lay them down (Ztype and z pointer, assuming z
   905  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   906  //
   907  //        Yi8, Yml -> Zibo_m, z (0x83, 00)
   908  //        Yi32, Yax -> Zil_, z+2 (0x05)
   909  //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   910  //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   911  //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   912  //
   913  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   914  // relatively straightforward as this program goes.
   915  //
   916  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   917  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   918  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   919  // Zilo_m is the same but a long (32-bit) immediate.
   920  var optab =
   921  //	as, ytab, andproto, opcode
   922  [...]Optab{
   923  	{obj.AXXX, nil, 0, opBytes{}},
   924  	{AAAA, ynone, P32, opBytes{0x37}},
   925  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   926  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   927  	{AAAS, ynone, P32, opBytes{0x3f}},
   928  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   929  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   930  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   933  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   934  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   935  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   936  	{AADDPD, yxm, Pq, opBytes{0x58}},
   937  	{AADDPS, yxm, Pm, opBytes{0x58}},
   938  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   939  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   940  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   941  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   942  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   943  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   944  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   945  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   946  	{AADJSP, nil, 0, opBytes{}},
   947  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   948  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   949  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   950  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   951  	{AANDPD, yxm, Pq, opBytes{0x54}},
   952  	{AANDPS, yxm, Pm, opBytes{0x54}},
   953  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   954  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   956  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   957  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   958  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   959  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   960  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   961  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   962  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   963  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   964  	{ABSWAPW, ybswap, Pe, opBytes{0x0f, 0xc8}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   988  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   989  	{ACLI, ynone, Px, opBytes{0xfa}},
   990  	{ACLTS, ynone, Pm, opBytes{0x06}},
   991  	{ACMC, ynone, Px, opBytes{0xf5}},
   992  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   993  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   994  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   995  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   996  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   997  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
   998  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
   999  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1000  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1001  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1002  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1003  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1004  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1005  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1006  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1007  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1008  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1009  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1010  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1011  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1012  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1013  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1014  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1015  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1016  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1017  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1018  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1019  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1020  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1021  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1022  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1023  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1024  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1025  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1026  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1027  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1028  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1029  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1030  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1031  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1032  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1033  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1034  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1035  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1036  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1037  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1038  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1039  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1040  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1041  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1042  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1043  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1044  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1045  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1046  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1047  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1048  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1049  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1050  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1051  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1052  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1053  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1054  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1055  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1056  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1057  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1058  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1059  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1060  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1061  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1062  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1063  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1064  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1065  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1066  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1067  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1068  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1069  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1070  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1071  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1072  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1073  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1074  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1075  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1076  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1077  	{ACWD, ynone, Pe, opBytes{0x99}},
  1078  	{ACWDE, ynone, Px, opBytes{0x98}},
  1079  	{ACQO, ynone, Pw, opBytes{0x99}},
  1080  	{ADAA, ynone, P32, opBytes{0x27}},
  1081  	{ADAS, ynone, P32, opBytes{0x2f}},
  1082  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1083  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1084  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1085  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1086  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1087  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1088  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1089  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1090  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1091  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1092  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1093  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1094  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1095  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1096  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1097  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1098  	{AENTER, nil, 0, opBytes{}}, // botch
  1099  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1100  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1101  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1102  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1103  	{AHLT, ynone, Px, opBytes{0xf4}},
  1104  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1105  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1106  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1107  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1108  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1109  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1110  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1113  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1114  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1116  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1117  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1118  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1119  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1120  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1121  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1122  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1123  	{AINSL, ynone, Px, opBytes{0x6d}},
  1124  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1125  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1126  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1127  	{AINT, yint, Px, opBytes{0xcd}},
  1128  	{AINTO, ynone, P32, opBytes{0xce}},
  1129  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1130  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1131  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1132  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1133  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1134  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1135  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1136  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1137  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1138  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1139  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1140  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1141  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1142  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1143  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1144  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1145  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1146  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1147  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1148  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1149  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1150  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1151  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1152  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1153  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1154  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1155  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1156  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1157  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1158  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1159  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1160  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1161  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1162  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1163  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1164  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1165  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1166  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1167  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1168  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1169  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1170  	{ALODSL, ynone, Px, opBytes{0xad}},
  1171  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1172  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1173  	{ALONG, ybyte, Px, opBytes{4}},
  1174  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1175  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1176  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1177  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1178  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1179  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1180  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1181  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1182  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1183  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1184  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1185  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1186  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1187  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1188  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1189  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1190  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1191  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1192  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1193  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1194  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1195  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1196  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1197  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1198  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1199  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1200  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1201  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1202  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1203  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1204  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1205  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1206  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1207  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1208  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1209  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1210  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1211  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1212  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1213  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1214  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1215  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1216  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1217  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1218  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1219  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1220  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1221  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1222  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1223  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1224  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1225  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1226  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1227  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1228  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1229  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1230  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1231  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1232  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1233  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1234  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1235  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1236  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1237  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1238  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1239  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1240  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1241  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1242  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1243  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1244  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1245  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1246  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1247  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1248  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1249  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1250  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1251  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1252  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1253  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1254  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1255  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1256  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1257  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1258  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1259  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1260  	{AORPD, yxm, Pq, opBytes{0x56}},
  1261  	{AORPS, yxm, Pm, opBytes{0x56}},
  1262  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1263  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1264  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1265  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1266  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1267  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1268  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1269  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1270  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1271  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1272  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1273  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1274  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1275  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1276  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1277  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1278  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1279  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1280  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1281  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1282  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1283  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1284  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1285  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1286  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1287  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1288  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1289  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1290  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1291  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1292  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1293  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1294  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1295  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1296  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1297  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1298  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1299  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1300  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1301  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1302  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1303  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1304  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1305  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1306  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1307  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1308  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1309  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1310  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1311  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1312  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1313  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1314  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1315  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1316  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1317  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1318  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1319  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1320  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1321  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1322  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1323  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1324  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1325  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1326  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1327  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1328  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1329  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1330  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1331  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1332  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1333  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1334  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1335  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1336  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1337  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1338  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1339  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1340  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1341  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1342  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1343  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1344  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1345  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1346  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1347  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1348  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1349  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1350  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1351  	{APOPAL, ynone, P32, opBytes{0x61}},
  1352  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1353  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1354  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1355  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1356  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1357  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1358  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1359  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1360  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1361  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1362  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1363  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1364  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1365  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1366  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1367  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1368  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1369  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1370  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1371  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1372  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1373  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1374  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1375  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1376  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1377  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1378  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1379  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1380  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1381  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1382  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1383  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1384  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1385  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1386  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1387  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1388  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1389  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1390  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1391  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1392  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1393  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1394  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1395  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1396  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1397  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1398  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1399  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1400  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1401  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1402  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1403  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1404  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1405  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1406  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1408  	{AQUAD, ybyte, Px, opBytes{8}},
  1409  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1410  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1411  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1412  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1414  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1415  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1416  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1417  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1418  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{AREP, ynone, Px, opBytes{0xf3}},
  1420  	{AREPN, ynone, Px, opBytes{0xf2}},
  1421  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1422  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1423  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1424  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1425  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1426  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1427  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1428  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1430  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1431  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1432  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1434  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1435  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1436  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1437  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1438  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1439  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1441  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1442  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1443  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1445  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1446  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1447  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1449  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1450  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1451  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1452  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1453  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1454  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1455  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1456  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1457  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1458  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1459  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1460  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1461  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1462  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1463  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1464  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1465  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1466  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1467  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1468  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1469  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1470  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1471  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1473  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1474  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1475  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1477  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1478  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1479  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1480  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1481  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1482  	{ASTC, ynone, Px, opBytes{0xf9}},
  1483  	{ASTD, ynone, Px, opBytes{0xfd}},
  1484  	{ASTI, ynone, Px, opBytes{0xfb}},
  1485  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1486  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1487  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1488  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1489  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1490  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1491  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1492  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1493  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1494  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1495  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1496  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1497  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1498  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1499  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1500  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1501  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1502  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1503  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{obj.ATEXT, ytext, Px, opBytes{}},
  1505  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1506  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1507  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1508  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1509  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1510  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1511  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1512  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1513  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1514  	{AWORD, ybyte, Px, opBytes{2}},
  1515  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1516  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1517  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1518  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1519  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1520  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1521  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1522  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1523  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1524  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1525  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1526  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1527  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1528  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1529  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1530  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1531  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1532  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1533  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1534  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1535  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1536  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1537  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1538  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1539  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1540  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1541  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1542  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1543  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1544  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1545  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1546  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1547  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1548  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1549  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1550  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1551  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1552  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1553  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1554  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1555  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1556  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1557  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1558  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1559  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1560  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1561  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1562  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1563  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1564  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1565  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1566  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1567  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1568  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1569  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1570  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1571  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1572  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1573  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1574  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1575  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1576  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1577  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1578  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1579  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1580  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1581  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1582  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1583  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1584  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1585  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1586  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1587  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1588  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1589  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1590  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1591  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1592  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1593  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1594  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1595  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1596  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1597  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1598  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1599  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1600  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1601  	{AFFREE, nil, 0, opBytes{}},
  1602  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1603  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1604  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1605  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1606  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1607  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1608  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1609  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1610  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1611  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1612  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1613  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1614  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1615  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1616  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1617  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1618  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1619  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1620  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1621  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1622  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1623  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1624  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1625  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1626  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1627  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1628  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1629  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1630  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1631  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1632  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1633  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1634  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1635  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1636  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1637  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1638  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1639  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1640  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1641  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1642  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1643  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1644  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1645  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1646  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1647  	{AINVD, ynone, Pm, opBytes{0x08}},
  1648  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1649  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1650  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1651  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1652  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1653  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1654  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1655  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1656  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1657  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1658  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1659  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1660  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1661  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1662  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1663  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1664  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1665  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1666  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1667  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1668  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1669  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1670  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1671  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1672  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1673  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1674  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1675  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1676  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1677  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1678  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1679  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1680  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1681  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1682  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1683  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1684  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1685  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1686  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1687  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1688  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1689  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1690  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1691  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1692  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1693  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1694  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1695  
  1696  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1697  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1698  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1699  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1700  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1701  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1702  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1703  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1704  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1705  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1706  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1707  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1708  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1709  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1710  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1711  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1712  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1713  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1714  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1715  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1716  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1717  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1718  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1719  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1720  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1721  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1722  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1723  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1724  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1725  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1726  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1727  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1728  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1729  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1730  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1731  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1732  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1733  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1734  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1735  	{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1736  	{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1737  	{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1738  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1739  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1740  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1741  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1742  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1743  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1744  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1745  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1746  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1747  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1748  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1749  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1750  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1751  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1752  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1753  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1754  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1755  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1756  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1757  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1758  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1759  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1760  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1761  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1762  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1763  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1764  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1765  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1766  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1767  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1768  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1769  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1770  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1771  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1772  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1773  
  1774  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1775  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1776  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1777  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1778  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1779  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1780  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1781  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1782  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1783  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1784  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1785  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1786  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1787  
  1788  	{obj.AEND, nil, 0, opBytes{}},
  1789  	{0, nil, 0, opBytes{}},
  1790  }
  1791  
  1792  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1793  
  1794  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1795  // This happens on systems like Solaris that call .so functions instead of system calls.
  1796  // It does not seem to be necessary for any other systems. This is probably working
  1797  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1798  // what that bug is. And this does fix it.
  1799  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1800  	if ctxt.Headtype == objabi.Hsolaris {
  1801  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1802  		return strings.HasPrefix(s.Name, "libc_")
  1803  	}
  1804  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1805  }
  1806  
  1807  // single-instruction no-ops of various lengths.
  1808  // constructed by hand and disassembled with gdb to verify.
  1809  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1810  var nop = [][16]uint8{
  1811  	{0x90},
  1812  	{0x66, 0x90},
  1813  	{0x0F, 0x1F, 0x00},
  1814  	{0x0F, 0x1F, 0x40, 0x00},
  1815  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1816  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1817  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1818  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1819  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1820  }
  1821  
  1822  // Native Client rejects the repeated 0x66 prefix.
  1823  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1824  func fillnop(p []byte, n int) {
  1825  	var m int
  1826  
  1827  	for n > 0 {
  1828  		m = n
  1829  		if m > len(nop) {
  1830  			m = len(nop)
  1831  		}
  1832  		copy(p[:m], nop[m-1][:m])
  1833  		p = p[m:]
  1834  		n -= m
  1835  	}
  1836  }
  1837  
  1838  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1839  	s.Grow(int64(c) + int64(pad))
  1840  	fillnop(s.P[c:], int(pad))
  1841  	return c + pad
  1842  }
  1843  
  1844  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1845  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1846  		return l
  1847  	}
  1848  	return q
  1849  }
  1850  
  1851  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1852  	if s.P != nil {
  1853  		return
  1854  	}
  1855  
  1856  	if ycover[0] == 0 {
  1857  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1858  	}
  1859  
  1860  	var ab AsmBuf
  1861  
  1862  	for p := s.Func.Text; p != nil; p = p.Link {
  1863  		if p.To.Type == obj.TYPE_BRANCH {
  1864  			if p.Pcond == nil {
  1865  				p.Pcond = p
  1866  			}
  1867  		}
  1868  		if p.As == AADJSP {
  1869  			p.To.Type = obj.TYPE_REG
  1870  			p.To.Reg = REG_SP
  1871  			v := int32(-p.From.Offset)
  1872  			p.From.Offset = int64(v)
  1873  			p.As = spadjop(ctxt, AADDL, AADDQ)
  1874  			if v < 0 {
  1875  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  1876  				v = -v
  1877  				p.From.Offset = int64(v)
  1878  			}
  1879  
  1880  			if v == 0 {
  1881  				p.As = obj.ANOP
  1882  			}
  1883  		}
  1884  	}
  1885  
  1886  	var q *obj.Prog
  1887  	var count int64 // rough count of number of instructions
  1888  	for p := s.Func.Text; p != nil; p = p.Link {
  1889  		count++
  1890  		p.Back = branchShort // use short branches first time through
  1891  		q = p.Pcond
  1892  		if q != nil && (q.Back&branchShort != 0) {
  1893  			p.Back |= branchBackwards
  1894  			q.Back |= branchLoopHead
  1895  		}
  1896  
  1897  		if p.As == AADJSP {
  1898  			p.To.Type = obj.TYPE_REG
  1899  			p.To.Reg = REG_SP
  1900  			v := int32(-p.From.Offset)
  1901  			p.From.Offset = int64(v)
  1902  			p.As = spadjop(ctxt, AADDL, AADDQ)
  1903  			if v < 0 {
  1904  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  1905  				v = -v
  1906  				p.From.Offset = int64(v)
  1907  			}
  1908  
  1909  			if v == 0 {
  1910  				p.As = obj.ANOP
  1911  			}
  1912  		}
  1913  	}
  1914  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1915  
  1916  	n := 0
  1917  	var c int32
  1918  	errors := ctxt.Errors
  1919  	for {
  1920  		// This loop continues while there are reasons to re-assemble
  1921  		// whole block, like the presence of long forward jumps.
  1922  		reAssemble := false
  1923  		for i := range s.R {
  1924  			s.R[i] = obj.Reloc{}
  1925  		}
  1926  		s.R = s.R[:0]
  1927  		s.P = s.P[:0]
  1928  		c = 0
  1929  		for p := s.Func.Text; p != nil; p = p.Link {
  1930  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1931  				// pad everything to avoid crossing 32-byte boundary
  1932  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1933  					c = naclpad(ctxt, s, c, -c&31)
  1934  				}
  1935  
  1936  				// pad call deferreturn to start at 32-byte boundary
  1937  				// so that subtracting 5 in jmpdefer will jump back
  1938  				// to that boundary and rerun the call.
  1939  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1940  					c = naclpad(ctxt, s, c, -c&31)
  1941  				}
  1942  
  1943  				// pad call to end at 32-byte boundary
  1944  				if p.As == obj.ACALL {
  1945  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1946  				}
  1947  
  1948  				// the linker treats REP and STOSQ as different instructions
  1949  				// but in fact the REP is a prefix on the STOSQ.
  1950  				// make sure REP has room for 2 more bytes, so that
  1951  				// padding will not be inserted before the next instruction.
  1952  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1953  					c = naclpad(ctxt, s, c, -c&31)
  1954  				}
  1955  
  1956  				// same for LOCK.
  1957  				// various instructions follow; the longest is 4 bytes.
  1958  				// give ourselves 8 bytes so as to avoid surprises.
  1959  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1960  					c = naclpad(ctxt, s, c, -c&31)
  1961  				}
  1962  			}
  1963  
  1964  			if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 {
  1965  				// pad with NOPs
  1966  				v := -c & (loopAlign - 1)
  1967  
  1968  				if v <= maxLoopPad {
  1969  					s.Grow(int64(c) + int64(v))
  1970  					fillnop(s.P[c:], int(v))
  1971  					c += v
  1972  				}
  1973  			}
  1974  
  1975  			p.Pc = int64(c)
  1976  
  1977  			// process forward jumps to p
  1978  			for q = p.Rel; q != nil; q = q.Forwd {
  1979  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1980  				if q.Back&branchShort != 0 {
  1981  					if v > 127 {
  1982  						reAssemble = true
  1983  						q.Back ^= branchShort
  1984  					}
  1985  
  1986  					if q.As == AJCXZL || q.As == AXBEGIN {
  1987  						s.P[q.Pc+2] = byte(v)
  1988  					} else {
  1989  						s.P[q.Pc+1] = byte(v)
  1990  					}
  1991  				} else {
  1992  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1993  				}
  1994  			}
  1995  
  1996  			p.Rel = nil
  1997  
  1998  			p.Pc = int64(c)
  1999  			ab.asmins(ctxt, s, p)
  2000  			m := ab.Len()
  2001  			if int(p.Isize) != m {
  2002  				p.Isize = uint8(m)
  2003  				// When building for NaCl, we currently need
  2004  				// at least 2 rounds to ensure proper 32-byte alignment.
  2005  				if ctxt.Headtype == objabi.Hnacl {
  2006  					reAssemble = true
  2007  				}
  2008  			}
  2009  
  2010  			s.Grow(p.Pc + int64(m))
  2011  			copy(s.P[p.Pc:], ab.Bytes())
  2012  			c += int32(m)
  2013  		}
  2014  
  2015  		n++
  2016  		if n > 20 {
  2017  			ctxt.Diag("span must be looping")
  2018  			log.Fatalf("loop")
  2019  		}
  2020  		if !reAssemble {
  2021  			break
  2022  		}
  2023  		if ctxt.Errors > errors {
  2024  			return
  2025  		}
  2026  	}
  2027  
  2028  	if ctxt.Headtype == objabi.Hnacl {
  2029  		c = naclpad(ctxt, s, c, -c&31)
  2030  	}
  2031  
  2032  	s.Size = int64(c)
  2033  
  2034  	if false { /* debug['a'] > 1 */
  2035  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2036  		var i int
  2037  		for i = 0; i < len(s.P); i++ {
  2038  			fmt.Printf(" %.2x", s.P[i])
  2039  			if i%16 == 15 {
  2040  				fmt.Printf("\n  %.6x", uint(i+1))
  2041  			}
  2042  		}
  2043  
  2044  		if i%16 != 0 {
  2045  			fmt.Printf("\n")
  2046  		}
  2047  
  2048  		for i := 0; i < len(s.R); i++ {
  2049  			r := &s.R[i]
  2050  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2051  		}
  2052  	}
  2053  }
  2054  
  2055  func instinit(ctxt *obj.Link) {
  2056  	if ycover[0] != 0 {
  2057  		// Already initialized; stop now.
  2058  		// This happens in the cmd/asm tests,
  2059  		// each of which re-initializes the arch.
  2060  		return
  2061  	}
  2062  
  2063  	switch ctxt.Headtype {
  2064  	case objabi.Hplan9:
  2065  		plan9privates = ctxt.Lookup("_privates")
  2066  	case objabi.Hnacl:
  2067  		deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal)
  2068  	}
  2069  
  2070  	for i := range avxOptab {
  2071  		c := avxOptab[i].as
  2072  		if opindex[c&obj.AMask] != nil {
  2073  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2074  		}
  2075  		opindex[c&obj.AMask] = &avxOptab[i]
  2076  	}
  2077  	for i := 1; optab[i].as != 0; i++ {
  2078  		c := optab[i].as
  2079  		if opindex[c&obj.AMask] != nil {
  2080  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2081  		}
  2082  		opindex[c&obj.AMask] = &optab[i]
  2083  	}
  2084  
  2085  	for i := 0; i < Ymax; i++ {
  2086  		ycover[i*Ymax+i] = 1
  2087  	}
  2088  
  2089  	ycover[Yi0*Ymax+Yu2] = 1
  2090  	ycover[Yi1*Ymax+Yu2] = 1
  2091  
  2092  	ycover[Yi0*Ymax+Yi8] = 1
  2093  	ycover[Yi1*Ymax+Yi8] = 1
  2094  	ycover[Yu2*Ymax+Yi8] = 1
  2095  	ycover[Yu7*Ymax+Yi8] = 1
  2096  
  2097  	ycover[Yi0*Ymax+Yu7] = 1
  2098  	ycover[Yi1*Ymax+Yu7] = 1
  2099  	ycover[Yu2*Ymax+Yu7] = 1
  2100  
  2101  	ycover[Yi0*Ymax+Yu8] = 1
  2102  	ycover[Yi1*Ymax+Yu8] = 1
  2103  	ycover[Yu2*Ymax+Yu8] = 1
  2104  	ycover[Yu7*Ymax+Yu8] = 1
  2105  
  2106  	ycover[Yi0*Ymax+Ys32] = 1
  2107  	ycover[Yi1*Ymax+Ys32] = 1
  2108  	ycover[Yu2*Ymax+Ys32] = 1
  2109  	ycover[Yu7*Ymax+Ys32] = 1
  2110  	ycover[Yu8*Ymax+Ys32] = 1
  2111  	ycover[Yi8*Ymax+Ys32] = 1
  2112  
  2113  	ycover[Yi0*Ymax+Yi32] = 1
  2114  	ycover[Yi1*Ymax+Yi32] = 1
  2115  	ycover[Yu2*Ymax+Yi32] = 1
  2116  	ycover[Yu7*Ymax+Yi32] = 1
  2117  	ycover[Yu8*Ymax+Yi32] = 1
  2118  	ycover[Yi8*Ymax+Yi32] = 1
  2119  	ycover[Ys32*Ymax+Yi32] = 1
  2120  
  2121  	ycover[Yi0*Ymax+Yi64] = 1
  2122  	ycover[Yi1*Ymax+Yi64] = 1
  2123  	ycover[Yu7*Ymax+Yi64] = 1
  2124  	ycover[Yu2*Ymax+Yi64] = 1
  2125  	ycover[Yu8*Ymax+Yi64] = 1
  2126  	ycover[Yi8*Ymax+Yi64] = 1
  2127  	ycover[Ys32*Ymax+Yi64] = 1
  2128  	ycover[Yi32*Ymax+Yi64] = 1
  2129  
  2130  	ycover[Yal*Ymax+Yrb] = 1
  2131  	ycover[Ycl*Ymax+Yrb] = 1
  2132  	ycover[Yax*Ymax+Yrb] = 1
  2133  	ycover[Ycx*Ymax+Yrb] = 1
  2134  	ycover[Yrx*Ymax+Yrb] = 1
  2135  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2136  
  2137  	ycover[Ycl*Ymax+Ycx] = 1
  2138  
  2139  	ycover[Yax*Ymax+Yrx] = 1
  2140  	ycover[Ycx*Ymax+Yrx] = 1
  2141  
  2142  	ycover[Yax*Ymax+Yrl] = 1
  2143  	ycover[Ycx*Ymax+Yrl] = 1
  2144  	ycover[Yrx*Ymax+Yrl] = 1
  2145  	ycover[Yrl32*Ymax+Yrl] = 1
  2146  
  2147  	ycover[Yf0*Ymax+Yrf] = 1
  2148  
  2149  	ycover[Yal*Ymax+Ymb] = 1
  2150  	ycover[Ycl*Ymax+Ymb] = 1
  2151  	ycover[Yax*Ymax+Ymb] = 1
  2152  	ycover[Ycx*Ymax+Ymb] = 1
  2153  	ycover[Yrx*Ymax+Ymb] = 1
  2154  	ycover[Yrb*Ymax+Ymb] = 1
  2155  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2156  	ycover[Ym*Ymax+Ymb] = 1
  2157  
  2158  	ycover[Yax*Ymax+Yml] = 1
  2159  	ycover[Ycx*Ymax+Yml] = 1
  2160  	ycover[Yrx*Ymax+Yml] = 1
  2161  	ycover[Yrl*Ymax+Yml] = 1
  2162  	ycover[Yrl32*Ymax+Yml] = 1
  2163  	ycover[Ym*Ymax+Yml] = 1
  2164  
  2165  	ycover[Yax*Ymax+Ymm] = 1
  2166  	ycover[Ycx*Ymax+Ymm] = 1
  2167  	ycover[Yrx*Ymax+Ymm] = 1
  2168  	ycover[Yrl*Ymax+Ymm] = 1
  2169  	ycover[Yrl32*Ymax+Ymm] = 1
  2170  	ycover[Ym*Ymax+Ymm] = 1
  2171  	ycover[Ymr*Ymax+Ymm] = 1
  2172  
  2173  	ycover[Yxr0*Ymax+Yxr] = 1
  2174  
  2175  	ycover[Ym*Ymax+Yxm] = 1
  2176  	ycover[Yxr0*Ymax+Yxm] = 1
  2177  	ycover[Yxr*Ymax+Yxm] = 1
  2178  
  2179  	ycover[Ym*Ymax+Yym] = 1
  2180  	ycover[Yyr*Ymax+Yym] = 1
  2181  
  2182  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2183  	ycover[Yxr*Ymax+YxrEvex] = 1
  2184  
  2185  	ycover[Ym*Ymax+YxmEvex] = 1
  2186  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2187  	ycover[Yxr*Ymax+YxmEvex] = 1
  2188  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2189  
  2190  	ycover[Yyr*Ymax+YyrEvex] = 1
  2191  
  2192  	ycover[Ym*Ymax+YymEvex] = 1
  2193  	ycover[Yyr*Ymax+YymEvex] = 1
  2194  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2195  
  2196  	ycover[Ym*Ymax+Yzm] = 1
  2197  	ycover[Yzr*Ymax+Yzm] = 1
  2198  
  2199  	ycover[Yk0*Ymax+Yk] = 1
  2200  	ycover[Yknot0*Ymax+Yk] = 1
  2201  
  2202  	ycover[Yk0*Ymax+Ykm] = 1
  2203  	ycover[Yknot0*Ymax+Ykm] = 1
  2204  	ycover[Yk*Ymax+Ykm] = 1
  2205  	ycover[Ym*Ymax+Ykm] = 1
  2206  
  2207  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2208  
  2209  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2210  
  2211  	for i := 0; i < MAXREG; i++ {
  2212  		reg[i] = -1
  2213  		if i >= REG_AL && i <= REG_R15B {
  2214  			reg[i] = (i - REG_AL) & 7
  2215  			if i >= REG_SPB && i <= REG_DIB {
  2216  				regrex[i] = 0x40
  2217  			}
  2218  			if i >= REG_R8B && i <= REG_R15B {
  2219  				regrex[i] = Rxr | Rxx | Rxb
  2220  			}
  2221  		}
  2222  
  2223  		if i >= REG_AH && i <= REG_BH {
  2224  			reg[i] = 4 + ((i - REG_AH) & 7)
  2225  		}
  2226  		if i >= REG_AX && i <= REG_R15 {
  2227  			reg[i] = (i - REG_AX) & 7
  2228  			if i >= REG_R8 {
  2229  				regrex[i] = Rxr | Rxx | Rxb
  2230  			}
  2231  		}
  2232  
  2233  		if i >= REG_F0 && i <= REG_F0+7 {
  2234  			reg[i] = (i - REG_F0) & 7
  2235  		}
  2236  		if i >= REG_M0 && i <= REG_M0+7 {
  2237  			reg[i] = (i - REG_M0) & 7
  2238  		}
  2239  		if i >= REG_K0 && i <= REG_K0+7 {
  2240  			reg[i] = (i - REG_K0) & 7
  2241  		}
  2242  		if i >= REG_X0 && i <= REG_X0+15 {
  2243  			reg[i] = (i - REG_X0) & 7
  2244  			if i >= REG_X0+8 {
  2245  				regrex[i] = Rxr | Rxx | Rxb
  2246  			}
  2247  		}
  2248  		if i >= REG_X16 && i <= REG_X16+15 {
  2249  			reg[i] = (i - REG_X16) & 7
  2250  			if i >= REG_X16+8 {
  2251  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2252  			} else {
  2253  				regrex[i] = RxrEvex
  2254  			}
  2255  		}
  2256  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2257  			reg[i] = (i - REG_Y0) & 7
  2258  			if i >= REG_Y0+8 {
  2259  				regrex[i] = Rxr | Rxx | Rxb
  2260  			}
  2261  		}
  2262  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2263  			reg[i] = (i - REG_Y16) & 7
  2264  			if i >= REG_Y16+8 {
  2265  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2266  			} else {
  2267  				regrex[i] = RxrEvex
  2268  			}
  2269  		}
  2270  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2271  			reg[i] = (i - REG_Z0) & 7
  2272  			if i > REG_Z0+7 {
  2273  				regrex[i] = Rxr | Rxx | Rxb
  2274  			}
  2275  		}
  2276  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2277  			reg[i] = (i - REG_Z16) & 7
  2278  			if i >= REG_Z16+8 {
  2279  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2280  			} else {
  2281  				regrex[i] = RxrEvex
  2282  			}
  2283  		}
  2284  
  2285  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2286  			regrex[i] = Rxr
  2287  		}
  2288  	}
  2289  }
  2290  
  2291  var isAndroid = objabi.GOOS == "android"
  2292  
  2293  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2294  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2295  		return 0
  2296  	}
  2297  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2298  		switch a.Reg {
  2299  		case REG_CS:
  2300  			return 0x2e
  2301  
  2302  		case REG_DS:
  2303  			return 0x3e
  2304  
  2305  		case REG_ES:
  2306  			return 0x26
  2307  
  2308  		case REG_FS:
  2309  			return 0x64
  2310  
  2311  		case REG_GS:
  2312  			return 0x65
  2313  
  2314  		case REG_TLS:
  2315  			// NOTE: Systems listed here should be only systems that
  2316  			// support direct TLS references like 8(TLS) implemented as
  2317  			// direct references from FS or GS. Systems that require
  2318  			// the initial-exec model, where you load the TLS base into
  2319  			// a register and then index from that register, do not reach
  2320  			// this code and should not be listed.
  2321  			if ctxt.Arch.Family == sys.I386 {
  2322  				switch ctxt.Headtype {
  2323  				default:
  2324  					if isAndroid {
  2325  						return 0x65 // GS
  2326  					}
  2327  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2328  
  2329  				case objabi.Hdarwin,
  2330  					objabi.Hdragonfly,
  2331  					objabi.Hfreebsd,
  2332  					objabi.Hnetbsd,
  2333  					objabi.Hopenbsd:
  2334  					return 0x65 // GS
  2335  				}
  2336  			}
  2337  
  2338  			switch ctxt.Headtype {
  2339  			default:
  2340  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2341  
  2342  			case objabi.Hlinux:
  2343  				if isAndroid {
  2344  					return 0x64 // FS
  2345  				}
  2346  
  2347  				if ctxt.Flag_shared {
  2348  					log.Fatalf("unknown TLS base register for linux with -shared")
  2349  				} else {
  2350  					return 0x64 // FS
  2351  				}
  2352  
  2353  			case objabi.Hdragonfly,
  2354  				objabi.Hfreebsd,
  2355  				objabi.Hnetbsd,
  2356  				objabi.Hopenbsd,
  2357  				objabi.Hsolaris:
  2358  				return 0x64 // FS
  2359  
  2360  			case objabi.Hdarwin:
  2361  				return 0x65 // GS
  2362  			}
  2363  		}
  2364  	}
  2365  
  2366  	if ctxt.Arch.Family == sys.I386 {
  2367  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2368  			// When building for inclusion into a shared library, an instruction of the form
  2369  			//     MOVL 0(CX)(TLS*1), AX
  2370  			// becomes
  2371  			//     mov %gs:(%ecx), %eax
  2372  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2373  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2374  			// a shared library the instruction it becomes
  2375  			//     mov 0x0(%ecx), $eax
  2376  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2377  			if a.Offset != 0 {
  2378  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2379  			}
  2380  			return 0x65 // GS
  2381  		}
  2382  		return 0
  2383  	}
  2384  
  2385  	switch a.Index {
  2386  	case REG_CS:
  2387  		return 0x2e
  2388  
  2389  	case REG_DS:
  2390  		return 0x3e
  2391  
  2392  	case REG_ES:
  2393  		return 0x26
  2394  
  2395  	case REG_TLS:
  2396  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2397  			// When building for inclusion into a shared library, an instruction of the form
  2398  			//     MOV 0(CX)(TLS*1), AX
  2399  			// becomes
  2400  			//     mov %fs:(%rcx), %rax
  2401  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2402  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2403  			// a shared library the instruction does not require a prefix.
  2404  			if a.Offset != 0 {
  2405  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2406  			}
  2407  			return 0x64
  2408  		}
  2409  
  2410  	case REG_FS:
  2411  		return 0x64
  2412  
  2413  	case REG_GS:
  2414  		return 0x65
  2415  	}
  2416  
  2417  	return 0
  2418  }
  2419  
  2420  // oclassRegList returns multisource operand class for addr.
  2421  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2422  	// TODO(quasilyte): when oclass register case is refactored into
  2423  	// lookup table, use it here to get register kind more easily.
  2424  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2425  
  2426  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2427  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2428  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2429  
  2430  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2431  	low := regIndex(int16(reg0))
  2432  	high := regIndex(int16(reg1))
  2433  
  2434  	if ctxt.Arch.Family == sys.I386 {
  2435  		if low >= 8 || high >= 8 {
  2436  			return Yxxx
  2437  		}
  2438  	}
  2439  
  2440  	switch high - low {
  2441  	case 3:
  2442  		switch {
  2443  		case regIsXmm(reg0) && regIsXmm(reg1):
  2444  			return YxrEvexMulti4
  2445  		case regIsYmm(reg0) && regIsYmm(reg1):
  2446  			return YyrEvexMulti4
  2447  		case regIsZmm(reg0) && regIsZmm(reg1):
  2448  			return YzrMulti4
  2449  		default:
  2450  			return Yxxx
  2451  		}
  2452  	default:
  2453  		return Yxxx
  2454  	}
  2455  }
  2456  
  2457  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2458  // For addr that is not V-mem returns (Yxxx, false).
  2459  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2460  	switch addr.Index {
  2461  	case REG_X0 + 0,
  2462  		REG_X0 + 1,
  2463  		REG_X0 + 2,
  2464  		REG_X0 + 3,
  2465  		REG_X0 + 4,
  2466  		REG_X0 + 5,
  2467  		REG_X0 + 6,
  2468  		REG_X0 + 7:
  2469  		return Yxvm, true
  2470  	case REG_X8 + 0,
  2471  		REG_X8 + 1,
  2472  		REG_X8 + 2,
  2473  		REG_X8 + 3,
  2474  		REG_X8 + 4,
  2475  		REG_X8 + 5,
  2476  		REG_X8 + 6,
  2477  		REG_X8 + 7:
  2478  		if ctxt.Arch.Family == sys.I386 {
  2479  			return Yxxx, true
  2480  		}
  2481  		return Yxvm, true
  2482  	case REG_X16 + 0,
  2483  		REG_X16 + 1,
  2484  		REG_X16 + 2,
  2485  		REG_X16 + 3,
  2486  		REG_X16 + 4,
  2487  		REG_X16 + 5,
  2488  		REG_X16 + 6,
  2489  		REG_X16 + 7,
  2490  		REG_X16 + 8,
  2491  		REG_X16 + 9,
  2492  		REG_X16 + 10,
  2493  		REG_X16 + 11,
  2494  		REG_X16 + 12,
  2495  		REG_X16 + 13,
  2496  		REG_X16 + 14,
  2497  		REG_X16 + 15:
  2498  		if ctxt.Arch.Family == sys.I386 {
  2499  			return Yxxx, true
  2500  		}
  2501  		return YxvmEvex, true
  2502  
  2503  	case REG_Y0 + 0,
  2504  		REG_Y0 + 1,
  2505  		REG_Y0 + 2,
  2506  		REG_Y0 + 3,
  2507  		REG_Y0 + 4,
  2508  		REG_Y0 + 5,
  2509  		REG_Y0 + 6,
  2510  		REG_Y0 + 7:
  2511  		return Yyvm, true
  2512  	case REG_Y8 + 0,
  2513  		REG_Y8 + 1,
  2514  		REG_Y8 + 2,
  2515  		REG_Y8 + 3,
  2516  		REG_Y8 + 4,
  2517  		REG_Y8 + 5,
  2518  		REG_Y8 + 6,
  2519  		REG_Y8 + 7:
  2520  		if ctxt.Arch.Family == sys.I386 {
  2521  			return Yxxx, true
  2522  		}
  2523  		return Yyvm, true
  2524  	case REG_Y16 + 0,
  2525  		REG_Y16 + 1,
  2526  		REG_Y16 + 2,
  2527  		REG_Y16 + 3,
  2528  		REG_Y16 + 4,
  2529  		REG_Y16 + 5,
  2530  		REG_Y16 + 6,
  2531  		REG_Y16 + 7,
  2532  		REG_Y16 + 8,
  2533  		REG_Y16 + 9,
  2534  		REG_Y16 + 10,
  2535  		REG_Y16 + 11,
  2536  		REG_Y16 + 12,
  2537  		REG_Y16 + 13,
  2538  		REG_Y16 + 14,
  2539  		REG_Y16 + 15:
  2540  		if ctxt.Arch.Family == sys.I386 {
  2541  			return Yxxx, true
  2542  		}
  2543  		return YyvmEvex, true
  2544  
  2545  	case REG_Z0 + 0,
  2546  		REG_Z0 + 1,
  2547  		REG_Z0 + 2,
  2548  		REG_Z0 + 3,
  2549  		REG_Z0 + 4,
  2550  		REG_Z0 + 5,
  2551  		REG_Z0 + 6,
  2552  		REG_Z0 + 7:
  2553  		return Yzvm, true
  2554  	case REG_Z8 + 0,
  2555  		REG_Z8 + 1,
  2556  		REG_Z8 + 2,
  2557  		REG_Z8 + 3,
  2558  		REG_Z8 + 4,
  2559  		REG_Z8 + 5,
  2560  		REG_Z8 + 6,
  2561  		REG_Z8 + 7,
  2562  		REG_Z8 + 8,
  2563  		REG_Z8 + 9,
  2564  		REG_Z8 + 10,
  2565  		REG_Z8 + 11,
  2566  		REG_Z8 + 12,
  2567  		REG_Z8 + 13,
  2568  		REG_Z8 + 14,
  2569  		REG_Z8 + 15,
  2570  		REG_Z8 + 16,
  2571  		REG_Z8 + 17,
  2572  		REG_Z8 + 18,
  2573  		REG_Z8 + 19,
  2574  		REG_Z8 + 20,
  2575  		REG_Z8 + 21,
  2576  		REG_Z8 + 22,
  2577  		REG_Z8 + 23:
  2578  		if ctxt.Arch.Family == sys.I386 {
  2579  			return Yxxx, true
  2580  		}
  2581  		return Yzvm, true
  2582  	}
  2583  
  2584  	return Yxxx, false
  2585  }
  2586  
  2587  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2588  	switch a.Type {
  2589  	case obj.TYPE_REGLIST:
  2590  		return oclassRegList(ctxt, a)
  2591  
  2592  	case obj.TYPE_NONE:
  2593  		return Ynone
  2594  
  2595  	case obj.TYPE_BRANCH:
  2596  		return Ybr
  2597  
  2598  	case obj.TYPE_INDIR:
  2599  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2600  			return Yindir
  2601  		}
  2602  		return Yxxx
  2603  
  2604  	case obj.TYPE_MEM:
  2605  		// Pseudo registers have negative index, but SP is
  2606  		// not pseudo on x86, hence REG_SP check is not redundant.
  2607  		if a.Index == REG_SP || a.Index < 0 {
  2608  			// Can't use FP/SB/PC/SP as the index register.
  2609  			return Yxxx
  2610  		}
  2611  
  2612  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2613  			return vmem
  2614  		}
  2615  
  2616  		if ctxt.Arch.Family == sys.AMD64 {
  2617  			switch a.Name {
  2618  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2619  				// Global variables can't use index registers and their
  2620  				// base register is %rip (%rip is encoded as REG_NONE).
  2621  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2622  					return Yxxx
  2623  				}
  2624  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2625  				// These names must have a base of SP.  The old compiler
  2626  				// uses 0 for the base register. SSA uses REG_SP.
  2627  				if a.Reg != REG_SP && a.Reg != 0 {
  2628  					return Yxxx
  2629  				}
  2630  			case obj.NAME_NONE:
  2631  				// everything is ok
  2632  			default:
  2633  				// unknown name
  2634  				return Yxxx
  2635  			}
  2636  		}
  2637  		return Ym
  2638  
  2639  	case obj.TYPE_ADDR:
  2640  		switch a.Name {
  2641  		case obj.NAME_GOTREF:
  2642  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2643  			return Yxxx
  2644  
  2645  		case obj.NAME_EXTERN,
  2646  			obj.NAME_STATIC:
  2647  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2648  				return Yi32
  2649  			}
  2650  			return Yiauto // use pc-relative addressing
  2651  
  2652  		case obj.NAME_AUTO,
  2653  			obj.NAME_PARAM:
  2654  			return Yiauto
  2655  		}
  2656  
  2657  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2658  		// and got Yi32 in an earlier version of this code.
  2659  		// Keep doing that until we fix yduff etc.
  2660  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2661  			return Yi32
  2662  		}
  2663  
  2664  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2665  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2666  		}
  2667  		fallthrough
  2668  
  2669  	case obj.TYPE_CONST:
  2670  		if a.Sym != nil {
  2671  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2672  		}
  2673  
  2674  		v := a.Offset
  2675  		if ctxt.Arch.Family == sys.I386 {
  2676  			v = int64(int32(v))
  2677  		}
  2678  		switch {
  2679  		case v == 0:
  2680  			return Yi0
  2681  		case v == 1:
  2682  			return Yi1
  2683  		case v >= 0 && v <= 3:
  2684  			return Yu2
  2685  		case v >= 0 && v <= 127:
  2686  			return Yu7
  2687  		case v >= 0 && v <= 255:
  2688  			return Yu8
  2689  		case v >= -128 && v <= 127:
  2690  			return Yi8
  2691  		}
  2692  		if ctxt.Arch.Family == sys.I386 {
  2693  			return Yi32
  2694  		}
  2695  		l := int32(v)
  2696  		if int64(l) == v {
  2697  			return Ys32 // can sign extend
  2698  		}
  2699  		if v>>32 == 0 {
  2700  			return Yi32 // unsigned
  2701  		}
  2702  		return Yi64
  2703  
  2704  	case obj.TYPE_TEXTSIZE:
  2705  		return Ytextsize
  2706  	}
  2707  
  2708  	if a.Type != obj.TYPE_REG {
  2709  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2710  		return Yxxx
  2711  	}
  2712  
  2713  	switch a.Reg {
  2714  	case REG_AL:
  2715  		return Yal
  2716  
  2717  	case REG_AX:
  2718  		return Yax
  2719  
  2720  		/*
  2721  			case REG_SPB:
  2722  		*/
  2723  	case REG_BPB,
  2724  		REG_SIB,
  2725  		REG_DIB,
  2726  		REG_R8B,
  2727  		REG_R9B,
  2728  		REG_R10B,
  2729  		REG_R11B,
  2730  		REG_R12B,
  2731  		REG_R13B,
  2732  		REG_R14B,
  2733  		REG_R15B:
  2734  		if ctxt.Arch.Family == sys.I386 {
  2735  			return Yxxx
  2736  		}
  2737  		fallthrough
  2738  
  2739  	case REG_DL,
  2740  		REG_BL,
  2741  		REG_AH,
  2742  		REG_CH,
  2743  		REG_DH,
  2744  		REG_BH:
  2745  		return Yrb
  2746  
  2747  	case REG_CL:
  2748  		return Ycl
  2749  
  2750  	case REG_CX:
  2751  		return Ycx
  2752  
  2753  	case REG_DX, REG_BX:
  2754  		return Yrx
  2755  
  2756  	case REG_R8, // not really Yrl
  2757  		REG_R9,
  2758  		REG_R10,
  2759  		REG_R11,
  2760  		REG_R12,
  2761  		REG_R13,
  2762  		REG_R14,
  2763  		REG_R15:
  2764  		if ctxt.Arch.Family == sys.I386 {
  2765  			return Yxxx
  2766  		}
  2767  		fallthrough
  2768  
  2769  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2770  		if ctxt.Arch.Family == sys.I386 {
  2771  			return Yrl32
  2772  		}
  2773  		return Yrl
  2774  
  2775  	case REG_F0 + 0:
  2776  		return Yf0
  2777  
  2778  	case REG_F0 + 1,
  2779  		REG_F0 + 2,
  2780  		REG_F0 + 3,
  2781  		REG_F0 + 4,
  2782  		REG_F0 + 5,
  2783  		REG_F0 + 6,
  2784  		REG_F0 + 7:
  2785  		return Yrf
  2786  
  2787  	case REG_M0 + 0,
  2788  		REG_M0 + 1,
  2789  		REG_M0 + 2,
  2790  		REG_M0 + 3,
  2791  		REG_M0 + 4,
  2792  		REG_M0 + 5,
  2793  		REG_M0 + 6,
  2794  		REG_M0 + 7:
  2795  		return Ymr
  2796  
  2797  	case REG_X0:
  2798  		return Yxr0
  2799  
  2800  	case REG_X0 + 1,
  2801  		REG_X0 + 2,
  2802  		REG_X0 + 3,
  2803  		REG_X0 + 4,
  2804  		REG_X0 + 5,
  2805  		REG_X0 + 6,
  2806  		REG_X0 + 7,
  2807  		REG_X0 + 8,
  2808  		REG_X0 + 9,
  2809  		REG_X0 + 10,
  2810  		REG_X0 + 11,
  2811  		REG_X0 + 12,
  2812  		REG_X0 + 13,
  2813  		REG_X0 + 14,
  2814  		REG_X0 + 15:
  2815  		return Yxr
  2816  
  2817  	case REG_X0 + 16,
  2818  		REG_X0 + 17,
  2819  		REG_X0 + 18,
  2820  		REG_X0 + 19,
  2821  		REG_X0 + 20,
  2822  		REG_X0 + 21,
  2823  		REG_X0 + 22,
  2824  		REG_X0 + 23,
  2825  		REG_X0 + 24,
  2826  		REG_X0 + 25,
  2827  		REG_X0 + 26,
  2828  		REG_X0 + 27,
  2829  		REG_X0 + 28,
  2830  		REG_X0 + 29,
  2831  		REG_X0 + 30,
  2832  		REG_X0 + 31:
  2833  		return YxrEvex
  2834  
  2835  	case REG_Y0 + 0,
  2836  		REG_Y0 + 1,
  2837  		REG_Y0 + 2,
  2838  		REG_Y0 + 3,
  2839  		REG_Y0 + 4,
  2840  		REG_Y0 + 5,
  2841  		REG_Y0 + 6,
  2842  		REG_Y0 + 7,
  2843  		REG_Y0 + 8,
  2844  		REG_Y0 + 9,
  2845  		REG_Y0 + 10,
  2846  		REG_Y0 + 11,
  2847  		REG_Y0 + 12,
  2848  		REG_Y0 + 13,
  2849  		REG_Y0 + 14,
  2850  		REG_Y0 + 15:
  2851  		return Yyr
  2852  
  2853  	case REG_Y0 + 16,
  2854  		REG_Y0 + 17,
  2855  		REG_Y0 + 18,
  2856  		REG_Y0 + 19,
  2857  		REG_Y0 + 20,
  2858  		REG_Y0 + 21,
  2859  		REG_Y0 + 22,
  2860  		REG_Y0 + 23,
  2861  		REG_Y0 + 24,
  2862  		REG_Y0 + 25,
  2863  		REG_Y0 + 26,
  2864  		REG_Y0 + 27,
  2865  		REG_Y0 + 28,
  2866  		REG_Y0 + 29,
  2867  		REG_Y0 + 30,
  2868  		REG_Y0 + 31:
  2869  		return YyrEvex
  2870  
  2871  	case REG_Z0 + 0,
  2872  		REG_Z0 + 1,
  2873  		REG_Z0 + 2,
  2874  		REG_Z0 + 3,
  2875  		REG_Z0 + 4,
  2876  		REG_Z0 + 5,
  2877  		REG_Z0 + 6,
  2878  		REG_Z0 + 7:
  2879  		return Yzr
  2880  
  2881  	case REG_Z0 + 8,
  2882  		REG_Z0 + 9,
  2883  		REG_Z0 + 10,
  2884  		REG_Z0 + 11,
  2885  		REG_Z0 + 12,
  2886  		REG_Z0 + 13,
  2887  		REG_Z0 + 14,
  2888  		REG_Z0 + 15,
  2889  		REG_Z0 + 16,
  2890  		REG_Z0 + 17,
  2891  		REG_Z0 + 18,
  2892  		REG_Z0 + 19,
  2893  		REG_Z0 + 20,
  2894  		REG_Z0 + 21,
  2895  		REG_Z0 + 22,
  2896  		REG_Z0 + 23,
  2897  		REG_Z0 + 24,
  2898  		REG_Z0 + 25,
  2899  		REG_Z0 + 26,
  2900  		REG_Z0 + 27,
  2901  		REG_Z0 + 28,
  2902  		REG_Z0 + 29,
  2903  		REG_Z0 + 30,
  2904  		REG_Z0 + 31:
  2905  		if ctxt.Arch.Family == sys.I386 {
  2906  			return Yxxx
  2907  		}
  2908  		return Yzr
  2909  
  2910  	case REG_K0:
  2911  		return Yk0
  2912  
  2913  	case REG_K0 + 1,
  2914  		REG_K0 + 2,
  2915  		REG_K0 + 3,
  2916  		REG_K0 + 4,
  2917  		REG_K0 + 5,
  2918  		REG_K0 + 6,
  2919  		REG_K0 + 7:
  2920  		return Yknot0
  2921  
  2922  	case REG_CS:
  2923  		return Ycs
  2924  	case REG_SS:
  2925  		return Yss
  2926  	case REG_DS:
  2927  		return Yds
  2928  	case REG_ES:
  2929  		return Yes
  2930  	case REG_FS:
  2931  		return Yfs
  2932  	case REG_GS:
  2933  		return Ygs
  2934  	case REG_TLS:
  2935  		return Ytls
  2936  
  2937  	case REG_GDTR:
  2938  		return Ygdtr
  2939  	case REG_IDTR:
  2940  		return Yidtr
  2941  	case REG_LDTR:
  2942  		return Yldtr
  2943  	case REG_MSW:
  2944  		return Ymsw
  2945  	case REG_TASK:
  2946  		return Ytask
  2947  
  2948  	case REG_CR + 0:
  2949  		return Ycr0
  2950  	case REG_CR + 1:
  2951  		return Ycr1
  2952  	case REG_CR + 2:
  2953  		return Ycr2
  2954  	case REG_CR + 3:
  2955  		return Ycr3
  2956  	case REG_CR + 4:
  2957  		return Ycr4
  2958  	case REG_CR + 5:
  2959  		return Ycr5
  2960  	case REG_CR + 6:
  2961  		return Ycr6
  2962  	case REG_CR + 7:
  2963  		return Ycr7
  2964  	case REG_CR + 8:
  2965  		return Ycr8
  2966  
  2967  	case REG_DR + 0:
  2968  		return Ydr0
  2969  	case REG_DR + 1:
  2970  		return Ydr1
  2971  	case REG_DR + 2:
  2972  		return Ydr2
  2973  	case REG_DR + 3:
  2974  		return Ydr3
  2975  	case REG_DR + 4:
  2976  		return Ydr4
  2977  	case REG_DR + 5:
  2978  		return Ydr5
  2979  	case REG_DR + 6:
  2980  		return Ydr6
  2981  	case REG_DR + 7:
  2982  		return Ydr7
  2983  
  2984  	case REG_TR + 0:
  2985  		return Ytr0
  2986  	case REG_TR + 1:
  2987  		return Ytr1
  2988  	case REG_TR + 2:
  2989  		return Ytr2
  2990  	case REG_TR + 3:
  2991  		return Ytr3
  2992  	case REG_TR + 4:
  2993  		return Ytr4
  2994  	case REG_TR + 5:
  2995  		return Ytr5
  2996  	case REG_TR + 6:
  2997  		return Ytr6
  2998  	case REG_TR + 7:
  2999  		return Ytr7
  3000  	}
  3001  
  3002  	return Yxxx
  3003  }
  3004  
  3005  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3006  // and hold assembly state.
  3007  type AsmBuf struct {
  3008  	buf      [100]byte
  3009  	off      int
  3010  	rexflag  int
  3011  	vexflag  bool // Per inst: true for VEX-encoded
  3012  	evexflag bool // Per inst: true for EVEX-encoded
  3013  	rep      bool
  3014  	repn     bool
  3015  	lock     bool
  3016  
  3017  	evex evexBits // Initialized when evexflag is true
  3018  }
  3019  
  3020  // Put1 appends one byte to the end of the buffer.
  3021  func (ab *AsmBuf) Put1(x byte) {
  3022  	ab.buf[ab.off] = x
  3023  	ab.off++
  3024  }
  3025  
  3026  // Put2 appends two bytes to the end of the buffer.
  3027  func (ab *AsmBuf) Put2(x, y byte) {
  3028  	ab.buf[ab.off+0] = x
  3029  	ab.buf[ab.off+1] = y
  3030  	ab.off += 2
  3031  }
  3032  
  3033  // Put3 appends three bytes to the end of the buffer.
  3034  func (ab *AsmBuf) Put3(x, y, z byte) {
  3035  	ab.buf[ab.off+0] = x
  3036  	ab.buf[ab.off+1] = y
  3037  	ab.buf[ab.off+2] = z
  3038  	ab.off += 3
  3039  }
  3040  
  3041  // Put4 appends four bytes to the end of the buffer.
  3042  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3043  	ab.buf[ab.off+0] = x
  3044  	ab.buf[ab.off+1] = y
  3045  	ab.buf[ab.off+2] = z
  3046  	ab.buf[ab.off+3] = w
  3047  	ab.off += 4
  3048  }
  3049  
  3050  // PutInt16 writes v into the buffer using little-endian encoding.
  3051  func (ab *AsmBuf) PutInt16(v int16) {
  3052  	ab.buf[ab.off+0] = byte(v)
  3053  	ab.buf[ab.off+1] = byte(v >> 8)
  3054  	ab.off += 2
  3055  }
  3056  
  3057  // PutInt32 writes v into the buffer using little-endian encoding.
  3058  func (ab *AsmBuf) PutInt32(v int32) {
  3059  	ab.buf[ab.off+0] = byte(v)
  3060  	ab.buf[ab.off+1] = byte(v >> 8)
  3061  	ab.buf[ab.off+2] = byte(v >> 16)
  3062  	ab.buf[ab.off+3] = byte(v >> 24)
  3063  	ab.off += 4
  3064  }
  3065  
  3066  // PutInt64 writes v into the buffer using little-endian encoding.
  3067  func (ab *AsmBuf) PutInt64(v int64) {
  3068  	ab.buf[ab.off+0] = byte(v)
  3069  	ab.buf[ab.off+1] = byte(v >> 8)
  3070  	ab.buf[ab.off+2] = byte(v >> 16)
  3071  	ab.buf[ab.off+3] = byte(v >> 24)
  3072  	ab.buf[ab.off+4] = byte(v >> 32)
  3073  	ab.buf[ab.off+5] = byte(v >> 40)
  3074  	ab.buf[ab.off+6] = byte(v >> 48)
  3075  	ab.buf[ab.off+7] = byte(v >> 56)
  3076  	ab.off += 8
  3077  }
  3078  
  3079  // Put copies b into the buffer.
  3080  func (ab *AsmBuf) Put(b []byte) {
  3081  	copy(ab.buf[ab.off:], b)
  3082  	ab.off += len(b)
  3083  }
  3084  
  3085  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3086  // starting at specified offsed (e.g. z counter value).
  3087  // Trailing 0 is not written.
  3088  //
  3089  // Intended to be used for literal Z cases.
  3090  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3091  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3092  	for int(op[offset]) != 0 {
  3093  		ab.Put1(byte(op[offset]))
  3094  		offset++
  3095  	}
  3096  }
  3097  
  3098  // Insert inserts b at offset i.
  3099  func (ab *AsmBuf) Insert(i int, b byte) {
  3100  	ab.off++
  3101  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3102  	ab.buf[i] = b
  3103  }
  3104  
  3105  // Last returns the byte at the end of the buffer.
  3106  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3107  
  3108  // Len returns the length of the buffer.
  3109  func (ab *AsmBuf) Len() int { return ab.off }
  3110  
  3111  // Bytes returns the contents of the buffer.
  3112  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3113  
  3114  // Reset empties the buffer.
  3115  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3116  
  3117  // At returns the byte at offset i.
  3118  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3119  
  3120  // asmidx emits SIB byte.
  3121  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3122  	var i int
  3123  
  3124  	// X/Y index register is used in VSIB.
  3125  	switch index {
  3126  	default:
  3127  		goto bad
  3128  
  3129  	case REG_NONE:
  3130  		i = 4 << 3
  3131  		goto bas
  3132  
  3133  	case REG_R8,
  3134  		REG_R9,
  3135  		REG_R10,
  3136  		REG_R11,
  3137  		REG_R12,
  3138  		REG_R13,
  3139  		REG_R14,
  3140  		REG_R15,
  3141  		REG_X8,
  3142  		REG_X9,
  3143  		REG_X10,
  3144  		REG_X11,
  3145  		REG_X12,
  3146  		REG_X13,
  3147  		REG_X14,
  3148  		REG_X15,
  3149  		REG_X16,
  3150  		REG_X17,
  3151  		REG_X18,
  3152  		REG_X19,
  3153  		REG_X20,
  3154  		REG_X21,
  3155  		REG_X22,
  3156  		REG_X23,
  3157  		REG_X24,
  3158  		REG_X25,
  3159  		REG_X26,
  3160  		REG_X27,
  3161  		REG_X28,
  3162  		REG_X29,
  3163  		REG_X30,
  3164  		REG_X31,
  3165  		REG_Y8,
  3166  		REG_Y9,
  3167  		REG_Y10,
  3168  		REG_Y11,
  3169  		REG_Y12,
  3170  		REG_Y13,
  3171  		REG_Y14,
  3172  		REG_Y15,
  3173  		REG_Y16,
  3174  		REG_Y17,
  3175  		REG_Y18,
  3176  		REG_Y19,
  3177  		REG_Y20,
  3178  		REG_Y21,
  3179  		REG_Y22,
  3180  		REG_Y23,
  3181  		REG_Y24,
  3182  		REG_Y25,
  3183  		REG_Y26,
  3184  		REG_Y27,
  3185  		REG_Y28,
  3186  		REG_Y29,
  3187  		REG_Y30,
  3188  		REG_Y31,
  3189  		REG_Z8,
  3190  		REG_Z9,
  3191  		REG_Z10,
  3192  		REG_Z11,
  3193  		REG_Z12,
  3194  		REG_Z13,
  3195  		REG_Z14,
  3196  		REG_Z15,
  3197  		REG_Z16,
  3198  		REG_Z17,
  3199  		REG_Z18,
  3200  		REG_Z19,
  3201  		REG_Z20,
  3202  		REG_Z21,
  3203  		REG_Z22,
  3204  		REG_Z23,
  3205  		REG_Z24,
  3206  		REG_Z25,
  3207  		REG_Z26,
  3208  		REG_Z27,
  3209  		REG_Z28,
  3210  		REG_Z29,
  3211  		REG_Z30,
  3212  		REG_Z31:
  3213  		if ctxt.Arch.Family == sys.I386 {
  3214  			goto bad
  3215  		}
  3216  		fallthrough
  3217  
  3218  	case REG_AX,
  3219  		REG_CX,
  3220  		REG_DX,
  3221  		REG_BX,
  3222  		REG_BP,
  3223  		REG_SI,
  3224  		REG_DI,
  3225  		REG_X0,
  3226  		REG_X1,
  3227  		REG_X2,
  3228  		REG_X3,
  3229  		REG_X4,
  3230  		REG_X5,
  3231  		REG_X6,
  3232  		REG_X7,
  3233  		REG_Y0,
  3234  		REG_Y1,
  3235  		REG_Y2,
  3236  		REG_Y3,
  3237  		REG_Y4,
  3238  		REG_Y5,
  3239  		REG_Y6,
  3240  		REG_Y7,
  3241  		REG_Z0,
  3242  		REG_Z1,
  3243  		REG_Z2,
  3244  		REG_Z3,
  3245  		REG_Z4,
  3246  		REG_Z5,
  3247  		REG_Z6,
  3248  		REG_Z7:
  3249  		i = reg[index] << 3
  3250  	}
  3251  
  3252  	switch scale {
  3253  	default:
  3254  		goto bad
  3255  
  3256  	case 1:
  3257  		break
  3258  
  3259  	case 2:
  3260  		i |= 1 << 6
  3261  
  3262  	case 4:
  3263  		i |= 2 << 6
  3264  
  3265  	case 8:
  3266  		i |= 3 << 6
  3267  	}
  3268  
  3269  bas:
  3270  	switch base {
  3271  	default:
  3272  		goto bad
  3273  
  3274  	case REG_NONE: // must be mod=00
  3275  		i |= 5
  3276  
  3277  	case REG_R8,
  3278  		REG_R9,
  3279  		REG_R10,
  3280  		REG_R11,
  3281  		REG_R12,
  3282  		REG_R13,
  3283  		REG_R14,
  3284  		REG_R15:
  3285  		if ctxt.Arch.Family == sys.I386 {
  3286  			goto bad
  3287  		}
  3288  		fallthrough
  3289  
  3290  	case REG_AX,
  3291  		REG_CX,
  3292  		REG_DX,
  3293  		REG_BX,
  3294  		REG_SP,
  3295  		REG_BP,
  3296  		REG_SI,
  3297  		REG_DI:
  3298  		i |= reg[base]
  3299  	}
  3300  
  3301  	ab.Put1(byte(i))
  3302  	return
  3303  
  3304  bad:
  3305  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3306  	ab.Put1(0)
  3307  }
  3308  
  3309  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3310  	var rel obj.Reloc
  3311  
  3312  	v := vaddr(ctxt, p, a, &rel)
  3313  	if rel.Siz != 0 {
  3314  		if rel.Siz != 4 {
  3315  			ctxt.Diag("bad reloc")
  3316  		}
  3317  		r := obj.Addrel(cursym)
  3318  		*r = rel
  3319  		r.Off = int32(p.Pc + int64(ab.Len()))
  3320  	}
  3321  
  3322  	ab.PutInt32(int32(v))
  3323  }
  3324  
  3325  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3326  	if r != nil {
  3327  		*r = obj.Reloc{}
  3328  	}
  3329  
  3330  	switch a.Name {
  3331  	case obj.NAME_STATIC,
  3332  		obj.NAME_GOTREF,
  3333  		obj.NAME_EXTERN:
  3334  		s := a.Sym
  3335  		if r == nil {
  3336  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3337  			log.Fatalf("reloc")
  3338  		}
  3339  
  3340  		if a.Name == obj.NAME_GOTREF {
  3341  			r.Siz = 4
  3342  			r.Type = objabi.R_GOTPCREL
  3343  		} else if useAbs(ctxt, s) {
  3344  			r.Siz = 4
  3345  			r.Type = objabi.R_ADDR
  3346  		} else {
  3347  			r.Siz = 4
  3348  			r.Type = objabi.R_PCREL
  3349  		}
  3350  
  3351  		r.Off = -1 // caller must fill in
  3352  		r.Sym = s
  3353  		r.Add = a.Offset
  3354  
  3355  		return 0
  3356  	}
  3357  
  3358  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3359  		if r == nil {
  3360  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3361  			log.Fatalf("reloc")
  3362  		}
  3363  
  3364  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3365  			r.Type = objabi.R_TLS_LE
  3366  			r.Siz = 4
  3367  			r.Off = -1 // caller must fill in
  3368  			r.Add = a.Offset
  3369  		}
  3370  		return 0
  3371  	}
  3372  
  3373  	return a.Offset
  3374  }
  3375  
  3376  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3377  	var base int
  3378  	var rel obj.Reloc
  3379  
  3380  	rex &= 0x40 | Rxr
  3381  	if a.Offset != int64(int32(a.Offset)) {
  3382  		// The rules are slightly different for 386 and AMD64,
  3383  		// mostly for historical reasons. We may unify them later,
  3384  		// but it must be discussed beforehand.
  3385  		//
  3386  		// For 64bit mode only LEAL is allowed to overflow.
  3387  		// It's how https://golang.org/cl/59630 made it.
  3388  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3389  		//
  3390  		// For 32bit mode rules are more permissive.
  3391  		// If offset fits uint32, it's permitted.
  3392  		// This is allowed for assembly that wants to use 32-bit hex
  3393  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3394  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3395  			(ctxt.Arch.Family != sys.AMD64 &&
  3396  				int64(uint32(a.Offset)) == a.Offset &&
  3397  				ab.rexflag&Rxw == 0)
  3398  		if !overflowOK {
  3399  			ctxt.Diag("offset too large in %s", p)
  3400  		}
  3401  	}
  3402  	v := int32(a.Offset)
  3403  	rel.Siz = 0
  3404  
  3405  	switch a.Type {
  3406  	case obj.TYPE_ADDR:
  3407  		if a.Name == obj.NAME_NONE {
  3408  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3409  		}
  3410  		if a.Index == REG_TLS {
  3411  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3412  		}
  3413  		goto bad
  3414  
  3415  	case obj.TYPE_REG:
  3416  		const regFirst = REG_AL
  3417  		const regLast = REG_Z31
  3418  		if a.Reg < regFirst || regLast < a.Reg {
  3419  			goto bad
  3420  		}
  3421  		if v != 0 {
  3422  			goto bad
  3423  		}
  3424  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3425  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3426  		return
  3427  	}
  3428  
  3429  	if a.Type != obj.TYPE_MEM {
  3430  		goto bad
  3431  	}
  3432  
  3433  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3434  		base := int(a.Reg)
  3435  		switch a.Name {
  3436  		case obj.NAME_EXTERN,
  3437  			obj.NAME_GOTREF,
  3438  			obj.NAME_STATIC:
  3439  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3440  				goto bad
  3441  			}
  3442  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3443  				// The base register has already been set. It holds the PC
  3444  				// of this instruction returned by a PC-reading thunk.
  3445  				// See obj6.go:rewriteToPcrel.
  3446  			} else {
  3447  				base = REG_NONE
  3448  			}
  3449  			v = int32(vaddr(ctxt, p, a, &rel))
  3450  
  3451  		case obj.NAME_AUTO,
  3452  			obj.NAME_PARAM:
  3453  			base = REG_SP
  3454  		}
  3455  
  3456  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3457  		if base == REG_NONE {
  3458  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3459  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3460  			goto putrelv
  3461  		}
  3462  
  3463  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3464  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3465  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3466  			return
  3467  		}
  3468  
  3469  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3470  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3471  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3472  			ab.Put1(disp8)
  3473  			return
  3474  		}
  3475  
  3476  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3477  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3478  		goto putrelv
  3479  	}
  3480  
  3481  	base = int(a.Reg)
  3482  	switch a.Name {
  3483  	case obj.NAME_STATIC,
  3484  		obj.NAME_GOTREF,
  3485  		obj.NAME_EXTERN:
  3486  		if a.Sym == nil {
  3487  			ctxt.Diag("bad addr: %v", p)
  3488  		}
  3489  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3490  			// The base register has already been set. It holds the PC
  3491  			// of this instruction returned by a PC-reading thunk.
  3492  			// See obj6.go:rewriteToPcrel.
  3493  		} else {
  3494  			base = REG_NONE
  3495  		}
  3496  		v = int32(vaddr(ctxt, p, a, &rel))
  3497  
  3498  	case obj.NAME_AUTO,
  3499  		obj.NAME_PARAM:
  3500  		base = REG_SP
  3501  	}
  3502  
  3503  	if base == REG_TLS {
  3504  		v = int32(vaddr(ctxt, p, a, &rel))
  3505  	}
  3506  
  3507  	ab.rexflag |= regrex[base]&Rxb | rex
  3508  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3509  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3510  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3511  				ctxt.Diag("%v has offset against gotref", p)
  3512  			}
  3513  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3514  			goto putrelv
  3515  		}
  3516  
  3517  		// temporary
  3518  		ab.Put2(
  3519  			byte(0<<6|4<<0|r<<3), // sib present
  3520  			0<<6|4<<3|5<<0,       // DS:d32
  3521  		)
  3522  		goto putrelv
  3523  	}
  3524  
  3525  	if base == REG_SP || base == REG_R12 {
  3526  		if v == 0 {
  3527  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3528  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3529  			return
  3530  		}
  3531  
  3532  		if disp8, ok := toDisp8(v, p, ab); ok {
  3533  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3534  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3535  			ab.Put1(disp8)
  3536  			return
  3537  		}
  3538  
  3539  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3540  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3541  		goto putrelv
  3542  	}
  3543  
  3544  	if REG_AX <= base && base <= REG_R15 {
  3545  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  3546  			rel = obj.Reloc{}
  3547  			rel.Type = objabi.R_TLS_LE
  3548  			rel.Siz = 4
  3549  			rel.Sym = nil
  3550  			rel.Add = int64(v)
  3551  			v = 0
  3552  		}
  3553  
  3554  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3555  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3556  			return
  3557  		}
  3558  
  3559  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3560  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3561  			return
  3562  		}
  3563  
  3564  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3565  		goto putrelv
  3566  	}
  3567  
  3568  	goto bad
  3569  
  3570  putrelv:
  3571  	if rel.Siz != 0 {
  3572  		if rel.Siz != 4 {
  3573  			ctxt.Diag("bad rel")
  3574  			goto bad
  3575  		}
  3576  
  3577  		r := obj.Addrel(cursym)
  3578  		*r = rel
  3579  		r.Off = int32(p.Pc + int64(ab.Len()))
  3580  	}
  3581  
  3582  	ab.PutInt32(v)
  3583  	return
  3584  
  3585  bad:
  3586  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3587  }
  3588  
  3589  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3590  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3591  }
  3592  
  3593  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3594  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3595  }
  3596  
  3597  func bytereg(a *obj.Addr, t *uint8) {
  3598  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3599  		a.Reg += REG_AL - REG_AX
  3600  		*t = 0
  3601  	}
  3602  }
  3603  
  3604  func unbytereg(a *obj.Addr, t *uint8) {
  3605  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3606  		a.Reg += REG_AX - REG_AL
  3607  		*t = 0
  3608  	}
  3609  }
  3610  
  3611  const (
  3612  	movLit uint8 = iota // Like Zlit
  3613  	movRegMem
  3614  	movMemReg
  3615  	movRegMem2op
  3616  	movMemReg2op
  3617  	movFullPtr // Load full pointer, trash heap (unsupported)
  3618  	movDoubleShift
  3619  	movTLSReg
  3620  )
  3621  
  3622  var ymovtab = []Movtab{
  3623  	// push
  3624  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3625  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3626  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3627  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3628  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3629  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3630  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3631  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3632  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3633  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3634  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3635  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3636  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3637  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3638  
  3639  	// pop
  3640  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3641  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3642  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3643  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3644  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3645  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3646  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3647  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3648  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3649  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3650  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3651  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3652  
  3653  	// mov seg
  3654  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3655  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3656  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3657  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3658  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3659  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3660  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3661  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3662  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3663  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3664  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3665  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3666  
  3667  	// mov cr
  3668  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3669  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3670  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3671  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3672  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3673  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3674  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3675  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3676  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3677  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3678  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3679  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3680  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3681  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3682  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3683  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3684  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3685  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3686  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3687  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3688  
  3689  	// mov dr
  3690  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3691  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3692  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3693  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3694  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3695  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3696  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3697  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3698  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3699  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3700  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3701  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3702  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3703  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3704  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3705  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3706  
  3707  	// mov tr
  3708  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3709  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3710  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3711  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3712  
  3713  	// lgdt, sgdt, lidt, sidt
  3714  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3715  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3716  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3717  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3718  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3719  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3720  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3721  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3722  
  3723  	// lldt, sldt
  3724  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3725  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3726  
  3727  	// lmsw, smsw
  3728  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3729  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3730  
  3731  	// ltr, str
  3732  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3733  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3734  
  3735  	/* load full pointer - unsupported
  3736  	Movtab{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3737  	Movtab{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3738  	*/
  3739  
  3740  	// double shift
  3741  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3742  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3743  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3744  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3745  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3746  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3747  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3748  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3749  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3750  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3751  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3752  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3753  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3754  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3755  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3756  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3757  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3758  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3759  
  3760  	// load TLS base
  3761  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3762  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3763  	{0, 0, 0, 0, 0, [4]uint8{}},
  3764  }
  3765  
  3766  func isax(a *obj.Addr) bool {
  3767  	switch a.Reg {
  3768  	case REG_AX, REG_AL, REG_AH:
  3769  		return true
  3770  	}
  3771  
  3772  	if a.Index == REG_AX {
  3773  		return true
  3774  	}
  3775  	return false
  3776  }
  3777  
  3778  func subreg(p *obj.Prog, from int, to int) {
  3779  	if false { /* debug['Q'] */
  3780  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3781  	}
  3782  
  3783  	if int(p.From.Reg) == from {
  3784  		p.From.Reg = int16(to)
  3785  		p.Ft = 0
  3786  	}
  3787  
  3788  	if int(p.To.Reg) == from {
  3789  		p.To.Reg = int16(to)
  3790  		p.Tt = 0
  3791  	}
  3792  
  3793  	if int(p.From.Index) == from {
  3794  		p.From.Index = int16(to)
  3795  		p.Ft = 0
  3796  	}
  3797  
  3798  	if int(p.To.Index) == from {
  3799  		p.To.Index = int16(to)
  3800  		p.Tt = 0
  3801  	}
  3802  
  3803  	if false { /* debug['Q'] */
  3804  		fmt.Printf("%v\n", p)
  3805  	}
  3806  }
  3807  
  3808  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3809  	switch op {
  3810  	case Pm, Pe, Pf2, Pf3:
  3811  		if osize != 1 {
  3812  			if op != Pm {
  3813  				ab.Put1(byte(op))
  3814  			}
  3815  			ab.Put1(Pm)
  3816  			z++
  3817  			op = int(o.op[z])
  3818  			break
  3819  		}
  3820  		fallthrough
  3821  
  3822  	default:
  3823  		if ab.Len() == 0 || ab.Last() != Pm {
  3824  			ab.Put1(Pm)
  3825  		}
  3826  	}
  3827  
  3828  	ab.Put1(byte(op))
  3829  	return z
  3830  }
  3831  
  3832  var bpduff1 = []byte{
  3833  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3834  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3835  }
  3836  
  3837  var bpduff2 = []byte{
  3838  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3839  }
  3840  
  3841  // asmevex emits EVEX pregis and opcode byte.
  3842  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  3843  // K-masking register.
  3844  //
  3845  // Expects asmbuf.evex to be properly initialized.
  3846  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  3847  	ab.evexflag = true
  3848  	evex := ab.evex
  3849  
  3850  	rexR := byte(1)
  3851  	evexR := byte(1)
  3852  	rexX := byte(1)
  3853  	rexB := byte(1)
  3854  	if r != nil {
  3855  		if regrex[r.Reg]&Rxr != 0 {
  3856  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  3857  		}
  3858  		if regrex[r.Reg]&RxrEvex != 0 {
  3859  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  3860  		}
  3861  	}
  3862  	if rm != nil {
  3863  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  3864  			rexX = 0
  3865  		} else if regrex[rm.Index]&Rxx != 0 {
  3866  			rexX = 0
  3867  		}
  3868  		if regrex[rm.Reg]&Rxb != 0 {
  3869  			rexB = 0
  3870  		}
  3871  	}
  3872  	// P0 = [R][X][B][R'][00][mm]
  3873  	p0 := (rexR << 7) |
  3874  		(rexX << 6) |
  3875  		(rexB << 5) |
  3876  		(evexR << 4) |
  3877  		(0 << 2) |
  3878  		(evex.M() << 0)
  3879  
  3880  	vexV := byte(0)
  3881  	if v != nil {
  3882  		// 4bit-wide reg index.
  3883  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3884  	}
  3885  	vexV ^= 0x0F
  3886  	// P1 = [W][vvvv][1][pp]
  3887  	p1 := (evex.W() << 7) |
  3888  		(vexV << 3) |
  3889  		(1 << 2) |
  3890  		(evex.P() << 0)
  3891  
  3892  	suffix := evexSuffixMap[p.Scond]
  3893  	evexZ := byte(0)
  3894  	evexLL := evex.L()
  3895  	evexB := byte(0)
  3896  	evexV := byte(1)
  3897  	evexA := byte(0)
  3898  	if suffix.zeroing {
  3899  		if !evex.ZeroingEnabled() {
  3900  			ctxt.Diag("unsupported zeroing: %v", p)
  3901  		}
  3902  		evexZ = 1
  3903  	}
  3904  	switch {
  3905  	case suffix.rounding != rcUnset:
  3906  		if rm != nil && rm.Type == obj.TYPE_MEM {
  3907  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  3908  		} else if !evex.RoundingEnabled() {
  3909  			ctxt.Diag("unsupported rounding: %v", p)
  3910  		}
  3911  		evexB = 1
  3912  		evexLL = suffix.rounding
  3913  	case suffix.broadcast:
  3914  		if rm == nil || rm.Type != obj.TYPE_MEM {
  3915  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  3916  		} else if !evex.BroadcastEnabled() {
  3917  			ctxt.Diag("unsupported broadcast: %v", p)
  3918  		}
  3919  		evexB = 1
  3920  	case suffix.sae:
  3921  		if rm != nil && rm.Type == obj.TYPE_MEM {
  3922  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  3923  		} else if !evex.SaeEnabled() {
  3924  			ctxt.Diag("unsupported SAE: %v", p)
  3925  		}
  3926  		evexB = 1
  3927  	}
  3928  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  3929  		evexV = 0
  3930  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  3931  		evexV = 0 // VSR selector 5th bit.
  3932  	}
  3933  	if k != nil {
  3934  		evexA = byte(reg[k.Reg])
  3935  	}
  3936  	// P2 = [z][L'L][b][V'][aaa]
  3937  	p2 := (evexZ << 7) |
  3938  		(evexLL << 5) |
  3939  		(evexB << 4) |
  3940  		(evexV << 3) |
  3941  		(evexA << 0)
  3942  
  3943  	const evexEscapeByte = 0x62
  3944  	ab.Put4(evexEscapeByte, p0, p1, p2)
  3945  	ab.Put1(evex.opcode)
  3946  }
  3947  
  3948  // Emit VEX prefix and opcode byte.
  3949  // The three addresses are the r/m, vvvv, and reg fields.
  3950  // The reg and rm arguments appear in the same order as the
  3951  // arguments to asmand, which typically follows the call to asmvex.
  3952  // The final two arguments are the VEX prefix (see encoding above)
  3953  // and the opcode byte.
  3954  // For details about vex prefix see:
  3955  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3956  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3957  	ab.vexflag = true
  3958  	rexR := 0
  3959  	if r != nil {
  3960  		rexR = regrex[r.Reg] & Rxr
  3961  	}
  3962  	rexB := 0
  3963  	rexX := 0
  3964  	if rm != nil {
  3965  		rexB = regrex[rm.Reg] & Rxb
  3966  		rexX = regrex[rm.Index] & Rxx
  3967  	}
  3968  	vexM := (vex >> 3) & 0x7
  3969  	vexWLP := vex & 0x87
  3970  	vexV := byte(0)
  3971  	if v != nil {
  3972  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3973  	}
  3974  	vexV ^= 0xF
  3975  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3976  		// Can use 2-byte encoding.
  3977  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3978  	} else {
  3979  		// Must use 3-byte encoding.
  3980  		ab.Put3(0xc4,
  3981  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3982  			vexV<<3|vexWLP,
  3983  		)
  3984  	}
  3985  	ab.Put1(opcode)
  3986  }
  3987  
  3988  // regIndex returns register index that fits in 5 bits.
  3989  //
  3990  //	R         : 3 bit | legacy instructions     | N/A
  3991  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  3992  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  3993  //
  3994  // Examples:
  3995  //	REG_Z30 => 30
  3996  //	REG_X15 => 15
  3997  //	REG_R9  => 9
  3998  //	REG_AX  => 0
  3999  //
  4000  func regIndex(r int16) int {
  4001  	lower3bits := reg[r]
  4002  	high4bit := regrex[r] & Rxr << 1
  4003  	high5bit := regrex[r] & RxrEvex << 0
  4004  	return lower3bits | high4bit | high5bit
  4005  }
  4006  
  4007  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4008  // Reports errors via ctxt.
  4009  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4010  	// If any pair of the index, mask, or destination registers
  4011  	// are the same, illegal instruction trap (#UD) is triggered.
  4012  	index := regIndex(p.GetFrom3().Index)
  4013  	mask := regIndex(p.From.Reg)
  4014  	dest := regIndex(p.To.Reg)
  4015  	if dest == mask || dest == index || mask == index {
  4016  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4017  		return false
  4018  	}
  4019  
  4020  	return true
  4021  }
  4022  
  4023  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4024  // Reports errors via ctxt.
  4025  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4026  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4027  	// register is the same as index vector in VSIB.
  4028  	index := regIndex(p.From.Index)
  4029  	dest := regIndex(p.To.Reg)
  4030  	if dest == index {
  4031  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4032  		return false
  4033  	}
  4034  
  4035  	return true
  4036  }
  4037  
  4038  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4039  	o := opindex[p.As&obj.AMask]
  4040  
  4041  	if o == nil {
  4042  		ctxt.Diag("asmins: missing op %v", p)
  4043  		return
  4044  	}
  4045  
  4046  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4047  		ab.Put1(byte(pre))
  4048  	}
  4049  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4050  		ab.Put1(byte(pre))
  4051  	}
  4052  
  4053  	// Checks to warn about instruction/arguments combinations that
  4054  	// will unconditionally trigger illegal instruction trap (#UD).
  4055  	switch p.As {
  4056  	case AVGATHERDPD,
  4057  		AVGATHERQPD,
  4058  		AVGATHERDPS,
  4059  		AVGATHERQPS,
  4060  		AVPGATHERDD,
  4061  		AVPGATHERQD,
  4062  		AVPGATHERDQ,
  4063  		AVPGATHERQQ:
  4064  		// AVX512 gather requires explicit K mask.
  4065  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4066  			if !avx512gatherValid(ctxt, p) {
  4067  				return
  4068  			}
  4069  		} else {
  4070  			if !avx2gatherValid(ctxt, p) {
  4071  				return
  4072  			}
  4073  		}
  4074  	}
  4075  
  4076  	if p.Ft == 0 {
  4077  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4078  	}
  4079  	if p.Tt == 0 {
  4080  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4081  	}
  4082  
  4083  	ft := int(p.Ft) * Ymax
  4084  	var f3t int
  4085  	tt := int(p.Tt) * Ymax
  4086  
  4087  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4088  	z := 0
  4089  	var a *obj.Addr
  4090  	var l int
  4091  	var op int
  4092  	var q *obj.Prog
  4093  	var r *obj.Reloc
  4094  	var rel obj.Reloc
  4095  	var v int64
  4096  
  4097  	args := make([]int, 0, argListMax)
  4098  	if ft != Ynone*Ymax {
  4099  		args = append(args, ft)
  4100  	}
  4101  	for i := range p.RestArgs {
  4102  		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  4103  	}
  4104  	if tt != Ynone*Ymax {
  4105  		args = append(args, tt)
  4106  	}
  4107  
  4108  	for _, yt := range o.ytab {
  4109  		// ytab matching is purely args-based,
  4110  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4111  		// add EVEX-only filter that will reject non-EVEX matches.
  4112  		//
  4113  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4114  		// Without this rule, operands will lead to VEX-encoded form
  4115  		// and produce "c5b15813" encoding.
  4116  		if !yt.match(args) {
  4117  			// "xo" is always zero for VEX/EVEX encoded insts.
  4118  			z += int(yt.zoffset) + xo
  4119  		} else {
  4120  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4121  				// Do not signal error and continue to search
  4122  				// for matching EVEX-encoded form.
  4123  				z += int(yt.zoffset)
  4124  				continue
  4125  			}
  4126  
  4127  			switch o.prefix {
  4128  			case Px1: // first option valid only in 32-bit mode
  4129  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4130  					z += int(yt.zoffset) + xo
  4131  					continue
  4132  				}
  4133  			case Pq: // 16 bit escape and opcode escape
  4134  				ab.Put2(Pe, Pm)
  4135  
  4136  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4137  				ab.rexflag |= Pw
  4138  				ab.Put2(Pe, Pm)
  4139  
  4140  			case Pq4: // 66 0F 38
  4141  				ab.Put3(0x66, 0x0F, 0x38)
  4142  
  4143  			case Pq4w: // 66 0F 38 + REX.W
  4144  				ab.rexflag |= Pw
  4145  				ab.Put3(0x66, 0x0F, 0x38)
  4146  
  4147  			case Pq5: // F3 0F 38
  4148  				ab.Put3(0xF3, 0x0F, 0x38)
  4149  
  4150  			case Pq5w: //  F3 0F 38 + REX.W
  4151  				ab.rexflag |= Pw
  4152  				ab.Put3(0xF3, 0x0F, 0x38)
  4153  
  4154  			case Pf2, // xmm opcode escape
  4155  				Pf3:
  4156  				ab.Put2(o.prefix, Pm)
  4157  
  4158  			case Pef3:
  4159  				ab.Put3(Pe, Pf3, Pm)
  4160  
  4161  			case Pfw: // xmm opcode escape + REX.W
  4162  				ab.rexflag |= Pw
  4163  				ab.Put2(Pf3, Pm)
  4164  
  4165  			case Pm: // opcode escape
  4166  				ab.Put1(Pm)
  4167  
  4168  			case Pe: // 16 bit escape
  4169  				ab.Put1(Pe)
  4170  
  4171  			case Pw: // 64-bit escape
  4172  				if ctxt.Arch.Family != sys.AMD64 {
  4173  					ctxt.Diag("asmins: illegal 64: %v", p)
  4174  				}
  4175  				ab.rexflag |= Pw
  4176  
  4177  			case Pw8: // 64-bit escape if z >= 8
  4178  				if z >= 8 {
  4179  					if ctxt.Arch.Family != sys.AMD64 {
  4180  						ctxt.Diag("asmins: illegal 64: %v", p)
  4181  					}
  4182  					ab.rexflag |= Pw
  4183  				}
  4184  
  4185  			case Pb: // botch
  4186  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4187  					goto bad
  4188  				}
  4189  				// NOTE(rsc): This is probably safe to do always,
  4190  				// but when enabled it chooses different encodings
  4191  				// than the old cmd/internal/obj/i386 code did,
  4192  				// which breaks our "same bits out" checks.
  4193  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4194  				// in the original obj/i386, and it would encode
  4195  				// (using a valid, shorter form) as 3c 00 if we enabled
  4196  				// the call to bytereg here.
  4197  				if ctxt.Arch.Family == sys.AMD64 {
  4198  					bytereg(&p.From, &p.Ft)
  4199  					bytereg(&p.To, &p.Tt)
  4200  				}
  4201  
  4202  			case P32: // 32 bit but illegal if 64-bit mode
  4203  				if ctxt.Arch.Family == sys.AMD64 {
  4204  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4205  				}
  4206  
  4207  			case Py: // 64-bit only, no prefix
  4208  				if ctxt.Arch.Family != sys.AMD64 {
  4209  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4210  				}
  4211  
  4212  			case Py1: // 64-bit only if z < 1, no prefix
  4213  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4214  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4215  				}
  4216  
  4217  			case Py3: // 64-bit only if z < 3, no prefix
  4218  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4219  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4220  				}
  4221  			}
  4222  
  4223  			if z >= len(o.op) {
  4224  				log.Fatalf("asmins bad table %v", p)
  4225  			}
  4226  			op = int(o.op[z])
  4227  			if op == 0x0f {
  4228  				ab.Put1(byte(op))
  4229  				z++
  4230  				op = int(o.op[z])
  4231  			}
  4232  
  4233  			switch yt.zcase {
  4234  			default:
  4235  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4236  				return
  4237  
  4238  			case Zpseudo:
  4239  				break
  4240  
  4241  			case Zlit:
  4242  				ab.PutOpBytesLit(z, &o.op)
  4243  
  4244  			case Zlitr_m:
  4245  				ab.PutOpBytesLit(z, &o.op)
  4246  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4247  
  4248  			case Zlitm_r:
  4249  				ab.PutOpBytesLit(z, &o.op)
  4250  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4251  
  4252  			case Zlit_m_r:
  4253  				ab.PutOpBytesLit(z, &o.op)
  4254  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4255  
  4256  			case Zmb_r:
  4257  				bytereg(&p.From, &p.Ft)
  4258  				fallthrough
  4259  
  4260  			case Zm_r:
  4261  				ab.Put1(byte(op))
  4262  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4263  
  4264  			case Z_m_r:
  4265  				ab.Put1(byte(op))
  4266  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4267  
  4268  			case Zm2_r:
  4269  				ab.Put2(byte(op), o.op[z+1])
  4270  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4271  
  4272  			case Zm_r_xm:
  4273  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4274  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4275  
  4276  			case Zm_r_xm_nr:
  4277  				ab.rexflag = 0
  4278  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4279  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4280  
  4281  			case Zm_r_i_xm:
  4282  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4283  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4284  				ab.Put1(byte(p.To.Offset))
  4285  
  4286  			case Zibm_r, Zibr_m:
  4287  				ab.PutOpBytesLit(z, &o.op)
  4288  				if yt.zcase == Zibr_m {
  4289  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4290  				} else {
  4291  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4292  				}
  4293  				switch {
  4294  				default:
  4295  					ab.Put1(byte(p.From.Offset))
  4296  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4297  					ab.PutInt16(int16(p.From.Offset))
  4298  				case yt.args[0] == Yi32:
  4299  					ab.PutInt32(int32(p.From.Offset))
  4300  				}
  4301  
  4302  			case Zaut_r:
  4303  				ab.Put1(0x8d) // leal
  4304  				if p.From.Type != obj.TYPE_ADDR {
  4305  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4306  				}
  4307  				p.From.Type = obj.TYPE_MEM
  4308  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4309  				p.From.Type = obj.TYPE_ADDR
  4310  
  4311  			case Zm_o:
  4312  				ab.Put1(byte(op))
  4313  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4314  
  4315  			case Zr_m:
  4316  				ab.Put1(byte(op))
  4317  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4318  
  4319  			case Zvex:
  4320  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4321  
  4322  			case Zvex_rm_v_r:
  4323  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4324  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4325  
  4326  			case Zvex_rm_v_ro:
  4327  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4328  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4329  
  4330  			case Zvex_i_rm_vo:
  4331  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4332  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4333  				ab.Put1(byte(p.From.Offset))
  4334  
  4335  			case Zvex_i_r_v:
  4336  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4337  				regnum := byte(0x7)
  4338  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4339  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4340  				} else {
  4341  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4342  				}
  4343  				ab.Put1(o.op[z+2] | regnum)
  4344  				ab.Put1(byte(p.From.Offset))
  4345  
  4346  			case Zvex_i_rm_v_r:
  4347  				imm, from, from3, to := unpackOps4(p)
  4348  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4349  				ab.asmand(ctxt, cursym, p, from, to)
  4350  				ab.Put1(byte(imm.Offset))
  4351  
  4352  			case Zvex_i_rm_r:
  4353  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4354  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4355  				ab.Put1(byte(p.From.Offset))
  4356  
  4357  			case Zvex_v_rm_r:
  4358  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4359  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4360  
  4361  			case Zvex_r_v_rm:
  4362  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4363  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4364  
  4365  			case Zvex_rm_r_vo:
  4366  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4367  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4368  
  4369  			case Zvex_i_r_rm:
  4370  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4371  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4372  				ab.Put1(byte(p.From.Offset))
  4373  
  4374  			case Zvex_hr_rm_v_r:
  4375  				hr, from, from3, to := unpackOps4(p)
  4376  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4377  				ab.asmand(ctxt, cursym, p, from, to)
  4378  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4379  
  4380  			case Zevex_k_rmo:
  4381  				ab.evex = newEVEXBits(z, &o.op)
  4382  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4383  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4384  
  4385  			case Zevex_i_rm_vo:
  4386  				ab.evex = newEVEXBits(z, &o.op)
  4387  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4388  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4389  				ab.Put1(byte(p.From.Offset))
  4390  
  4391  			case Zevex_i_rm_k_vo:
  4392  				imm, from, kmask, to := unpackOps4(p)
  4393  				ab.evex = newEVEXBits(z, &o.op)
  4394  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4395  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4396  				ab.Put1(byte(imm.Offset))
  4397  
  4398  			case Zevex_i_r_rm:
  4399  				ab.evex = newEVEXBits(z, &o.op)
  4400  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4401  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4402  				ab.Put1(byte(p.From.Offset))
  4403  
  4404  			case Zevex_i_r_k_rm:
  4405  				imm, from, kmask, to := unpackOps4(p)
  4406  				ab.evex = newEVEXBits(z, &o.op)
  4407  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4408  				ab.asmand(ctxt, cursym, p, to, from)
  4409  				ab.Put1(byte(imm.Offset))
  4410  
  4411  			case Zevex_i_rm_r:
  4412  				ab.evex = newEVEXBits(z, &o.op)
  4413  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4414  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4415  				ab.Put1(byte(p.From.Offset))
  4416  
  4417  			case Zevex_i_rm_k_r:
  4418  				imm, from, kmask, to := unpackOps4(p)
  4419  				ab.evex = newEVEXBits(z, &o.op)
  4420  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4421  				ab.asmand(ctxt, cursym, p, from, to)
  4422  				ab.Put1(byte(imm.Offset))
  4423  
  4424  			case Zevex_i_rm_v_r:
  4425  				imm, from, from3, to := unpackOps4(p)
  4426  				ab.evex = newEVEXBits(z, &o.op)
  4427  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4428  				ab.asmand(ctxt, cursym, p, from, to)
  4429  				ab.Put1(byte(imm.Offset))
  4430  
  4431  			case Zevex_i_rm_v_k_r:
  4432  				imm, from, from3, kmask, to := unpackOps5(p)
  4433  				ab.evex = newEVEXBits(z, &o.op)
  4434  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4435  				ab.asmand(ctxt, cursym, p, from, to)
  4436  				ab.Put1(byte(imm.Offset))
  4437  
  4438  			case Zevex_r_v_rm:
  4439  				ab.evex = newEVEXBits(z, &o.op)
  4440  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4441  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4442  
  4443  			case Zevex_rm_v_r:
  4444  				ab.evex = newEVEXBits(z, &o.op)
  4445  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4446  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4447  
  4448  			case Zevex_rm_k_r:
  4449  				ab.evex = newEVEXBits(z, &o.op)
  4450  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4451  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4452  
  4453  			case Zevex_r_k_rm:
  4454  				ab.evex = newEVEXBits(z, &o.op)
  4455  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4456  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4457  
  4458  			case Zevex_rm_v_k_r:
  4459  				from, from3, kmask, to := unpackOps4(p)
  4460  				ab.evex = newEVEXBits(z, &o.op)
  4461  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4462  				ab.asmand(ctxt, cursym, p, from, to)
  4463  
  4464  			case Zevex_r_v_k_rm:
  4465  				from, from3, kmask, to := unpackOps4(p)
  4466  				ab.evex = newEVEXBits(z, &o.op)
  4467  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4468  				ab.asmand(ctxt, cursym, p, to, from)
  4469  
  4470  			case Zr_m_xm:
  4471  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4472  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4473  
  4474  			case Zr_m_xm_nr:
  4475  				ab.rexflag = 0
  4476  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4477  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4478  
  4479  			case Zo_m:
  4480  				ab.Put1(byte(op))
  4481  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4482  
  4483  			case Zcallindreg:
  4484  				r = obj.Addrel(cursym)
  4485  				r.Off = int32(p.Pc)
  4486  				r.Type = objabi.R_CALLIND
  4487  				r.Siz = 0
  4488  				fallthrough
  4489  
  4490  			case Zo_m64:
  4491  				ab.Put1(byte(op))
  4492  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4493  
  4494  			case Zm_ibo:
  4495  				ab.Put1(byte(op))
  4496  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4497  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4498  
  4499  			case Zibo_m:
  4500  				ab.Put1(byte(op))
  4501  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4502  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4503  
  4504  			case Zibo_m_xm:
  4505  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4506  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4507  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4508  
  4509  			case Z_ib, Zib_:
  4510  				if yt.zcase == Zib_ {
  4511  					a = &p.From
  4512  				} else {
  4513  					a = &p.To
  4514  				}
  4515  				ab.Put1(byte(op))
  4516  				if p.As == AXABORT {
  4517  					ab.Put1(o.op[z+1])
  4518  				}
  4519  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4520  
  4521  			case Zib_rp:
  4522  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4523  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4524  
  4525  			case Zil_rp:
  4526  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4527  				ab.Put1(byte(op + reg[p.To.Reg]))
  4528  				if o.prefix == Pe {
  4529  					v = vaddr(ctxt, p, &p.From, nil)
  4530  					ab.PutInt16(int16(v))
  4531  				} else {
  4532  					ab.relput4(ctxt, cursym, p, &p.From)
  4533  				}
  4534  
  4535  			case Zo_iw:
  4536  				ab.Put1(byte(op))
  4537  				if p.From.Type != obj.TYPE_NONE {
  4538  					v = vaddr(ctxt, p, &p.From, nil)
  4539  					ab.PutInt16(int16(v))
  4540  				}
  4541  
  4542  			case Ziq_rp:
  4543  				v = vaddr(ctxt, p, &p.From, &rel)
  4544  				l = int(v >> 32)
  4545  				if l == 0 && rel.Siz != 8 {
  4546  					ab.rexflag &^= (0x40 | Rxw)
  4547  
  4548  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4549  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4550  					if rel.Type != 0 {
  4551  						r = obj.Addrel(cursym)
  4552  						*r = rel
  4553  						r.Off = int32(p.Pc + int64(ab.Len()))
  4554  					}
  4555  
  4556  					ab.PutInt32(int32(v))
  4557  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4558  					ab.Put1(0xc7)
  4559  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4560  
  4561  					ab.PutInt32(int32(v)) // need all 8
  4562  				} else {
  4563  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4564  					ab.Put1(byte(op + reg[p.To.Reg]))
  4565  					if rel.Type != 0 {
  4566  						r = obj.Addrel(cursym)
  4567  						*r = rel
  4568  						r.Off = int32(p.Pc + int64(ab.Len()))
  4569  					}
  4570  
  4571  					ab.PutInt64(v)
  4572  				}
  4573  
  4574  			case Zib_rr:
  4575  				ab.Put1(byte(op))
  4576  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4577  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4578  
  4579  			case Z_il, Zil_:
  4580  				if yt.zcase == Zil_ {
  4581  					a = &p.From
  4582  				} else {
  4583  					a = &p.To
  4584  				}
  4585  				ab.Put1(byte(op))
  4586  				if o.prefix == Pe {
  4587  					v = vaddr(ctxt, p, a, nil)
  4588  					ab.PutInt16(int16(v))
  4589  				} else {
  4590  					ab.relput4(ctxt, cursym, p, a)
  4591  				}
  4592  
  4593  			case Zm_ilo, Zilo_m:
  4594  				ab.Put1(byte(op))
  4595  				if yt.zcase == Zilo_m {
  4596  					a = &p.From
  4597  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4598  				} else {
  4599  					a = &p.To
  4600  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4601  				}
  4602  
  4603  				if o.prefix == Pe {
  4604  					v = vaddr(ctxt, p, a, nil)
  4605  					ab.PutInt16(int16(v))
  4606  				} else {
  4607  					ab.relput4(ctxt, cursym, p, a)
  4608  				}
  4609  
  4610  			case Zil_rr:
  4611  				ab.Put1(byte(op))
  4612  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4613  				if o.prefix == Pe {
  4614  					v = vaddr(ctxt, p, &p.From, nil)
  4615  					ab.PutInt16(int16(v))
  4616  				} else {
  4617  					ab.relput4(ctxt, cursym, p, &p.From)
  4618  				}
  4619  
  4620  			case Z_rp:
  4621  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4622  				ab.Put1(byte(op + reg[p.To.Reg]))
  4623  
  4624  			case Zrp_:
  4625  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4626  				ab.Put1(byte(op + reg[p.From.Reg]))
  4627  
  4628  			case Zcallcon, Zjmpcon:
  4629  				if yt.zcase == Zcallcon {
  4630  					ab.Put1(byte(op))
  4631  				} else {
  4632  					ab.Put1(o.op[z+1])
  4633  				}
  4634  				r = obj.Addrel(cursym)
  4635  				r.Off = int32(p.Pc + int64(ab.Len()))
  4636  				r.Type = objabi.R_PCREL
  4637  				r.Siz = 4
  4638  				r.Add = p.To.Offset
  4639  				ab.PutInt32(0)
  4640  
  4641  			case Zcallind:
  4642  				ab.Put2(byte(op), o.op[z+1])
  4643  				r = obj.Addrel(cursym)
  4644  				r.Off = int32(p.Pc + int64(ab.Len()))
  4645  				if ctxt.Arch.Family == sys.AMD64 {
  4646  					r.Type = objabi.R_PCREL
  4647  				} else {
  4648  					r.Type = objabi.R_ADDR
  4649  				}
  4650  				r.Siz = 4
  4651  				r.Add = p.To.Offset
  4652  				r.Sym = p.To.Sym
  4653  				ab.PutInt32(0)
  4654  
  4655  			case Zcall, Zcallduff:
  4656  				if p.To.Sym == nil {
  4657  					ctxt.Diag("call without target")
  4658  					ctxt.DiagFlush()
  4659  					log.Fatalf("bad code")
  4660  				}
  4661  
  4662  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4663  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4664  				}
  4665  
  4666  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4667  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4668  					// (the call jumps into the middle of the function).
  4669  					// This makes it possible to see call sites for duffcopy/duffzero in
  4670  					// BP-based profiling tools like Linux perf (which is the
  4671  					// whole point of obj.Framepointer_enabled).
  4672  					// MOVQ BP, -16(SP)
  4673  					// LEAQ -16(SP), BP
  4674  					ab.Put(bpduff1)
  4675  				}
  4676  				ab.Put1(byte(op))
  4677  				r = obj.Addrel(cursym)
  4678  				r.Off = int32(p.Pc + int64(ab.Len()))
  4679  				r.Sym = p.To.Sym
  4680  				r.Add = p.To.Offset
  4681  				r.Type = objabi.R_CALL
  4682  				r.Siz = 4
  4683  				ab.PutInt32(0)
  4684  
  4685  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4686  					// Pop BP pushed above.
  4687  					// MOVQ 0(BP), BP
  4688  					ab.Put(bpduff2)
  4689  				}
  4690  
  4691  			// TODO: jump across functions needs reloc
  4692  			case Zbr, Zjmp, Zloop:
  4693  				if p.As == AXBEGIN {
  4694  					ab.Put1(byte(op))
  4695  				}
  4696  				if p.To.Sym != nil {
  4697  					if yt.zcase != Zjmp {
  4698  						ctxt.Diag("branch to ATEXT")
  4699  						ctxt.DiagFlush()
  4700  						log.Fatalf("bad code")
  4701  					}
  4702  
  4703  					ab.Put1(o.op[z+1])
  4704  					r = obj.Addrel(cursym)
  4705  					r.Off = int32(p.Pc + int64(ab.Len()))
  4706  					r.Sym = p.To.Sym
  4707  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4708  					// it can point to a trampoline instead of the destination itself.
  4709  					r.Type = objabi.R_CALL
  4710  					r.Siz = 4
  4711  					ab.PutInt32(0)
  4712  					break
  4713  				}
  4714  
  4715  				// Assumes q is in this function.
  4716  				// TODO: Check in input, preserve in brchain.
  4717  
  4718  				// Fill in backward jump now.
  4719  				q = p.Pcond
  4720  
  4721  				if q == nil {
  4722  					ctxt.Diag("jmp/branch/loop without target")
  4723  					ctxt.DiagFlush()
  4724  					log.Fatalf("bad code")
  4725  				}
  4726  
  4727  				if p.Back&branchBackwards != 0 {
  4728  					v = q.Pc - (p.Pc + 2)
  4729  					if v >= -128 && p.As != AXBEGIN {
  4730  						if p.As == AJCXZL {
  4731  							ab.Put1(0x67)
  4732  						}
  4733  						ab.Put2(byte(op), byte(v))
  4734  					} else if yt.zcase == Zloop {
  4735  						ctxt.Diag("loop too far: %v", p)
  4736  					} else {
  4737  						v -= 5 - 2
  4738  						if p.As == AXBEGIN {
  4739  							v--
  4740  						}
  4741  						if yt.zcase == Zbr {
  4742  							ab.Put1(0x0f)
  4743  							v--
  4744  						}
  4745  
  4746  						ab.Put1(o.op[z+1])
  4747  						ab.PutInt32(int32(v))
  4748  					}
  4749  
  4750  					break
  4751  				}
  4752  
  4753  				// Annotate target; will fill in later.
  4754  				p.Forwd = q.Rel
  4755  
  4756  				q.Rel = p
  4757  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4758  					if p.As == AJCXZL {
  4759  						ab.Put1(0x67)
  4760  					}
  4761  					ab.Put2(byte(op), 0)
  4762  				} else if yt.zcase == Zloop {
  4763  					ctxt.Diag("loop too far: %v", p)
  4764  				} else {
  4765  					if yt.zcase == Zbr {
  4766  						ab.Put1(0x0f)
  4767  					}
  4768  					ab.Put1(o.op[z+1])
  4769  					ab.PutInt32(0)
  4770  				}
  4771  
  4772  			case Zbyte:
  4773  				v = vaddr(ctxt, p, &p.From, &rel)
  4774  				if rel.Siz != 0 {
  4775  					rel.Siz = uint8(op)
  4776  					r = obj.Addrel(cursym)
  4777  					*r = rel
  4778  					r.Off = int32(p.Pc + int64(ab.Len()))
  4779  				}
  4780  
  4781  				ab.Put1(byte(v))
  4782  				if op > 1 {
  4783  					ab.Put1(byte(v >> 8))
  4784  					if op > 2 {
  4785  						ab.PutInt16(int16(v >> 16))
  4786  						if op > 4 {
  4787  							ab.PutInt32(int32(v >> 32))
  4788  						}
  4789  					}
  4790  				}
  4791  			}
  4792  
  4793  			return
  4794  		}
  4795  	}
  4796  	f3t = Ynone * Ymax
  4797  	if p.GetFrom3() != nil {
  4798  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4799  	}
  4800  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4801  		var pp obj.Prog
  4802  		var t []byte
  4803  		if p.As == mo[0].as {
  4804  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4805  				t = mo[0].op[:]
  4806  				switch mo[0].code {
  4807  				default:
  4808  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4809  
  4810  				case movLit:
  4811  					for z = 0; t[z] != 0; z++ {
  4812  						ab.Put1(t[z])
  4813  					}
  4814  
  4815  				case movRegMem:
  4816  					ab.Put1(t[0])
  4817  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4818  
  4819  				case movMemReg:
  4820  					ab.Put1(t[0])
  4821  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4822  
  4823  				case movRegMem2op: // r,m - 2op
  4824  					ab.Put2(t[0], t[1])
  4825  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4826  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4827  
  4828  				case movMemReg2op:
  4829  					ab.Put2(t[0], t[1])
  4830  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4831  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4832  
  4833  				case movFullPtr:
  4834  					if t[0] != 0 {
  4835  						ab.Put1(t[0])
  4836  					}
  4837  					switch p.To.Index {
  4838  					default:
  4839  						goto bad
  4840  
  4841  					case REG_DS:
  4842  						ab.Put1(0xc5)
  4843  
  4844  					case REG_SS:
  4845  						ab.Put2(0x0f, 0xb2)
  4846  
  4847  					case REG_ES:
  4848  						ab.Put1(0xc4)
  4849  
  4850  					case REG_FS:
  4851  						ab.Put2(0x0f, 0xb4)
  4852  
  4853  					case REG_GS:
  4854  						ab.Put2(0x0f, 0xb5)
  4855  					}
  4856  
  4857  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4858  
  4859  				case movDoubleShift:
  4860  					if t[0] == Pw {
  4861  						if ctxt.Arch.Family != sys.AMD64 {
  4862  							ctxt.Diag("asmins: illegal 64: %v", p)
  4863  						}
  4864  						ab.rexflag |= Pw
  4865  						t = t[1:]
  4866  					} else if t[0] == Pe {
  4867  						ab.Put1(Pe)
  4868  						t = t[1:]
  4869  					}
  4870  
  4871  					switch p.From.Type {
  4872  					default:
  4873  						goto bad
  4874  
  4875  					case obj.TYPE_CONST:
  4876  						ab.Put2(0x0f, t[0])
  4877  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4878  						ab.Put1(byte(p.From.Offset))
  4879  
  4880  					case obj.TYPE_REG:
  4881  						switch p.From.Reg {
  4882  						default:
  4883  							goto bad
  4884  
  4885  						case REG_CL, REG_CX:
  4886  							ab.Put2(0x0f, t[1])
  4887  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4888  						}
  4889  					}
  4890  
  4891  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4892  				// where you load the TLS base register into a register and then index off that
  4893  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4894  				// are handled in prefixof above and should not be listed here.
  4895  				case movTLSReg:
  4896  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4897  						ctxt.Diag("invalid load of TLS: %v", p)
  4898  					}
  4899  
  4900  					if ctxt.Arch.Family == sys.I386 {
  4901  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4902  						// where you load the TLS base register into a register and then index off that
  4903  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4904  						// are handled in prefixof above and should not be listed here.
  4905  						switch ctxt.Headtype {
  4906  						default:
  4907  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4908  
  4909  						case objabi.Hlinux,
  4910  							objabi.Hnacl, objabi.Hfreebsd:
  4911  							if ctxt.Flag_shared {
  4912  								// Note that this is not generating the same insns as the other cases.
  4913  								//     MOV TLS, dst
  4914  								// becomes
  4915  								//     call __x86.get_pc_thunk.dst
  4916  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4917  								// which is encoded as
  4918  								//     call __x86.get_pc_thunk.dst
  4919  								//     movq 0(dst), dst
  4920  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4921  								// is g, which we can't check here, but will when we assemble the second
  4922  								// instruction.
  4923  								dst := p.To.Reg
  4924  								ab.Put1(0xe8)
  4925  								r = obj.Addrel(cursym)
  4926  								r.Off = int32(p.Pc + int64(ab.Len()))
  4927  								r.Type = objabi.R_CALL
  4928  								r.Siz = 4
  4929  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4930  								ab.PutInt32(0)
  4931  
  4932  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4933  								r = obj.Addrel(cursym)
  4934  								r.Off = int32(p.Pc + int64(ab.Len()))
  4935  								r.Type = objabi.R_TLS_IE
  4936  								r.Siz = 4
  4937  								r.Add = 2
  4938  								ab.PutInt32(0)
  4939  							} else {
  4940  								// ELF TLS base is 0(GS).
  4941  								pp.From = p.From
  4942  
  4943  								pp.From.Type = obj.TYPE_MEM
  4944  								pp.From.Reg = REG_GS
  4945  								pp.From.Offset = 0
  4946  								pp.From.Index = REG_NONE
  4947  								pp.From.Scale = 0
  4948  								ab.Put2(0x65, // GS
  4949  									0x8B)
  4950  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4951  							}
  4952  						case objabi.Hplan9:
  4953  							pp.From = obj.Addr{}
  4954  							pp.From.Type = obj.TYPE_MEM
  4955  							pp.From.Name = obj.NAME_EXTERN
  4956  							pp.From.Sym = plan9privates
  4957  							pp.From.Offset = 0
  4958  							pp.From.Index = REG_NONE
  4959  							ab.Put1(0x8B)
  4960  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4961  
  4962  						case objabi.Hwindows:
  4963  							// Windows TLS base is always 0x14(FS).
  4964  							pp.From = p.From
  4965  
  4966  							pp.From.Type = obj.TYPE_MEM
  4967  							pp.From.Reg = REG_FS
  4968  							pp.From.Offset = 0x14
  4969  							pp.From.Index = REG_NONE
  4970  							pp.From.Scale = 0
  4971  							ab.Put2(0x64, // FS
  4972  								0x8B)
  4973  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4974  						}
  4975  						break
  4976  					}
  4977  
  4978  					switch ctxt.Headtype {
  4979  					default:
  4980  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4981  
  4982  					case objabi.Hlinux, objabi.Hfreebsd:
  4983  						if !ctxt.Flag_shared {
  4984  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  4985  						}
  4986  						// Note that this is not generating the same insn as the other cases.
  4987  						//     MOV TLS, R_to
  4988  						// becomes
  4989  						//     movq g@gottpoff(%rip), R_to
  4990  						// which is encoded as
  4991  						//     movq 0(%rip), R_to
  4992  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4993  						// is g, which we can't check here, but will when we assemble the second
  4994  						// instruction.
  4995  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4996  
  4997  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4998  						r = obj.Addrel(cursym)
  4999  						r.Off = int32(p.Pc + int64(ab.Len()))
  5000  						r.Type = objabi.R_TLS_IE
  5001  						r.Siz = 4
  5002  						r.Add = -4
  5003  						ab.PutInt32(0)
  5004  
  5005  					case objabi.Hplan9:
  5006  						pp.From = obj.Addr{}
  5007  						pp.From.Type = obj.TYPE_MEM
  5008  						pp.From.Name = obj.NAME_EXTERN
  5009  						pp.From.Sym = plan9privates
  5010  						pp.From.Offset = 0
  5011  						pp.From.Index = REG_NONE
  5012  						ab.rexflag |= Pw
  5013  						ab.Put1(0x8B)
  5014  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5015  
  5016  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5017  						// TLS base is 0(FS).
  5018  						pp.From = p.From
  5019  
  5020  						pp.From.Type = obj.TYPE_MEM
  5021  						pp.From.Name = obj.NAME_NONE
  5022  						pp.From.Reg = REG_NONE
  5023  						pp.From.Offset = 0
  5024  						pp.From.Index = REG_NONE
  5025  						pp.From.Scale = 0
  5026  						ab.rexflag |= Pw
  5027  						ab.Put2(0x64, // FS
  5028  							0x8B)
  5029  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5030  
  5031  					case objabi.Hwindows:
  5032  						// Windows TLS base is always 0x28(GS).
  5033  						pp.From = p.From
  5034  
  5035  						pp.From.Type = obj.TYPE_MEM
  5036  						pp.From.Name = obj.NAME_NONE
  5037  						pp.From.Reg = REG_GS
  5038  						pp.From.Offset = 0x28
  5039  						pp.From.Index = REG_NONE
  5040  						pp.From.Scale = 0
  5041  						ab.rexflag |= Pw
  5042  						ab.Put2(0x65, // GS
  5043  							0x8B)
  5044  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5045  					}
  5046  				}
  5047  				return
  5048  			}
  5049  		}
  5050  	}
  5051  	goto bad
  5052  
  5053  bad:
  5054  	if ctxt.Arch.Family != sys.AMD64 {
  5055  		// here, the assembly has failed.
  5056  		// if it's a byte instruction that has
  5057  		// unaddressable registers, try to
  5058  		// exchange registers and reissue the
  5059  		// instruction with the operands renamed.
  5060  		pp := *p
  5061  
  5062  		unbytereg(&pp.From, &pp.Ft)
  5063  		unbytereg(&pp.To, &pp.Tt)
  5064  
  5065  		z := int(p.From.Reg)
  5066  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5067  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5068  			// For now, different to keep bit-for-bit compatibility.
  5069  			if ctxt.Arch.Family == sys.I386 {
  5070  				breg := byteswapreg(ctxt, &p.To)
  5071  				if breg != REG_AX {
  5072  					ab.Put1(0x87) // xchg lhs,bx
  5073  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5074  					subreg(&pp, z, breg)
  5075  					ab.doasm(ctxt, cursym, &pp)
  5076  					ab.Put1(0x87) // xchg lhs,bx
  5077  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5078  				} else {
  5079  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5080  					subreg(&pp, z, REG_AX)
  5081  					ab.doasm(ctxt, cursym, &pp)
  5082  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5083  				}
  5084  				return
  5085  			}
  5086  
  5087  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5088  				// We certainly don't want to exchange
  5089  				// with AX if the op is MUL or DIV.
  5090  				ab.Put1(0x87) // xchg lhs,bx
  5091  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5092  				subreg(&pp, z, REG_BX)
  5093  				ab.doasm(ctxt, cursym, &pp)
  5094  				ab.Put1(0x87) // xchg lhs,bx
  5095  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5096  			} else {
  5097  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5098  				subreg(&pp, z, REG_AX)
  5099  				ab.doasm(ctxt, cursym, &pp)
  5100  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5101  			}
  5102  			return
  5103  		}
  5104  
  5105  		z = int(p.To.Reg)
  5106  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5107  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5108  			// For now, different to keep bit-for-bit compatibility.
  5109  			if ctxt.Arch.Family == sys.I386 {
  5110  				breg := byteswapreg(ctxt, &p.From)
  5111  				if breg != REG_AX {
  5112  					ab.Put1(0x87) //xchg rhs,bx
  5113  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5114  					subreg(&pp, z, breg)
  5115  					ab.doasm(ctxt, cursym, &pp)
  5116  					ab.Put1(0x87) // xchg rhs,bx
  5117  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5118  				} else {
  5119  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5120  					subreg(&pp, z, REG_AX)
  5121  					ab.doasm(ctxt, cursym, &pp)
  5122  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5123  				}
  5124  				return
  5125  			}
  5126  
  5127  			if isax(&p.From) {
  5128  				ab.Put1(0x87) // xchg rhs,bx
  5129  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5130  				subreg(&pp, z, REG_BX)
  5131  				ab.doasm(ctxt, cursym, &pp)
  5132  				ab.Put1(0x87) // xchg rhs,bx
  5133  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5134  			} else {
  5135  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5136  				subreg(&pp, z, REG_AX)
  5137  				ab.doasm(ctxt, cursym, &pp)
  5138  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5139  			}
  5140  			return
  5141  		}
  5142  	}
  5143  
  5144  	ctxt.Diag("invalid instruction: %v", p)
  5145  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  5146  }
  5147  
  5148  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5149  // which is not referenced in a.
  5150  // If a is empty, it returns BX to account for MULB-like instructions
  5151  // that might use DX and AX.
  5152  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5153  	cana, canb, canc, cand := true, true, true, true
  5154  	if a.Type == obj.TYPE_NONE {
  5155  		cana, cand = false, false
  5156  	}
  5157  
  5158  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5159  		switch a.Reg {
  5160  		case REG_NONE:
  5161  			cana, cand = false, false
  5162  		case REG_AX, REG_AL, REG_AH:
  5163  			cana = false
  5164  		case REG_BX, REG_BL, REG_BH:
  5165  			canb = false
  5166  		case REG_CX, REG_CL, REG_CH:
  5167  			canc = false
  5168  		case REG_DX, REG_DL, REG_DH:
  5169  			cand = false
  5170  		}
  5171  	}
  5172  
  5173  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5174  		switch a.Index {
  5175  		case REG_AX:
  5176  			cana = false
  5177  		case REG_BX:
  5178  			canb = false
  5179  		case REG_CX:
  5180  			canc = false
  5181  		case REG_DX:
  5182  			cand = false
  5183  		}
  5184  	}
  5185  
  5186  	switch {
  5187  	case cana:
  5188  		return REG_AX
  5189  	case canb:
  5190  		return REG_BX
  5191  	case canc:
  5192  		return REG_CX
  5193  	case cand:
  5194  		return REG_DX
  5195  	default:
  5196  		ctxt.Diag("impossible byte register")
  5197  		ctxt.DiagFlush()
  5198  		log.Fatalf("bad code")
  5199  		return 0
  5200  	}
  5201  }
  5202  
  5203  func isbadbyte(a *obj.Addr) bool {
  5204  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5205  }
  5206  
  5207  var naclret = []uint8{
  5208  	0x5e, // POPL SI
  5209  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  5210  	0x83,
  5211  	0xe6,
  5212  	0xe0, // ANDL $~31, SI
  5213  	0x4c,
  5214  	0x01,
  5215  	0xfe, // ADDQ R15, SI
  5216  	0xff,
  5217  	0xe6, // JMP SI
  5218  }
  5219  
  5220  var naclret8 = []uint8{
  5221  	0x5d, // POPL BP
  5222  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  5223  	0x83,
  5224  	0xe5,
  5225  	0xe0, // ANDL $~31, BP
  5226  	0xff,
  5227  	0xe5, // JMP BP
  5228  }
  5229  
  5230  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  5231  
  5232  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  5233  
  5234  var naclmovs = []uint8{
  5235  	0x89,
  5236  	0xf6, // MOVL SI, SI
  5237  	0x49,
  5238  	0x8d,
  5239  	0x34,
  5240  	0x37, // LEAQ (R15)(SI*1), SI
  5241  	0x89,
  5242  	0xff, // MOVL DI, DI
  5243  	0x49,
  5244  	0x8d,
  5245  	0x3c,
  5246  	0x3f, // LEAQ (R15)(DI*1), DI
  5247  }
  5248  
  5249  var naclstos = []uint8{
  5250  	0x89,
  5251  	0xff, // MOVL DI, DI
  5252  	0x49,
  5253  	0x8d,
  5254  	0x3c,
  5255  	0x3f, // LEAQ (R15)(DI*1), DI
  5256  }
  5257  
  5258  func (ab *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  5259  	if reg >= REG_R8 {
  5260  		ab.Put1(0x45)
  5261  	}
  5262  	reg = (reg - REG_AX) & 7
  5263  	ab.Put2(0x89, byte(3<<6|reg<<3|reg))
  5264  }
  5265  
  5266  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5267  	ab.Reset()
  5268  
  5269  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  5270  		switch p.As {
  5271  		case obj.ARET:
  5272  			ab.Put(naclret8)
  5273  			return
  5274  
  5275  		case obj.ACALL,
  5276  			obj.AJMP:
  5277  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  5278  				ab.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  5279  			}
  5280  
  5281  		case AINT:
  5282  			ab.Put1(0xf4)
  5283  			return
  5284  		}
  5285  	}
  5286  
  5287  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  5288  		if p.As == AREP {
  5289  			ab.rep = true
  5290  			return
  5291  		}
  5292  
  5293  		if p.As == AREPN {
  5294  			ab.repn = true
  5295  			return
  5296  		}
  5297  
  5298  		if p.As == ALOCK {
  5299  			ab.lock = true
  5300  			return
  5301  		}
  5302  
  5303  		if p.As != ALEAQ && p.As != ALEAL {
  5304  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  5305  				ab.nacltrunc(ctxt, int(p.From.Index))
  5306  			}
  5307  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  5308  				ab.nacltrunc(ctxt, int(p.To.Index))
  5309  			}
  5310  		}
  5311  
  5312  		switch p.As {
  5313  		case obj.ARET:
  5314  			ab.Put(naclret)
  5315  			return
  5316  
  5317  		case obj.ACALL,
  5318  			obj.AJMP:
  5319  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  5320  				// ANDL $~31, reg
  5321  				ab.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  5322  				// ADDQ R15, reg
  5323  				ab.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  5324  			}
  5325  
  5326  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  5327  				// ANDL $~31, reg
  5328  				ab.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  5329  				// ADDQ R15, reg
  5330  				ab.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  5331  			}
  5332  
  5333  		case AINT:
  5334  			ab.Put1(0xf4)
  5335  			return
  5336  
  5337  		case ASCASB,
  5338  			ASCASW,
  5339  			ASCASL,
  5340  			ASCASQ,
  5341  			ASTOSB,
  5342  			ASTOSW,
  5343  			ASTOSL,
  5344  			ASTOSQ:
  5345  			ab.Put(naclstos)
  5346  
  5347  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  5348  			ab.Put(naclmovs)
  5349  		}
  5350  
  5351  		if ab.rep {
  5352  			ab.Put1(0xf3)
  5353  			ab.rep = false
  5354  		}
  5355  
  5356  		if ab.repn {
  5357  			ab.Put1(0xf2)
  5358  			ab.repn = false
  5359  		}
  5360  
  5361  		if ab.lock {
  5362  			ab.Put1(0xf0)
  5363  			ab.lock = false
  5364  		}
  5365  	}
  5366  
  5367  	ab.rexflag = 0
  5368  	ab.vexflag = false
  5369  	ab.evexflag = false
  5370  	mark := ab.Len()
  5371  	ab.doasm(ctxt, cursym, p)
  5372  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5373  		// as befits the whole approach of the architecture,
  5374  		// the rex prefix must appear before the first opcode byte
  5375  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5376  		// before the 0f opcode escape!), or it might be ignored.
  5377  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5378  		if ctxt.Arch.Family != sys.AMD64 {
  5379  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5380  		}
  5381  		n := ab.Len()
  5382  		var np int
  5383  		for np = mark; np < n; np++ {
  5384  			c := ab.At(np)
  5385  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5386  				break
  5387  			}
  5388  		}
  5389  		ab.Insert(np, byte(0x40|ab.rexflag))
  5390  	}
  5391  
  5392  	n := ab.Len()
  5393  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5394  		r := &cursym.R[i]
  5395  		if int64(r.Off) < p.Pc {
  5396  			break
  5397  		}
  5398  		if ab.rexflag != 0 && !ab.vexflag {
  5399  			r.Off++
  5400  		}
  5401  		if r.Type == objabi.R_PCREL {
  5402  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5403  				// PC-relative addressing is relative to the end of the instruction,
  5404  				// but the relocations applied by the linker are relative to the end
  5405  				// of the relocation. Because immediate instruction
  5406  				// arguments can follow the PC-relative memory reference in the
  5407  				// instruction encoding, the two may not coincide. In this case,
  5408  				// adjust addend so that linker can keep relocating relative to the
  5409  				// end of the relocation.
  5410  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5411  			} else if ctxt.Arch.Family == sys.I386 {
  5412  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5413  				// assumes that the previous instruction loaded the PC of the end
  5414  				// of that instruction into CX, so the adjustment is relative to
  5415  				// that.
  5416  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5417  			}
  5418  		}
  5419  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5420  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5421  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5422  		}
  5423  
  5424  	}
  5425  
  5426  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  5427  		switch p.To.Reg {
  5428  		case REG_SP:
  5429  			ab.Put(naclspfix)
  5430  		case REG_BP:
  5431  			ab.Put(naclbpfix)
  5432  		}
  5433  	}
  5434  }
  5435  
  5436  // unpackOps4 extracts 4 operands from p.
  5437  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5438  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  5439  }
  5440  
  5441  // unpackOps5 extracts 5 operands from p.
  5442  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5443  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
  5444  }