github.com/bir3/gocompiler@v0.3.205/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    35  	"github.com/bir3/gocompiler/src/cmd/internal/objabi"
    36  	"github.com/bir3/gocompiler/src/cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"github.com/bir3/gocompiler/src/internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  const (
    63  	loopAlign  = 16
    64  	maxLoopPad = 0
    65  )
    66  
    67  // Bit flags that are used to express jump target properties.
    68  const (
    69  	// branchBackwards marks targets that are located behind.
    70  	// Used to express jumps to loop headers.
    71  	branchBackwards = (1 << iota)
    72  	// branchShort marks branches those target is close,
    73  	// with offset is in -128..127 range.
    74  	branchShort
    75  	// branchLoopHead marks loop entry.
    76  	// Used to insert padding for misaligned loops.
    77  	branchLoopHead
    78  )
    79  
    80  // opBytes holds optab encoding bytes.
    81  // Each ytab reserves fixed amount of bytes in this array.
    82  //
    83  // The size should be the minimal number of bytes that
    84  // are enough to hold biggest optab op lines.
    85  type opBytes [31]uint8
    86  
    87  type Optab struct {
    88  	as     obj.As
    89  	ytab   []ytab
    90  	prefix uint8
    91  	op     opBytes
    92  }
    93  
    94  type movtab struct {
    95  	as   obj.As
    96  	ft   uint8
    97  	f3t  uint8
    98  	tt   uint8
    99  	code uint8
   100  	op   [4]uint8
   101  }
   102  
   103  const (
   104  	Yxxx = iota
   105  	Ynone
   106  	Yi0 // $0
   107  	Yi1 // $1
   108  	Yu2 // $x, x fits in uint2
   109  	Yi8 // $x, x fits in int8
   110  	Yu8 // $x, x fits in uint8
   111  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   112  	Ys32
   113  	Yi32
   114  	Yi64
   115  	Yiauto
   116  	Yal
   117  	Ycl
   118  	Yax
   119  	Ycx
   120  	Yrb
   121  	Yrl
   122  	Yrl32 // Yrl on 32-bit system
   123  	Yrf
   124  	Yf0
   125  	Yrx
   126  	Ymb
   127  	Yml
   128  	Ym
   129  	Ybr
   130  	Ycs
   131  	Yss
   132  	Yds
   133  	Yes
   134  	Yfs
   135  	Ygs
   136  	Ygdtr
   137  	Yidtr
   138  	Yldtr
   139  	Ymsw
   140  	Ytask
   141  	Ycr0
   142  	Ycr1
   143  	Ycr2
   144  	Ycr3
   145  	Ycr4
   146  	Ycr5
   147  	Ycr6
   148  	Ycr7
   149  	Ycr8
   150  	Ydr0
   151  	Ydr1
   152  	Ydr2
   153  	Ydr3
   154  	Ydr4
   155  	Ydr5
   156  	Ydr6
   157  	Ydr7
   158  	Ytr0
   159  	Ytr1
   160  	Ytr2
   161  	Ytr3
   162  	Ytr4
   163  	Ytr5
   164  	Ytr6
   165  	Ytr7
   166  	Ymr
   167  	Ymm
   168  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   169  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   170  	Yxr           // X0..X15
   171  	YxrEvex       // X0..X31
   172  	Yxm
   173  	YxmEvex       // YxrEvex+Ym
   174  	Yxvm          // VSIB vector array; vm32x/vm64x
   175  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   176  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   177  	Yyr           // Y0..Y15
   178  	YyrEvex       // Y0..Y31
   179  	Yym
   180  	YymEvex   // YyrEvex+Ym
   181  	Yyvm      // VSIB vector array; vm32y/vm64y
   182  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   183  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   184  	Yzr       // Z0..Z31
   185  	Yzm       // Yzr+Ym
   186  	Yzvm      // VSIB vector array; vm32z/vm64z
   187  	Yk0       // K0
   188  	Yknot0    // K1..K7; write mask
   189  	Yk        // K0..K7; used for KOP
   190  	Ykm       // Yk+Ym; used for KOP
   191  	Ytls
   192  	Ytextsize
   193  	Yindir
   194  	Ymax
   195  )
   196  
   197  const (
   198  	Zxxx = iota
   199  	Zlit
   200  	Zlitm_r
   201  	Zlitr_m
   202  	Zlit_m_r
   203  	Z_rp
   204  	Zbr
   205  	Zcall
   206  	Zcallcon
   207  	Zcallduff
   208  	Zcallind
   209  	Zcallindreg
   210  	Zib_
   211  	Zib_rp
   212  	Zibo_m
   213  	Zibo_m_xm
   214  	Zil_
   215  	Zil_rp
   216  	Ziq_rp
   217  	Zilo_m
   218  	Zjmp
   219  	Zjmpcon
   220  	Zloop
   221  	Zo_iw
   222  	Zm_o
   223  	Zm_r
   224  	Z_m_r
   225  	Zm2_r
   226  	Zm_r_xm
   227  	Zm_r_i_xm
   228  	Zm_r_xm_nr
   229  	Zr_m_xm_nr
   230  	Zibm_r // mmx1,mmx2/mem64,imm8
   231  	Zibr_m
   232  	Zmb_r
   233  	Zaut_r
   234  	Zo_m
   235  	Zo_m64
   236  	Zpseudo
   237  	Zr_m
   238  	Zr_m_xm
   239  	Zrp_
   240  	Z_ib
   241  	Z_il
   242  	Zm_ibo
   243  	Zm_ilo
   244  	Zib_rr
   245  	Zil_rr
   246  	Zbyte
   247  
   248  	Zvex_rm_v_r
   249  	Zvex_rm_v_ro
   250  	Zvex_r_v_rm
   251  	Zvex_i_rm_vo
   252  	Zvex_v_rm_r
   253  	Zvex_i_rm_r
   254  	Zvex_i_r_v
   255  	Zvex_i_rm_v_r
   256  	Zvex
   257  	Zvex_rm_r_vo
   258  	Zvex_i_r_rm
   259  	Zvex_hr_rm_v_r
   260  
   261  	Zevex_first
   262  	Zevex_i_r_k_rm
   263  	Zevex_i_r_rm
   264  	Zevex_i_rm_k_r
   265  	Zevex_i_rm_k_vo
   266  	Zevex_i_rm_r
   267  	Zevex_i_rm_v_k_r
   268  	Zevex_i_rm_v_r
   269  	Zevex_i_rm_vo
   270  	Zevex_k_rmo
   271  	Zevex_r_k_rm
   272  	Zevex_r_v_k_rm
   273  	Zevex_r_v_rm
   274  	Zevex_rm_k_r
   275  	Zevex_rm_v_k_r
   276  	Zevex_rm_v_r
   277  	Zevex_last
   278  
   279  	Zmax
   280  )
   281  
   282  const (
   283  	Px   = 0
   284  	Px1  = 1    // symbolic; exact value doesn't matter
   285  	P32  = 0x32 // 32-bit only
   286  	Pe   = 0x66 // operand escape
   287  	Pm   = 0x0f // 2byte opcode escape
   288  	Pq   = 0xff // both escapes: 66 0f
   289  	Pb   = 0xfe // byte operands
   290  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   291  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   292  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   293  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   294  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   295  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   296  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   297  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   298  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   299  	Pw   = 0x48 // Rex.w
   300  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   301  	Py   = 0x80 // defaults to 64-bit mode
   302  	Py1  = 0x81 // symbolic; exact value doesn't matter
   303  	Py3  = 0x83 // symbolic; exact value doesn't matter
   304  	Pavx = 0x84 // symbolic: exact value doesn't matter
   305  
   306  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   307  	Rxw     = 1 << 3 // =1, 64-bit operand size
   308  	Rxr     = 1 << 2 // extend modrm reg
   309  	Rxx     = 1 << 1 // extend sib index
   310  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   311  )
   312  
   313  const (
   314  	// Encoding for VEX prefix in tables.
   315  	// The P, L, and W fields are chosen to match
   316  	// their eventual locations in the VEX prefix bytes.
   317  
   318  	// Encoding for VEX prefix in tables.
   319  	// The P, L, and W fields are chosen to match
   320  	// their eventual locations in the VEX prefix bytes.
   321  
   322  	// Using spare bit to make leading [E]VEX encoding byte different from
   323  	// 0x0f even if all other VEX fields are 0.
   324  	avxEscape = 1 << 6
   325  
   326  	// P field - 2 bits
   327  	vex66 = 1 << 0
   328  	vexF3 = 2 << 0
   329  	vexF2 = 3 << 0
   330  	// L field - 1 bit
   331  	vexLZ  = 0 << 2
   332  	vexLIG = 0 << 2
   333  	vex128 = 0 << 2
   334  	vex256 = 1 << 2
   335  	// W field - 1 bit
   336  	vexWIG = 0 << 7
   337  	vexW0  = 0 << 7
   338  	vexW1  = 1 << 7
   339  	// M field - 5 bits, but mostly reserved; we can store up to 3
   340  	vex0F   = 1 << 3
   341  	vex0F38 = 2 << 3
   342  	vex0F3A = 3 << 3
   343  )
   344  
   345  var ycover [Ymax * Ymax]uint8
   346  
   347  var reg [MAXREG]int
   348  
   349  var regrex [MAXREG + 1]int
   350  
   351  var ynone = []ytab{
   352  	{Zlit, 1, argList{}},
   353  }
   354  
   355  var ytext = []ytab{
   356  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   357  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   358  }
   359  
   360  var ynop = []ytab{
   361  	{Zpseudo, 0, argList{}},
   362  	{Zpseudo, 0, argList{Yiauto}},
   363  	{Zpseudo, 0, argList{Yml}},
   364  	{Zpseudo, 0, argList{Yrf}},
   365  	{Zpseudo, 0, argList{Yxr}},
   366  	{Zpseudo, 0, argList{Yiauto}},
   367  	{Zpseudo, 0, argList{Yml}},
   368  	{Zpseudo, 0, argList{Yrf}},
   369  	{Zpseudo, 1, argList{Yxr}},
   370  }
   371  
   372  var yfuncdata = []ytab{
   373  	{Zpseudo, 0, argList{Yi32, Ym}},
   374  }
   375  
   376  var ypcdata = []ytab{
   377  	{Zpseudo, 0, argList{Yi32, Yi32}},
   378  }
   379  
   380  var yxorb = []ytab{
   381  	{Zib_, 1, argList{Yi32, Yal}},
   382  	{Zibo_m, 2, argList{Yi32, Ymb}},
   383  	{Zr_m, 1, argList{Yrb, Ymb}},
   384  	{Zm_r, 1, argList{Ymb, Yrb}},
   385  }
   386  
   387  var yaddl = []ytab{
   388  	{Zibo_m, 2, argList{Yi8, Yml}},
   389  	{Zil_, 1, argList{Yi32, Yax}},
   390  	{Zilo_m, 2, argList{Yi32, Yml}},
   391  	{Zr_m, 1, argList{Yrl, Yml}},
   392  	{Zm_r, 1, argList{Yml, Yrl}},
   393  }
   394  
   395  var yincl = []ytab{
   396  	{Z_rp, 1, argList{Yrl}},
   397  	{Zo_m, 2, argList{Yml}},
   398  }
   399  
   400  var yincq = []ytab{
   401  	{Zo_m, 2, argList{Yml}},
   402  }
   403  
   404  var ycmpb = []ytab{
   405  	{Z_ib, 1, argList{Yal, Yi32}},
   406  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   407  	{Zm_r, 1, argList{Ymb, Yrb}},
   408  	{Zr_m, 1, argList{Yrb, Ymb}},
   409  }
   410  
   411  var ycmpl = []ytab{
   412  	{Zm_ibo, 2, argList{Yml, Yi8}},
   413  	{Z_il, 1, argList{Yax, Yi32}},
   414  	{Zm_ilo, 2, argList{Yml, Yi32}},
   415  	{Zm_r, 1, argList{Yml, Yrl}},
   416  	{Zr_m, 1, argList{Yrl, Yml}},
   417  }
   418  
   419  var yshb = []ytab{
   420  	{Zo_m, 2, argList{Yi1, Ymb}},
   421  	{Zibo_m, 2, argList{Yu8, Ymb}},
   422  	{Zo_m, 2, argList{Ycx, Ymb}},
   423  }
   424  
   425  var yshl = []ytab{
   426  	{Zo_m, 2, argList{Yi1, Yml}},
   427  	{Zibo_m, 2, argList{Yu8, Yml}},
   428  	{Zo_m, 2, argList{Ycl, Yml}},
   429  	{Zo_m, 2, argList{Ycx, Yml}},
   430  }
   431  
   432  var ytestl = []ytab{
   433  	{Zil_, 1, argList{Yi32, Yax}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zr_m, 1, argList{Yrl, Yml}},
   436  	{Zm_r, 1, argList{Yml, Yrl}},
   437  }
   438  
   439  var ymovb = []ytab{
   440  	{Zr_m, 1, argList{Yrb, Ymb}},
   441  	{Zm_r, 1, argList{Ymb, Yrb}},
   442  	{Zib_rp, 1, argList{Yi32, Yrb}},
   443  	{Zibo_m, 2, argList{Yi32, Ymb}},
   444  }
   445  
   446  var ybtl = []ytab{
   447  	{Zibo_m, 2, argList{Yi8, Yml}},
   448  	{Zr_m, 1, argList{Yrl, Yml}},
   449  }
   450  
   451  var ymovw = []ytab{
   452  	{Zr_m, 1, argList{Yrl, Yml}},
   453  	{Zm_r, 1, argList{Yml, Yrl}},
   454  	{Zil_rp, 1, argList{Yi32, Yrl}},
   455  	{Zilo_m, 2, argList{Yi32, Yml}},
   456  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   457  }
   458  
   459  var ymovl = []ytab{
   460  	{Zr_m, 1, argList{Yrl, Yml}},
   461  	{Zm_r, 1, argList{Yml, Yrl}},
   462  	{Zil_rp, 1, argList{Yi32, Yrl}},
   463  	{Zilo_m, 2, argList{Yi32, Yml}},
   464  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   465  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   466  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   467  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   468  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   469  }
   470  
   471  var yret = []ytab{
   472  	{Zo_iw, 1, argList{}},
   473  	{Zo_iw, 1, argList{Yi32}},
   474  }
   475  
   476  var ymovq = []ytab{
   477  	// valid in 32-bit mode
   478  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   479  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   480  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   481  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   482  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   483  
   484  	// valid only in 64-bit mode, usually with 64-bit prefix
   485  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   486  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   487  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   488  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   489  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   490  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   491  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   492  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   493  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   494  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   495  }
   496  
   497  var ymovbe = []ytab{
   498  	{Zlitm_r, 3, argList{Ym, Yrl}},
   499  	{Zlitr_m, 3, argList{Yrl, Ym}},
   500  }
   501  
   502  var ym_rl = []ytab{
   503  	{Zm_r, 1, argList{Ym, Yrl}},
   504  }
   505  
   506  var yrl_m = []ytab{
   507  	{Zr_m, 1, argList{Yrl, Ym}},
   508  }
   509  
   510  var ymb_rl = []ytab{
   511  	{Zmb_r, 1, argList{Ymb, Yrl}},
   512  }
   513  
   514  var yml_rl = []ytab{
   515  	{Zm_r, 1, argList{Yml, Yrl}},
   516  }
   517  
   518  var yrl_ml = []ytab{
   519  	{Zr_m, 1, argList{Yrl, Yml}},
   520  }
   521  
   522  var yml_mb = []ytab{
   523  	{Zr_m, 1, argList{Yrb, Ymb}},
   524  	{Zm_r, 1, argList{Ymb, Yrb}},
   525  }
   526  
   527  var yrb_mb = []ytab{
   528  	{Zr_m, 1, argList{Yrb, Ymb}},
   529  }
   530  
   531  var yxchg = []ytab{
   532  	{Z_rp, 1, argList{Yax, Yrl}},
   533  	{Zrp_, 1, argList{Yrl, Yax}},
   534  	{Zr_m, 1, argList{Yrl, Yml}},
   535  	{Zm_r, 1, argList{Yml, Yrl}},
   536  }
   537  
   538  var ydivl = []ytab{
   539  	{Zm_o, 2, argList{Yml}},
   540  }
   541  
   542  var ydivb = []ytab{
   543  	{Zm_o, 2, argList{Ymb}},
   544  }
   545  
   546  var yimul = []ytab{
   547  	{Zm_o, 2, argList{Yml}},
   548  	{Zib_rr, 1, argList{Yi8, Yrl}},
   549  	{Zil_rr, 1, argList{Yi32, Yrl}},
   550  	{Zm_r, 2, argList{Yml, Yrl}},
   551  }
   552  
   553  var yimul3 = []ytab{
   554  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   555  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   556  }
   557  
   558  var ybyte = []ytab{
   559  	{Zbyte, 1, argList{Yi64}},
   560  }
   561  
   562  var yin = []ytab{
   563  	{Zib_, 1, argList{Yi32}},
   564  	{Zlit, 1, argList{}},
   565  }
   566  
   567  var yint = []ytab{
   568  	{Zib_, 1, argList{Yi32}},
   569  }
   570  
   571  var ypushl = []ytab{
   572  	{Zrp_, 1, argList{Yrl}},
   573  	{Zm_o, 2, argList{Ym}},
   574  	{Zib_, 1, argList{Yi8}},
   575  	{Zil_, 1, argList{Yi32}},
   576  }
   577  
   578  var ypopl = []ytab{
   579  	{Z_rp, 1, argList{Yrl}},
   580  	{Zo_m, 2, argList{Ym}},
   581  }
   582  
   583  var ywrfsbase = []ytab{
   584  	{Zm_o, 2, argList{Yrl}},
   585  }
   586  
   587  var yrdrand = []ytab{
   588  	{Zo_m, 2, argList{Yrl}},
   589  }
   590  
   591  var yclflush = []ytab{
   592  	{Zo_m, 2, argList{Ym}},
   593  }
   594  
   595  var ybswap = []ytab{
   596  	{Z_rp, 2, argList{Yrl}},
   597  }
   598  
   599  var yscond = []ytab{
   600  	{Zo_m, 2, argList{Ymb}},
   601  }
   602  
   603  var yjcond = []ytab{
   604  	{Zbr, 0, argList{Ybr}},
   605  	{Zbr, 0, argList{Yi0, Ybr}},
   606  	{Zbr, 1, argList{Yi1, Ybr}},
   607  }
   608  
   609  var yloop = []ytab{
   610  	{Zloop, 1, argList{Ybr}},
   611  }
   612  
   613  var ycall = []ytab{
   614  	{Zcallindreg, 0, argList{Yml}},
   615  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   616  	{Zcallind, 2, argList{Yindir}},
   617  	{Zcall, 0, argList{Ybr}},
   618  	{Zcallcon, 1, argList{Yi32}},
   619  }
   620  
   621  var yduff = []ytab{
   622  	{Zcallduff, 1, argList{Yi32}},
   623  }
   624  
   625  var yjmp = []ytab{
   626  	{Zo_m64, 2, argList{Yml}},
   627  	{Zjmp, 0, argList{Ybr}},
   628  	{Zjmpcon, 1, argList{Yi32}},
   629  }
   630  
   631  var yfmvd = []ytab{
   632  	{Zm_o, 2, argList{Ym, Yf0}},
   633  	{Zo_m, 2, argList{Yf0, Ym}},
   634  	{Zm_o, 2, argList{Yrf, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Yrf}},
   636  }
   637  
   638  var yfmvdp = []ytab{
   639  	{Zo_m, 2, argList{Yf0, Ym}},
   640  	{Zo_m, 2, argList{Yf0, Yrf}},
   641  }
   642  
   643  var yfmvf = []ytab{
   644  	{Zm_o, 2, argList{Ym, Yf0}},
   645  	{Zo_m, 2, argList{Yf0, Ym}},
   646  }
   647  
   648  var yfmvx = []ytab{
   649  	{Zm_o, 2, argList{Ym, Yf0}},
   650  }
   651  
   652  var yfmvp = []ytab{
   653  	{Zo_m, 2, argList{Yf0, Ym}},
   654  }
   655  
   656  var yfcmv = []ytab{
   657  	{Zm_o, 2, argList{Yrf, Yf0}},
   658  }
   659  
   660  var yfadd = []ytab{
   661  	{Zm_o, 2, argList{Ym, Yf0}},
   662  	{Zm_o, 2, argList{Yrf, Yf0}},
   663  	{Zo_m, 2, argList{Yf0, Yrf}},
   664  }
   665  
   666  var yfxch = []ytab{
   667  	{Zo_m, 2, argList{Yf0, Yrf}},
   668  	{Zm_o, 2, argList{Yrf, Yf0}},
   669  }
   670  
   671  var ycompp = []ytab{
   672  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   673  }
   674  
   675  var ystsw = []ytab{
   676  	{Zo_m, 2, argList{Ym}},
   677  	{Zlit, 1, argList{Yax}},
   678  }
   679  
   680  var ysvrs_mo = []ytab{
   681  	{Zm_o, 2, argList{Ym}},
   682  }
   683  
   684  // unaryDst version of "ysvrs_mo".
   685  var ysvrs_om = []ytab{
   686  	{Zo_m, 2, argList{Ym}},
   687  }
   688  
   689  var ymm = []ytab{
   690  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   691  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   692  }
   693  
   694  var yxm = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   696  }
   697  
   698  var yxm_q4 = []ytab{
   699  	{Zm_r, 1, argList{Yxm, Yxr}},
   700  }
   701  
   702  var yxcvm1 = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   704  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   705  }
   706  
   707  var yxcvm2 = []ytab{
   708  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   709  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   710  }
   711  
   712  var yxr = []ytab{
   713  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   714  }
   715  
   716  var yxr_ml = []ytab{
   717  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   718  }
   719  
   720  var ymr = []ytab{
   721  	{Zm_r, 1, argList{Ymr, Ymr}},
   722  }
   723  
   724  var ymr_ml = []ytab{
   725  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   726  }
   727  
   728  var yxcmpi = []ytab{
   729  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   730  }
   731  
   732  var yxmov = []ytab{
   733  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   734  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   735  }
   736  
   737  var yxcvfl = []ytab{
   738  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   739  }
   740  
   741  var yxcvlf = []ytab{
   742  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   743  }
   744  
   745  var yxcvfq = []ytab{
   746  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   747  }
   748  
   749  var yxcvqf = []ytab{
   750  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   751  }
   752  
   753  var yps = []ytab{
   754  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   755  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   756  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   757  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   758  }
   759  
   760  var yxrrl = []ytab{
   761  	{Zm_r, 1, argList{Yxr, Yrl}},
   762  }
   763  
   764  var ymrxr = []ytab{
   765  	{Zm_r, 1, argList{Ymr, Yxr}},
   766  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   767  }
   768  
   769  var ymshuf = []ytab{
   770  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   771  }
   772  
   773  var ymshufb = []ytab{
   774  	{Zm2_r, 2, argList{Yxm, Yxr}},
   775  }
   776  
   777  // It should never have more than 1 entry,
   778  // because some optab entries you opcode secuences that
   779  // are longer than 2 bytes (zoffset=2 here),
   780  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   781  // to name a few.
   782  var yxshuf = []ytab{
   783  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   784  }
   785  
   786  var yextrw = []ytab{
   787  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   788  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   789  }
   790  
   791  var yextr = []ytab{
   792  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   793  }
   794  
   795  var yinsrw = []ytab{
   796  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   797  }
   798  
   799  var yinsr = []ytab{
   800  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   801  }
   802  
   803  var ypsdq = []ytab{
   804  	{Zibo_m, 2, argList{Yi8, Yxr}},
   805  }
   806  
   807  var ymskb = []ytab{
   808  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   809  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   810  }
   811  
   812  var ycrc32l = []ytab{
   813  	{Zlitm_r, 0, argList{Yml, Yrl}},
   814  }
   815  
   816  var ycrc32b = []ytab{
   817  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   818  }
   819  
   820  var yprefetch = []ytab{
   821  	{Zm_o, 2, argList{Ym}},
   822  }
   823  
   824  var yaes = []ytab{
   825  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   826  }
   827  
   828  var yxbegin = []ytab{
   829  	{Zjmp, 1, argList{Ybr}},
   830  }
   831  
   832  var yxabort = []ytab{
   833  	{Zib_, 1, argList{Yu8}},
   834  }
   835  
   836  var ylddqu = []ytab{
   837  	{Zm_r, 1, argList{Ym, Yxr}},
   838  }
   839  
   840  var ypalignr = []ytab{
   841  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   842  }
   843  
   844  var ysha256rnds2 = []ytab{
   845  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   846  }
   847  
   848  var yblendvpd = []ytab{
   849  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   850  }
   851  
   852  var ymmxmm0f38 = []ytab{
   853  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   854  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   855  }
   856  
   857  var yextractps = []ytab{
   858  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   859  }
   860  
   861  var ysha1rnds4 = []ytab{
   862  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   863  }
   864  
   865  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   866  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   867  // to find the entry with the given p.As and then looks through the ytable for
   868  // that instruction (the second field in the optab struct) for a line whose
   869  // first two values match the Ytypes of the p.From and p.To operands.  The
   870  // function oclass computes the specific Ytype of an operand and then the set
   871  // of more general Ytypes that it satisfies is implied by the ycover table, set
   872  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   873  // from the more general 8-bit constants, but instinit says
   874  //
   875  //	ycover[Yi0*Ymax+Ys32] = 1
   876  //	ycover[Yi1*Ymax+Ys32] = 1
   877  //	ycover[Yi8*Ymax+Ys32] = 1
   878  //
   879  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   880  // if that's what an instruction can handle.
   881  //
   882  // In parallel with the scan through the ytable for the appropriate line, there
   883  // is a z pointer that starts out pointing at the strange magic byte list in
   884  // the Optab struct.  With each step past a non-matching ytable line, z
   885  // advances by the 4th entry in the line.  When a matching line is found, that
   886  // z pointer has the extra data to use in laying down the instruction bytes.
   887  // The actual bytes laid down are a function of the 3rd entry in the line (that
   888  // is, the Ztype) and the z bytes.
   889  //
   890  // For example, let's look at AADDL.  The optab line says:
   891  //
   892  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //
   896  //	var yaddl = []ytab{
   897  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //	        {Yi32, Ynone, Yax, Zil_, 1},
   899  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   901  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   902  //	}
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //	Yi32, Yax -> Zil_, z+2 (0x05)
   910  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  // as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1100  	{AENTER, nil, 0, opBytes{}}, // botch
  1101  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1102  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1103  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1104  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1105  	{AHLT, ynone, Px, opBytes{0xf4}},
  1106  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1107  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1108  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1109  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1110  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1111  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1118  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1119  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1120  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1121  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1122  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1123  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1124  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1125  	{AINSL, ynone, Px, opBytes{0x6d}},
  1126  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1127  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1128  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1129  	{AINT, yint, Px, opBytes{0xcd}},
  1130  	{AINTO, ynone, P32, opBytes{0xce}},
  1131  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1132  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1133  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1134  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1135  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1136  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1139  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1140  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1141  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1142  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1143  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1144  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1145  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1146  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1147  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1148  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1149  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1150  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1151  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1152  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1153  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1154  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1155  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1156  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1157  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1158  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1159  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1160  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1161  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1162  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1163  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1164  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1165  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1166  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1167  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1168  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1169  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1170  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1171  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1172  	{ALODSL, ynone, Px, opBytes{0xad}},
  1173  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1174  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1175  	{ALONG, ybyte, Px, opBytes{4}},
  1176  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1177  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1178  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1179  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1180  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1181  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1182  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1183  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1184  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1185  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1186  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1187  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1188  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1189  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1190  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1191  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1192  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1193  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1194  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1195  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1196  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1197  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1198  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1199  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1200  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1201  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1202  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1203  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1204  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1205  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1206  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1207  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1208  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1209  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1210  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1211  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1212  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1213  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1214  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1216  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1217  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1218  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1219  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1220  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1221  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1222  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1223  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1224  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1225  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1226  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1227  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1228  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1229  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1230  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1231  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1232  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1233  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1234  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1235  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1236  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1237  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1238  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1239  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1240  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1241  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1242  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1243  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1244  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1245  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1246  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1247  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1248  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1249  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1250  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1251  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1252  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1253  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1254  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1255  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1256  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1257  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1258  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1259  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1260  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1261  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1262  	{AORPD, yxm, Pq, opBytes{0x56}},
  1263  	{AORPS, yxm, Pm, opBytes{0x56}},
  1264  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1267  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1268  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1269  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1270  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1271  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1272  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1273  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1274  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1275  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1276  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1277  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1278  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1279  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1280  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1281  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1282  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1283  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1284  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1285  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1286  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1287  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1288  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1289  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1290  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1291  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1292  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1293  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1294  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1295  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1296  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1297  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1298  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1299  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1300  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1301  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1302  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1303  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1304  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1305  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1306  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1307  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1308  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1309  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1310  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1311  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1312  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1313  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1314  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1315  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1316  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1317  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1318  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1319  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1320  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1321  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1322  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1323  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1324  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1325  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1326  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1327  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1328  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1329  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1330  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1331  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1332  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1333  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1334  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1335  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1336  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1337  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1338  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1339  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1340  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1341  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1342  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1343  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1344  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1345  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1346  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1347  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1348  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1349  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1350  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1351  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1352  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1353  	{APOPAL, ynone, P32, opBytes{0x61}},
  1354  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1355  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1356  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1357  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1358  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1359  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1360  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1361  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1364  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1365  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1366  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1367  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1368  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1369  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1370  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1371  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1372  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1373  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1374  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1375  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1376  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1377  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1378  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1379  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1380  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1381  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1382  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1383  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1384  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1385  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1386  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1387  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1388  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1389  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1390  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1391  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1392  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1393  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1394  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1395  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1396  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1397  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1398  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1399  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1400  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1401  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1402  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1403  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1404  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1405  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1406  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1410  	{AQUAD, ybyte, Px, opBytes{8}},
  1411  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1412  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1416  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1417  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1418  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{AREP, ynone, Px, opBytes{0xf3}},
  1422  	{AREPN, ynone, Px, opBytes{0xf2}},
  1423  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1424  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1425  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1426  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1427  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1428  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1432  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1436  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1437  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1438  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1439  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1443  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1447  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1451  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1452  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1453  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1454  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1455  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1456  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1457  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1458  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1459  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1460  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1461  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1462  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1463  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1464  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1465  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1466  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1467  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1468  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1469  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1470  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1471  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1475  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1479  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1480  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1481  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1482  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1483  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1484  	{ASTC, ynone, Px, opBytes{0xf9}},
  1485  	{ASTD, ynone, Px, opBytes{0xfd}},
  1486  	{ASTI, ynone, Px, opBytes{0xfb}},
  1487  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1488  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1489  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1490  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1491  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1492  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1493  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1494  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1495  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1496  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1497  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1498  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1499  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1500  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1501  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1502  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1503  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1507  	{obj.ATEXT, ytext, Px, opBytes{}},
  1508  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1509  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1510  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1511  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1512  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1513  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1514  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1515  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1516  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1517  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1518  	{AWORD, ybyte, Px, opBytes{2}},
  1519  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1520  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1524  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1525  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1526  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1527  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1528  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1531  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1532  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1533  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1534  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1535  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1536  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1537  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1538  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1539  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1540  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1541  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1542  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1543  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1544  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1545  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1546  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1547  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1548  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1549  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1550  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1551  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1552  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1553  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1554  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1555  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1556  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1559  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1560  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1561  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1562  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1563  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1564  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1565  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1566  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1567  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1568  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1569  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1570  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1571  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1572  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1573  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1574  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1575  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1576  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1577  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1578  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1579  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1580  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1581  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1582  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1583  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1584  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1585  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1586  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1587  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1588  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1589  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1590  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1591  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1592  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1593  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1594  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1595  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1596  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1597  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1598  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1599  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1600  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1601  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1602  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1603  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1604  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1605  	{AFFREE, nil, 0, opBytes{}},
  1606  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1607  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1608  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1609  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1610  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1611  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1612  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1613  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1614  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1615  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1616  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1617  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1618  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1619  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1620  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1621  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1622  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1623  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1624  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1625  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1626  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1627  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1628  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1629  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1630  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1631  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1632  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1633  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1634  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1635  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1636  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1637  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1638  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1639  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1640  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1641  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1642  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1643  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1644  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1645  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1646  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1650  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1651  	{AINVD, ynone, Pm, opBytes{0x08}},
  1652  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1653  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1654  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1655  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1656  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1657  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1658  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1659  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1660  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1661  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1662  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1663  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1664  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1665  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1666  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1667  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1668  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1669  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1670  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1671  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1672  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1673  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1677  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1678  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1679  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1680  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1681  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1682  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1683  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1684  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1685  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1686  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1687  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1688  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1689  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1690  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1691  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1692  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1693  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1694  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1695  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1696  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1697  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1698  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1699  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1700  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1701  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1702  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1703  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1704  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1707  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1708  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1709  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1710  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1711  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1712  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1726  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1727  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1728  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1729  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1735  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1738  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1739  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1742  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1743  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1744  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1747  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1750  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1751  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1752  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1753  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1754  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1755  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1756  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1757  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1758  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1759  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1760  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1761  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1762  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1763  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1764  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1765  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1766  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1767  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1768  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1769  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1770  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1771  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1772  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1773  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1774  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1775  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1776  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1777  
  1778  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1779  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1780  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1781  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1782  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1783  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1784  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1785  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1786  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1787  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1788  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1789  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1790  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1791  
  1792  	{obj.AEND, nil, 0, opBytes{}},
  1793  	{0, nil, 0, opBytes{}},
  1794  }
  1795  
  1796  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1797  
  1798  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1799  // This happens on systems like Solaris that call .so functions instead of system calls.
  1800  // It does not seem to be necessary for any other systems. This is probably working
  1801  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1802  // what that bug is. And this does fix it.
  1803  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1804  	if ctxt.Headtype == objabi.Hsolaris {
  1805  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1806  		return strings.HasPrefix(s.Name, "libc_")
  1807  	}
  1808  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1809  }
  1810  
  1811  // single-instruction no-ops of various lengths.
  1812  // constructed by hand and disassembled with gdb to verify.
  1813  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1814  var nop = [][16]uint8{
  1815  	{0x90},
  1816  	{0x66, 0x90},
  1817  	{0x0F, 0x1F, 0x00},
  1818  	{0x0F, 0x1F, 0x40, 0x00},
  1819  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1820  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1821  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1822  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1824  }
  1825  
  1826  // Native Client rejects the repeated 0x66 prefix.
  1827  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1828  func fillnop(p []byte, n int) {
  1829  	var m int
  1830  
  1831  	for n > 0 {
  1832  		m = n
  1833  		if m > len(nop) {
  1834  			m = len(nop)
  1835  		}
  1836  		copy(p[:m], nop[m-1][:m])
  1837  		p = p[m:]
  1838  		n -= m
  1839  	}
  1840  }
  1841  
  1842  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1843  	s.Grow(int64(c) + int64(pad))
  1844  	fillnop(s.P[c:], int(pad))
  1845  	return c + pad
  1846  }
  1847  
  1848  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1849  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1850  		return l
  1851  	}
  1852  	return q
  1853  }
  1854  
  1855  // isJump returns whether p is a jump instruction.
  1856  // It is used to ensure that no standalone or macro-fused jump will straddle
  1857  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1858  func isJump(p *obj.Prog) bool {
  1859  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1860  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1861  }
  1862  
  1863  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1864  // jump. Otherwise, nil is returned.
  1865  func lookForJCC(p *obj.Prog) *obj.Prog {
  1866  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1867  	var q *obj.Prog
  1868  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1869  	}
  1870  
  1871  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1872  		return nil
  1873  	}
  1874  
  1875  	switch q.As {
  1876  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1877  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1878  	default:
  1879  		return nil
  1880  	}
  1881  
  1882  	return q
  1883  }
  1884  
  1885  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1886  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1887  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1888  func fusedJump(p *obj.Prog) (bool, uint8) {
  1889  	var fusedSize uint8
  1890  
  1891  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1892  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1893  	// need to be careful to insert any padding before the locks rather than directly after them.
  1894  
  1895  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1896  		fusedSize += p.Isize
  1897  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1898  		}
  1899  		if p == nil {
  1900  			return false, 0
  1901  		}
  1902  	}
  1903  	if p.As == ALOCK {
  1904  		fusedSize += p.Isize
  1905  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1906  		}
  1907  		if p == nil {
  1908  			return false, 0
  1909  		}
  1910  	}
  1911  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1912  
  1913  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1914  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1915  
  1916  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1917  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1918  
  1919  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1920  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1921  
  1922  	if !cmpAddSub && !testAnd && !incDec {
  1923  		return false, 0
  1924  	}
  1925  
  1926  	if !incDec {
  1927  		var argOne obj.AddrType
  1928  		var argTwo obj.AddrType
  1929  		if cmp {
  1930  			argOne = p.From.Type
  1931  			argTwo = p.To.Type
  1932  		} else {
  1933  			argOne = p.To.Type
  1934  			argTwo = p.From.Type
  1935  		}
  1936  		if argOne == obj.TYPE_REG {
  1937  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1938  				return false, 0
  1939  			}
  1940  		} else if argOne == obj.TYPE_MEM {
  1941  			if argTwo != obj.TYPE_REG {
  1942  				return false, 0
  1943  			}
  1944  		} else {
  1945  			return false, 0
  1946  		}
  1947  	}
  1948  
  1949  	fusedSize += p.Isize
  1950  	jmp := lookForJCC(p)
  1951  	if jmp == nil {
  1952  		return false, 0
  1953  	}
  1954  
  1955  	fusedSize += jmp.Isize
  1956  
  1957  	if testAnd {
  1958  		return true, fusedSize
  1959  	}
  1960  
  1961  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1962  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1963  		return false, 0
  1964  	}
  1965  
  1966  	if cmpAddSub {
  1967  		return true, fusedSize
  1968  	}
  1969  
  1970  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1971  		return false, 0
  1972  	}
  1973  
  1974  	return true, fusedSize
  1975  }
  1976  
  1977  type padJumpsCtx int32
  1978  
  1979  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1980  	// Disable jump padding on 32 bit builds by settting
  1981  	// padJumps to 0.
  1982  	if ctxt.Arch.Family == sys.I386 {
  1983  		return padJumpsCtx(0)
  1984  	}
  1985  
  1986  	// Disable jump padding for hand written assembly code.
  1987  	if ctxt.IsAsm {
  1988  		return padJumpsCtx(0)
  1989  	}
  1990  
  1991  	return padJumpsCtx(32)
  1992  }
  1993  
  1994  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1995  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1996  // not cross or end on a 32 byte boundary.
  1997  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1998  	if pjc == 0 {
  1999  		return c
  2000  	}
  2001  
  2002  	var toPad int32
  2003  	fj, fjSize := fusedJump(p)
  2004  	mask := int32(pjc - 1)
  2005  	if fj {
  2006  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2007  			toPad = int32(pjc) - (c & mask)
  2008  		}
  2009  	} else if isJump(p) {
  2010  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2011  			toPad = int32(pjc) - (c & mask)
  2012  		}
  2013  	}
  2014  	if toPad <= 0 {
  2015  		return c
  2016  	}
  2017  
  2018  	return noppad(ctxt, s, c, toPad)
  2019  }
  2020  
  2021  // reAssemble is called if an instruction's size changes during assembly. If
  2022  // it does and the instruction is a standalone or a macro-fused jump we need to
  2023  // reassemble.
  2024  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2025  	if pjc == 0 {
  2026  		return false
  2027  	}
  2028  
  2029  	fj, _ := fusedJump(p)
  2030  	return fj || isJump(p)
  2031  }
  2032  
  2033  type nopPad struct {
  2034  	p *obj.Prog // Instruction before the pad
  2035  	n int32     // Size of the pad
  2036  }
  2037  
  2038  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2039  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2040  		ctxt.Diag("-spectre=ret not supported on 386")
  2041  		ctxt.Retpoline = false // don't keep printing
  2042  	}
  2043  
  2044  	pjc := makePjcCtx(ctxt)
  2045  
  2046  	if s.P != nil {
  2047  		return
  2048  	}
  2049  
  2050  	if ycover[0] == 0 {
  2051  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2052  	}
  2053  
  2054  	for p := s.Func().Text; p != nil; p = p.Link {
  2055  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2056  			p.To.SetTarget(p)
  2057  		}
  2058  		if p.As == AADJSP {
  2059  			p.To.Type = obj.TYPE_REG
  2060  			p.To.Reg = REG_SP
  2061  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2062  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2063  			// For that case, flip the sign and the op:
  2064  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2065  			switch v := p.From.Offset; {
  2066  			case v == 0:
  2067  				p.As = obj.ANOP
  2068  			case v == 0x80 || (v < 0 && v != -0x80):
  2069  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2070  				p.From.Offset *= -1
  2071  			default:
  2072  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2073  			}
  2074  		}
  2075  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2076  			if p.To.Type != obj.TYPE_REG {
  2077  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2078  				continue
  2079  			}
  2080  			p.To.Type = obj.TYPE_BRANCH
  2081  			p.To.Name = obj.NAME_EXTERN
  2082  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2083  			p.To.Reg = 0
  2084  			p.To.Offset = 0
  2085  		}
  2086  	}
  2087  
  2088  	var count int64 // rough count of number of instructions
  2089  	for p := s.Func().Text; p != nil; p = p.Link {
  2090  		count++
  2091  		p.Back = branchShort // use short branches first time through
  2092  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2093  			p.Back |= branchBackwards
  2094  			q.Back |= branchLoopHead
  2095  		}
  2096  	}
  2097  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2098  
  2099  	var ab AsmBuf
  2100  	var n int
  2101  	var c int32
  2102  	errors := ctxt.Errors
  2103  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2104  	nrelocs0 := len(s.R)
  2105  	for {
  2106  		// This loop continues while there are reasons to re-assemble
  2107  		// whole block, like the presence of long forward jumps.
  2108  		reAssemble := false
  2109  		for i := range s.R[nrelocs0:] {
  2110  			s.R[nrelocs0+i] = obj.Reloc{}
  2111  		}
  2112  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2113  		s.P = s.P[:0]
  2114  		c = 0
  2115  		var pPrev *obj.Prog
  2116  		nops = nops[:0]
  2117  		for p := s.Func().Text; p != nil; p = p.Link {
  2118  			c0 := c
  2119  			c = pjc.padJump(ctxt, s, p, c)
  2120  
  2121  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2122  				// pad with NOPs
  2123  				v := -c & (loopAlign - 1)
  2124  
  2125  				if v <= maxLoopPad {
  2126  					s.Grow(int64(c) + int64(v))
  2127  					fillnop(s.P[c:], int(v))
  2128  					c += v
  2129  				}
  2130  			}
  2131  
  2132  			p.Pc = int64(c)
  2133  
  2134  			// process forward jumps to p
  2135  			for q := p.Rel; q != nil; q = q.Forwd {
  2136  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2137  				if q.Back&branchShort != 0 {
  2138  					if v > 127 {
  2139  						reAssemble = true
  2140  						q.Back ^= branchShort
  2141  					}
  2142  
  2143  					if q.As == AJCXZL || q.As == AXBEGIN {
  2144  						s.P[q.Pc+2] = byte(v)
  2145  					} else {
  2146  						s.P[q.Pc+1] = byte(v)
  2147  					}
  2148  				} else {
  2149  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2150  				}
  2151  			}
  2152  
  2153  			p.Rel = nil
  2154  
  2155  			p.Pc = int64(c)
  2156  			ab.asmins(ctxt, s, p)
  2157  			m := ab.Len()
  2158  			if int(p.Isize) != m {
  2159  				p.Isize = uint8(m)
  2160  				if pjc.reAssemble(p) {
  2161  					// We need to re-assemble here to check for jumps and fused jumps
  2162  					// that span or end on 32 byte boundaries.
  2163  					reAssemble = true
  2164  				}
  2165  			}
  2166  
  2167  			s.Grow(p.Pc + int64(m))
  2168  			copy(s.P[p.Pc:], ab.Bytes())
  2169  			// If there was padding, remember it.
  2170  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2171  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2172  			}
  2173  			c += int32(m)
  2174  			pPrev = p
  2175  		}
  2176  
  2177  		n++
  2178  		if n > 1000 {
  2179  			ctxt.Diag("span must be looping")
  2180  			log.Fatalf("loop")
  2181  		}
  2182  		if !reAssemble {
  2183  			break
  2184  		}
  2185  		if ctxt.Errors > errors {
  2186  			return
  2187  		}
  2188  	}
  2189  	// splice padding nops into Progs
  2190  	for _, n := range nops {
  2191  		pp := n.p
  2192  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2193  		pp.Link = np
  2194  	}
  2195  
  2196  	s.Size = int64(c)
  2197  
  2198  	if false { /* debug['a'] > 1 */
  2199  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2200  		var i int
  2201  		for i = 0; i < len(s.P); i++ {
  2202  			fmt.Printf(" %.2x", s.P[i])
  2203  			if i%16 == 15 {
  2204  				fmt.Printf("\n  %.6x", uint(i+1))
  2205  			}
  2206  		}
  2207  
  2208  		if i%16 != 0 {
  2209  			fmt.Printf("\n")
  2210  		}
  2211  
  2212  		for i := 0; i < len(s.R); i++ {
  2213  			r := &s.R[i]
  2214  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2215  		}
  2216  	}
  2217  
  2218  	// Mark nonpreemptible instruction sequences.
  2219  	// The 2-instruction TLS access sequence
  2220  	//	MOVQ TLS, BX
  2221  	//	MOVQ 0(BX)(TLS*1), BX
  2222  	// is not async preemptible, as if it is preempted and resumed on
  2223  	// a different thread, the TLS address may become invalid.
  2224  	if !CanUse1InsnTLS(ctxt) {
  2225  		useTLS := func(p *obj.Prog) bool {
  2226  			// Only need to mark the second instruction, which has
  2227  			// REG_TLS as Index. (It is okay to interrupt and restart
  2228  			// the first instruction.)
  2229  			return p.From.Index == REG_TLS
  2230  		}
  2231  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2232  	}
  2233  
  2234  	// Now that we know byte offsets, we can generate jump table entries.
  2235  	// TODO: could this live in obj instead of obj/$ARCH?
  2236  	for _, jt := range s.Func().JumpTables {
  2237  		for i, p := range jt.Targets {
  2238  			// The ith jumptable entry points to the p.Pc'th
  2239  			// byte in the function symbol s.
  2240  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2241  		}
  2242  	}
  2243  }
  2244  
  2245  func instinit(ctxt *obj.Link) {
  2246  	if ycover[0] != 0 {
  2247  		// Already initialized; stop now.
  2248  		// This happens in the cmd/asm tests,
  2249  		// each of which re-initializes the arch.
  2250  		return
  2251  	}
  2252  
  2253  	switch ctxt.Headtype {
  2254  	case objabi.Hplan9:
  2255  		plan9privates = ctxt.Lookup("_privates")
  2256  	}
  2257  
  2258  	for i := range avxOptab {
  2259  		c := avxOptab[i].as
  2260  		if opindex[c&obj.AMask] != nil {
  2261  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2262  		}
  2263  		opindex[c&obj.AMask] = &avxOptab[i]
  2264  	}
  2265  	for i := 1; optab[i].as != 0; i++ {
  2266  		c := optab[i].as
  2267  		if opindex[c&obj.AMask] != nil {
  2268  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2269  		}
  2270  		opindex[c&obj.AMask] = &optab[i]
  2271  	}
  2272  
  2273  	for i := 0; i < Ymax; i++ {
  2274  		ycover[i*Ymax+i] = 1
  2275  	}
  2276  
  2277  	ycover[Yi0*Ymax+Yu2] = 1
  2278  	ycover[Yi1*Ymax+Yu2] = 1
  2279  
  2280  	ycover[Yi0*Ymax+Yi8] = 1
  2281  	ycover[Yi1*Ymax+Yi8] = 1
  2282  	ycover[Yu2*Ymax+Yi8] = 1
  2283  	ycover[Yu7*Ymax+Yi8] = 1
  2284  
  2285  	ycover[Yi0*Ymax+Yu7] = 1
  2286  	ycover[Yi1*Ymax+Yu7] = 1
  2287  	ycover[Yu2*Ymax+Yu7] = 1
  2288  
  2289  	ycover[Yi0*Ymax+Yu8] = 1
  2290  	ycover[Yi1*Ymax+Yu8] = 1
  2291  	ycover[Yu2*Ymax+Yu8] = 1
  2292  	ycover[Yu7*Ymax+Yu8] = 1
  2293  
  2294  	ycover[Yi0*Ymax+Ys32] = 1
  2295  	ycover[Yi1*Ymax+Ys32] = 1
  2296  	ycover[Yu2*Ymax+Ys32] = 1
  2297  	ycover[Yu7*Ymax+Ys32] = 1
  2298  	ycover[Yu8*Ymax+Ys32] = 1
  2299  	ycover[Yi8*Ymax+Ys32] = 1
  2300  
  2301  	ycover[Yi0*Ymax+Yi32] = 1
  2302  	ycover[Yi1*Ymax+Yi32] = 1
  2303  	ycover[Yu2*Ymax+Yi32] = 1
  2304  	ycover[Yu7*Ymax+Yi32] = 1
  2305  	ycover[Yu8*Ymax+Yi32] = 1
  2306  	ycover[Yi8*Ymax+Yi32] = 1
  2307  	ycover[Ys32*Ymax+Yi32] = 1
  2308  
  2309  	ycover[Yi0*Ymax+Yi64] = 1
  2310  	ycover[Yi1*Ymax+Yi64] = 1
  2311  	ycover[Yu7*Ymax+Yi64] = 1
  2312  	ycover[Yu2*Ymax+Yi64] = 1
  2313  	ycover[Yu8*Ymax+Yi64] = 1
  2314  	ycover[Yi8*Ymax+Yi64] = 1
  2315  	ycover[Ys32*Ymax+Yi64] = 1
  2316  	ycover[Yi32*Ymax+Yi64] = 1
  2317  
  2318  	ycover[Yal*Ymax+Yrb] = 1
  2319  	ycover[Ycl*Ymax+Yrb] = 1
  2320  	ycover[Yax*Ymax+Yrb] = 1
  2321  	ycover[Ycx*Ymax+Yrb] = 1
  2322  	ycover[Yrx*Ymax+Yrb] = 1
  2323  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2324  
  2325  	ycover[Ycl*Ymax+Ycx] = 1
  2326  
  2327  	ycover[Yax*Ymax+Yrx] = 1
  2328  	ycover[Ycx*Ymax+Yrx] = 1
  2329  
  2330  	ycover[Yax*Ymax+Yrl] = 1
  2331  	ycover[Ycx*Ymax+Yrl] = 1
  2332  	ycover[Yrx*Ymax+Yrl] = 1
  2333  	ycover[Yrl32*Ymax+Yrl] = 1
  2334  
  2335  	ycover[Yf0*Ymax+Yrf] = 1
  2336  
  2337  	ycover[Yal*Ymax+Ymb] = 1
  2338  	ycover[Ycl*Ymax+Ymb] = 1
  2339  	ycover[Yax*Ymax+Ymb] = 1
  2340  	ycover[Ycx*Ymax+Ymb] = 1
  2341  	ycover[Yrx*Ymax+Ymb] = 1
  2342  	ycover[Yrb*Ymax+Ymb] = 1
  2343  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2344  	ycover[Ym*Ymax+Ymb] = 1
  2345  
  2346  	ycover[Yax*Ymax+Yml] = 1
  2347  	ycover[Ycx*Ymax+Yml] = 1
  2348  	ycover[Yrx*Ymax+Yml] = 1
  2349  	ycover[Yrl*Ymax+Yml] = 1
  2350  	ycover[Yrl32*Ymax+Yml] = 1
  2351  	ycover[Ym*Ymax+Yml] = 1
  2352  
  2353  	ycover[Yax*Ymax+Ymm] = 1
  2354  	ycover[Ycx*Ymax+Ymm] = 1
  2355  	ycover[Yrx*Ymax+Ymm] = 1
  2356  	ycover[Yrl*Ymax+Ymm] = 1
  2357  	ycover[Yrl32*Ymax+Ymm] = 1
  2358  	ycover[Ym*Ymax+Ymm] = 1
  2359  	ycover[Ymr*Ymax+Ymm] = 1
  2360  
  2361  	ycover[Yxr0*Ymax+Yxr] = 1
  2362  
  2363  	ycover[Ym*Ymax+Yxm] = 1
  2364  	ycover[Yxr0*Ymax+Yxm] = 1
  2365  	ycover[Yxr*Ymax+Yxm] = 1
  2366  
  2367  	ycover[Ym*Ymax+Yym] = 1
  2368  	ycover[Yyr*Ymax+Yym] = 1
  2369  
  2370  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2371  	ycover[Yxr*Ymax+YxrEvex] = 1
  2372  
  2373  	ycover[Ym*Ymax+YxmEvex] = 1
  2374  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2375  	ycover[Yxr*Ymax+YxmEvex] = 1
  2376  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2377  
  2378  	ycover[Yyr*Ymax+YyrEvex] = 1
  2379  
  2380  	ycover[Ym*Ymax+YymEvex] = 1
  2381  	ycover[Yyr*Ymax+YymEvex] = 1
  2382  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2383  
  2384  	ycover[Ym*Ymax+Yzm] = 1
  2385  	ycover[Yzr*Ymax+Yzm] = 1
  2386  
  2387  	ycover[Yk0*Ymax+Yk] = 1
  2388  	ycover[Yknot0*Ymax+Yk] = 1
  2389  
  2390  	ycover[Yk0*Ymax+Ykm] = 1
  2391  	ycover[Yknot0*Ymax+Ykm] = 1
  2392  	ycover[Yk*Ymax+Ykm] = 1
  2393  	ycover[Ym*Ymax+Ykm] = 1
  2394  
  2395  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2396  
  2397  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2398  
  2399  	for i := 0; i < MAXREG; i++ {
  2400  		reg[i] = -1
  2401  		if i >= REG_AL && i <= REG_R15B {
  2402  			reg[i] = (i - REG_AL) & 7
  2403  			if i >= REG_SPB && i <= REG_DIB {
  2404  				regrex[i] = 0x40
  2405  			}
  2406  			if i >= REG_R8B && i <= REG_R15B {
  2407  				regrex[i] = Rxr | Rxx | Rxb
  2408  			}
  2409  		}
  2410  
  2411  		if i >= REG_AH && i <= REG_BH {
  2412  			reg[i] = 4 + ((i - REG_AH) & 7)
  2413  		}
  2414  		if i >= REG_AX && i <= REG_R15 {
  2415  			reg[i] = (i - REG_AX) & 7
  2416  			if i >= REG_R8 {
  2417  				regrex[i] = Rxr | Rxx | Rxb
  2418  			}
  2419  		}
  2420  
  2421  		if i >= REG_F0 && i <= REG_F0+7 {
  2422  			reg[i] = (i - REG_F0) & 7
  2423  		}
  2424  		if i >= REG_M0 && i <= REG_M0+7 {
  2425  			reg[i] = (i - REG_M0) & 7
  2426  		}
  2427  		if i >= REG_K0 && i <= REG_K0+7 {
  2428  			reg[i] = (i - REG_K0) & 7
  2429  		}
  2430  		if i >= REG_X0 && i <= REG_X0+15 {
  2431  			reg[i] = (i - REG_X0) & 7
  2432  			if i >= REG_X0+8 {
  2433  				regrex[i] = Rxr | Rxx | Rxb
  2434  			}
  2435  		}
  2436  		if i >= REG_X16 && i <= REG_X16+15 {
  2437  			reg[i] = (i - REG_X16) & 7
  2438  			if i >= REG_X16+8 {
  2439  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2440  			} else {
  2441  				regrex[i] = RxrEvex
  2442  			}
  2443  		}
  2444  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2445  			reg[i] = (i - REG_Y0) & 7
  2446  			if i >= REG_Y0+8 {
  2447  				regrex[i] = Rxr | Rxx | Rxb
  2448  			}
  2449  		}
  2450  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2451  			reg[i] = (i - REG_Y16) & 7
  2452  			if i >= REG_Y16+8 {
  2453  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2454  			} else {
  2455  				regrex[i] = RxrEvex
  2456  			}
  2457  		}
  2458  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2459  			reg[i] = (i - REG_Z0) & 7
  2460  			if i > REG_Z0+7 {
  2461  				regrex[i] = Rxr | Rxx | Rxb
  2462  			}
  2463  		}
  2464  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2465  			reg[i] = (i - REG_Z16) & 7
  2466  			if i >= REG_Z16+8 {
  2467  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2468  			} else {
  2469  				regrex[i] = RxrEvex
  2470  			}
  2471  		}
  2472  
  2473  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2474  			regrex[i] = Rxr
  2475  		}
  2476  	}
  2477  }
  2478  
  2479  var isAndroid = buildcfg.GOOS == "android"
  2480  
  2481  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2482  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2483  		return 0
  2484  	}
  2485  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2486  		switch a.Reg {
  2487  		case REG_CS:
  2488  			return 0x2e
  2489  
  2490  		case REG_DS:
  2491  			return 0x3e
  2492  
  2493  		case REG_ES:
  2494  			return 0x26
  2495  
  2496  		case REG_FS:
  2497  			return 0x64
  2498  
  2499  		case REG_GS:
  2500  			return 0x65
  2501  
  2502  		case REG_TLS:
  2503  			// NOTE: Systems listed here should be only systems that
  2504  			// support direct TLS references like 8(TLS) implemented as
  2505  			// direct references from FS or GS. Systems that require
  2506  			// the initial-exec model, where you load the TLS base into
  2507  			// a register and then index from that register, do not reach
  2508  			// this code and should not be listed.
  2509  			if ctxt.Arch.Family == sys.I386 {
  2510  				switch ctxt.Headtype {
  2511  				default:
  2512  					if isAndroid {
  2513  						return 0x65 // GS
  2514  					}
  2515  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2516  
  2517  				case objabi.Hdarwin,
  2518  					objabi.Hdragonfly,
  2519  					objabi.Hfreebsd,
  2520  					objabi.Hnetbsd,
  2521  					objabi.Hopenbsd:
  2522  					return 0x65 // GS
  2523  				}
  2524  			}
  2525  
  2526  			switch ctxt.Headtype {
  2527  			default:
  2528  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2529  
  2530  			case objabi.Hlinux:
  2531  				if isAndroid {
  2532  					return 0x64 // FS
  2533  				}
  2534  
  2535  				if ctxt.Flag_shared {
  2536  					log.Fatalf("unknown TLS base register for linux with -shared")
  2537  				} else {
  2538  					return 0x64 // FS
  2539  				}
  2540  
  2541  			case objabi.Hdragonfly,
  2542  				objabi.Hfreebsd,
  2543  				objabi.Hnetbsd,
  2544  				objabi.Hopenbsd,
  2545  				objabi.Hsolaris:
  2546  				return 0x64 // FS
  2547  
  2548  			case objabi.Hdarwin:
  2549  				return 0x65 // GS
  2550  			}
  2551  		}
  2552  	}
  2553  
  2554  	if ctxt.Arch.Family == sys.I386 {
  2555  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2556  			// When building for inclusion into a shared library, an instruction of the form
  2557  			//     MOVL off(CX)(TLS*1), AX
  2558  			// becomes
  2559  			//     mov %gs:off(%ecx), %eax
  2560  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2561  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2562  			// a shared library the instruction it becomes
  2563  			//     mov 0x0(%ecx), %eax
  2564  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2565  			return 0x65 // GS
  2566  		}
  2567  		return 0
  2568  	}
  2569  
  2570  	switch a.Index {
  2571  	case REG_CS:
  2572  		return 0x2e
  2573  
  2574  	case REG_DS:
  2575  		return 0x3e
  2576  
  2577  	case REG_ES:
  2578  		return 0x26
  2579  
  2580  	case REG_TLS:
  2581  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2582  			// When building for inclusion into a shared library, an instruction of the form
  2583  			//     MOV off(CX)(TLS*1), AX
  2584  			// becomes
  2585  			//     mov %fs:off(%rcx), %rax
  2586  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2587  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2588  			// a shared library the instruction does not require a prefix.
  2589  			return 0x64
  2590  		}
  2591  
  2592  	case REG_FS:
  2593  		return 0x64
  2594  
  2595  	case REG_GS:
  2596  		return 0x65
  2597  	}
  2598  
  2599  	return 0
  2600  }
  2601  
  2602  // oclassRegList returns multisource operand class for addr.
  2603  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2604  	// TODO(quasilyte): when oclass register case is refactored into
  2605  	// lookup table, use it here to get register kind more easily.
  2606  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2607  
  2608  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2609  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2610  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2611  
  2612  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2613  	low := regIndex(int16(reg0))
  2614  	high := regIndex(int16(reg1))
  2615  
  2616  	if ctxt.Arch.Family == sys.I386 {
  2617  		if low >= 8 || high >= 8 {
  2618  			return Yxxx
  2619  		}
  2620  	}
  2621  
  2622  	switch high - low {
  2623  	case 3:
  2624  		switch {
  2625  		case regIsXmm(reg0) && regIsXmm(reg1):
  2626  			return YxrEvexMulti4
  2627  		case regIsYmm(reg0) && regIsYmm(reg1):
  2628  			return YyrEvexMulti4
  2629  		case regIsZmm(reg0) && regIsZmm(reg1):
  2630  			return YzrMulti4
  2631  		default:
  2632  			return Yxxx
  2633  		}
  2634  	default:
  2635  		return Yxxx
  2636  	}
  2637  }
  2638  
  2639  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2640  // For addr that is not V-mem returns (Yxxx, false).
  2641  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2642  	switch addr.Index {
  2643  	case REG_X0 + 0,
  2644  		REG_X0 + 1,
  2645  		REG_X0 + 2,
  2646  		REG_X0 + 3,
  2647  		REG_X0 + 4,
  2648  		REG_X0 + 5,
  2649  		REG_X0 + 6,
  2650  		REG_X0 + 7:
  2651  		return Yxvm, true
  2652  	case REG_X8 + 0,
  2653  		REG_X8 + 1,
  2654  		REG_X8 + 2,
  2655  		REG_X8 + 3,
  2656  		REG_X8 + 4,
  2657  		REG_X8 + 5,
  2658  		REG_X8 + 6,
  2659  		REG_X8 + 7:
  2660  		if ctxt.Arch.Family == sys.I386 {
  2661  			return Yxxx, true
  2662  		}
  2663  		return Yxvm, true
  2664  	case REG_X16 + 0,
  2665  		REG_X16 + 1,
  2666  		REG_X16 + 2,
  2667  		REG_X16 + 3,
  2668  		REG_X16 + 4,
  2669  		REG_X16 + 5,
  2670  		REG_X16 + 6,
  2671  		REG_X16 + 7,
  2672  		REG_X16 + 8,
  2673  		REG_X16 + 9,
  2674  		REG_X16 + 10,
  2675  		REG_X16 + 11,
  2676  		REG_X16 + 12,
  2677  		REG_X16 + 13,
  2678  		REG_X16 + 14,
  2679  		REG_X16 + 15:
  2680  		if ctxt.Arch.Family == sys.I386 {
  2681  			return Yxxx, true
  2682  		}
  2683  		return YxvmEvex, true
  2684  
  2685  	case REG_Y0 + 0,
  2686  		REG_Y0 + 1,
  2687  		REG_Y0 + 2,
  2688  		REG_Y0 + 3,
  2689  		REG_Y0 + 4,
  2690  		REG_Y0 + 5,
  2691  		REG_Y0 + 6,
  2692  		REG_Y0 + 7:
  2693  		return Yyvm, true
  2694  	case REG_Y8 + 0,
  2695  		REG_Y8 + 1,
  2696  		REG_Y8 + 2,
  2697  		REG_Y8 + 3,
  2698  		REG_Y8 + 4,
  2699  		REG_Y8 + 5,
  2700  		REG_Y8 + 6,
  2701  		REG_Y8 + 7:
  2702  		if ctxt.Arch.Family == sys.I386 {
  2703  			return Yxxx, true
  2704  		}
  2705  		return Yyvm, true
  2706  	case REG_Y16 + 0,
  2707  		REG_Y16 + 1,
  2708  		REG_Y16 + 2,
  2709  		REG_Y16 + 3,
  2710  		REG_Y16 + 4,
  2711  		REG_Y16 + 5,
  2712  		REG_Y16 + 6,
  2713  		REG_Y16 + 7,
  2714  		REG_Y16 + 8,
  2715  		REG_Y16 + 9,
  2716  		REG_Y16 + 10,
  2717  		REG_Y16 + 11,
  2718  		REG_Y16 + 12,
  2719  		REG_Y16 + 13,
  2720  		REG_Y16 + 14,
  2721  		REG_Y16 + 15:
  2722  		if ctxt.Arch.Family == sys.I386 {
  2723  			return Yxxx, true
  2724  		}
  2725  		return YyvmEvex, true
  2726  
  2727  	case REG_Z0 + 0,
  2728  		REG_Z0 + 1,
  2729  		REG_Z0 + 2,
  2730  		REG_Z0 + 3,
  2731  		REG_Z0 + 4,
  2732  		REG_Z0 + 5,
  2733  		REG_Z0 + 6,
  2734  		REG_Z0 + 7:
  2735  		return Yzvm, true
  2736  	case REG_Z8 + 0,
  2737  		REG_Z8 + 1,
  2738  		REG_Z8 + 2,
  2739  		REG_Z8 + 3,
  2740  		REG_Z8 + 4,
  2741  		REG_Z8 + 5,
  2742  		REG_Z8 + 6,
  2743  		REG_Z8 + 7,
  2744  		REG_Z8 + 8,
  2745  		REG_Z8 + 9,
  2746  		REG_Z8 + 10,
  2747  		REG_Z8 + 11,
  2748  		REG_Z8 + 12,
  2749  		REG_Z8 + 13,
  2750  		REG_Z8 + 14,
  2751  		REG_Z8 + 15,
  2752  		REG_Z8 + 16,
  2753  		REG_Z8 + 17,
  2754  		REG_Z8 + 18,
  2755  		REG_Z8 + 19,
  2756  		REG_Z8 + 20,
  2757  		REG_Z8 + 21,
  2758  		REG_Z8 + 22,
  2759  		REG_Z8 + 23:
  2760  		if ctxt.Arch.Family == sys.I386 {
  2761  			return Yxxx, true
  2762  		}
  2763  		return Yzvm, true
  2764  	}
  2765  
  2766  	return Yxxx, false
  2767  }
  2768  
  2769  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2770  	switch a.Type {
  2771  	case obj.TYPE_REGLIST:
  2772  		return oclassRegList(ctxt, a)
  2773  
  2774  	case obj.TYPE_NONE:
  2775  		return Ynone
  2776  
  2777  	case obj.TYPE_BRANCH:
  2778  		return Ybr
  2779  
  2780  	case obj.TYPE_INDIR:
  2781  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2782  			return Yindir
  2783  		}
  2784  		return Yxxx
  2785  
  2786  	case obj.TYPE_MEM:
  2787  		// Pseudo registers have negative index, but SP is
  2788  		// not pseudo on x86, hence REG_SP check is not redundant.
  2789  		if a.Index == REG_SP || a.Index < 0 {
  2790  			// Can't use FP/SB/PC/SP as the index register.
  2791  			return Yxxx
  2792  		}
  2793  
  2794  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2795  			return vmem
  2796  		}
  2797  
  2798  		if ctxt.Arch.Family == sys.AMD64 {
  2799  			switch a.Name {
  2800  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2801  				// Global variables can't use index registers and their
  2802  				// base register is %rip (%rip is encoded as REG_NONE).
  2803  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2804  					return Yxxx
  2805  				}
  2806  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2807  				// These names must have a base of SP.  The old compiler
  2808  				// uses 0 for the base register. SSA uses REG_SP.
  2809  				if a.Reg != REG_SP && a.Reg != 0 {
  2810  					return Yxxx
  2811  				}
  2812  			case obj.NAME_NONE:
  2813  				// everything is ok
  2814  			default:
  2815  				// unknown name
  2816  				return Yxxx
  2817  			}
  2818  		}
  2819  		return Ym
  2820  
  2821  	case obj.TYPE_ADDR:
  2822  		switch a.Name {
  2823  		case obj.NAME_GOTREF:
  2824  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2825  			return Yxxx
  2826  
  2827  		case obj.NAME_EXTERN,
  2828  			obj.NAME_STATIC:
  2829  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2830  				return Yi32
  2831  			}
  2832  			return Yiauto // use pc-relative addressing
  2833  
  2834  		case obj.NAME_AUTO,
  2835  			obj.NAME_PARAM:
  2836  			return Yiauto
  2837  		}
  2838  
  2839  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2840  		// and got Yi32 in an earlier version of this code.
  2841  		// Keep doing that until we fix yduff etc.
  2842  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2843  			return Yi32
  2844  		}
  2845  
  2846  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2847  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2848  		}
  2849  		fallthrough
  2850  
  2851  	case obj.TYPE_CONST:
  2852  		if a.Sym != nil {
  2853  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2854  		}
  2855  
  2856  		v := a.Offset
  2857  		if ctxt.Arch.Family == sys.I386 {
  2858  			v = int64(int32(v))
  2859  		}
  2860  		switch {
  2861  		case v == 0:
  2862  			return Yi0
  2863  		case v == 1:
  2864  			return Yi1
  2865  		case v >= 0 && v <= 3:
  2866  			return Yu2
  2867  		case v >= 0 && v <= 127:
  2868  			return Yu7
  2869  		case v >= 0 && v <= 255:
  2870  			return Yu8
  2871  		case v >= -128 && v <= 127:
  2872  			return Yi8
  2873  		}
  2874  		if ctxt.Arch.Family == sys.I386 {
  2875  			return Yi32
  2876  		}
  2877  		l := int32(v)
  2878  		if int64(l) == v {
  2879  			return Ys32 // can sign extend
  2880  		}
  2881  		if v>>32 == 0 {
  2882  			return Yi32 // unsigned
  2883  		}
  2884  		return Yi64
  2885  
  2886  	case obj.TYPE_TEXTSIZE:
  2887  		return Ytextsize
  2888  	}
  2889  
  2890  	if a.Type != obj.TYPE_REG {
  2891  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2892  		return Yxxx
  2893  	}
  2894  
  2895  	switch a.Reg {
  2896  	case REG_AL:
  2897  		return Yal
  2898  
  2899  	case REG_AX:
  2900  		return Yax
  2901  
  2902  		/*
  2903  			case REG_SPB:
  2904  		*/
  2905  	case REG_BPB,
  2906  		REG_SIB,
  2907  		REG_DIB,
  2908  		REG_R8B,
  2909  		REG_R9B,
  2910  		REG_R10B,
  2911  		REG_R11B,
  2912  		REG_R12B,
  2913  		REG_R13B,
  2914  		REG_R14B,
  2915  		REG_R15B:
  2916  		if ctxt.Arch.Family == sys.I386 {
  2917  			return Yxxx
  2918  		}
  2919  		fallthrough
  2920  
  2921  	case REG_DL,
  2922  		REG_BL,
  2923  		REG_AH,
  2924  		REG_CH,
  2925  		REG_DH,
  2926  		REG_BH:
  2927  		return Yrb
  2928  
  2929  	case REG_CL:
  2930  		return Ycl
  2931  
  2932  	case REG_CX:
  2933  		return Ycx
  2934  
  2935  	case REG_DX, REG_BX:
  2936  		return Yrx
  2937  
  2938  	case REG_R8, // not really Yrl
  2939  		REG_R9,
  2940  		REG_R10,
  2941  		REG_R11,
  2942  		REG_R12,
  2943  		REG_R13,
  2944  		REG_R14,
  2945  		REG_R15:
  2946  		if ctxt.Arch.Family == sys.I386 {
  2947  			return Yxxx
  2948  		}
  2949  		fallthrough
  2950  
  2951  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2952  		if ctxt.Arch.Family == sys.I386 {
  2953  			return Yrl32
  2954  		}
  2955  		return Yrl
  2956  
  2957  	case REG_F0 + 0:
  2958  		return Yf0
  2959  
  2960  	case REG_F0 + 1,
  2961  		REG_F0 + 2,
  2962  		REG_F0 + 3,
  2963  		REG_F0 + 4,
  2964  		REG_F0 + 5,
  2965  		REG_F0 + 6,
  2966  		REG_F0 + 7:
  2967  		return Yrf
  2968  
  2969  	case REG_M0 + 0,
  2970  		REG_M0 + 1,
  2971  		REG_M0 + 2,
  2972  		REG_M0 + 3,
  2973  		REG_M0 + 4,
  2974  		REG_M0 + 5,
  2975  		REG_M0 + 6,
  2976  		REG_M0 + 7:
  2977  		return Ymr
  2978  
  2979  	case REG_X0:
  2980  		return Yxr0
  2981  
  2982  	case REG_X0 + 1,
  2983  		REG_X0 + 2,
  2984  		REG_X0 + 3,
  2985  		REG_X0 + 4,
  2986  		REG_X0 + 5,
  2987  		REG_X0 + 6,
  2988  		REG_X0 + 7,
  2989  		REG_X0 + 8,
  2990  		REG_X0 + 9,
  2991  		REG_X0 + 10,
  2992  		REG_X0 + 11,
  2993  		REG_X0 + 12,
  2994  		REG_X0 + 13,
  2995  		REG_X0 + 14,
  2996  		REG_X0 + 15:
  2997  		return Yxr
  2998  
  2999  	case REG_X0 + 16,
  3000  		REG_X0 + 17,
  3001  		REG_X0 + 18,
  3002  		REG_X0 + 19,
  3003  		REG_X0 + 20,
  3004  		REG_X0 + 21,
  3005  		REG_X0 + 22,
  3006  		REG_X0 + 23,
  3007  		REG_X0 + 24,
  3008  		REG_X0 + 25,
  3009  		REG_X0 + 26,
  3010  		REG_X0 + 27,
  3011  		REG_X0 + 28,
  3012  		REG_X0 + 29,
  3013  		REG_X0 + 30,
  3014  		REG_X0 + 31:
  3015  		return YxrEvex
  3016  
  3017  	case REG_Y0 + 0,
  3018  		REG_Y0 + 1,
  3019  		REG_Y0 + 2,
  3020  		REG_Y0 + 3,
  3021  		REG_Y0 + 4,
  3022  		REG_Y0 + 5,
  3023  		REG_Y0 + 6,
  3024  		REG_Y0 + 7,
  3025  		REG_Y0 + 8,
  3026  		REG_Y0 + 9,
  3027  		REG_Y0 + 10,
  3028  		REG_Y0 + 11,
  3029  		REG_Y0 + 12,
  3030  		REG_Y0 + 13,
  3031  		REG_Y0 + 14,
  3032  		REG_Y0 + 15:
  3033  		return Yyr
  3034  
  3035  	case REG_Y0 + 16,
  3036  		REG_Y0 + 17,
  3037  		REG_Y0 + 18,
  3038  		REG_Y0 + 19,
  3039  		REG_Y0 + 20,
  3040  		REG_Y0 + 21,
  3041  		REG_Y0 + 22,
  3042  		REG_Y0 + 23,
  3043  		REG_Y0 + 24,
  3044  		REG_Y0 + 25,
  3045  		REG_Y0 + 26,
  3046  		REG_Y0 + 27,
  3047  		REG_Y0 + 28,
  3048  		REG_Y0 + 29,
  3049  		REG_Y0 + 30,
  3050  		REG_Y0 + 31:
  3051  		return YyrEvex
  3052  
  3053  	case REG_Z0 + 0,
  3054  		REG_Z0 + 1,
  3055  		REG_Z0 + 2,
  3056  		REG_Z0 + 3,
  3057  		REG_Z0 + 4,
  3058  		REG_Z0 + 5,
  3059  		REG_Z0 + 6,
  3060  		REG_Z0 + 7:
  3061  		return Yzr
  3062  
  3063  	case REG_Z0 + 8,
  3064  		REG_Z0 + 9,
  3065  		REG_Z0 + 10,
  3066  		REG_Z0 + 11,
  3067  		REG_Z0 + 12,
  3068  		REG_Z0 + 13,
  3069  		REG_Z0 + 14,
  3070  		REG_Z0 + 15,
  3071  		REG_Z0 + 16,
  3072  		REG_Z0 + 17,
  3073  		REG_Z0 + 18,
  3074  		REG_Z0 + 19,
  3075  		REG_Z0 + 20,
  3076  		REG_Z0 + 21,
  3077  		REG_Z0 + 22,
  3078  		REG_Z0 + 23,
  3079  		REG_Z0 + 24,
  3080  		REG_Z0 + 25,
  3081  		REG_Z0 + 26,
  3082  		REG_Z0 + 27,
  3083  		REG_Z0 + 28,
  3084  		REG_Z0 + 29,
  3085  		REG_Z0 + 30,
  3086  		REG_Z0 + 31:
  3087  		if ctxt.Arch.Family == sys.I386 {
  3088  			return Yxxx
  3089  		}
  3090  		return Yzr
  3091  
  3092  	case REG_K0:
  3093  		return Yk0
  3094  
  3095  	case REG_K0 + 1,
  3096  		REG_K0 + 2,
  3097  		REG_K0 + 3,
  3098  		REG_K0 + 4,
  3099  		REG_K0 + 5,
  3100  		REG_K0 + 6,
  3101  		REG_K0 + 7:
  3102  		return Yknot0
  3103  
  3104  	case REG_CS:
  3105  		return Ycs
  3106  	case REG_SS:
  3107  		return Yss
  3108  	case REG_DS:
  3109  		return Yds
  3110  	case REG_ES:
  3111  		return Yes
  3112  	case REG_FS:
  3113  		return Yfs
  3114  	case REG_GS:
  3115  		return Ygs
  3116  	case REG_TLS:
  3117  		return Ytls
  3118  
  3119  	case REG_GDTR:
  3120  		return Ygdtr
  3121  	case REG_IDTR:
  3122  		return Yidtr
  3123  	case REG_LDTR:
  3124  		return Yldtr
  3125  	case REG_MSW:
  3126  		return Ymsw
  3127  	case REG_TASK:
  3128  		return Ytask
  3129  
  3130  	case REG_CR + 0:
  3131  		return Ycr0
  3132  	case REG_CR + 1:
  3133  		return Ycr1
  3134  	case REG_CR + 2:
  3135  		return Ycr2
  3136  	case REG_CR + 3:
  3137  		return Ycr3
  3138  	case REG_CR + 4:
  3139  		return Ycr4
  3140  	case REG_CR + 5:
  3141  		return Ycr5
  3142  	case REG_CR + 6:
  3143  		return Ycr6
  3144  	case REG_CR + 7:
  3145  		return Ycr7
  3146  	case REG_CR + 8:
  3147  		return Ycr8
  3148  
  3149  	case REG_DR + 0:
  3150  		return Ydr0
  3151  	case REG_DR + 1:
  3152  		return Ydr1
  3153  	case REG_DR + 2:
  3154  		return Ydr2
  3155  	case REG_DR + 3:
  3156  		return Ydr3
  3157  	case REG_DR + 4:
  3158  		return Ydr4
  3159  	case REG_DR + 5:
  3160  		return Ydr5
  3161  	case REG_DR + 6:
  3162  		return Ydr6
  3163  	case REG_DR + 7:
  3164  		return Ydr7
  3165  
  3166  	case REG_TR + 0:
  3167  		return Ytr0
  3168  	case REG_TR + 1:
  3169  		return Ytr1
  3170  	case REG_TR + 2:
  3171  		return Ytr2
  3172  	case REG_TR + 3:
  3173  		return Ytr3
  3174  	case REG_TR + 4:
  3175  		return Ytr4
  3176  	case REG_TR + 5:
  3177  		return Ytr5
  3178  	case REG_TR + 6:
  3179  		return Ytr6
  3180  	case REG_TR + 7:
  3181  		return Ytr7
  3182  	}
  3183  
  3184  	return Yxxx
  3185  }
  3186  
  3187  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3188  // and hold assembly state.
  3189  type AsmBuf struct {
  3190  	buf      [100]byte
  3191  	off      int
  3192  	rexflag  int
  3193  	vexflag  bool // Per inst: true for VEX-encoded
  3194  	evexflag bool // Per inst: true for EVEX-encoded
  3195  	rep      bool
  3196  	repn     bool
  3197  	lock     bool
  3198  
  3199  	evex evexBits // Initialized when evexflag is true
  3200  }
  3201  
  3202  // Put1 appends one byte to the end of the buffer.
  3203  func (ab *AsmBuf) Put1(x byte) {
  3204  	ab.buf[ab.off] = x
  3205  	ab.off++
  3206  }
  3207  
  3208  // Put2 appends two bytes to the end of the buffer.
  3209  func (ab *AsmBuf) Put2(x, y byte) {
  3210  	ab.buf[ab.off+0] = x
  3211  	ab.buf[ab.off+1] = y
  3212  	ab.off += 2
  3213  }
  3214  
  3215  // Put3 appends three bytes to the end of the buffer.
  3216  func (ab *AsmBuf) Put3(x, y, z byte) {
  3217  	ab.buf[ab.off+0] = x
  3218  	ab.buf[ab.off+1] = y
  3219  	ab.buf[ab.off+2] = z
  3220  	ab.off += 3
  3221  }
  3222  
  3223  // Put4 appends four bytes to the end of the buffer.
  3224  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3225  	ab.buf[ab.off+0] = x
  3226  	ab.buf[ab.off+1] = y
  3227  	ab.buf[ab.off+2] = z
  3228  	ab.buf[ab.off+3] = w
  3229  	ab.off += 4
  3230  }
  3231  
  3232  // PutInt16 writes v into the buffer using little-endian encoding.
  3233  func (ab *AsmBuf) PutInt16(v int16) {
  3234  	ab.buf[ab.off+0] = byte(v)
  3235  	ab.buf[ab.off+1] = byte(v >> 8)
  3236  	ab.off += 2
  3237  }
  3238  
  3239  // PutInt32 writes v into the buffer using little-endian encoding.
  3240  func (ab *AsmBuf) PutInt32(v int32) {
  3241  	ab.buf[ab.off+0] = byte(v)
  3242  	ab.buf[ab.off+1] = byte(v >> 8)
  3243  	ab.buf[ab.off+2] = byte(v >> 16)
  3244  	ab.buf[ab.off+3] = byte(v >> 24)
  3245  	ab.off += 4
  3246  }
  3247  
  3248  // PutInt64 writes v into the buffer using little-endian encoding.
  3249  func (ab *AsmBuf) PutInt64(v int64) {
  3250  	ab.buf[ab.off+0] = byte(v)
  3251  	ab.buf[ab.off+1] = byte(v >> 8)
  3252  	ab.buf[ab.off+2] = byte(v >> 16)
  3253  	ab.buf[ab.off+3] = byte(v >> 24)
  3254  	ab.buf[ab.off+4] = byte(v >> 32)
  3255  	ab.buf[ab.off+5] = byte(v >> 40)
  3256  	ab.buf[ab.off+6] = byte(v >> 48)
  3257  	ab.buf[ab.off+7] = byte(v >> 56)
  3258  	ab.off += 8
  3259  }
  3260  
  3261  // Put copies b into the buffer.
  3262  func (ab *AsmBuf) Put(b []byte) {
  3263  	copy(ab.buf[ab.off:], b)
  3264  	ab.off += len(b)
  3265  }
  3266  
  3267  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3268  // starting at specified offset (e.g. z counter value).
  3269  // Trailing 0 is not written.
  3270  //
  3271  // Intended to be used for literal Z cases.
  3272  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3273  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3274  	for int(op[offset]) != 0 {
  3275  		ab.Put1(byte(op[offset]))
  3276  		offset++
  3277  	}
  3278  }
  3279  
  3280  // Insert inserts b at offset i.
  3281  func (ab *AsmBuf) Insert(i int, b byte) {
  3282  	ab.off++
  3283  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3284  	ab.buf[i] = b
  3285  }
  3286  
  3287  // Last returns the byte at the end of the buffer.
  3288  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3289  
  3290  // Len returns the length of the buffer.
  3291  func (ab *AsmBuf) Len() int { return ab.off }
  3292  
  3293  // Bytes returns the contents of the buffer.
  3294  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3295  
  3296  // Reset empties the buffer.
  3297  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3298  
  3299  // At returns the byte at offset i.
  3300  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3301  
  3302  // asmidx emits SIB byte.
  3303  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3304  	var i int
  3305  
  3306  	// X/Y index register is used in VSIB.
  3307  	switch index {
  3308  	default:
  3309  		goto bad
  3310  
  3311  	case REG_NONE:
  3312  		i = 4 << 3
  3313  		goto bas
  3314  
  3315  	case REG_R8,
  3316  		REG_R9,
  3317  		REG_R10,
  3318  		REG_R11,
  3319  		REG_R12,
  3320  		REG_R13,
  3321  		REG_R14,
  3322  		REG_R15,
  3323  		REG_X8,
  3324  		REG_X9,
  3325  		REG_X10,
  3326  		REG_X11,
  3327  		REG_X12,
  3328  		REG_X13,
  3329  		REG_X14,
  3330  		REG_X15,
  3331  		REG_X16,
  3332  		REG_X17,
  3333  		REG_X18,
  3334  		REG_X19,
  3335  		REG_X20,
  3336  		REG_X21,
  3337  		REG_X22,
  3338  		REG_X23,
  3339  		REG_X24,
  3340  		REG_X25,
  3341  		REG_X26,
  3342  		REG_X27,
  3343  		REG_X28,
  3344  		REG_X29,
  3345  		REG_X30,
  3346  		REG_X31,
  3347  		REG_Y8,
  3348  		REG_Y9,
  3349  		REG_Y10,
  3350  		REG_Y11,
  3351  		REG_Y12,
  3352  		REG_Y13,
  3353  		REG_Y14,
  3354  		REG_Y15,
  3355  		REG_Y16,
  3356  		REG_Y17,
  3357  		REG_Y18,
  3358  		REG_Y19,
  3359  		REG_Y20,
  3360  		REG_Y21,
  3361  		REG_Y22,
  3362  		REG_Y23,
  3363  		REG_Y24,
  3364  		REG_Y25,
  3365  		REG_Y26,
  3366  		REG_Y27,
  3367  		REG_Y28,
  3368  		REG_Y29,
  3369  		REG_Y30,
  3370  		REG_Y31,
  3371  		REG_Z8,
  3372  		REG_Z9,
  3373  		REG_Z10,
  3374  		REG_Z11,
  3375  		REG_Z12,
  3376  		REG_Z13,
  3377  		REG_Z14,
  3378  		REG_Z15,
  3379  		REG_Z16,
  3380  		REG_Z17,
  3381  		REG_Z18,
  3382  		REG_Z19,
  3383  		REG_Z20,
  3384  		REG_Z21,
  3385  		REG_Z22,
  3386  		REG_Z23,
  3387  		REG_Z24,
  3388  		REG_Z25,
  3389  		REG_Z26,
  3390  		REG_Z27,
  3391  		REG_Z28,
  3392  		REG_Z29,
  3393  		REG_Z30,
  3394  		REG_Z31:
  3395  		if ctxt.Arch.Family == sys.I386 {
  3396  			goto bad
  3397  		}
  3398  		fallthrough
  3399  
  3400  	case REG_AX,
  3401  		REG_CX,
  3402  		REG_DX,
  3403  		REG_BX,
  3404  		REG_BP,
  3405  		REG_SI,
  3406  		REG_DI,
  3407  		REG_X0,
  3408  		REG_X1,
  3409  		REG_X2,
  3410  		REG_X3,
  3411  		REG_X4,
  3412  		REG_X5,
  3413  		REG_X6,
  3414  		REG_X7,
  3415  		REG_Y0,
  3416  		REG_Y1,
  3417  		REG_Y2,
  3418  		REG_Y3,
  3419  		REG_Y4,
  3420  		REG_Y5,
  3421  		REG_Y6,
  3422  		REG_Y7,
  3423  		REG_Z0,
  3424  		REG_Z1,
  3425  		REG_Z2,
  3426  		REG_Z3,
  3427  		REG_Z4,
  3428  		REG_Z5,
  3429  		REG_Z6,
  3430  		REG_Z7:
  3431  		i = reg[index] << 3
  3432  	}
  3433  
  3434  	switch scale {
  3435  	default:
  3436  		goto bad
  3437  
  3438  	case 1:
  3439  		break
  3440  
  3441  	case 2:
  3442  		i |= 1 << 6
  3443  
  3444  	case 4:
  3445  		i |= 2 << 6
  3446  
  3447  	case 8:
  3448  		i |= 3 << 6
  3449  	}
  3450  
  3451  bas:
  3452  	switch base {
  3453  	default:
  3454  		goto bad
  3455  
  3456  	case REG_NONE: // must be mod=00
  3457  		i |= 5
  3458  
  3459  	case REG_R8,
  3460  		REG_R9,
  3461  		REG_R10,
  3462  		REG_R11,
  3463  		REG_R12,
  3464  		REG_R13,
  3465  		REG_R14,
  3466  		REG_R15:
  3467  		if ctxt.Arch.Family == sys.I386 {
  3468  			goto bad
  3469  		}
  3470  		fallthrough
  3471  
  3472  	case REG_AX,
  3473  		REG_CX,
  3474  		REG_DX,
  3475  		REG_BX,
  3476  		REG_SP,
  3477  		REG_BP,
  3478  		REG_SI,
  3479  		REG_DI:
  3480  		i |= reg[base]
  3481  	}
  3482  
  3483  	ab.Put1(byte(i))
  3484  	return
  3485  
  3486  bad:
  3487  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3488  	ab.Put1(0)
  3489  }
  3490  
  3491  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3492  	var rel obj.Reloc
  3493  
  3494  	v := vaddr(ctxt, p, a, &rel)
  3495  	if rel.Siz != 0 {
  3496  		if rel.Siz != 4 {
  3497  			ctxt.Diag("bad reloc")
  3498  		}
  3499  		r := obj.Addrel(cursym)
  3500  		*r = rel
  3501  		r.Off = int32(p.Pc + int64(ab.Len()))
  3502  	}
  3503  
  3504  	ab.PutInt32(int32(v))
  3505  }
  3506  
  3507  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3508  	if r != nil {
  3509  		*r = obj.Reloc{}
  3510  	}
  3511  
  3512  	switch a.Name {
  3513  	case obj.NAME_STATIC,
  3514  		obj.NAME_GOTREF,
  3515  		obj.NAME_EXTERN:
  3516  		s := a.Sym
  3517  		if r == nil {
  3518  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3519  			log.Fatalf("reloc")
  3520  		}
  3521  
  3522  		if a.Name == obj.NAME_GOTREF {
  3523  			r.Siz = 4
  3524  			r.Type = objabi.R_GOTPCREL
  3525  		} else if useAbs(ctxt, s) {
  3526  			r.Siz = 4
  3527  			r.Type = objabi.R_ADDR
  3528  		} else {
  3529  			r.Siz = 4
  3530  			r.Type = objabi.R_PCREL
  3531  		}
  3532  
  3533  		r.Off = -1 // caller must fill in
  3534  		r.Sym = s
  3535  		r.Add = a.Offset
  3536  
  3537  		return 0
  3538  	}
  3539  
  3540  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3541  		if r == nil {
  3542  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3543  			log.Fatalf("reloc")
  3544  		}
  3545  
  3546  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3547  			r.Type = objabi.R_TLS_LE
  3548  			r.Siz = 4
  3549  			r.Off = -1 // caller must fill in
  3550  			r.Add = a.Offset
  3551  		}
  3552  		return 0
  3553  	}
  3554  
  3555  	return a.Offset
  3556  }
  3557  
  3558  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3559  	var base int
  3560  	var rel obj.Reloc
  3561  
  3562  	rex &= 0x40 | Rxr
  3563  	if a.Offset != int64(int32(a.Offset)) {
  3564  		// The rules are slightly different for 386 and AMD64,
  3565  		// mostly for historical reasons. We may unify them later,
  3566  		// but it must be discussed beforehand.
  3567  		//
  3568  		// For 64bit mode only LEAL is allowed to overflow.
  3569  		// It's how https://golang.org/cl/59630 made it.
  3570  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3571  		//
  3572  		// For 32bit mode rules are more permissive.
  3573  		// If offset fits uint32, it's permitted.
  3574  		// This is allowed for assembly that wants to use 32-bit hex
  3575  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3576  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3577  			(ctxt.Arch.Family != sys.AMD64 &&
  3578  				int64(uint32(a.Offset)) == a.Offset &&
  3579  				ab.rexflag&Rxw == 0)
  3580  		if !overflowOK {
  3581  			ctxt.Diag("offset too large in %s", p)
  3582  		}
  3583  	}
  3584  	v := int32(a.Offset)
  3585  	rel.Siz = 0
  3586  
  3587  	switch a.Type {
  3588  	case obj.TYPE_ADDR:
  3589  		if a.Name == obj.NAME_NONE {
  3590  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3591  		}
  3592  		if a.Index == REG_TLS {
  3593  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3594  		}
  3595  		goto bad
  3596  
  3597  	case obj.TYPE_REG:
  3598  		const regFirst = REG_AL
  3599  		const regLast = REG_Z31
  3600  		if a.Reg < regFirst || regLast < a.Reg {
  3601  			goto bad
  3602  		}
  3603  		if v != 0 {
  3604  			goto bad
  3605  		}
  3606  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3607  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3608  		return
  3609  	}
  3610  
  3611  	if a.Type != obj.TYPE_MEM {
  3612  		goto bad
  3613  	}
  3614  
  3615  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3616  		base := int(a.Reg)
  3617  		switch a.Name {
  3618  		case obj.NAME_EXTERN,
  3619  			obj.NAME_GOTREF,
  3620  			obj.NAME_STATIC:
  3621  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3622  				goto bad
  3623  			}
  3624  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3625  				// The base register has already been set. It holds the PC
  3626  				// of this instruction returned by a PC-reading thunk.
  3627  				// See obj6.go:rewriteToPcrel.
  3628  			} else {
  3629  				base = REG_NONE
  3630  			}
  3631  			v = int32(vaddr(ctxt, p, a, &rel))
  3632  
  3633  		case obj.NAME_AUTO,
  3634  			obj.NAME_PARAM:
  3635  			base = REG_SP
  3636  		}
  3637  
  3638  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3639  		if base == REG_NONE {
  3640  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3641  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3642  			goto putrelv
  3643  		}
  3644  
  3645  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3646  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3647  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3648  			return
  3649  		}
  3650  
  3651  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3652  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3653  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3654  			ab.Put1(disp8)
  3655  			return
  3656  		}
  3657  
  3658  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3659  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3660  		goto putrelv
  3661  	}
  3662  
  3663  	base = int(a.Reg)
  3664  	switch a.Name {
  3665  	case obj.NAME_STATIC,
  3666  		obj.NAME_GOTREF,
  3667  		obj.NAME_EXTERN:
  3668  		if a.Sym == nil {
  3669  			ctxt.Diag("bad addr: %v", p)
  3670  		}
  3671  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3672  			// The base register has already been set. It holds the PC
  3673  			// of this instruction returned by a PC-reading thunk.
  3674  			// See obj6.go:rewriteToPcrel.
  3675  		} else {
  3676  			base = REG_NONE
  3677  		}
  3678  		v = int32(vaddr(ctxt, p, a, &rel))
  3679  
  3680  	case obj.NAME_AUTO,
  3681  		obj.NAME_PARAM:
  3682  		base = REG_SP
  3683  	}
  3684  
  3685  	if base == REG_TLS {
  3686  		v = int32(vaddr(ctxt, p, a, &rel))
  3687  	}
  3688  
  3689  	ab.rexflag |= regrex[base]&Rxb | rex
  3690  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3691  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3692  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3693  				ctxt.Diag("%v has offset against gotref", p)
  3694  			}
  3695  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3696  			goto putrelv
  3697  		}
  3698  
  3699  		// temporary
  3700  		ab.Put2(
  3701  			byte(0<<6|4<<0|r<<3), // sib present
  3702  			0<<6|4<<3|5<<0,       // DS:d32
  3703  		)
  3704  		goto putrelv
  3705  	}
  3706  
  3707  	if base == REG_SP || base == REG_R12 {
  3708  		if v == 0 {
  3709  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3710  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3711  			return
  3712  		}
  3713  
  3714  		if disp8, ok := toDisp8(v, p, ab); ok {
  3715  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3716  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3717  			ab.Put1(disp8)
  3718  			return
  3719  		}
  3720  
  3721  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3722  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3723  		goto putrelv
  3724  	}
  3725  
  3726  	if REG_AX <= base && base <= REG_R15 {
  3727  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3728  			!(ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64) {
  3729  			rel = obj.Reloc{}
  3730  			rel.Type = objabi.R_TLS_LE
  3731  			rel.Siz = 4
  3732  			rel.Sym = nil
  3733  			rel.Add = int64(v)
  3734  			v = 0
  3735  		}
  3736  
  3737  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3738  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3739  			return
  3740  		}
  3741  
  3742  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3743  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3744  			return
  3745  		}
  3746  
  3747  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3748  		goto putrelv
  3749  	}
  3750  
  3751  	goto bad
  3752  
  3753  putrelv:
  3754  	if rel.Siz != 0 {
  3755  		if rel.Siz != 4 {
  3756  			ctxt.Diag("bad rel")
  3757  			goto bad
  3758  		}
  3759  
  3760  		r := obj.Addrel(cursym)
  3761  		*r = rel
  3762  		r.Off = int32(p.Pc + int64(ab.Len()))
  3763  	}
  3764  
  3765  	ab.PutInt32(v)
  3766  	return
  3767  
  3768  bad:
  3769  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3770  }
  3771  
  3772  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3773  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3774  }
  3775  
  3776  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3777  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3778  }
  3779  
  3780  func bytereg(a *obj.Addr, t *uint8) {
  3781  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3782  		a.Reg += REG_AL - REG_AX
  3783  		*t = 0
  3784  	}
  3785  }
  3786  
  3787  func unbytereg(a *obj.Addr, t *uint8) {
  3788  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3789  		a.Reg += REG_AX - REG_AL
  3790  		*t = 0
  3791  	}
  3792  }
  3793  
  3794  const (
  3795  	movLit uint8 = iota // Like Zlit
  3796  	movRegMem
  3797  	movMemReg
  3798  	movRegMem2op
  3799  	movMemReg2op
  3800  	movFullPtr // Load full pointer, trash heap (unsupported)
  3801  	movDoubleShift
  3802  	movTLSReg
  3803  )
  3804  
  3805  var ymovtab = []movtab{
  3806  	// push
  3807  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3808  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3809  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3810  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3811  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3812  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3813  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3814  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3815  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3816  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3817  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3818  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3819  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3820  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3821  
  3822  	// pop
  3823  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3824  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3825  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3826  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3827  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3828  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3829  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3830  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3831  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3832  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3833  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3834  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3835  
  3836  	// mov seg
  3837  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3838  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3839  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3840  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3841  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3842  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3843  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3844  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3845  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3846  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3847  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3848  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3849  
  3850  	// mov cr
  3851  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3852  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3853  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3854  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3855  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3856  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3857  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3858  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3859  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3860  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3861  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3862  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3863  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3864  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3865  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3866  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3867  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3868  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3869  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3870  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3871  
  3872  	// mov dr
  3873  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3874  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3875  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3876  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3877  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3878  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3879  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3880  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3881  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3882  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3883  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3884  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3885  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3886  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3887  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3888  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3889  
  3890  	// mov tr
  3891  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3892  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3893  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3894  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3895  
  3896  	// lgdt, sgdt, lidt, sidt
  3897  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3898  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3899  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3900  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3901  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3902  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3903  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3904  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3905  
  3906  	// lldt, sldt
  3907  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3908  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3909  
  3910  	// lmsw, smsw
  3911  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3912  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3913  
  3914  	// ltr, str
  3915  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3916  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3917  
  3918  	/* load full pointer - unsupported
  3919  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3920  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3921  	*/
  3922  
  3923  	// double shift
  3924  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3925  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3926  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3927  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3928  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3929  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3930  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3931  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3932  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3933  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3934  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3935  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3936  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3937  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3938  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3939  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3940  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3941  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3942  
  3943  	// load TLS base
  3944  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3945  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3946  	{0, 0, 0, 0, 0, [4]uint8{}},
  3947  }
  3948  
  3949  func isax(a *obj.Addr) bool {
  3950  	switch a.Reg {
  3951  	case REG_AX, REG_AL, REG_AH:
  3952  		return true
  3953  	}
  3954  
  3955  	return a.Index == REG_AX
  3956  }
  3957  
  3958  func subreg(p *obj.Prog, from int, to int) {
  3959  	if false { /* debug['Q'] */
  3960  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3961  	}
  3962  
  3963  	if int(p.From.Reg) == from {
  3964  		p.From.Reg = int16(to)
  3965  		p.Ft = 0
  3966  	}
  3967  
  3968  	if int(p.To.Reg) == from {
  3969  		p.To.Reg = int16(to)
  3970  		p.Tt = 0
  3971  	}
  3972  
  3973  	if int(p.From.Index) == from {
  3974  		p.From.Index = int16(to)
  3975  		p.Ft = 0
  3976  	}
  3977  
  3978  	if int(p.To.Index) == from {
  3979  		p.To.Index = int16(to)
  3980  		p.Tt = 0
  3981  	}
  3982  
  3983  	if false { /* debug['Q'] */
  3984  		fmt.Printf("%v\n", p)
  3985  	}
  3986  }
  3987  
  3988  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3989  	switch op {
  3990  	case Pm, Pe, Pf2, Pf3:
  3991  		if osize != 1 {
  3992  			if op != Pm {
  3993  				ab.Put1(byte(op))
  3994  			}
  3995  			ab.Put1(Pm)
  3996  			z++
  3997  			op = int(o.op[z])
  3998  			break
  3999  		}
  4000  		fallthrough
  4001  
  4002  	default:
  4003  		if ab.Len() == 0 || ab.Last() != Pm {
  4004  			ab.Put1(Pm)
  4005  		}
  4006  	}
  4007  
  4008  	ab.Put1(byte(op))
  4009  	return z
  4010  }
  4011  
  4012  var bpduff1 = []byte{
  4013  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4014  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4015  }
  4016  
  4017  var bpduff2 = []byte{
  4018  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4019  }
  4020  
  4021  // asmevex emits EVEX pregis and opcode byte.
  4022  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4023  // K-masking register.
  4024  //
  4025  // Expects asmbuf.evex to be properly initialized.
  4026  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4027  	ab.evexflag = true
  4028  	evex := ab.evex
  4029  
  4030  	rexR := byte(1)
  4031  	evexR := byte(1)
  4032  	rexX := byte(1)
  4033  	rexB := byte(1)
  4034  	if r != nil {
  4035  		if regrex[r.Reg]&Rxr != 0 {
  4036  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4037  		}
  4038  		if regrex[r.Reg]&RxrEvex != 0 {
  4039  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4040  		}
  4041  	}
  4042  	if rm != nil {
  4043  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4044  			rexX = 0
  4045  		} else if regrex[rm.Index]&Rxx != 0 {
  4046  			rexX = 0
  4047  		}
  4048  		if regrex[rm.Reg]&Rxb != 0 {
  4049  			rexB = 0
  4050  		}
  4051  	}
  4052  	// P0 = [R][X][B][R'][00][mm]
  4053  	p0 := (rexR << 7) |
  4054  		(rexX << 6) |
  4055  		(rexB << 5) |
  4056  		(evexR << 4) |
  4057  		(0 << 2) |
  4058  		(evex.M() << 0)
  4059  
  4060  	vexV := byte(0)
  4061  	if v != nil {
  4062  		// 4bit-wide reg index.
  4063  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4064  	}
  4065  	vexV ^= 0x0F
  4066  	// P1 = [W][vvvv][1][pp]
  4067  	p1 := (evex.W() << 7) |
  4068  		(vexV << 3) |
  4069  		(1 << 2) |
  4070  		(evex.P() << 0)
  4071  
  4072  	suffix := evexSuffixMap[p.Scond]
  4073  	evexZ := byte(0)
  4074  	evexLL := evex.L()
  4075  	evexB := byte(0)
  4076  	evexV := byte(1)
  4077  	evexA := byte(0)
  4078  	if suffix.zeroing {
  4079  		if !evex.ZeroingEnabled() {
  4080  			ctxt.Diag("unsupported zeroing: %v", p)
  4081  		}
  4082  		evexZ = 1
  4083  	}
  4084  	switch {
  4085  	case suffix.rounding != rcUnset:
  4086  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4087  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4088  		} else if !evex.RoundingEnabled() {
  4089  			ctxt.Diag("unsupported rounding: %v", p)
  4090  		}
  4091  		evexB = 1
  4092  		evexLL = suffix.rounding
  4093  	case suffix.broadcast:
  4094  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4095  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4096  		} else if !evex.BroadcastEnabled() {
  4097  			ctxt.Diag("unsupported broadcast: %v", p)
  4098  		}
  4099  		evexB = 1
  4100  	case suffix.sae:
  4101  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4102  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4103  		} else if !evex.SaeEnabled() {
  4104  			ctxt.Diag("unsupported SAE: %v", p)
  4105  		}
  4106  		evexB = 1
  4107  	}
  4108  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4109  		evexV = 0
  4110  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4111  		evexV = 0 // VSR selector 5th bit.
  4112  	}
  4113  	if k != nil {
  4114  		evexA = byte(reg[k.Reg])
  4115  	}
  4116  	// P2 = [z][L'L][b][V'][aaa]
  4117  	p2 := (evexZ << 7) |
  4118  		(evexLL << 5) |
  4119  		(evexB << 4) |
  4120  		(evexV << 3) |
  4121  		(evexA << 0)
  4122  
  4123  	const evexEscapeByte = 0x62
  4124  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4125  	ab.Put1(evex.opcode)
  4126  }
  4127  
  4128  // Emit VEX prefix and opcode byte.
  4129  // The three addresses are the r/m, vvvv, and reg fields.
  4130  // The reg and rm arguments appear in the same order as the
  4131  // arguments to asmand, which typically follows the call to asmvex.
  4132  // The final two arguments are the VEX prefix (see encoding above)
  4133  // and the opcode byte.
  4134  // For details about vex prefix see:
  4135  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4136  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4137  	ab.vexflag = true
  4138  	rexR := 0
  4139  	if r != nil {
  4140  		rexR = regrex[r.Reg] & Rxr
  4141  	}
  4142  	rexB := 0
  4143  	rexX := 0
  4144  	if rm != nil {
  4145  		rexB = regrex[rm.Reg] & Rxb
  4146  		rexX = regrex[rm.Index] & Rxx
  4147  	}
  4148  	vexM := (vex >> 3) & 0x7
  4149  	vexWLP := vex & 0x87
  4150  	vexV := byte(0)
  4151  	if v != nil {
  4152  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4153  	}
  4154  	vexV ^= 0xF
  4155  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4156  		// Can use 2-byte encoding.
  4157  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4158  	} else {
  4159  		// Must use 3-byte encoding.
  4160  		ab.Put3(0xc4,
  4161  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4162  			vexV<<3|vexWLP,
  4163  		)
  4164  	}
  4165  	ab.Put1(opcode)
  4166  }
  4167  
  4168  // regIndex returns register index that fits in 5 bits.
  4169  //
  4170  //	R         : 3 bit | legacy instructions     | N/A
  4171  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4172  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4173  //
  4174  // Examples:
  4175  //
  4176  //	REG_Z30 => 30
  4177  //	REG_X15 => 15
  4178  //	REG_R9  => 9
  4179  //	REG_AX  => 0
  4180  func regIndex(r int16) int {
  4181  	lower3bits := reg[r]
  4182  	high4bit := regrex[r] & Rxr << 1
  4183  	high5bit := regrex[r] & RxrEvex << 0
  4184  	return lower3bits | high4bit | high5bit
  4185  }
  4186  
  4187  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4188  // Reports errors via ctxt.
  4189  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4190  	// If any pair of the index, mask, or destination registers
  4191  	// are the same, illegal instruction trap (#UD) is triggered.
  4192  	index := regIndex(p.GetFrom3().Index)
  4193  	mask := regIndex(p.From.Reg)
  4194  	dest := regIndex(p.To.Reg)
  4195  	if dest == mask || dest == index || mask == index {
  4196  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4197  		return false
  4198  	}
  4199  
  4200  	return true
  4201  }
  4202  
  4203  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4204  // Reports errors via ctxt.
  4205  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4206  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4207  	// register is the same as index vector in VSIB.
  4208  	index := regIndex(p.From.Index)
  4209  	dest := regIndex(p.To.Reg)
  4210  	if dest == index {
  4211  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4212  		return false
  4213  	}
  4214  
  4215  	return true
  4216  }
  4217  
  4218  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4219  	o := opindex[p.As&obj.AMask]
  4220  
  4221  	if o == nil {
  4222  		ctxt.Diag("asmins: missing op %v", p)
  4223  		return
  4224  	}
  4225  
  4226  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4227  		ab.Put1(byte(pre))
  4228  	}
  4229  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4230  		ab.Put1(byte(pre))
  4231  	}
  4232  
  4233  	// Checks to warn about instruction/arguments combinations that
  4234  	// will unconditionally trigger illegal instruction trap (#UD).
  4235  	switch p.As {
  4236  	case AVGATHERDPD,
  4237  		AVGATHERQPD,
  4238  		AVGATHERDPS,
  4239  		AVGATHERQPS,
  4240  		AVPGATHERDD,
  4241  		AVPGATHERQD,
  4242  		AVPGATHERDQ,
  4243  		AVPGATHERQQ:
  4244  		// AVX512 gather requires explicit K mask.
  4245  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4246  			if !avx512gatherValid(ctxt, p) {
  4247  				return
  4248  			}
  4249  		} else {
  4250  			if !avx2gatherValid(ctxt, p) {
  4251  				return
  4252  			}
  4253  		}
  4254  	}
  4255  
  4256  	if p.Ft == 0 {
  4257  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4258  	}
  4259  	if p.Tt == 0 {
  4260  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4261  	}
  4262  
  4263  	ft := int(p.Ft) * Ymax
  4264  	var f3t int
  4265  	tt := int(p.Tt) * Ymax
  4266  
  4267  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4268  	z := 0
  4269  	var a *obj.Addr
  4270  	var l int
  4271  	var op int
  4272  	var q *obj.Prog
  4273  	var r *obj.Reloc
  4274  	var rel obj.Reloc
  4275  	var v int64
  4276  
  4277  	args := make([]int, 0, argListMax)
  4278  	if ft != Ynone*Ymax {
  4279  		args = append(args, ft)
  4280  	}
  4281  	for i := range p.RestArgs {
  4282  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4283  	}
  4284  	if tt != Ynone*Ymax {
  4285  		args = append(args, tt)
  4286  	}
  4287  
  4288  	for _, yt := range o.ytab {
  4289  		// ytab matching is purely args-based,
  4290  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4291  		// add EVEX-only filter that will reject non-EVEX matches.
  4292  		//
  4293  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4294  		// Without this rule, operands will lead to VEX-encoded form
  4295  		// and produce "c5b15813" encoding.
  4296  		if !yt.match(args) {
  4297  			// "xo" is always zero for VEX/EVEX encoded insts.
  4298  			z += int(yt.zoffset) + xo
  4299  		} else {
  4300  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4301  				// Do not signal error and continue to search
  4302  				// for matching EVEX-encoded form.
  4303  				z += int(yt.zoffset)
  4304  				continue
  4305  			}
  4306  
  4307  			switch o.prefix {
  4308  			case Px1: // first option valid only in 32-bit mode
  4309  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4310  					z += int(yt.zoffset) + xo
  4311  					continue
  4312  				}
  4313  			case Pq: // 16 bit escape and opcode escape
  4314  				ab.Put2(Pe, Pm)
  4315  
  4316  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4317  				ab.rexflag |= Pw
  4318  				ab.Put2(Pe, Pm)
  4319  
  4320  			case Pq4: // 66 0F 38
  4321  				ab.Put3(0x66, 0x0F, 0x38)
  4322  
  4323  			case Pq4w: // 66 0F 38 + REX.W
  4324  				ab.rexflag |= Pw
  4325  				ab.Put3(0x66, 0x0F, 0x38)
  4326  
  4327  			case Pq5: // F3 0F 38
  4328  				ab.Put3(0xF3, 0x0F, 0x38)
  4329  
  4330  			case Pq5w: //  F3 0F 38 + REX.W
  4331  				ab.rexflag |= Pw
  4332  				ab.Put3(0xF3, 0x0F, 0x38)
  4333  
  4334  			case Pf2, // xmm opcode escape
  4335  				Pf3:
  4336  				ab.Put2(o.prefix, Pm)
  4337  
  4338  			case Pef3:
  4339  				ab.Put3(Pe, Pf3, Pm)
  4340  
  4341  			case Pfw: // xmm opcode escape + REX.W
  4342  				ab.rexflag |= Pw
  4343  				ab.Put2(Pf3, Pm)
  4344  
  4345  			case Pm: // opcode escape
  4346  				ab.Put1(Pm)
  4347  
  4348  			case Pe: // 16 bit escape
  4349  				ab.Put1(Pe)
  4350  
  4351  			case Pw: // 64-bit escape
  4352  				if ctxt.Arch.Family != sys.AMD64 {
  4353  					ctxt.Diag("asmins: illegal 64: %v", p)
  4354  				}
  4355  				ab.rexflag |= Pw
  4356  
  4357  			case Pw8: // 64-bit escape if z >= 8
  4358  				if z >= 8 {
  4359  					if ctxt.Arch.Family != sys.AMD64 {
  4360  						ctxt.Diag("asmins: illegal 64: %v", p)
  4361  					}
  4362  					ab.rexflag |= Pw
  4363  				}
  4364  
  4365  			case Pb: // botch
  4366  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4367  					goto bad
  4368  				}
  4369  				// NOTE(rsc): This is probably safe to do always,
  4370  				// but when enabled it chooses different encodings
  4371  				// than the old cmd/internal/obj/i386 code did,
  4372  				// which breaks our "same bits out" checks.
  4373  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4374  				// in the original obj/i386, and it would encode
  4375  				// (using a valid, shorter form) as 3c 00 if we enabled
  4376  				// the call to bytereg here.
  4377  				if ctxt.Arch.Family == sys.AMD64 {
  4378  					bytereg(&p.From, &p.Ft)
  4379  					bytereg(&p.To, &p.Tt)
  4380  				}
  4381  
  4382  			case P32: // 32 bit but illegal if 64-bit mode
  4383  				if ctxt.Arch.Family == sys.AMD64 {
  4384  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4385  				}
  4386  
  4387  			case Py: // 64-bit only, no prefix
  4388  				if ctxt.Arch.Family != sys.AMD64 {
  4389  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4390  				}
  4391  
  4392  			case Py1: // 64-bit only if z < 1, no prefix
  4393  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4394  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4395  				}
  4396  
  4397  			case Py3: // 64-bit only if z < 3, no prefix
  4398  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4399  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4400  				}
  4401  			}
  4402  
  4403  			if z >= len(o.op) {
  4404  				log.Fatalf("asmins bad table %v", p)
  4405  			}
  4406  			op = int(o.op[z])
  4407  			if op == 0x0f {
  4408  				ab.Put1(byte(op))
  4409  				z++
  4410  				op = int(o.op[z])
  4411  			}
  4412  
  4413  			switch yt.zcase {
  4414  			default:
  4415  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4416  				return
  4417  
  4418  			case Zpseudo:
  4419  				break
  4420  
  4421  			case Zlit:
  4422  				ab.PutOpBytesLit(z, &o.op)
  4423  
  4424  			case Zlitr_m:
  4425  				ab.PutOpBytesLit(z, &o.op)
  4426  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4427  
  4428  			case Zlitm_r:
  4429  				ab.PutOpBytesLit(z, &o.op)
  4430  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4431  
  4432  			case Zlit_m_r:
  4433  				ab.PutOpBytesLit(z, &o.op)
  4434  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4435  
  4436  			case Zmb_r:
  4437  				bytereg(&p.From, &p.Ft)
  4438  				fallthrough
  4439  
  4440  			case Zm_r:
  4441  				ab.Put1(byte(op))
  4442  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4443  
  4444  			case Z_m_r:
  4445  				ab.Put1(byte(op))
  4446  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4447  
  4448  			case Zm2_r:
  4449  				ab.Put2(byte(op), o.op[z+1])
  4450  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4451  
  4452  			case Zm_r_xm:
  4453  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4454  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4455  
  4456  			case Zm_r_xm_nr:
  4457  				ab.rexflag = 0
  4458  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4459  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4460  
  4461  			case Zm_r_i_xm:
  4462  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4463  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4464  				ab.Put1(byte(p.To.Offset))
  4465  
  4466  			case Zibm_r, Zibr_m:
  4467  				ab.PutOpBytesLit(z, &o.op)
  4468  				if yt.zcase == Zibr_m {
  4469  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4470  				} else {
  4471  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4472  				}
  4473  				switch {
  4474  				default:
  4475  					ab.Put1(byte(p.From.Offset))
  4476  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4477  					ab.PutInt16(int16(p.From.Offset))
  4478  				case yt.args[0] == Yi32:
  4479  					ab.PutInt32(int32(p.From.Offset))
  4480  				}
  4481  
  4482  			case Zaut_r:
  4483  				ab.Put1(0x8d) // leal
  4484  				if p.From.Type != obj.TYPE_ADDR {
  4485  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4486  				}
  4487  				p.From.Type = obj.TYPE_MEM
  4488  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4489  				p.From.Type = obj.TYPE_ADDR
  4490  
  4491  			case Zm_o:
  4492  				ab.Put1(byte(op))
  4493  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4494  
  4495  			case Zr_m:
  4496  				ab.Put1(byte(op))
  4497  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4498  
  4499  			case Zvex:
  4500  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4501  
  4502  			case Zvex_rm_v_r:
  4503  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4504  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4505  
  4506  			case Zvex_rm_v_ro:
  4507  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4508  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4509  
  4510  			case Zvex_i_rm_vo:
  4511  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4512  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4513  				ab.Put1(byte(p.From.Offset))
  4514  
  4515  			case Zvex_i_r_v:
  4516  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4517  				regnum := byte(0x7)
  4518  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4519  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4520  				} else {
  4521  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4522  				}
  4523  				ab.Put1(o.op[z+2] | regnum)
  4524  				ab.Put1(byte(p.From.Offset))
  4525  
  4526  			case Zvex_i_rm_v_r:
  4527  				imm, from, from3, to := unpackOps4(p)
  4528  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4529  				ab.asmand(ctxt, cursym, p, from, to)
  4530  				ab.Put1(byte(imm.Offset))
  4531  
  4532  			case Zvex_i_rm_r:
  4533  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4534  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4535  				ab.Put1(byte(p.From.Offset))
  4536  
  4537  			case Zvex_v_rm_r:
  4538  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4539  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4540  
  4541  			case Zvex_r_v_rm:
  4542  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4543  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4544  
  4545  			case Zvex_rm_r_vo:
  4546  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4547  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4548  
  4549  			case Zvex_i_r_rm:
  4550  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4551  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4552  				ab.Put1(byte(p.From.Offset))
  4553  
  4554  			case Zvex_hr_rm_v_r:
  4555  				hr, from, from3, to := unpackOps4(p)
  4556  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4557  				ab.asmand(ctxt, cursym, p, from, to)
  4558  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4559  
  4560  			case Zevex_k_rmo:
  4561  				ab.evex = newEVEXBits(z, &o.op)
  4562  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4563  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4564  
  4565  			case Zevex_i_rm_vo:
  4566  				ab.evex = newEVEXBits(z, &o.op)
  4567  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4568  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4569  				ab.Put1(byte(p.From.Offset))
  4570  
  4571  			case Zevex_i_rm_k_vo:
  4572  				imm, from, kmask, to := unpackOps4(p)
  4573  				ab.evex = newEVEXBits(z, &o.op)
  4574  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4575  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4576  				ab.Put1(byte(imm.Offset))
  4577  
  4578  			case Zevex_i_r_rm:
  4579  				ab.evex = newEVEXBits(z, &o.op)
  4580  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4581  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4582  				ab.Put1(byte(p.From.Offset))
  4583  
  4584  			case Zevex_i_r_k_rm:
  4585  				imm, from, kmask, to := unpackOps4(p)
  4586  				ab.evex = newEVEXBits(z, &o.op)
  4587  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4588  				ab.asmand(ctxt, cursym, p, to, from)
  4589  				ab.Put1(byte(imm.Offset))
  4590  
  4591  			case Zevex_i_rm_r:
  4592  				ab.evex = newEVEXBits(z, &o.op)
  4593  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4594  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4595  				ab.Put1(byte(p.From.Offset))
  4596  
  4597  			case Zevex_i_rm_k_r:
  4598  				imm, from, kmask, to := unpackOps4(p)
  4599  				ab.evex = newEVEXBits(z, &o.op)
  4600  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4601  				ab.asmand(ctxt, cursym, p, from, to)
  4602  				ab.Put1(byte(imm.Offset))
  4603  
  4604  			case Zevex_i_rm_v_r:
  4605  				imm, from, from3, to := unpackOps4(p)
  4606  				ab.evex = newEVEXBits(z, &o.op)
  4607  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4608  				ab.asmand(ctxt, cursym, p, from, to)
  4609  				ab.Put1(byte(imm.Offset))
  4610  
  4611  			case Zevex_i_rm_v_k_r:
  4612  				imm, from, from3, kmask, to := unpackOps5(p)
  4613  				ab.evex = newEVEXBits(z, &o.op)
  4614  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4615  				ab.asmand(ctxt, cursym, p, from, to)
  4616  				ab.Put1(byte(imm.Offset))
  4617  
  4618  			case Zevex_r_v_rm:
  4619  				ab.evex = newEVEXBits(z, &o.op)
  4620  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4621  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4622  
  4623  			case Zevex_rm_v_r:
  4624  				ab.evex = newEVEXBits(z, &o.op)
  4625  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4626  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4627  
  4628  			case Zevex_rm_k_r:
  4629  				ab.evex = newEVEXBits(z, &o.op)
  4630  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4631  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4632  
  4633  			case Zevex_r_k_rm:
  4634  				ab.evex = newEVEXBits(z, &o.op)
  4635  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4636  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4637  
  4638  			case Zevex_rm_v_k_r:
  4639  				from, from3, kmask, to := unpackOps4(p)
  4640  				ab.evex = newEVEXBits(z, &o.op)
  4641  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4642  				ab.asmand(ctxt, cursym, p, from, to)
  4643  
  4644  			case Zevex_r_v_k_rm:
  4645  				from, from3, kmask, to := unpackOps4(p)
  4646  				ab.evex = newEVEXBits(z, &o.op)
  4647  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4648  				ab.asmand(ctxt, cursym, p, to, from)
  4649  
  4650  			case Zr_m_xm:
  4651  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4652  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4653  
  4654  			case Zr_m_xm_nr:
  4655  				ab.rexflag = 0
  4656  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4657  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4658  
  4659  			case Zo_m:
  4660  				ab.Put1(byte(op))
  4661  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4662  
  4663  			case Zcallindreg:
  4664  				r = obj.Addrel(cursym)
  4665  				r.Off = int32(p.Pc)
  4666  				r.Type = objabi.R_CALLIND
  4667  				r.Siz = 0
  4668  				fallthrough
  4669  
  4670  			case Zo_m64:
  4671  				ab.Put1(byte(op))
  4672  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4673  
  4674  			case Zm_ibo:
  4675  				ab.Put1(byte(op))
  4676  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4677  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4678  
  4679  			case Zibo_m:
  4680  				ab.Put1(byte(op))
  4681  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4682  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4683  
  4684  			case Zibo_m_xm:
  4685  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4686  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4687  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4688  
  4689  			case Z_ib, Zib_:
  4690  				if yt.zcase == Zib_ {
  4691  					a = &p.From
  4692  				} else {
  4693  					a = &p.To
  4694  				}
  4695  				ab.Put1(byte(op))
  4696  				if p.As == AXABORT {
  4697  					ab.Put1(o.op[z+1])
  4698  				}
  4699  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4700  
  4701  			case Zib_rp:
  4702  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4703  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4704  
  4705  			case Zil_rp:
  4706  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4707  				ab.Put1(byte(op + reg[p.To.Reg]))
  4708  				if o.prefix == Pe {
  4709  					v = vaddr(ctxt, p, &p.From, nil)
  4710  					ab.PutInt16(int16(v))
  4711  				} else {
  4712  					ab.relput4(ctxt, cursym, p, &p.From)
  4713  				}
  4714  
  4715  			case Zo_iw:
  4716  				ab.Put1(byte(op))
  4717  				if p.From.Type != obj.TYPE_NONE {
  4718  					v = vaddr(ctxt, p, &p.From, nil)
  4719  					ab.PutInt16(int16(v))
  4720  				}
  4721  
  4722  			case Ziq_rp:
  4723  				v = vaddr(ctxt, p, &p.From, &rel)
  4724  				l = int(v >> 32)
  4725  				if l == 0 && rel.Siz != 8 {
  4726  					ab.rexflag &^= (0x40 | Rxw)
  4727  
  4728  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4729  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4730  					if rel.Type != 0 {
  4731  						r = obj.Addrel(cursym)
  4732  						*r = rel
  4733  						r.Off = int32(p.Pc + int64(ab.Len()))
  4734  					}
  4735  
  4736  					ab.PutInt32(int32(v))
  4737  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4738  					ab.Put1(0xc7)
  4739  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4740  
  4741  					ab.PutInt32(int32(v)) // need all 8
  4742  				} else {
  4743  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4744  					ab.Put1(byte(op + reg[p.To.Reg]))
  4745  					if rel.Type != 0 {
  4746  						r = obj.Addrel(cursym)
  4747  						*r = rel
  4748  						r.Off = int32(p.Pc + int64(ab.Len()))
  4749  					}
  4750  
  4751  					ab.PutInt64(v)
  4752  				}
  4753  
  4754  			case Zib_rr:
  4755  				ab.Put1(byte(op))
  4756  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4757  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4758  
  4759  			case Z_il, Zil_:
  4760  				if yt.zcase == Zil_ {
  4761  					a = &p.From
  4762  				} else {
  4763  					a = &p.To
  4764  				}
  4765  				ab.Put1(byte(op))
  4766  				if o.prefix == Pe {
  4767  					v = vaddr(ctxt, p, a, nil)
  4768  					ab.PutInt16(int16(v))
  4769  				} else {
  4770  					ab.relput4(ctxt, cursym, p, a)
  4771  				}
  4772  
  4773  			case Zm_ilo, Zilo_m:
  4774  				ab.Put1(byte(op))
  4775  				if yt.zcase == Zilo_m {
  4776  					a = &p.From
  4777  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4778  				} else {
  4779  					a = &p.To
  4780  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4781  				}
  4782  
  4783  				if o.prefix == Pe {
  4784  					v = vaddr(ctxt, p, a, nil)
  4785  					ab.PutInt16(int16(v))
  4786  				} else {
  4787  					ab.relput4(ctxt, cursym, p, a)
  4788  				}
  4789  
  4790  			case Zil_rr:
  4791  				ab.Put1(byte(op))
  4792  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4793  				if o.prefix == Pe {
  4794  					v = vaddr(ctxt, p, &p.From, nil)
  4795  					ab.PutInt16(int16(v))
  4796  				} else {
  4797  					ab.relput4(ctxt, cursym, p, &p.From)
  4798  				}
  4799  
  4800  			case Z_rp:
  4801  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4802  				ab.Put1(byte(op + reg[p.To.Reg]))
  4803  
  4804  			case Zrp_:
  4805  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4806  				ab.Put1(byte(op + reg[p.From.Reg]))
  4807  
  4808  			case Zcallcon, Zjmpcon:
  4809  				if yt.zcase == Zcallcon {
  4810  					ab.Put1(byte(op))
  4811  				} else {
  4812  					ab.Put1(o.op[z+1])
  4813  				}
  4814  				r = obj.Addrel(cursym)
  4815  				r.Off = int32(p.Pc + int64(ab.Len()))
  4816  				r.Type = objabi.R_PCREL
  4817  				r.Siz = 4
  4818  				r.Add = p.To.Offset
  4819  				ab.PutInt32(0)
  4820  
  4821  			case Zcallind:
  4822  				ab.Put2(byte(op), o.op[z+1])
  4823  				r = obj.Addrel(cursym)
  4824  				r.Off = int32(p.Pc + int64(ab.Len()))
  4825  				if ctxt.Arch.Family == sys.AMD64 {
  4826  					r.Type = objabi.R_PCREL
  4827  				} else {
  4828  					r.Type = objabi.R_ADDR
  4829  				}
  4830  				r.Siz = 4
  4831  				r.Add = p.To.Offset
  4832  				r.Sym = p.To.Sym
  4833  				ab.PutInt32(0)
  4834  
  4835  			case Zcall, Zcallduff:
  4836  				if p.To.Sym == nil {
  4837  					ctxt.Diag("call without target")
  4838  					ctxt.DiagFlush()
  4839  					log.Fatalf("bad code")
  4840  				}
  4841  
  4842  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4843  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4844  				}
  4845  
  4846  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4847  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4848  					// (the call jumps into the middle of the function).
  4849  					// This makes it possible to see call sites for duffcopy/duffzero in
  4850  					// BP-based profiling tools like Linux perf (which is the
  4851  					// whole point of maintaining frame pointers in Go).
  4852  					// MOVQ BP, -16(SP)
  4853  					// LEAQ -16(SP), BP
  4854  					ab.Put(bpduff1)
  4855  				}
  4856  				ab.Put1(byte(op))
  4857  				r = obj.Addrel(cursym)
  4858  				r.Off = int32(p.Pc + int64(ab.Len()))
  4859  				r.Sym = p.To.Sym
  4860  				r.Add = p.To.Offset
  4861  				r.Type = objabi.R_CALL
  4862  				r.Siz = 4
  4863  				ab.PutInt32(0)
  4864  
  4865  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4866  					// Pop BP pushed above.
  4867  					// MOVQ 0(BP), BP
  4868  					ab.Put(bpduff2)
  4869  				}
  4870  
  4871  			// TODO: jump across functions needs reloc
  4872  			case Zbr, Zjmp, Zloop:
  4873  				if p.As == AXBEGIN {
  4874  					ab.Put1(byte(op))
  4875  				}
  4876  				if p.To.Sym != nil {
  4877  					if yt.zcase != Zjmp {
  4878  						ctxt.Diag("branch to ATEXT")
  4879  						ctxt.DiagFlush()
  4880  						log.Fatalf("bad code")
  4881  					}
  4882  
  4883  					ab.Put1(o.op[z+1])
  4884  					r = obj.Addrel(cursym)
  4885  					r.Off = int32(p.Pc + int64(ab.Len()))
  4886  					r.Sym = p.To.Sym
  4887  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4888  					// it can point to a trampoline instead of the destination itself.
  4889  					r.Type = objabi.R_CALL
  4890  					r.Siz = 4
  4891  					ab.PutInt32(0)
  4892  					break
  4893  				}
  4894  
  4895  				// Assumes q is in this function.
  4896  				// TODO: Check in input, preserve in brchain.
  4897  
  4898  				// Fill in backward jump now.
  4899  				q = p.To.Target()
  4900  
  4901  				if q == nil {
  4902  					ctxt.Diag("jmp/branch/loop without target")
  4903  					ctxt.DiagFlush()
  4904  					log.Fatalf("bad code")
  4905  				}
  4906  
  4907  				if p.Back&branchBackwards != 0 {
  4908  					v = q.Pc - (p.Pc + 2)
  4909  					if v >= -128 && p.As != AXBEGIN {
  4910  						if p.As == AJCXZL {
  4911  							ab.Put1(0x67)
  4912  						}
  4913  						ab.Put2(byte(op), byte(v))
  4914  					} else if yt.zcase == Zloop {
  4915  						ctxt.Diag("loop too far: %v", p)
  4916  					} else {
  4917  						v -= 5 - 2
  4918  						if p.As == AXBEGIN {
  4919  							v--
  4920  						}
  4921  						if yt.zcase == Zbr {
  4922  							ab.Put1(0x0f)
  4923  							v--
  4924  						}
  4925  
  4926  						ab.Put1(o.op[z+1])
  4927  						ab.PutInt32(int32(v))
  4928  					}
  4929  
  4930  					break
  4931  				}
  4932  
  4933  				// Annotate target; will fill in later.
  4934  				p.Forwd = q.Rel
  4935  
  4936  				q.Rel = p
  4937  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4938  					if p.As == AJCXZL {
  4939  						ab.Put1(0x67)
  4940  					}
  4941  					ab.Put2(byte(op), 0)
  4942  				} else if yt.zcase == Zloop {
  4943  					ctxt.Diag("loop too far: %v", p)
  4944  				} else {
  4945  					if yt.zcase == Zbr {
  4946  						ab.Put1(0x0f)
  4947  					}
  4948  					ab.Put1(o.op[z+1])
  4949  					ab.PutInt32(0)
  4950  				}
  4951  
  4952  			case Zbyte:
  4953  				v = vaddr(ctxt, p, &p.From, &rel)
  4954  				if rel.Siz != 0 {
  4955  					rel.Siz = uint8(op)
  4956  					r = obj.Addrel(cursym)
  4957  					*r = rel
  4958  					r.Off = int32(p.Pc + int64(ab.Len()))
  4959  				}
  4960  
  4961  				ab.Put1(byte(v))
  4962  				if op > 1 {
  4963  					ab.Put1(byte(v >> 8))
  4964  					if op > 2 {
  4965  						ab.PutInt16(int16(v >> 16))
  4966  						if op > 4 {
  4967  							ab.PutInt32(int32(v >> 32))
  4968  						}
  4969  					}
  4970  				}
  4971  			}
  4972  
  4973  			return
  4974  		}
  4975  	}
  4976  	f3t = Ynone * Ymax
  4977  	if p.GetFrom3() != nil {
  4978  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4979  	}
  4980  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4981  		var pp obj.Prog
  4982  		var t []byte
  4983  		if p.As == mo[0].as {
  4984  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4985  				t = mo[0].op[:]
  4986  				switch mo[0].code {
  4987  				default:
  4988  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4989  
  4990  				case movLit:
  4991  					for z = 0; t[z] != 0; z++ {
  4992  						ab.Put1(t[z])
  4993  					}
  4994  
  4995  				case movRegMem:
  4996  					ab.Put1(t[0])
  4997  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4998  
  4999  				case movMemReg:
  5000  					ab.Put1(t[0])
  5001  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  5002  
  5003  				case movRegMem2op: // r,m - 2op
  5004  					ab.Put2(t[0], t[1])
  5005  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  5006  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  5007  
  5008  				case movMemReg2op:
  5009  					ab.Put2(t[0], t[1])
  5010  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5011  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5012  
  5013  				case movFullPtr:
  5014  					if t[0] != 0 {
  5015  						ab.Put1(t[0])
  5016  					}
  5017  					switch p.To.Index {
  5018  					default:
  5019  						goto bad
  5020  
  5021  					case REG_DS:
  5022  						ab.Put1(0xc5)
  5023  
  5024  					case REG_SS:
  5025  						ab.Put2(0x0f, 0xb2)
  5026  
  5027  					case REG_ES:
  5028  						ab.Put1(0xc4)
  5029  
  5030  					case REG_FS:
  5031  						ab.Put2(0x0f, 0xb4)
  5032  
  5033  					case REG_GS:
  5034  						ab.Put2(0x0f, 0xb5)
  5035  					}
  5036  
  5037  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5038  
  5039  				case movDoubleShift:
  5040  					if t[0] == Pw {
  5041  						if ctxt.Arch.Family != sys.AMD64 {
  5042  							ctxt.Diag("asmins: illegal 64: %v", p)
  5043  						}
  5044  						ab.rexflag |= Pw
  5045  						t = t[1:]
  5046  					} else if t[0] == Pe {
  5047  						ab.Put1(Pe)
  5048  						t = t[1:]
  5049  					}
  5050  
  5051  					switch p.From.Type {
  5052  					default:
  5053  						goto bad
  5054  
  5055  					case obj.TYPE_CONST:
  5056  						ab.Put2(0x0f, t[0])
  5057  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5058  						ab.Put1(byte(p.From.Offset))
  5059  
  5060  					case obj.TYPE_REG:
  5061  						switch p.From.Reg {
  5062  						default:
  5063  							goto bad
  5064  
  5065  						case REG_CL, REG_CX:
  5066  							ab.Put2(0x0f, t[1])
  5067  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5068  						}
  5069  					}
  5070  
  5071  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5072  				// where you load the TLS base register into a register and then index off that
  5073  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5074  				// are handled in prefixof above and should not be listed here.
  5075  				case movTLSReg:
  5076  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5077  						ctxt.Diag("invalid load of TLS: %v", p)
  5078  					}
  5079  
  5080  					if ctxt.Arch.Family == sys.I386 {
  5081  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5082  						// where you load the TLS base register into a register and then index off that
  5083  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5084  						// are handled in prefixof above and should not be listed here.
  5085  						switch ctxt.Headtype {
  5086  						default:
  5087  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5088  
  5089  						case objabi.Hlinux, objabi.Hfreebsd:
  5090  							if ctxt.Flag_shared {
  5091  								// Note that this is not generating the same insns as the other cases.
  5092  								//     MOV TLS, dst
  5093  								// becomes
  5094  								//     call __x86.get_pc_thunk.dst
  5095  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5096  								// which is encoded as
  5097  								//     call __x86.get_pc_thunk.dst
  5098  								//     movq 0(dst), dst
  5099  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5100  								// is g, which we can't check here, but will when we assemble the second
  5101  								// instruction.
  5102  								dst := p.To.Reg
  5103  								ab.Put1(0xe8)
  5104  								r = obj.Addrel(cursym)
  5105  								r.Off = int32(p.Pc + int64(ab.Len()))
  5106  								r.Type = objabi.R_CALL
  5107  								r.Siz = 4
  5108  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5109  								ab.PutInt32(0)
  5110  
  5111  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5112  								r = obj.Addrel(cursym)
  5113  								r.Off = int32(p.Pc + int64(ab.Len()))
  5114  								r.Type = objabi.R_TLS_IE
  5115  								r.Siz = 4
  5116  								r.Add = 2
  5117  								ab.PutInt32(0)
  5118  							} else {
  5119  								// ELF TLS base is 0(GS).
  5120  								pp.From = p.From
  5121  
  5122  								pp.From.Type = obj.TYPE_MEM
  5123  								pp.From.Reg = REG_GS
  5124  								pp.From.Offset = 0
  5125  								pp.From.Index = REG_NONE
  5126  								pp.From.Scale = 0
  5127  								ab.Put2(0x65, // GS
  5128  									0x8B)
  5129  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5130  							}
  5131  						case objabi.Hplan9:
  5132  							pp.From = obj.Addr{}
  5133  							pp.From.Type = obj.TYPE_MEM
  5134  							pp.From.Name = obj.NAME_EXTERN
  5135  							pp.From.Sym = plan9privates
  5136  							pp.From.Offset = 0
  5137  							pp.From.Index = REG_NONE
  5138  							ab.Put1(0x8B)
  5139  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5140  
  5141  						case objabi.Hwindows:
  5142  							// Windows TLS base is always 0x14(FS).
  5143  							pp.From = p.From
  5144  
  5145  							pp.From.Type = obj.TYPE_MEM
  5146  							pp.From.Reg = REG_FS
  5147  							pp.From.Offset = 0x14
  5148  							pp.From.Index = REG_NONE
  5149  							pp.From.Scale = 0
  5150  							ab.Put2(0x64, // FS
  5151  								0x8B)
  5152  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5153  						}
  5154  						break
  5155  					}
  5156  
  5157  					switch ctxt.Headtype {
  5158  					default:
  5159  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5160  
  5161  					case objabi.Hlinux, objabi.Hfreebsd:
  5162  						if !ctxt.Flag_shared {
  5163  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5164  						}
  5165  						// Note that this is not generating the same insn as the other cases.
  5166  						//     MOV TLS, R_to
  5167  						// becomes
  5168  						//     movq g@gottpoff(%rip), R_to
  5169  						// which is encoded as
  5170  						//     movq 0(%rip), R_to
  5171  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5172  						// is g, which we can't check here, but will when we assemble the second
  5173  						// instruction.
  5174  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5175  
  5176  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5177  						r = obj.Addrel(cursym)
  5178  						r.Off = int32(p.Pc + int64(ab.Len()))
  5179  						r.Type = objabi.R_TLS_IE
  5180  						r.Siz = 4
  5181  						r.Add = -4
  5182  						ab.PutInt32(0)
  5183  
  5184  					case objabi.Hplan9:
  5185  						pp.From = obj.Addr{}
  5186  						pp.From.Type = obj.TYPE_MEM
  5187  						pp.From.Name = obj.NAME_EXTERN
  5188  						pp.From.Sym = plan9privates
  5189  						pp.From.Offset = 0
  5190  						pp.From.Index = REG_NONE
  5191  						ab.rexflag |= Pw
  5192  						ab.Put1(0x8B)
  5193  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5194  
  5195  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5196  						// TLS base is 0(FS).
  5197  						pp.From = p.From
  5198  
  5199  						pp.From.Type = obj.TYPE_MEM
  5200  						pp.From.Name = obj.NAME_NONE
  5201  						pp.From.Reg = REG_NONE
  5202  						pp.From.Offset = 0
  5203  						pp.From.Index = REG_NONE
  5204  						pp.From.Scale = 0
  5205  						ab.rexflag |= Pw
  5206  						ab.Put2(0x64, // FS
  5207  							0x8B)
  5208  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5209  					}
  5210  				}
  5211  				return
  5212  			}
  5213  		}
  5214  	}
  5215  	goto bad
  5216  
  5217  bad:
  5218  	if ctxt.Arch.Family != sys.AMD64 {
  5219  		// here, the assembly has failed.
  5220  		// if it's a byte instruction that has
  5221  		// unaddressable registers, try to
  5222  		// exchange registers and reissue the
  5223  		// instruction with the operands renamed.
  5224  		pp := *p
  5225  
  5226  		unbytereg(&pp.From, &pp.Ft)
  5227  		unbytereg(&pp.To, &pp.Tt)
  5228  
  5229  		z := int(p.From.Reg)
  5230  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5231  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5232  			// For now, different to keep bit-for-bit compatibility.
  5233  			if ctxt.Arch.Family == sys.I386 {
  5234  				breg := byteswapreg(ctxt, &p.To)
  5235  				if breg != REG_AX {
  5236  					ab.Put1(0x87) // xchg lhs,bx
  5237  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5238  					subreg(&pp, z, breg)
  5239  					ab.doasm(ctxt, cursym, &pp)
  5240  					ab.Put1(0x87) // xchg lhs,bx
  5241  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5242  				} else {
  5243  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5244  					subreg(&pp, z, REG_AX)
  5245  					ab.doasm(ctxt, cursym, &pp)
  5246  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5247  				}
  5248  				return
  5249  			}
  5250  
  5251  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5252  				// We certainly don't want to exchange
  5253  				// with AX if the op is MUL or DIV.
  5254  				ab.Put1(0x87) // xchg lhs,bx
  5255  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5256  				subreg(&pp, z, REG_BX)
  5257  				ab.doasm(ctxt, cursym, &pp)
  5258  				ab.Put1(0x87) // xchg lhs,bx
  5259  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5260  			} else {
  5261  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5262  				subreg(&pp, z, REG_AX)
  5263  				ab.doasm(ctxt, cursym, &pp)
  5264  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5265  			}
  5266  			return
  5267  		}
  5268  
  5269  		z = int(p.To.Reg)
  5270  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5271  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5272  			// For now, different to keep bit-for-bit compatibility.
  5273  			if ctxt.Arch.Family == sys.I386 {
  5274  				breg := byteswapreg(ctxt, &p.From)
  5275  				if breg != REG_AX {
  5276  					ab.Put1(0x87) //xchg rhs,bx
  5277  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5278  					subreg(&pp, z, breg)
  5279  					ab.doasm(ctxt, cursym, &pp)
  5280  					ab.Put1(0x87) // xchg rhs,bx
  5281  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5282  				} else {
  5283  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5284  					subreg(&pp, z, REG_AX)
  5285  					ab.doasm(ctxt, cursym, &pp)
  5286  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5287  				}
  5288  				return
  5289  			}
  5290  
  5291  			if isax(&p.From) {
  5292  				ab.Put1(0x87) // xchg rhs,bx
  5293  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5294  				subreg(&pp, z, REG_BX)
  5295  				ab.doasm(ctxt, cursym, &pp)
  5296  				ab.Put1(0x87) // xchg rhs,bx
  5297  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5298  			} else {
  5299  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5300  				subreg(&pp, z, REG_AX)
  5301  				ab.doasm(ctxt, cursym, &pp)
  5302  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5303  			}
  5304  			return
  5305  		}
  5306  	}
  5307  
  5308  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5309  }
  5310  
  5311  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5312  // which is not referenced in a.
  5313  // If a is empty, it returns BX to account for MULB-like instructions
  5314  // that might use DX and AX.
  5315  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5316  	cana, canb, canc, cand := true, true, true, true
  5317  	if a.Type == obj.TYPE_NONE {
  5318  		cana, cand = false, false
  5319  	}
  5320  
  5321  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5322  		switch a.Reg {
  5323  		case REG_NONE:
  5324  			cana, cand = false, false
  5325  		case REG_AX, REG_AL, REG_AH:
  5326  			cana = false
  5327  		case REG_BX, REG_BL, REG_BH:
  5328  			canb = false
  5329  		case REG_CX, REG_CL, REG_CH:
  5330  			canc = false
  5331  		case REG_DX, REG_DL, REG_DH:
  5332  			cand = false
  5333  		}
  5334  	}
  5335  
  5336  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5337  		switch a.Index {
  5338  		case REG_AX:
  5339  			cana = false
  5340  		case REG_BX:
  5341  			canb = false
  5342  		case REG_CX:
  5343  			canc = false
  5344  		case REG_DX:
  5345  			cand = false
  5346  		}
  5347  	}
  5348  
  5349  	switch {
  5350  	case cana:
  5351  		return REG_AX
  5352  	case canb:
  5353  		return REG_BX
  5354  	case canc:
  5355  		return REG_CX
  5356  	case cand:
  5357  		return REG_DX
  5358  	default:
  5359  		ctxt.Diag("impossible byte register")
  5360  		ctxt.DiagFlush()
  5361  		log.Fatalf("bad code")
  5362  		return 0
  5363  	}
  5364  }
  5365  
  5366  func isbadbyte(a *obj.Addr) bool {
  5367  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5368  }
  5369  
  5370  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5371  	ab.Reset()
  5372  
  5373  	ab.rexflag = 0
  5374  	ab.vexflag = false
  5375  	ab.evexflag = false
  5376  	mark := ab.Len()
  5377  	ab.doasm(ctxt, cursym, p)
  5378  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5379  		// as befits the whole approach of the architecture,
  5380  		// the rex prefix must appear before the first opcode byte
  5381  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5382  		// before the 0f opcode escape!), or it might be ignored.
  5383  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5384  		if ctxt.Arch.Family != sys.AMD64 {
  5385  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5386  		}
  5387  		n := ab.Len()
  5388  		var np int
  5389  		for np = mark; np < n; np++ {
  5390  			c := ab.At(np)
  5391  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5392  				break
  5393  			}
  5394  		}
  5395  		ab.Insert(np, byte(0x40|ab.rexflag))
  5396  	}
  5397  
  5398  	n := ab.Len()
  5399  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5400  		r := &cursym.R[i]
  5401  		if int64(r.Off) < p.Pc {
  5402  			break
  5403  		}
  5404  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5405  			r.Off++
  5406  		}
  5407  		if r.Type == objabi.R_PCREL {
  5408  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5409  				// PC-relative addressing is relative to the end of the instruction,
  5410  				// but the relocations applied by the linker are relative to the end
  5411  				// of the relocation. Because immediate instruction
  5412  				// arguments can follow the PC-relative memory reference in the
  5413  				// instruction encoding, the two may not coincide. In this case,
  5414  				// adjust addend so that linker can keep relocating relative to the
  5415  				// end of the relocation.
  5416  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5417  			} else if ctxt.Arch.Family == sys.I386 {
  5418  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5419  				// assumes that the previous instruction loaded the PC of the end
  5420  				// of that instruction into CX, so the adjustment is relative to
  5421  				// that.
  5422  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5423  			}
  5424  		}
  5425  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5426  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5427  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5428  		}
  5429  
  5430  	}
  5431  }
  5432  
  5433  // unpackOps4 extracts 4 operands from p.
  5434  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5435  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5436  }
  5437  
  5438  // unpackOps5 extracts 5 operands from p.
  5439  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5440  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5441  }