github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"encoding/binary"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  
    39  	"github.com/go-asm/go/buildcfg"
    40  	"github.com/go-asm/go/cmd/obj"
    41  	"github.com/go-asm/go/cmd/objabi"
    42  	"github.com/go-asm/go/cmd/sys"
    43  )
    44  
    45  var (
    46  	plan9privates *obj.LSym
    47  )
    48  
    49  // Instruction layout.
    50  
    51  // Loop alignment constants:
    52  // want to align loop entry to loopAlign-byte boundary,
    53  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    54  // We define a loop entry as the target of a backward jump.
    55  //
    56  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    57  // and it aligns all jump targets, not just backward jump targets.
    58  //
    59  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    60  // is very slight but negative, so the alignment is disabled by
    61  // setting MaxLoopPad = 0. The code is here for reference and
    62  // for future experiments.
    63  const (
    64  	loopAlign  = 16
    65  	maxLoopPad = 0
    66  )
    67  
    68  // Bit flags that are used to express jump target properties.
    69  const (
    70  	// branchBackwards marks targets that are located behind.
    71  	// Used to express jumps to loop headers.
    72  	branchBackwards = (1 << iota)
    73  	// branchShort marks branches those target is close,
    74  	// with offset is in -128..127 range.
    75  	branchShort
    76  	// branchLoopHead marks loop entry.
    77  	// Used to insert padding for misaligned loops.
    78  	branchLoopHead
    79  )
    80  
    81  // opBytes holds optab encoding bytes.
    82  // Each ytab reserves fixed amount of bytes in this array.
    83  //
    84  // The size should be the minimal number of bytes that
    85  // are enough to hold biggest optab op lines.
    86  type opBytes [31]uint8
    87  
    88  type Optab struct {
    89  	as     obj.As
    90  	ytab   []ytab
    91  	prefix uint8
    92  	op     opBytes
    93  }
    94  
    95  type movtab struct {
    96  	as   obj.As
    97  	ft   uint8
    98  	f3t  uint8
    99  	tt   uint8
   100  	code uint8
   101  	op   [4]uint8
   102  }
   103  
   104  const (
   105  	Yxxx = iota
   106  	Ynone
   107  	Yi0 // $0
   108  	Yi1 // $1
   109  	Yu2 // $x, x fits in uint2
   110  	Yi8 // $x, x fits in int8
   111  	Yu8 // $x, x fits in uint8
   112  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113  	Ys32
   114  	Yi32
   115  	Yi64
   116  	Yiauto
   117  	Yal
   118  	Ycl
   119  	Yax
   120  	Ycx
   121  	Yrb
   122  	Yrl
   123  	Yrl32 // Yrl on 32-bit system
   124  	Yrf
   125  	Yf0
   126  	Yrx
   127  	Ymb
   128  	Yml
   129  	Ym
   130  	Ybr
   131  	Ycs
   132  	Yss
   133  	Yds
   134  	Yes
   135  	Yfs
   136  	Ygs
   137  	Ygdtr
   138  	Yidtr
   139  	Yldtr
   140  	Ymsw
   141  	Ytask
   142  	Ycr0
   143  	Ycr1
   144  	Ycr2
   145  	Ycr3
   146  	Ycr4
   147  	Ycr5
   148  	Ycr6
   149  	Ycr7
   150  	Ycr8
   151  	Ydr0
   152  	Ydr1
   153  	Ydr2
   154  	Ydr3
   155  	Ydr4
   156  	Ydr5
   157  	Ydr6
   158  	Ydr7
   159  	Ytr0
   160  	Ytr1
   161  	Ytr2
   162  	Ytr3
   163  	Ytr4
   164  	Ytr5
   165  	Ytr6
   166  	Ytr7
   167  	Ymr
   168  	Ymm
   169  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171  	Yxr           // X0..X15
   172  	YxrEvex       // X0..X31
   173  	Yxm
   174  	YxmEvex       // YxrEvex+Ym
   175  	Yxvm          // VSIB vector array; vm32x/vm64x
   176  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   177  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178  	Yyr           // Y0..Y15
   179  	YyrEvex       // Y0..Y31
   180  	Yym
   181  	YymEvex   // YyrEvex+Ym
   182  	Yyvm      // VSIB vector array; vm32y/vm64y
   183  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185  	Yzr       // Z0..Z31
   186  	Yzm       // Yzr+Ym
   187  	Yzvm      // VSIB vector array; vm32z/vm64z
   188  	Yk0       // K0
   189  	Yknot0    // K1..K7; write mask
   190  	Yk        // K0..K7; used for KOP
   191  	Ykm       // Yk+Ym; used for KOP
   192  	Ytls
   193  	Ytextsize
   194  	Yindir
   195  	Ymax
   196  )
   197  
   198  const (
   199  	Zxxx = iota
   200  	Zlit
   201  	Zlitm_r
   202  	Zlitr_m
   203  	Zlit_m_r
   204  	Z_rp
   205  	Zbr
   206  	Zcall
   207  	Zcallcon
   208  	Zcallduff
   209  	Zcallind
   210  	Zcallindreg
   211  	Zib_
   212  	Zib_rp
   213  	Zibo_m
   214  	Zibo_m_xm
   215  	Zil_
   216  	Zil_rp
   217  	Ziq_rp
   218  	Zilo_m
   219  	Zjmp
   220  	Zjmpcon
   221  	Zloop
   222  	Zo_iw
   223  	Zm_o
   224  	Zm_r
   225  	Z_m_r
   226  	Zm2_r
   227  	Zm_r_xm
   228  	Zm_r_i_xm
   229  	Zm_r_xm_nr
   230  	Zr_m_xm_nr
   231  	Zibm_r // mmx1,mmx2/mem64,imm8
   232  	Zibr_m
   233  	Zmb_r
   234  	Zaut_r
   235  	Zo_m
   236  	Zo_m64
   237  	Zpseudo
   238  	Zr_m
   239  	Zr_m_xm
   240  	Zrp_
   241  	Z_ib
   242  	Z_il
   243  	Zm_ibo
   244  	Zm_ilo
   245  	Zib_rr
   246  	Zil_rr
   247  	Zbyte
   248  
   249  	Zvex_rm_v_r
   250  	Zvex_rm_v_ro
   251  	Zvex_r_v_rm
   252  	Zvex_i_rm_vo
   253  	Zvex_v_rm_r
   254  	Zvex_i_rm_r
   255  	Zvex_i_r_v
   256  	Zvex_i_rm_v_r
   257  	Zvex
   258  	Zvex_rm_r_vo
   259  	Zvex_i_r_rm
   260  	Zvex_hr_rm_v_r
   261  
   262  	Zevex_first
   263  	Zevex_i_r_k_rm
   264  	Zevex_i_r_rm
   265  	Zevex_i_rm_k_r
   266  	Zevex_i_rm_k_vo
   267  	Zevex_i_rm_r
   268  	Zevex_i_rm_v_k_r
   269  	Zevex_i_rm_v_r
   270  	Zevex_i_rm_vo
   271  	Zevex_k_rmo
   272  	Zevex_r_k_rm
   273  	Zevex_r_v_k_rm
   274  	Zevex_r_v_rm
   275  	Zevex_rm_k_r
   276  	Zevex_rm_v_k_r
   277  	Zevex_rm_v_r
   278  	Zevex_last
   279  
   280  	Zmax
   281  )
   282  
   283  const (
   284  	Px   = 0
   285  	Px1  = 1    // symbolic; exact value doesn't matter
   286  	P32  = 0x32 // 32-bit only
   287  	Pe   = 0x66 // operand escape
   288  	Pm   = 0x0f // 2byte opcode escape
   289  	Pq   = 0xff // both escapes: 66 0f
   290  	Pb   = 0xfe // byte operands
   291  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   292  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   293  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300  	Pw   = 0x48 // Rex.w
   301  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   302  	Py   = 0x80 // defaults to 64-bit mode
   303  	Py1  = 0x81 // symbolic; exact value doesn't matter
   304  	Py3  = 0x83 // symbolic; exact value doesn't matter
   305  	Pavx = 0x84 // symbolic; exact value doesn't matter
   306  
   307  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308  	Rxw     = 1 << 3 // =1, 64-bit operand size
   309  	Rxr     = 1 << 2 // extend modrm reg
   310  	Rxx     = 1 << 1 // extend sib index
   311  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312  )
   313  
   314  const (
   315  	// Encoding for VEX prefix in tables.
   316  	// The P, L, and W fields are chosen to match
   317  	// their eventual locations in the VEX prefix bytes.
   318  
   319  	// Encoding for VEX prefix in tables.
   320  	// The P, L, and W fields are chosen to match
   321  	// their eventual locations in the VEX prefix bytes.
   322  
   323  	// Using spare bit to make leading [E]VEX encoding byte different from
   324  	// 0x0f even if all other VEX fields are 0.
   325  	avxEscape = 1 << 6
   326  
   327  	// P field - 2 bits
   328  	vex66 = 1 << 0
   329  	vexF3 = 2 << 0
   330  	vexF2 = 3 << 0
   331  	// L field - 1 bit
   332  	vexLZ  = 0 << 2
   333  	vexLIG = 0 << 2
   334  	vex128 = 0 << 2
   335  	vex256 = 1 << 2
   336  	// W field - 1 bit
   337  	vexWIG = 0 << 7
   338  	vexW0  = 0 << 7
   339  	vexW1  = 1 << 7
   340  	// M field - 5 bits, but mostly reserved; we can store up to 3
   341  	vex0F   = 1 << 3
   342  	vex0F38 = 2 << 3
   343  	vex0F3A = 3 << 3
   344  )
   345  
   346  var ycover [Ymax * Ymax]uint8
   347  
   348  var reg [MAXREG]int
   349  
   350  var regrex [MAXREG + 1]int
   351  
   352  var ynone = []ytab{
   353  	{Zlit, 1, argList{}},
   354  }
   355  
   356  var ytext = []ytab{
   357  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359  }
   360  
   361  var ynop = []ytab{
   362  	{Zpseudo, 0, argList{}},
   363  	{Zpseudo, 0, argList{Yiauto}},
   364  	{Zpseudo, 0, argList{Yml}},
   365  	{Zpseudo, 0, argList{Yrf}},
   366  	{Zpseudo, 0, argList{Yxr}},
   367  	{Zpseudo, 0, argList{Yiauto}},
   368  	{Zpseudo, 0, argList{Yml}},
   369  	{Zpseudo, 0, argList{Yrf}},
   370  	{Zpseudo, 1, argList{Yxr}},
   371  }
   372  
   373  var yfuncdata = []ytab{
   374  	{Zpseudo, 0, argList{Yi32, Ym}},
   375  }
   376  
   377  var ypcdata = []ytab{
   378  	{Zpseudo, 0, argList{Yi32, Yi32}},
   379  }
   380  
   381  var yxorb = []ytab{
   382  	{Zib_, 1, argList{Yi32, Yal}},
   383  	{Zibo_m, 2, argList{Yi32, Ymb}},
   384  	{Zr_m, 1, argList{Yrb, Ymb}},
   385  	{Zm_r, 1, argList{Ymb, Yrb}},
   386  }
   387  
   388  var yaddl = []ytab{
   389  	{Zibo_m, 2, argList{Yi8, Yml}},
   390  	{Zil_, 1, argList{Yi32, Yax}},
   391  	{Zilo_m, 2, argList{Yi32, Yml}},
   392  	{Zr_m, 1, argList{Yrl, Yml}},
   393  	{Zm_r, 1, argList{Yml, Yrl}},
   394  }
   395  
   396  var yincl = []ytab{
   397  	{Z_rp, 1, argList{Yrl}},
   398  	{Zo_m, 2, argList{Yml}},
   399  }
   400  
   401  var yincq = []ytab{
   402  	{Zo_m, 2, argList{Yml}},
   403  }
   404  
   405  var ycmpb = []ytab{
   406  	{Z_ib, 1, argList{Yal, Yi32}},
   407  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   408  	{Zm_r, 1, argList{Ymb, Yrb}},
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  }
   411  
   412  var ycmpl = []ytab{
   413  	{Zm_ibo, 2, argList{Yml, Yi8}},
   414  	{Z_il, 1, argList{Yax, Yi32}},
   415  	{Zm_ilo, 2, argList{Yml, Yi32}},
   416  	{Zm_r, 1, argList{Yml, Yrl}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var yshb = []ytab{
   421  	{Zo_m, 2, argList{Yi1, Ymb}},
   422  	{Zibo_m, 2, argList{Yu8, Ymb}},
   423  	{Zo_m, 2, argList{Ycx, Ymb}},
   424  }
   425  
   426  var yshl = []ytab{
   427  	{Zo_m, 2, argList{Yi1, Yml}},
   428  	{Zibo_m, 2, argList{Yu8, Yml}},
   429  	{Zo_m, 2, argList{Ycl, Yml}},
   430  	{Zo_m, 2, argList{Ycx, Yml}},
   431  }
   432  
   433  var ytestl = []ytab{
   434  	{Zil_, 1, argList{Yi32, Yax}},
   435  	{Zilo_m, 2, argList{Yi32, Yml}},
   436  	{Zr_m, 1, argList{Yrl, Yml}},
   437  	{Zm_r, 1, argList{Yml, Yrl}},
   438  }
   439  
   440  var ymovb = []ytab{
   441  	{Zr_m, 1, argList{Yrb, Ymb}},
   442  	{Zm_r, 1, argList{Ymb, Yrb}},
   443  	{Zib_rp, 1, argList{Yi32, Yrb}},
   444  	{Zibo_m, 2, argList{Yi32, Ymb}},
   445  }
   446  
   447  var ybtl = []ytab{
   448  	{Zibo_m, 2, argList{Yi8, Yml}},
   449  	{Zr_m, 1, argList{Yrl, Yml}},
   450  }
   451  
   452  var ymovw = []ytab{
   453  	{Zr_m, 1, argList{Yrl, Yml}},
   454  	{Zm_r, 1, argList{Yml, Yrl}},
   455  	{Zil_rp, 1, argList{Yi32, Yrl}},
   456  	{Zilo_m, 2, argList{Yi32, Yml}},
   457  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   458  }
   459  
   460  var ymovl = []ytab{
   461  	{Zr_m, 1, argList{Yrl, Yml}},
   462  	{Zm_r, 1, argList{Yml, Yrl}},
   463  	{Zil_rp, 1, argList{Yi32, Yrl}},
   464  	{Zilo_m, 2, argList{Yi32, Yml}},
   465  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   470  }
   471  
   472  var yret = []ytab{
   473  	{Zo_iw, 1, argList{}},
   474  	{Zo_iw, 1, argList{Yi32}},
   475  }
   476  
   477  var ymovq = []ytab{
   478  	// valid in 32-bit mode
   479  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484  
   485  	// valid only in 64-bit mode, usually with 64-bit prefix
   486  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496  }
   497  
   498  var ymovbe = []ytab{
   499  	{Zlitm_r, 3, argList{Ym, Yrl}},
   500  	{Zlitr_m, 3, argList{Yrl, Ym}},
   501  }
   502  
   503  var ym_rl = []ytab{
   504  	{Zm_r, 1, argList{Ym, Yrl}},
   505  }
   506  
   507  var yrl_m = []ytab{
   508  	{Zr_m, 1, argList{Yrl, Ym}},
   509  }
   510  
   511  var ymb_rl = []ytab{
   512  	{Zmb_r, 1, argList{Ymb, Yrl}},
   513  }
   514  
   515  var yml_rl = []ytab{
   516  	{Zm_r, 1, argList{Yml, Yrl}},
   517  }
   518  
   519  var yrl_ml = []ytab{
   520  	{Zr_m, 1, argList{Yrl, Yml}},
   521  }
   522  
   523  var yml_mb = []ytab{
   524  	{Zr_m, 1, argList{Yrb, Ymb}},
   525  	{Zm_r, 1, argList{Ymb, Yrb}},
   526  }
   527  
   528  var yrb_mb = []ytab{
   529  	{Zr_m, 1, argList{Yrb, Ymb}},
   530  }
   531  
   532  var yxchg = []ytab{
   533  	{Z_rp, 1, argList{Yax, Yrl}},
   534  	{Zrp_, 1, argList{Yrl, Yax}},
   535  	{Zr_m, 1, argList{Yrl, Yml}},
   536  	{Zm_r, 1, argList{Yml, Yrl}},
   537  }
   538  
   539  var ydivl = []ytab{
   540  	{Zm_o, 2, argList{Yml}},
   541  }
   542  
   543  var ydivb = []ytab{
   544  	{Zm_o, 2, argList{Ymb}},
   545  }
   546  
   547  var yimul = []ytab{
   548  	{Zm_o, 2, argList{Yml}},
   549  	{Zib_rr, 1, argList{Yi8, Yrl}},
   550  	{Zil_rr, 1, argList{Yi32, Yrl}},
   551  	{Zm_r, 2, argList{Yml, Yrl}},
   552  }
   553  
   554  var yimul3 = []ytab{
   555  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557  }
   558  
   559  var ybyte = []ytab{
   560  	{Zbyte, 1, argList{Yi64}},
   561  }
   562  
   563  var yin = []ytab{
   564  	{Zib_, 1, argList{Yi32}},
   565  	{Zlit, 1, argList{}},
   566  }
   567  
   568  var yint = []ytab{
   569  	{Zib_, 1, argList{Yi32}},
   570  }
   571  
   572  var ypushl = []ytab{
   573  	{Zrp_, 1, argList{Yrl}},
   574  	{Zm_o, 2, argList{Ym}},
   575  	{Zib_, 1, argList{Yi8}},
   576  	{Zil_, 1, argList{Yi32}},
   577  }
   578  
   579  var ypopl = []ytab{
   580  	{Z_rp, 1, argList{Yrl}},
   581  	{Zo_m, 2, argList{Ym}},
   582  }
   583  
   584  var ywrfsbase = []ytab{
   585  	{Zm_o, 2, argList{Yrl}},
   586  }
   587  
   588  var yrdrand = []ytab{
   589  	{Zo_m, 2, argList{Yrl}},
   590  }
   591  
   592  var yclflush = []ytab{
   593  	{Zo_m, 2, argList{Ym}},
   594  }
   595  
   596  var ybswap = []ytab{
   597  	{Z_rp, 2, argList{Yrl}},
   598  }
   599  
   600  var yscond = []ytab{
   601  	{Zo_m, 2, argList{Ymb}},
   602  }
   603  
   604  var yjcond = []ytab{
   605  	{Zbr, 0, argList{Ybr}},
   606  	{Zbr, 0, argList{Yi0, Ybr}},
   607  	{Zbr, 1, argList{Yi1, Ybr}},
   608  }
   609  
   610  var yloop = []ytab{
   611  	{Zloop, 1, argList{Ybr}},
   612  }
   613  
   614  var ycall = []ytab{
   615  	{Zcallindreg, 0, argList{Yml}},
   616  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   617  	{Zcallind, 2, argList{Yindir}},
   618  	{Zcall, 0, argList{Ybr}},
   619  	{Zcallcon, 1, argList{Yi32}},
   620  }
   621  
   622  var yduff = []ytab{
   623  	{Zcallduff, 1, argList{Yi32}},
   624  }
   625  
   626  var yjmp = []ytab{
   627  	{Zo_m64, 2, argList{Yml}},
   628  	{Zjmp, 0, argList{Ybr}},
   629  	{Zjmpcon, 1, argList{Yi32}},
   630  }
   631  
   632  var yfmvd = []ytab{
   633  	{Zm_o, 2, argList{Ym, Yf0}},
   634  	{Zo_m, 2, argList{Yf0, Ym}},
   635  	{Zm_o, 2, argList{Yrf, Yf0}},
   636  	{Zo_m, 2, argList{Yf0, Yrf}},
   637  }
   638  
   639  var yfmvdp = []ytab{
   640  	{Zo_m, 2, argList{Yf0, Ym}},
   641  	{Zo_m, 2, argList{Yf0, Yrf}},
   642  }
   643  
   644  var yfmvf = []ytab{
   645  	{Zm_o, 2, argList{Ym, Yf0}},
   646  	{Zo_m, 2, argList{Yf0, Ym}},
   647  }
   648  
   649  var yfmvx = []ytab{
   650  	{Zm_o, 2, argList{Ym, Yf0}},
   651  }
   652  
   653  var yfmvp = []ytab{
   654  	{Zo_m, 2, argList{Yf0, Ym}},
   655  }
   656  
   657  var yfcmv = []ytab{
   658  	{Zm_o, 2, argList{Yrf, Yf0}},
   659  }
   660  
   661  var yfadd = []ytab{
   662  	{Zm_o, 2, argList{Ym, Yf0}},
   663  	{Zm_o, 2, argList{Yrf, Yf0}},
   664  	{Zo_m, 2, argList{Yf0, Yrf}},
   665  }
   666  
   667  var yfxch = []ytab{
   668  	{Zo_m, 2, argList{Yf0, Yrf}},
   669  	{Zm_o, 2, argList{Yrf, Yf0}},
   670  }
   671  
   672  var ycompp = []ytab{
   673  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674  }
   675  
   676  var ystsw = []ytab{
   677  	{Zo_m, 2, argList{Ym}},
   678  	{Zlit, 1, argList{Yax}},
   679  }
   680  
   681  var ysvrs_mo = []ytab{
   682  	{Zm_o, 2, argList{Ym}},
   683  }
   684  
   685  // unaryDst version of "ysvrs_mo".
   686  var ysvrs_om = []ytab{
   687  	{Zo_m, 2, argList{Ym}},
   688  }
   689  
   690  var ymm = []ytab{
   691  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693  }
   694  
   695  var yxm = []ytab{
   696  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697  }
   698  
   699  var yxm_q4 = []ytab{
   700  	{Zm_r, 1, argList{Yxm, Yxr}},
   701  }
   702  
   703  var yxcvm1 = []ytab{
   704  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706  }
   707  
   708  var yxcvm2 = []ytab{
   709  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711  }
   712  
   713  var yxr = []ytab{
   714  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715  }
   716  
   717  var yxr_ml = []ytab{
   718  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   719  }
   720  
   721  var ymr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Ymr}},
   723  }
   724  
   725  var ymr_ml = []ytab{
   726  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   727  }
   728  
   729  var yxcmpi = []ytab{
   730  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731  }
   732  
   733  var yxmov = []ytab{
   734  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736  }
   737  
   738  var yxcvfl = []ytab{
   739  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740  }
   741  
   742  var yxcvlf = []ytab{
   743  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   744  }
   745  
   746  var yxcvfq = []ytab{
   747  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748  }
   749  
   750  var yxcvqf = []ytab{
   751  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   752  }
   753  
   754  var yps = []ytab{
   755  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759  }
   760  
   761  var yxrrl = []ytab{
   762  	{Zm_r, 1, argList{Yxr, Yrl}},
   763  }
   764  
   765  var ymrxr = []ytab{
   766  	{Zm_r, 1, argList{Ymr, Yxr}},
   767  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768  }
   769  
   770  var ymshuf = []ytab{
   771  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772  }
   773  
   774  var ymshufb = []ytab{
   775  	{Zm2_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  // It should never have more than 1 entry,
   779  // because some optab entries have opcode sequences that
   780  // are longer than 2 bytes (zoffset=2 here),
   781  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   782  // to name a few.
   783  var yxshuf = []ytab{
   784  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785  }
   786  
   787  var yextrw = []ytab{
   788  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790  }
   791  
   792  var yextr = []ytab{
   793  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794  }
   795  
   796  var yinsrw = []ytab{
   797  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798  }
   799  
   800  var yinsr = []ytab{
   801  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802  }
   803  
   804  var ypsdq = []ytab{
   805  	{Zibo_m, 2, argList{Yi8, Yxr}},
   806  }
   807  
   808  var ymskb = []ytab{
   809  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811  }
   812  
   813  var ycrc32l = []ytab{
   814  	{Zlitm_r, 0, argList{Yml, Yrl}},
   815  }
   816  
   817  var ycrc32b = []ytab{
   818  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   819  }
   820  
   821  var yprefetch = []ytab{
   822  	{Zm_o, 2, argList{Ym}},
   823  }
   824  
   825  var yaes = []ytab{
   826  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   827  }
   828  
   829  var yxbegin = []ytab{
   830  	{Zjmp, 1, argList{Ybr}},
   831  }
   832  
   833  var yxabort = []ytab{
   834  	{Zib_, 1, argList{Yu8}},
   835  }
   836  
   837  var ylddqu = []ytab{
   838  	{Zm_r, 1, argList{Ym, Yxr}},
   839  }
   840  
   841  var ypalignr = []ytab{
   842  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843  }
   844  
   845  var ysha256rnds2 = []ytab{
   846  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847  }
   848  
   849  var yblendvpd = []ytab{
   850  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851  }
   852  
   853  var ymmxmm0f38 = []ytab{
   854  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   855  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   856  }
   857  
   858  var yextractps = []ytab{
   859  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860  }
   861  
   862  var ysha1rnds4 = []ytab{
   863  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864  }
   865  
   866  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868  // to find the entry with the given p.As and then looks through the ytable for
   869  // that instruction (the second field in the optab struct) for a line whose
   870  // first two values match the Ytypes of the p.From and p.To operands.  The
   871  // function oclass computes the specific Ytype of an operand and then the set
   872  // of more general Ytypes that it satisfies is implied by the ycover table, set
   873  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874  // from the more general 8-bit constants, but instinit says
   875  //
   876  //	ycover[Yi0*Ymax+Ys32] = 1
   877  //	ycover[Yi1*Ymax+Ys32] = 1
   878  //	ycover[Yi8*Ymax+Ys32] = 1
   879  //
   880  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881  // if that's what an instruction can handle.
   882  //
   883  // In parallel with the scan through the ytable for the appropriate line, there
   884  // is a z pointer that starts out pointing at the strange magic byte list in
   885  // the Optab struct.  With each step past a non-matching ytable line, z
   886  // advances by the 4th entry in the line.  When a matching line is found, that
   887  // z pointer has the extra data to use in laying down the instruction bytes.
   888  // The actual bytes laid down are a function of the 3rd entry in the line (that
   889  // is, the Ztype) and the z bytes.
   890  //
   891  // For example, let's look at AADDL.  The optab line says:
   892  //
   893  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   894  //
   895  // and yaddl says
   896  //
   897  //	var yaddl = []ytab{
   898  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   899  //	        {Yi32, Ynone, Yax, Zil_, 1},
   900  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   901  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   902  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   903  //	}
   904  //
   905  // so there are 5 possible types of ADDL instruction that can be laid down, and
   906  // possible states used to lay them down (Ztype and z pointer, assuming z
   907  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   908  //
   909  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   910  //	Yi32, Yax -> Zil_, z+2 (0x05)
   911  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   912  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   913  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   914  //
   915  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   916  // relatively straightforward as this program goes.
   917  //
   918  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   919  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   920  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   921  // Zilo_m is the same but a long (32-bit) immediate.
   922  var optab =
   923  // as, ytab, andproto, opcode
   924  [...]Optab{
   925  	{obj.AXXX, nil, 0, opBytes{}},
   926  	{AAAA, ynone, P32, opBytes{0x37}},
   927  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   928  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   929  	{AAAS, ynone, P32, opBytes{0x3f}},
   930  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   931  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   934  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   935  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   936  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   937  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   938  	{AADDPD, yxm, Pq, opBytes{0x58}},
   939  	{AADDPS, yxm, Pm, opBytes{0x58}},
   940  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   941  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   942  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   943  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   944  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   945  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   946  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   947  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   948  	{AADJSP, nil, 0, opBytes{}},
   949  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   950  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   951  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   952  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   953  	{AANDPD, yxm, Pq, opBytes{0x54}},
   954  	{AANDPS, yxm, Pm, opBytes{0x54}},
   955  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   957  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   958  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   959  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   960  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   961  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   962  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   963  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   964  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   965  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   966  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   967  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   968  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   969  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   970  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   971  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   972  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   973  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   974  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   975  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   976  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   977  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   978  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   979  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   980  	{ABYTE, ybyte, Px, opBytes{1}},
   981  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   982  	{ACBW, ynone, Pe, opBytes{0x98}},
   983  	{ACDQ, ynone, Px, opBytes{0x99}},
   984  	{ACDQE, ynone, Pw, opBytes{0x98}},
   985  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   986  	{ACLC, ynone, Px, opBytes{0xf8}},
   987  	{ACLD, ynone, Px, opBytes{0xfc}},
   988  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   989  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   990  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   991  	{ACLI, ynone, Px, opBytes{0xfa}},
   992  	{ACLTS, ynone, Pm, opBytes{0x06}},
   993  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   994  	{ACMC, ynone, Px, opBytes{0xf5}},
   995  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   996  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   997  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   998  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   999  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
  1000  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1001  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1002  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1003  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1004  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1005  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1006  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1007  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1008  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1009  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1010  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1011  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1012  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1013  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1014  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1015  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1016  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1017  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1018  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1019  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1020  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1021  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1022  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1023  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1024  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1025  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1026  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1027  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1028  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1029  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1030  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1031  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1032  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1033  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1034  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1035  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1036  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1037  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1038  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1039  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1040  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1041  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1042  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1043  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1044  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1045  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1046  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1047  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1048  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1049  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1050  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1051  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1052  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1053  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1054  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1055  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1056  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1057  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1058  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1059  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1060  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1061  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1062  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1063  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1064  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1065  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1066  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1067  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1068  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1069  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1070  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1071  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1072  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1073  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1074  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1075  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1076  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1077  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1078  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1079  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1080  	{ACWD, ynone, Pe, opBytes{0x99}},
  1081  	{ACWDE, ynone, Px, opBytes{0x98}},
  1082  	{ACQO, ynone, Pw, opBytes{0x99}},
  1083  	{ADAA, ynone, P32, opBytes{0x27}},
  1084  	{ADAS, ynone, P32, opBytes{0x2f}},
  1085  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1086  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1087  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1088  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1089  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1090  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1091  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1092  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1093  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1094  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1095  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1096  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1097  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1098  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1099  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1100  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1101  	{AENTER, nil, 0, opBytes{}}, // botch
  1102  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1103  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1104  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1105  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1106  	{AHLT, ynone, Px, opBytes{0xf4}},
  1107  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1108  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1109  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1110  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1111  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1112  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1118  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1119  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1120  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1121  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1122  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1123  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1124  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1125  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1126  	{AINSL, ynone, Px, opBytes{0x6d}},
  1127  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1128  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1129  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1130  	{AINT, yint, Px, opBytes{0xcd}},
  1131  	{AINTO, ynone, P32, opBytes{0xce}},
  1132  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1133  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1134  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1135  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1136  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1137  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1139  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1140  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1141  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1142  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1143  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1144  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1145  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1146  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1147  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1148  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1149  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1150  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1151  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1152  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1153  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1154  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1155  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1156  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1157  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1158  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1159  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1160  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1161  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1162  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1163  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1164  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1165  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1166  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1167  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1168  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1169  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1170  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1171  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1172  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1173  	{ALODSL, ynone, Px, opBytes{0xad}},
  1174  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1175  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1176  	{ALONG, ybyte, Px, opBytes{4}},
  1177  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1178  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1179  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1180  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1181  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1182  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1183  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1184  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1185  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1186  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1187  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1188  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1189  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1190  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1191  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1192  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1193  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1194  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1195  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1196  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1197  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1198  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1199  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1200  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1201  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1202  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1203  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1204  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1205  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1206  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1207  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1208  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1209  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1210  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1211  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1212  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1213  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1214  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1215  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1216  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1217  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1218  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1219  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1220  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1221  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1222  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1223  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1224  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1225  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1226  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1227  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1228  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1229  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1230  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1231  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1232  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1233  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1234  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1235  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1236  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1237  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1238  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1239  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1240  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1241  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1242  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1243  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1244  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1245  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1246  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1247  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1248  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1249  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1250  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1251  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1252  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1253  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1254  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1255  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1256  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1257  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1258  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1259  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1260  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1261  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1262  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1263  	{AORPD, yxm, Pq, opBytes{0x56}},
  1264  	{AORPS, yxm, Pm, opBytes{0x56}},
  1265  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1267  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1268  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1269  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1270  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1271  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1272  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1273  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1274  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1275  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1276  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1277  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1278  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1279  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1280  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1281  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1282  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1283  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1284  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1285  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1286  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1287  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1288  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1289  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1290  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1291  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1292  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1293  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1294  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1295  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1296  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1297  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1298  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1299  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1300  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1301  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1302  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1303  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1304  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1305  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1306  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1307  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1308  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1309  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1310  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1311  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1312  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1313  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1314  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1315  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1316  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1317  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1318  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1319  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1320  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1321  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1322  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1323  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1324  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1325  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1326  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1327  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1328  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1329  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1330  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1331  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1332  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1333  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1334  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1335  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1336  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1337  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1338  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1339  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1340  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1341  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1342  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1343  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1344  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1345  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1346  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1347  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1348  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1349  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1350  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1351  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1352  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1353  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1354  	{APOPAL, ynone, P32, opBytes{0x61}},
  1355  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1356  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1357  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1358  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1359  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1360  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1361  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1362  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1364  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1365  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1366  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1367  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1368  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1369  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1370  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1371  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1372  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1373  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1374  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1375  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1376  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1377  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1378  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1379  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1380  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1381  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1382  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1383  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1384  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1385  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1386  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1387  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1388  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1389  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1390  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1391  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1392  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1393  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1394  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1395  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1396  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1397  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1398  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1399  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1400  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1401  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1402  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1403  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1404  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1405  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1406  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1407  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1410  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1411  	{AQUAD, ybyte, Px, opBytes{8}},
  1412  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1413  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1416  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1417  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1418  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1419  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1422  	{AREP, ynone, Px, opBytes{0xf3}},
  1423  	{AREPN, ynone, Px, opBytes{0xf2}},
  1424  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1425  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1426  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1427  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1428  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1429  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1432  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1433  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1436  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1437  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1438  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1439  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1440  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1443  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1444  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1447  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1448  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1451  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1452  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1453  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1454  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1455  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1456  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1457  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1458  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1459  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1460  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1461  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1462  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1463  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1464  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1465  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1466  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1467  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1468  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1469  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1470  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1471  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1472  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1475  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1476  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1479  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1480  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1481  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1482  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1483  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1484  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1485  	{ASTC, ynone, Px, opBytes{0xf9}},
  1486  	{ASTD, ynone, Px, opBytes{0xfd}},
  1487  	{ASTI, ynone, Px, opBytes{0xfb}},
  1488  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1489  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1490  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1491  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1492  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1493  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1494  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1495  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1496  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1497  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1498  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1499  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1500  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1501  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1502  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1503  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1504  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1507  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1508  	{obj.ATEXT, ytext, Px, opBytes{}},
  1509  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1510  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1511  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1512  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1513  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1514  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1515  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1516  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1517  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1518  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1519  	{AWORD, ybyte, Px, opBytes{2}},
  1520  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1521  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1525  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1526  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1527  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1528  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1529  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1531  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1532  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1533  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1534  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1535  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1536  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1537  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1538  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1539  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1540  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1541  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1542  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1543  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1544  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1545  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1546  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1547  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1548  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1549  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1551  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1552  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1553  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1554  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1555  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1556  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1557  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1559  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1560  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1561  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1562  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1563  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1564  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1565  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1566  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1567  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1568  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1569  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1570  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1571  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1572  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1573  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1574  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1575  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1576  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1577  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1578  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1579  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1580  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1581  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1582  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1583  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1584  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1585  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1586  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1587  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1588  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1589  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1590  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1591  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1592  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1593  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1594  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1595  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1596  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1597  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1598  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1599  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1600  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1601  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1602  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1603  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1604  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1605  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1606  	{AFFREE, nil, 0, opBytes{}},
  1607  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1608  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1609  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1610  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1611  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1612  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1613  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1614  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1615  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1616  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1617  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1618  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1619  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1620  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1621  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1622  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1623  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1624  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1625  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1626  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1627  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1628  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1629  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1630  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1631  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1632  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1633  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1634  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1635  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1636  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1637  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1638  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1639  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1640  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1641  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1642  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1643  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1644  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1645  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1646  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1647  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1650  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1651  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1652  	{AINVD, ynone, Pm, opBytes{0x08}},
  1653  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1654  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1655  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1656  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1657  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1658  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1659  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1660  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1661  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1662  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1663  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1664  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1665  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1666  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1667  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1668  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1669  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1670  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1671  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1672  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1673  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1674  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1677  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1678  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1679  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1680  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1681  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1682  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1683  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1684  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1685  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1686  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1687  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1688  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1689  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1690  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1691  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1692  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1693  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1694  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1695  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1696  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1697  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1698  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1699  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1700  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1701  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1702  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1703  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1704  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1705  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1707  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1708  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1709  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1710  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1711  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1712  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1713  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1716  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1718  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1720  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1722  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1724  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1726  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1727  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1728  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1729  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1730  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1733  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1735  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1736  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1738  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1739  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1740  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1742  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1743  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1744  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1745  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1747  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1748  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1750  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1751  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1752  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1753  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1754  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1755  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1756  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1757  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1758  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1759  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1760  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1761  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1762  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1763  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1764  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1765  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1766  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1767  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1768  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1769  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1770  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1771  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1772  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1773  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1774  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1775  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1776  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1777  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1778  	{ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
  1779  
  1780  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1781  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1782  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1783  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1784  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1785  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1786  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1787  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1788  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1789  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1790  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1791  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1792  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1793  
  1794  	{obj.AEND, nil, 0, opBytes{}},
  1795  	{0, nil, 0, opBytes{}},
  1796  }
  1797  
  1798  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1799  
  1800  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1801  // This happens on systems like Solaris that call .so functions instead of system calls.
  1802  // It does not seem to be necessary for any other systems. This is probably working
  1803  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1804  // what that bug is. And this does fix it.
  1805  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1806  	if ctxt.Headtype == objabi.Hsolaris {
  1807  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1808  		return strings.HasPrefix(s.Name, "libc_")
  1809  	}
  1810  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1811  }
  1812  
  1813  // single-instruction no-ops of various lengths.
  1814  // constructed by hand and disassembled with gdb to verify.
  1815  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1816  var nop = [][16]uint8{
  1817  	{0x90},
  1818  	{0x66, 0x90},
  1819  	{0x0F, 0x1F, 0x00},
  1820  	{0x0F, 0x1F, 0x40, 0x00},
  1821  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1823  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1824  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1825  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1826  }
  1827  
  1828  // Native Client rejects the repeated 0x66 prefix.
  1829  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1830  func fillnop(p []byte, n int) {
  1831  	var m int
  1832  
  1833  	for n > 0 {
  1834  		m = n
  1835  		if m > len(nop) {
  1836  			m = len(nop)
  1837  		}
  1838  		copy(p[:m], nop[m-1][:m])
  1839  		p = p[m:]
  1840  		n -= m
  1841  	}
  1842  }
  1843  
  1844  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1845  	s.Grow(int64(c) + int64(pad))
  1846  	fillnop(s.P[c:], int(pad))
  1847  	return c + pad
  1848  }
  1849  
  1850  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1851  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1852  		return l
  1853  	}
  1854  	return q
  1855  }
  1856  
  1857  // isJump returns whether p is a jump instruction.
  1858  // It is used to ensure that no standalone or macro-fused jump will straddle
  1859  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1860  func isJump(p *obj.Prog) bool {
  1861  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1862  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1863  }
  1864  
  1865  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1866  // jump. Otherwise, nil is returned.
  1867  func lookForJCC(p *obj.Prog) *obj.Prog {
  1868  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1869  	var q *obj.Prog
  1870  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1871  	}
  1872  
  1873  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1874  		return nil
  1875  	}
  1876  
  1877  	switch q.As {
  1878  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1879  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1880  	default:
  1881  		return nil
  1882  	}
  1883  
  1884  	return q
  1885  }
  1886  
  1887  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1888  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1889  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1890  func fusedJump(p *obj.Prog) (bool, uint8) {
  1891  	var fusedSize uint8
  1892  
  1893  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1894  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1895  	// need to be careful to insert any padding before the locks rather than directly after them.
  1896  
  1897  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1898  		fusedSize += p.Isize
  1899  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1900  		}
  1901  		if p == nil {
  1902  			return false, 0
  1903  		}
  1904  	}
  1905  	if p.As == ALOCK {
  1906  		fusedSize += p.Isize
  1907  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1908  		}
  1909  		if p == nil {
  1910  			return false, 0
  1911  		}
  1912  	}
  1913  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1914  
  1915  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1916  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1917  
  1918  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1919  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1920  
  1921  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1922  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1923  
  1924  	if !cmpAddSub && !testAnd && !incDec {
  1925  		return false, 0
  1926  	}
  1927  
  1928  	if !incDec {
  1929  		var argOne obj.AddrType
  1930  		var argTwo obj.AddrType
  1931  		if cmp {
  1932  			argOne = p.From.Type
  1933  			argTwo = p.To.Type
  1934  		} else {
  1935  			argOne = p.To.Type
  1936  			argTwo = p.From.Type
  1937  		}
  1938  		if argOne == obj.TYPE_REG {
  1939  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1940  				return false, 0
  1941  			}
  1942  		} else if argOne == obj.TYPE_MEM {
  1943  			if argTwo != obj.TYPE_REG {
  1944  				return false, 0
  1945  			}
  1946  		} else {
  1947  			return false, 0
  1948  		}
  1949  	}
  1950  
  1951  	fusedSize += p.Isize
  1952  	jmp := lookForJCC(p)
  1953  	if jmp == nil {
  1954  		return false, 0
  1955  	}
  1956  
  1957  	fusedSize += jmp.Isize
  1958  
  1959  	if testAnd {
  1960  		return true, fusedSize
  1961  	}
  1962  
  1963  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1964  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1965  		return false, 0
  1966  	}
  1967  
  1968  	if cmpAddSub {
  1969  		return true, fusedSize
  1970  	}
  1971  
  1972  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1973  		return false, 0
  1974  	}
  1975  
  1976  	return true, fusedSize
  1977  }
  1978  
  1979  type padJumpsCtx int32
  1980  
  1981  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1982  	// Disable jump padding on 32 bit builds by setting
  1983  	// padJumps to 0.
  1984  	if ctxt.Arch.Family == sys.I386 {
  1985  		return padJumpsCtx(0)
  1986  	}
  1987  
  1988  	// Disable jump padding for hand written assembly code.
  1989  	if ctxt.IsAsm {
  1990  		return padJumpsCtx(0)
  1991  	}
  1992  
  1993  	return padJumpsCtx(32)
  1994  }
  1995  
  1996  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1997  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1998  // not cross or end on a 32 byte boundary.
  1999  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  2000  	if pjc == 0 {
  2001  		return c
  2002  	}
  2003  
  2004  	var toPad int32
  2005  	fj, fjSize := fusedJump(p)
  2006  	mask := int32(pjc - 1)
  2007  	if fj {
  2008  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2009  			toPad = int32(pjc) - (c & mask)
  2010  		}
  2011  	} else if isJump(p) {
  2012  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2013  			toPad = int32(pjc) - (c & mask)
  2014  		}
  2015  	}
  2016  	if toPad <= 0 {
  2017  		return c
  2018  	}
  2019  
  2020  	return noppad(ctxt, s, c, toPad)
  2021  }
  2022  
  2023  // reAssemble is called if an instruction's size changes during assembly. If
  2024  // it does and the instruction is a standalone or a macro-fused jump we need to
  2025  // reassemble.
  2026  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2027  	if pjc == 0 {
  2028  		return false
  2029  	}
  2030  
  2031  	fj, _ := fusedJump(p)
  2032  	return fj || isJump(p)
  2033  }
  2034  
  2035  type nopPad struct {
  2036  	p *obj.Prog // Instruction before the pad
  2037  	n int32     // Size of the pad
  2038  }
  2039  
  2040  // Padding bytes to add to align code as requested.
  2041  // Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
  2042  //
  2043  // pc: current offset in function, in bytes
  2044  // a: requested alignment, in bytes
  2045  // cursym: current function being assembled
  2046  // returns number of bytes of padding needed
  2047  func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
  2048  	if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
  2049  		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
  2050  		return 0
  2051  	}
  2052  
  2053  	// By default function alignment is 32 bytes for amd64
  2054  	if cursym.Func().Align < int32(a) {
  2055  		cursym.Func().Align = int32(a)
  2056  	}
  2057  
  2058  	if pc&(a-1) != 0 {
  2059  		return int(a - (pc & (a - 1)))
  2060  	}
  2061  
  2062  	return 0
  2063  }
  2064  
  2065  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2066  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2067  		ctxt.Diag("-spectre=ret not supported on 386")
  2068  		ctxt.Retpoline = false // don't keep printing
  2069  	}
  2070  
  2071  	pjc := makePjcCtx(ctxt)
  2072  
  2073  	if s.P != nil {
  2074  		return
  2075  	}
  2076  
  2077  	if ycover[0] == 0 {
  2078  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2079  	}
  2080  
  2081  	for p := s.Func().Text; p != nil; p = p.Link {
  2082  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2083  			p.To.SetTarget(p)
  2084  		}
  2085  		if p.As == AADJSP {
  2086  			p.To.Type = obj.TYPE_REG
  2087  			p.To.Reg = REG_SP
  2088  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2089  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2090  			// For that case, flip the sign and the op:
  2091  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2092  			switch v := p.From.Offset; {
  2093  			case v == 0:
  2094  				p.As = obj.ANOP
  2095  			case v == 0x80 || (v < 0 && v != -0x80):
  2096  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2097  				p.From.Offset *= -1
  2098  			default:
  2099  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2100  			}
  2101  		}
  2102  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2103  			if p.To.Type != obj.TYPE_REG {
  2104  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2105  				continue
  2106  			}
  2107  			p.To.Type = obj.TYPE_BRANCH
  2108  			p.To.Name = obj.NAME_EXTERN
  2109  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2110  			p.To.Reg = 0
  2111  			p.To.Offset = 0
  2112  		}
  2113  	}
  2114  
  2115  	var count int64 // rough count of number of instructions
  2116  	for p := s.Func().Text; p != nil; p = p.Link {
  2117  		count++
  2118  		p.Back = branchShort // use short branches first time through
  2119  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2120  			p.Back |= branchBackwards
  2121  			q.Back |= branchLoopHead
  2122  		}
  2123  	}
  2124  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2125  
  2126  	var ab AsmBuf
  2127  	var n int
  2128  	var c int32
  2129  	errors := ctxt.Errors
  2130  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2131  	nrelocs0 := len(s.R)
  2132  	for {
  2133  		// This loop continues while there are reasons to re-assemble
  2134  		// whole block, like the presence of long forward jumps.
  2135  		reAssemble := false
  2136  		for i := range s.R[nrelocs0:] {
  2137  			s.R[nrelocs0+i] = obj.Reloc{}
  2138  		}
  2139  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2140  		s.P = s.P[:0]
  2141  		c = 0
  2142  		var pPrev *obj.Prog
  2143  		nops = nops[:0]
  2144  		for p := s.Func().Text; p != nil; p = p.Link {
  2145  			c0 := c
  2146  			c = pjc.padJump(ctxt, s, p, c)
  2147  
  2148  			if p.As == obj.APCALIGN {
  2149  				aln := p.From.Offset
  2150  				v := addpad(int64(c), aln, ctxt, s)
  2151  				if v > 0 {
  2152  					s.Grow(int64(c) + int64(v))
  2153  					fillnop(s.P[c:], int(v))
  2154  				}
  2155  
  2156  				c += int32(v)
  2157  				pPrev = p
  2158  				continue
  2159  			}
  2160  
  2161  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2162  				// pad with NOPs
  2163  				v := -c & (loopAlign - 1)
  2164  
  2165  				if v <= maxLoopPad {
  2166  					s.Grow(int64(c) + int64(v))
  2167  					fillnop(s.P[c:], int(v))
  2168  					c += v
  2169  				}
  2170  			}
  2171  
  2172  			p.Pc = int64(c)
  2173  
  2174  			// process forward jumps to p
  2175  			for q := p.Rel; q != nil; q = q.Forwd {
  2176  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2177  				if q.Back&branchShort != 0 {
  2178  					if v > 127 {
  2179  						reAssemble = true
  2180  						q.Back ^= branchShort
  2181  					}
  2182  
  2183  					if q.As == AJCXZL || q.As == AXBEGIN {
  2184  						s.P[q.Pc+2] = byte(v)
  2185  					} else {
  2186  						s.P[q.Pc+1] = byte(v)
  2187  					}
  2188  				} else {
  2189  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2190  				}
  2191  			}
  2192  
  2193  			p.Rel = nil
  2194  
  2195  			p.Pc = int64(c)
  2196  			ab.asmins(ctxt, s, p)
  2197  			m := ab.Len()
  2198  			if int(p.Isize) != m {
  2199  				p.Isize = uint8(m)
  2200  				if pjc.reAssemble(p) {
  2201  					// We need to re-assemble here to check for jumps and fused jumps
  2202  					// that span or end on 32 byte boundaries.
  2203  					reAssemble = true
  2204  				}
  2205  			}
  2206  
  2207  			s.Grow(p.Pc + int64(m))
  2208  			copy(s.P[p.Pc:], ab.Bytes())
  2209  			// If there was padding, remember it.
  2210  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2211  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2212  			}
  2213  			c += int32(m)
  2214  			pPrev = p
  2215  		}
  2216  
  2217  		n++
  2218  		if n > 1000 {
  2219  			ctxt.Diag("span must be looping")
  2220  			log.Fatalf("loop")
  2221  		}
  2222  		if !reAssemble {
  2223  			break
  2224  		}
  2225  		if ctxt.Errors > errors {
  2226  			return
  2227  		}
  2228  	}
  2229  	// splice padding nops into Progs
  2230  	for _, n := range nops {
  2231  		pp := n.p
  2232  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2233  		pp.Link = np
  2234  	}
  2235  
  2236  	s.Size = int64(c)
  2237  
  2238  	if false { /* debug['a'] > 1 */
  2239  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2240  		var i int
  2241  		for i = 0; i < len(s.P); i++ {
  2242  			fmt.Printf(" %.2x", s.P[i])
  2243  			if i%16 == 15 {
  2244  				fmt.Printf("\n  %.6x", uint(i+1))
  2245  			}
  2246  		}
  2247  
  2248  		if i%16 != 0 {
  2249  			fmt.Printf("\n")
  2250  		}
  2251  
  2252  		for i := 0; i < len(s.R); i++ {
  2253  			r := &s.R[i]
  2254  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2255  		}
  2256  	}
  2257  
  2258  	// Mark nonpreemptible instruction sequences.
  2259  	// The 2-instruction TLS access sequence
  2260  	//	MOVQ TLS, BX
  2261  	//	MOVQ 0(BX)(TLS*1), BX
  2262  	// is not async preemptible, as if it is preempted and resumed on
  2263  	// a different thread, the TLS address may become invalid.
  2264  	if !CanUse1InsnTLS(ctxt) {
  2265  		useTLS := func(p *obj.Prog) bool {
  2266  			// Only need to mark the second instruction, which has
  2267  			// REG_TLS as Index. (It is okay to interrupt and restart
  2268  			// the first instruction.)
  2269  			return p.From.Index == REG_TLS
  2270  		}
  2271  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2272  	}
  2273  
  2274  	// Now that we know byte offsets, we can generate jump table entries.
  2275  	// TODO: could this live in obj instead of obj/$ARCH?
  2276  	for _, jt := range s.Func().JumpTables {
  2277  		for i, p := range jt.Targets {
  2278  			// The ith jumptable entry points to the p.Pc'th
  2279  			// byte in the function symbol s.
  2280  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2281  		}
  2282  	}
  2283  }
  2284  
  2285  func instinit(ctxt *obj.Link) {
  2286  	if ycover[0] != 0 {
  2287  		// Already initialized; stop now.
  2288  		// This happens in the cmd/asm tests,
  2289  		// each of which re-initializes the arch.
  2290  		return
  2291  	}
  2292  
  2293  	switch ctxt.Headtype {
  2294  	case objabi.Hplan9:
  2295  		plan9privates = ctxt.Lookup("_privates")
  2296  	}
  2297  
  2298  	for i := range avxOptab {
  2299  		c := avxOptab[i].as
  2300  		if opindex[c&obj.AMask] != nil {
  2301  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2302  		}
  2303  		opindex[c&obj.AMask] = &avxOptab[i]
  2304  	}
  2305  	for i := 1; optab[i].as != 0; i++ {
  2306  		c := optab[i].as
  2307  		if opindex[c&obj.AMask] != nil {
  2308  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2309  		}
  2310  		opindex[c&obj.AMask] = &optab[i]
  2311  	}
  2312  
  2313  	for i := 0; i < Ymax; i++ {
  2314  		ycover[i*Ymax+i] = 1
  2315  	}
  2316  
  2317  	ycover[Yi0*Ymax+Yu2] = 1
  2318  	ycover[Yi1*Ymax+Yu2] = 1
  2319  
  2320  	ycover[Yi0*Ymax+Yi8] = 1
  2321  	ycover[Yi1*Ymax+Yi8] = 1
  2322  	ycover[Yu2*Ymax+Yi8] = 1
  2323  	ycover[Yu7*Ymax+Yi8] = 1
  2324  
  2325  	ycover[Yi0*Ymax+Yu7] = 1
  2326  	ycover[Yi1*Ymax+Yu7] = 1
  2327  	ycover[Yu2*Ymax+Yu7] = 1
  2328  
  2329  	ycover[Yi0*Ymax+Yu8] = 1
  2330  	ycover[Yi1*Ymax+Yu8] = 1
  2331  	ycover[Yu2*Ymax+Yu8] = 1
  2332  	ycover[Yu7*Ymax+Yu8] = 1
  2333  
  2334  	ycover[Yi0*Ymax+Ys32] = 1
  2335  	ycover[Yi1*Ymax+Ys32] = 1
  2336  	ycover[Yu2*Ymax+Ys32] = 1
  2337  	ycover[Yu7*Ymax+Ys32] = 1
  2338  	ycover[Yu8*Ymax+Ys32] = 1
  2339  	ycover[Yi8*Ymax+Ys32] = 1
  2340  
  2341  	ycover[Yi0*Ymax+Yi32] = 1
  2342  	ycover[Yi1*Ymax+Yi32] = 1
  2343  	ycover[Yu2*Ymax+Yi32] = 1
  2344  	ycover[Yu7*Ymax+Yi32] = 1
  2345  	ycover[Yu8*Ymax+Yi32] = 1
  2346  	ycover[Yi8*Ymax+Yi32] = 1
  2347  	ycover[Ys32*Ymax+Yi32] = 1
  2348  
  2349  	ycover[Yi0*Ymax+Yi64] = 1
  2350  	ycover[Yi1*Ymax+Yi64] = 1
  2351  	ycover[Yu7*Ymax+Yi64] = 1
  2352  	ycover[Yu2*Ymax+Yi64] = 1
  2353  	ycover[Yu8*Ymax+Yi64] = 1
  2354  	ycover[Yi8*Ymax+Yi64] = 1
  2355  	ycover[Ys32*Ymax+Yi64] = 1
  2356  	ycover[Yi32*Ymax+Yi64] = 1
  2357  
  2358  	ycover[Yal*Ymax+Yrb] = 1
  2359  	ycover[Ycl*Ymax+Yrb] = 1
  2360  	ycover[Yax*Ymax+Yrb] = 1
  2361  	ycover[Ycx*Ymax+Yrb] = 1
  2362  	ycover[Yrx*Ymax+Yrb] = 1
  2363  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2364  
  2365  	ycover[Ycl*Ymax+Ycx] = 1
  2366  
  2367  	ycover[Yax*Ymax+Yrx] = 1
  2368  	ycover[Ycx*Ymax+Yrx] = 1
  2369  
  2370  	ycover[Yax*Ymax+Yrl] = 1
  2371  	ycover[Ycx*Ymax+Yrl] = 1
  2372  	ycover[Yrx*Ymax+Yrl] = 1
  2373  	ycover[Yrl32*Ymax+Yrl] = 1
  2374  
  2375  	ycover[Yf0*Ymax+Yrf] = 1
  2376  
  2377  	ycover[Yal*Ymax+Ymb] = 1
  2378  	ycover[Ycl*Ymax+Ymb] = 1
  2379  	ycover[Yax*Ymax+Ymb] = 1
  2380  	ycover[Ycx*Ymax+Ymb] = 1
  2381  	ycover[Yrx*Ymax+Ymb] = 1
  2382  	ycover[Yrb*Ymax+Ymb] = 1
  2383  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2384  	ycover[Ym*Ymax+Ymb] = 1
  2385  
  2386  	ycover[Yax*Ymax+Yml] = 1
  2387  	ycover[Ycx*Ymax+Yml] = 1
  2388  	ycover[Yrx*Ymax+Yml] = 1
  2389  	ycover[Yrl*Ymax+Yml] = 1
  2390  	ycover[Yrl32*Ymax+Yml] = 1
  2391  	ycover[Ym*Ymax+Yml] = 1
  2392  
  2393  	ycover[Yax*Ymax+Ymm] = 1
  2394  	ycover[Ycx*Ymax+Ymm] = 1
  2395  	ycover[Yrx*Ymax+Ymm] = 1
  2396  	ycover[Yrl*Ymax+Ymm] = 1
  2397  	ycover[Yrl32*Ymax+Ymm] = 1
  2398  	ycover[Ym*Ymax+Ymm] = 1
  2399  	ycover[Ymr*Ymax+Ymm] = 1
  2400  
  2401  	ycover[Yxr0*Ymax+Yxr] = 1
  2402  
  2403  	ycover[Ym*Ymax+Yxm] = 1
  2404  	ycover[Yxr0*Ymax+Yxm] = 1
  2405  	ycover[Yxr*Ymax+Yxm] = 1
  2406  
  2407  	ycover[Ym*Ymax+Yym] = 1
  2408  	ycover[Yyr*Ymax+Yym] = 1
  2409  
  2410  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2411  	ycover[Yxr*Ymax+YxrEvex] = 1
  2412  
  2413  	ycover[Ym*Ymax+YxmEvex] = 1
  2414  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2415  	ycover[Yxr*Ymax+YxmEvex] = 1
  2416  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2417  
  2418  	ycover[Yyr*Ymax+YyrEvex] = 1
  2419  
  2420  	ycover[Ym*Ymax+YymEvex] = 1
  2421  	ycover[Yyr*Ymax+YymEvex] = 1
  2422  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2423  
  2424  	ycover[Ym*Ymax+Yzm] = 1
  2425  	ycover[Yzr*Ymax+Yzm] = 1
  2426  
  2427  	ycover[Yk0*Ymax+Yk] = 1
  2428  	ycover[Yknot0*Ymax+Yk] = 1
  2429  
  2430  	ycover[Yk0*Ymax+Ykm] = 1
  2431  	ycover[Yknot0*Ymax+Ykm] = 1
  2432  	ycover[Yk*Ymax+Ykm] = 1
  2433  	ycover[Ym*Ymax+Ykm] = 1
  2434  
  2435  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2436  
  2437  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2438  
  2439  	for i := 0; i < MAXREG; i++ {
  2440  		reg[i] = -1
  2441  		if i >= REG_AL && i <= REG_R15B {
  2442  			reg[i] = (i - REG_AL) & 7
  2443  			if i >= REG_SPB && i <= REG_DIB {
  2444  				regrex[i] = 0x40
  2445  			}
  2446  			if i >= REG_R8B && i <= REG_R15B {
  2447  				regrex[i] = Rxr | Rxx | Rxb
  2448  			}
  2449  		}
  2450  
  2451  		if i >= REG_AH && i <= REG_BH {
  2452  			reg[i] = 4 + ((i - REG_AH) & 7)
  2453  		}
  2454  		if i >= REG_AX && i <= REG_R15 {
  2455  			reg[i] = (i - REG_AX) & 7
  2456  			if i >= REG_R8 {
  2457  				regrex[i] = Rxr | Rxx | Rxb
  2458  			}
  2459  		}
  2460  
  2461  		if i >= REG_F0 && i <= REG_F0+7 {
  2462  			reg[i] = (i - REG_F0) & 7
  2463  		}
  2464  		if i >= REG_M0 && i <= REG_M0+7 {
  2465  			reg[i] = (i - REG_M0) & 7
  2466  		}
  2467  		if i >= REG_K0 && i <= REG_K0+7 {
  2468  			reg[i] = (i - REG_K0) & 7
  2469  		}
  2470  		if i >= REG_X0 && i <= REG_X0+15 {
  2471  			reg[i] = (i - REG_X0) & 7
  2472  			if i >= REG_X0+8 {
  2473  				regrex[i] = Rxr | Rxx | Rxb
  2474  			}
  2475  		}
  2476  		if i >= REG_X16 && i <= REG_X16+15 {
  2477  			reg[i] = (i - REG_X16) & 7
  2478  			if i >= REG_X16+8 {
  2479  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2480  			} else {
  2481  				regrex[i] = RxrEvex
  2482  			}
  2483  		}
  2484  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2485  			reg[i] = (i - REG_Y0) & 7
  2486  			if i >= REG_Y0+8 {
  2487  				regrex[i] = Rxr | Rxx | Rxb
  2488  			}
  2489  		}
  2490  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2491  			reg[i] = (i - REG_Y16) & 7
  2492  			if i >= REG_Y16+8 {
  2493  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2494  			} else {
  2495  				regrex[i] = RxrEvex
  2496  			}
  2497  		}
  2498  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2499  			reg[i] = (i - REG_Z0) & 7
  2500  			if i > REG_Z0+7 {
  2501  				regrex[i] = Rxr | Rxx | Rxb
  2502  			}
  2503  		}
  2504  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2505  			reg[i] = (i - REG_Z16) & 7
  2506  			if i >= REG_Z16+8 {
  2507  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2508  			} else {
  2509  				regrex[i] = RxrEvex
  2510  			}
  2511  		}
  2512  
  2513  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2514  			regrex[i] = Rxr
  2515  		}
  2516  	}
  2517  }
  2518  
  2519  var isAndroid = buildcfg.GOOS == "android"
  2520  
  2521  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2522  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2523  		return 0
  2524  	}
  2525  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2526  		switch a.Reg {
  2527  		case REG_CS:
  2528  			return 0x2e
  2529  
  2530  		case REG_DS:
  2531  			return 0x3e
  2532  
  2533  		case REG_ES:
  2534  			return 0x26
  2535  
  2536  		case REG_FS:
  2537  			return 0x64
  2538  
  2539  		case REG_GS:
  2540  			return 0x65
  2541  
  2542  		case REG_TLS:
  2543  			// NOTE: Systems listed here should be only systems that
  2544  			// support direct TLS references like 8(TLS) implemented as
  2545  			// direct references from FS or GS. Systems that require
  2546  			// the initial-exec model, where you load the TLS base into
  2547  			// a register and then index from that register, do not reach
  2548  			// this code and should not be listed.
  2549  			if ctxt.Arch.Family == sys.I386 {
  2550  				switch ctxt.Headtype {
  2551  				default:
  2552  					if isAndroid {
  2553  						return 0x65 // GS
  2554  					}
  2555  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2556  
  2557  				case objabi.Hdarwin,
  2558  					objabi.Hdragonfly,
  2559  					objabi.Hfreebsd,
  2560  					objabi.Hnetbsd,
  2561  					objabi.Hopenbsd:
  2562  					return 0x65 // GS
  2563  				}
  2564  			}
  2565  
  2566  			switch ctxt.Headtype {
  2567  			default:
  2568  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2569  
  2570  			case objabi.Hlinux:
  2571  				if isAndroid {
  2572  					return 0x64 // FS
  2573  				}
  2574  
  2575  				if ctxt.Flag_shared {
  2576  					log.Fatalf("unknown TLS base register for linux with -shared")
  2577  				} else {
  2578  					return 0x64 // FS
  2579  				}
  2580  
  2581  			case objabi.Hdragonfly,
  2582  				objabi.Hfreebsd,
  2583  				objabi.Hnetbsd,
  2584  				objabi.Hopenbsd,
  2585  				objabi.Hsolaris:
  2586  				return 0x64 // FS
  2587  
  2588  			case objabi.Hdarwin:
  2589  				return 0x65 // GS
  2590  			}
  2591  		}
  2592  	}
  2593  
  2594  	switch a.Index {
  2595  	case REG_CS:
  2596  		return 0x2e
  2597  
  2598  	case REG_DS:
  2599  		return 0x3e
  2600  
  2601  	case REG_ES:
  2602  		return 0x26
  2603  
  2604  	case REG_TLS:
  2605  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2606  			// When building for inclusion into a shared library, an instruction of the form
  2607  			//     MOV off(CX)(TLS*1), AX
  2608  			// becomes
  2609  			//     mov %gs:off(%ecx), %eax // on i386
  2610  			//     mov %fs:off(%rcx), %rax // on amd64
  2611  			// which assumes that the correct TLS offset has been loaded into CX (today
  2612  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2613  			// a shared library the instruction it becomes
  2614  			//     mov 0x0(%ecx), %eax // on i386
  2615  			//     mov 0x0(%rcx), %rax // on amd64
  2616  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2617  			if ctxt.Arch.Family == sys.I386 {
  2618  				return 0x65 // GS
  2619  			}
  2620  			return 0x64 // FS
  2621  		}
  2622  
  2623  	case REG_FS:
  2624  		return 0x64
  2625  
  2626  	case REG_GS:
  2627  		return 0x65
  2628  	}
  2629  
  2630  	return 0
  2631  }
  2632  
  2633  // oclassRegList returns multisource operand class for addr.
  2634  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2635  	// TODO(quasilyte): when oclass register case is refactored into
  2636  	// lookup table, use it here to get register kind more easily.
  2637  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2638  
  2639  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2640  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2641  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2642  
  2643  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2644  	low := regIndex(int16(reg0))
  2645  	high := regIndex(int16(reg1))
  2646  
  2647  	if ctxt.Arch.Family == sys.I386 {
  2648  		if low >= 8 || high >= 8 {
  2649  			return Yxxx
  2650  		}
  2651  	}
  2652  
  2653  	switch high - low {
  2654  	case 3:
  2655  		switch {
  2656  		case regIsXmm(reg0) && regIsXmm(reg1):
  2657  			return YxrEvexMulti4
  2658  		case regIsYmm(reg0) && regIsYmm(reg1):
  2659  			return YyrEvexMulti4
  2660  		case regIsZmm(reg0) && regIsZmm(reg1):
  2661  			return YzrMulti4
  2662  		default:
  2663  			return Yxxx
  2664  		}
  2665  	default:
  2666  		return Yxxx
  2667  	}
  2668  }
  2669  
  2670  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2671  // For addr that is not V-mem returns (Yxxx, false).
  2672  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2673  	switch addr.Index {
  2674  	case REG_X0 + 0,
  2675  		REG_X0 + 1,
  2676  		REG_X0 + 2,
  2677  		REG_X0 + 3,
  2678  		REG_X0 + 4,
  2679  		REG_X0 + 5,
  2680  		REG_X0 + 6,
  2681  		REG_X0 + 7:
  2682  		return Yxvm, true
  2683  	case REG_X8 + 0,
  2684  		REG_X8 + 1,
  2685  		REG_X8 + 2,
  2686  		REG_X8 + 3,
  2687  		REG_X8 + 4,
  2688  		REG_X8 + 5,
  2689  		REG_X8 + 6,
  2690  		REG_X8 + 7:
  2691  		if ctxt.Arch.Family == sys.I386 {
  2692  			return Yxxx, true
  2693  		}
  2694  		return Yxvm, true
  2695  	case REG_X16 + 0,
  2696  		REG_X16 + 1,
  2697  		REG_X16 + 2,
  2698  		REG_X16 + 3,
  2699  		REG_X16 + 4,
  2700  		REG_X16 + 5,
  2701  		REG_X16 + 6,
  2702  		REG_X16 + 7,
  2703  		REG_X16 + 8,
  2704  		REG_X16 + 9,
  2705  		REG_X16 + 10,
  2706  		REG_X16 + 11,
  2707  		REG_X16 + 12,
  2708  		REG_X16 + 13,
  2709  		REG_X16 + 14,
  2710  		REG_X16 + 15:
  2711  		if ctxt.Arch.Family == sys.I386 {
  2712  			return Yxxx, true
  2713  		}
  2714  		return YxvmEvex, true
  2715  
  2716  	case REG_Y0 + 0,
  2717  		REG_Y0 + 1,
  2718  		REG_Y0 + 2,
  2719  		REG_Y0 + 3,
  2720  		REG_Y0 + 4,
  2721  		REG_Y0 + 5,
  2722  		REG_Y0 + 6,
  2723  		REG_Y0 + 7:
  2724  		return Yyvm, true
  2725  	case REG_Y8 + 0,
  2726  		REG_Y8 + 1,
  2727  		REG_Y8 + 2,
  2728  		REG_Y8 + 3,
  2729  		REG_Y8 + 4,
  2730  		REG_Y8 + 5,
  2731  		REG_Y8 + 6,
  2732  		REG_Y8 + 7:
  2733  		if ctxt.Arch.Family == sys.I386 {
  2734  			return Yxxx, true
  2735  		}
  2736  		return Yyvm, true
  2737  	case REG_Y16 + 0,
  2738  		REG_Y16 + 1,
  2739  		REG_Y16 + 2,
  2740  		REG_Y16 + 3,
  2741  		REG_Y16 + 4,
  2742  		REG_Y16 + 5,
  2743  		REG_Y16 + 6,
  2744  		REG_Y16 + 7,
  2745  		REG_Y16 + 8,
  2746  		REG_Y16 + 9,
  2747  		REG_Y16 + 10,
  2748  		REG_Y16 + 11,
  2749  		REG_Y16 + 12,
  2750  		REG_Y16 + 13,
  2751  		REG_Y16 + 14,
  2752  		REG_Y16 + 15:
  2753  		if ctxt.Arch.Family == sys.I386 {
  2754  			return Yxxx, true
  2755  		}
  2756  		return YyvmEvex, true
  2757  
  2758  	case REG_Z0 + 0,
  2759  		REG_Z0 + 1,
  2760  		REG_Z0 + 2,
  2761  		REG_Z0 + 3,
  2762  		REG_Z0 + 4,
  2763  		REG_Z0 + 5,
  2764  		REG_Z0 + 6,
  2765  		REG_Z0 + 7:
  2766  		return Yzvm, true
  2767  	case REG_Z8 + 0,
  2768  		REG_Z8 + 1,
  2769  		REG_Z8 + 2,
  2770  		REG_Z8 + 3,
  2771  		REG_Z8 + 4,
  2772  		REG_Z8 + 5,
  2773  		REG_Z8 + 6,
  2774  		REG_Z8 + 7,
  2775  		REG_Z8 + 8,
  2776  		REG_Z8 + 9,
  2777  		REG_Z8 + 10,
  2778  		REG_Z8 + 11,
  2779  		REG_Z8 + 12,
  2780  		REG_Z8 + 13,
  2781  		REG_Z8 + 14,
  2782  		REG_Z8 + 15,
  2783  		REG_Z8 + 16,
  2784  		REG_Z8 + 17,
  2785  		REG_Z8 + 18,
  2786  		REG_Z8 + 19,
  2787  		REG_Z8 + 20,
  2788  		REG_Z8 + 21,
  2789  		REG_Z8 + 22,
  2790  		REG_Z8 + 23:
  2791  		if ctxt.Arch.Family == sys.I386 {
  2792  			return Yxxx, true
  2793  		}
  2794  		return Yzvm, true
  2795  	}
  2796  
  2797  	return Yxxx, false
  2798  }
  2799  
  2800  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2801  	switch a.Type {
  2802  	case obj.TYPE_REGLIST:
  2803  		return oclassRegList(ctxt, a)
  2804  
  2805  	case obj.TYPE_NONE:
  2806  		return Ynone
  2807  
  2808  	case obj.TYPE_BRANCH:
  2809  		return Ybr
  2810  
  2811  	case obj.TYPE_INDIR:
  2812  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2813  			return Yindir
  2814  		}
  2815  		return Yxxx
  2816  
  2817  	case obj.TYPE_MEM:
  2818  		// Pseudo registers have negative index, but SP is
  2819  		// not pseudo on x86, hence REG_SP check is not redundant.
  2820  		if a.Index == REG_SP || a.Index < 0 {
  2821  			// Can't use FP/SB/PC/SP as the index register.
  2822  			return Yxxx
  2823  		}
  2824  
  2825  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2826  			return vmem
  2827  		}
  2828  
  2829  		if ctxt.Arch.Family == sys.AMD64 {
  2830  			switch a.Name {
  2831  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2832  				// Global variables can't use index registers and their
  2833  				// base register is %rip (%rip is encoded as REG_NONE).
  2834  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2835  					return Yxxx
  2836  				}
  2837  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2838  				// These names must have a base of SP.  The old compiler
  2839  				// uses 0 for the base register. SSA uses REG_SP.
  2840  				if a.Reg != REG_SP && a.Reg != 0 {
  2841  					return Yxxx
  2842  				}
  2843  			case obj.NAME_NONE:
  2844  				// everything is ok
  2845  			default:
  2846  				// unknown name
  2847  				return Yxxx
  2848  			}
  2849  		}
  2850  		return Ym
  2851  
  2852  	case obj.TYPE_ADDR:
  2853  		switch a.Name {
  2854  		case obj.NAME_GOTREF:
  2855  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2856  			return Yxxx
  2857  
  2858  		case obj.NAME_EXTERN,
  2859  			obj.NAME_STATIC:
  2860  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2861  				return Yi32
  2862  			}
  2863  			return Yiauto // use pc-relative addressing
  2864  
  2865  		case obj.NAME_AUTO,
  2866  			obj.NAME_PARAM:
  2867  			return Yiauto
  2868  		}
  2869  
  2870  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2871  		// and got Yi32 in an earlier version of this code.
  2872  		// Keep doing that until we fix yduff etc.
  2873  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2874  			return Yi32
  2875  		}
  2876  
  2877  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2878  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2879  		}
  2880  		fallthrough
  2881  
  2882  	case obj.TYPE_CONST:
  2883  		if a.Sym != nil {
  2884  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2885  		}
  2886  
  2887  		v := a.Offset
  2888  		if ctxt.Arch.Family == sys.I386 {
  2889  			v = int64(int32(v))
  2890  		}
  2891  		switch {
  2892  		case v == 0:
  2893  			return Yi0
  2894  		case v == 1:
  2895  			return Yi1
  2896  		case v >= 0 && v <= 3:
  2897  			return Yu2
  2898  		case v >= 0 && v <= 127:
  2899  			return Yu7
  2900  		case v >= 0 && v <= 255:
  2901  			return Yu8
  2902  		case v >= -128 && v <= 127:
  2903  			return Yi8
  2904  		}
  2905  		if ctxt.Arch.Family == sys.I386 {
  2906  			return Yi32
  2907  		}
  2908  		l := int32(v)
  2909  		if int64(l) == v {
  2910  			return Ys32 // can sign extend
  2911  		}
  2912  		if v>>32 == 0 {
  2913  			return Yi32 // unsigned
  2914  		}
  2915  		return Yi64
  2916  
  2917  	case obj.TYPE_TEXTSIZE:
  2918  		return Ytextsize
  2919  	}
  2920  
  2921  	if a.Type != obj.TYPE_REG {
  2922  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2923  		return Yxxx
  2924  	}
  2925  
  2926  	switch a.Reg {
  2927  	case REG_AL:
  2928  		return Yal
  2929  
  2930  	case REG_AX:
  2931  		return Yax
  2932  
  2933  		/*
  2934  			case REG_SPB:
  2935  		*/
  2936  	case REG_BPB,
  2937  		REG_SIB,
  2938  		REG_DIB,
  2939  		REG_R8B,
  2940  		REG_R9B,
  2941  		REG_R10B,
  2942  		REG_R11B,
  2943  		REG_R12B,
  2944  		REG_R13B,
  2945  		REG_R14B,
  2946  		REG_R15B:
  2947  		if ctxt.Arch.Family == sys.I386 {
  2948  			return Yxxx
  2949  		}
  2950  		fallthrough
  2951  
  2952  	case REG_DL,
  2953  		REG_BL,
  2954  		REG_AH,
  2955  		REG_CH,
  2956  		REG_DH,
  2957  		REG_BH:
  2958  		return Yrb
  2959  
  2960  	case REG_CL:
  2961  		return Ycl
  2962  
  2963  	case REG_CX:
  2964  		return Ycx
  2965  
  2966  	case REG_DX, REG_BX:
  2967  		return Yrx
  2968  
  2969  	case REG_R8, // not really Yrl
  2970  		REG_R9,
  2971  		REG_R10,
  2972  		REG_R11,
  2973  		REG_R12,
  2974  		REG_R13,
  2975  		REG_R14,
  2976  		REG_R15:
  2977  		if ctxt.Arch.Family == sys.I386 {
  2978  			return Yxxx
  2979  		}
  2980  		fallthrough
  2981  
  2982  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2983  		if ctxt.Arch.Family == sys.I386 {
  2984  			return Yrl32
  2985  		}
  2986  		return Yrl
  2987  
  2988  	case REG_F0 + 0:
  2989  		return Yf0
  2990  
  2991  	case REG_F0 + 1,
  2992  		REG_F0 + 2,
  2993  		REG_F0 + 3,
  2994  		REG_F0 + 4,
  2995  		REG_F0 + 5,
  2996  		REG_F0 + 6,
  2997  		REG_F0 + 7:
  2998  		return Yrf
  2999  
  3000  	case REG_M0 + 0,
  3001  		REG_M0 + 1,
  3002  		REG_M0 + 2,
  3003  		REG_M0 + 3,
  3004  		REG_M0 + 4,
  3005  		REG_M0 + 5,
  3006  		REG_M0 + 6,
  3007  		REG_M0 + 7:
  3008  		return Ymr
  3009  
  3010  	case REG_X0:
  3011  		return Yxr0
  3012  
  3013  	case REG_X0 + 1,
  3014  		REG_X0 + 2,
  3015  		REG_X0 + 3,
  3016  		REG_X0 + 4,
  3017  		REG_X0 + 5,
  3018  		REG_X0 + 6,
  3019  		REG_X0 + 7,
  3020  		REG_X0 + 8,
  3021  		REG_X0 + 9,
  3022  		REG_X0 + 10,
  3023  		REG_X0 + 11,
  3024  		REG_X0 + 12,
  3025  		REG_X0 + 13,
  3026  		REG_X0 + 14,
  3027  		REG_X0 + 15:
  3028  		return Yxr
  3029  
  3030  	case REG_X0 + 16,
  3031  		REG_X0 + 17,
  3032  		REG_X0 + 18,
  3033  		REG_X0 + 19,
  3034  		REG_X0 + 20,
  3035  		REG_X0 + 21,
  3036  		REG_X0 + 22,
  3037  		REG_X0 + 23,
  3038  		REG_X0 + 24,
  3039  		REG_X0 + 25,
  3040  		REG_X0 + 26,
  3041  		REG_X0 + 27,
  3042  		REG_X0 + 28,
  3043  		REG_X0 + 29,
  3044  		REG_X0 + 30,
  3045  		REG_X0 + 31:
  3046  		return YxrEvex
  3047  
  3048  	case REG_Y0 + 0,
  3049  		REG_Y0 + 1,
  3050  		REG_Y0 + 2,
  3051  		REG_Y0 + 3,
  3052  		REG_Y0 + 4,
  3053  		REG_Y0 + 5,
  3054  		REG_Y0 + 6,
  3055  		REG_Y0 + 7,
  3056  		REG_Y0 + 8,
  3057  		REG_Y0 + 9,
  3058  		REG_Y0 + 10,
  3059  		REG_Y0 + 11,
  3060  		REG_Y0 + 12,
  3061  		REG_Y0 + 13,
  3062  		REG_Y0 + 14,
  3063  		REG_Y0 + 15:
  3064  		return Yyr
  3065  
  3066  	case REG_Y0 + 16,
  3067  		REG_Y0 + 17,
  3068  		REG_Y0 + 18,
  3069  		REG_Y0 + 19,
  3070  		REG_Y0 + 20,
  3071  		REG_Y0 + 21,
  3072  		REG_Y0 + 22,
  3073  		REG_Y0 + 23,
  3074  		REG_Y0 + 24,
  3075  		REG_Y0 + 25,
  3076  		REG_Y0 + 26,
  3077  		REG_Y0 + 27,
  3078  		REG_Y0 + 28,
  3079  		REG_Y0 + 29,
  3080  		REG_Y0 + 30,
  3081  		REG_Y0 + 31:
  3082  		return YyrEvex
  3083  
  3084  	case REG_Z0 + 0,
  3085  		REG_Z0 + 1,
  3086  		REG_Z0 + 2,
  3087  		REG_Z0 + 3,
  3088  		REG_Z0 + 4,
  3089  		REG_Z0 + 5,
  3090  		REG_Z0 + 6,
  3091  		REG_Z0 + 7:
  3092  		return Yzr
  3093  
  3094  	case REG_Z0 + 8,
  3095  		REG_Z0 + 9,
  3096  		REG_Z0 + 10,
  3097  		REG_Z0 + 11,
  3098  		REG_Z0 + 12,
  3099  		REG_Z0 + 13,
  3100  		REG_Z0 + 14,
  3101  		REG_Z0 + 15,
  3102  		REG_Z0 + 16,
  3103  		REG_Z0 + 17,
  3104  		REG_Z0 + 18,
  3105  		REG_Z0 + 19,
  3106  		REG_Z0 + 20,
  3107  		REG_Z0 + 21,
  3108  		REG_Z0 + 22,
  3109  		REG_Z0 + 23,
  3110  		REG_Z0 + 24,
  3111  		REG_Z0 + 25,
  3112  		REG_Z0 + 26,
  3113  		REG_Z0 + 27,
  3114  		REG_Z0 + 28,
  3115  		REG_Z0 + 29,
  3116  		REG_Z0 + 30,
  3117  		REG_Z0 + 31:
  3118  		if ctxt.Arch.Family == sys.I386 {
  3119  			return Yxxx
  3120  		}
  3121  		return Yzr
  3122  
  3123  	case REG_K0:
  3124  		return Yk0
  3125  
  3126  	case REG_K0 + 1,
  3127  		REG_K0 + 2,
  3128  		REG_K0 + 3,
  3129  		REG_K0 + 4,
  3130  		REG_K0 + 5,
  3131  		REG_K0 + 6,
  3132  		REG_K0 + 7:
  3133  		return Yknot0
  3134  
  3135  	case REG_CS:
  3136  		return Ycs
  3137  	case REG_SS:
  3138  		return Yss
  3139  	case REG_DS:
  3140  		return Yds
  3141  	case REG_ES:
  3142  		return Yes
  3143  	case REG_FS:
  3144  		return Yfs
  3145  	case REG_GS:
  3146  		return Ygs
  3147  	case REG_TLS:
  3148  		return Ytls
  3149  
  3150  	case REG_GDTR:
  3151  		return Ygdtr
  3152  	case REG_IDTR:
  3153  		return Yidtr
  3154  	case REG_LDTR:
  3155  		return Yldtr
  3156  	case REG_MSW:
  3157  		return Ymsw
  3158  	case REG_TASK:
  3159  		return Ytask
  3160  
  3161  	case REG_CR + 0:
  3162  		return Ycr0
  3163  	case REG_CR + 1:
  3164  		return Ycr1
  3165  	case REG_CR + 2:
  3166  		return Ycr2
  3167  	case REG_CR + 3:
  3168  		return Ycr3
  3169  	case REG_CR + 4:
  3170  		return Ycr4
  3171  	case REG_CR + 5:
  3172  		return Ycr5
  3173  	case REG_CR + 6:
  3174  		return Ycr6
  3175  	case REG_CR + 7:
  3176  		return Ycr7
  3177  	case REG_CR + 8:
  3178  		return Ycr8
  3179  
  3180  	case REG_DR + 0:
  3181  		return Ydr0
  3182  	case REG_DR + 1:
  3183  		return Ydr1
  3184  	case REG_DR + 2:
  3185  		return Ydr2
  3186  	case REG_DR + 3:
  3187  		return Ydr3
  3188  	case REG_DR + 4:
  3189  		return Ydr4
  3190  	case REG_DR + 5:
  3191  		return Ydr5
  3192  	case REG_DR + 6:
  3193  		return Ydr6
  3194  	case REG_DR + 7:
  3195  		return Ydr7
  3196  
  3197  	case REG_TR + 0:
  3198  		return Ytr0
  3199  	case REG_TR + 1:
  3200  		return Ytr1
  3201  	case REG_TR + 2:
  3202  		return Ytr2
  3203  	case REG_TR + 3:
  3204  		return Ytr3
  3205  	case REG_TR + 4:
  3206  		return Ytr4
  3207  	case REG_TR + 5:
  3208  		return Ytr5
  3209  	case REG_TR + 6:
  3210  		return Ytr6
  3211  	case REG_TR + 7:
  3212  		return Ytr7
  3213  	}
  3214  
  3215  	return Yxxx
  3216  }
  3217  
  3218  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3219  // and hold assembly state.
  3220  type AsmBuf struct {
  3221  	buf      [100]byte
  3222  	off      int
  3223  	rexflag  int
  3224  	vexflag  bool // Per inst: true for VEX-encoded
  3225  	evexflag bool // Per inst: true for EVEX-encoded
  3226  	rep      bool
  3227  	repn     bool
  3228  	lock     bool
  3229  
  3230  	evex evexBits // Initialized when evexflag is true
  3231  }
  3232  
  3233  // Put1 appends one byte to the end of the buffer.
  3234  func (ab *AsmBuf) Put1(x byte) {
  3235  	ab.buf[ab.off] = x
  3236  	ab.off++
  3237  }
  3238  
  3239  // Put2 appends two bytes to the end of the buffer.
  3240  func (ab *AsmBuf) Put2(x, y byte) {
  3241  	ab.buf[ab.off+0] = x
  3242  	ab.buf[ab.off+1] = y
  3243  	ab.off += 2
  3244  }
  3245  
  3246  // Put3 appends three bytes to the end of the buffer.
  3247  func (ab *AsmBuf) Put3(x, y, z byte) {
  3248  	ab.buf[ab.off+0] = x
  3249  	ab.buf[ab.off+1] = y
  3250  	ab.buf[ab.off+2] = z
  3251  	ab.off += 3
  3252  }
  3253  
  3254  // Put4 appends four bytes to the end of the buffer.
  3255  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3256  	ab.buf[ab.off+0] = x
  3257  	ab.buf[ab.off+1] = y
  3258  	ab.buf[ab.off+2] = z
  3259  	ab.buf[ab.off+3] = w
  3260  	ab.off += 4
  3261  }
  3262  
  3263  // PutInt16 writes v into the buffer using little-endian encoding.
  3264  func (ab *AsmBuf) PutInt16(v int16) {
  3265  	ab.buf[ab.off+0] = byte(v)
  3266  	ab.buf[ab.off+1] = byte(v >> 8)
  3267  	ab.off += 2
  3268  }
  3269  
  3270  // PutInt32 writes v into the buffer using little-endian encoding.
  3271  func (ab *AsmBuf) PutInt32(v int32) {
  3272  	ab.buf[ab.off+0] = byte(v)
  3273  	ab.buf[ab.off+1] = byte(v >> 8)
  3274  	ab.buf[ab.off+2] = byte(v >> 16)
  3275  	ab.buf[ab.off+3] = byte(v >> 24)
  3276  	ab.off += 4
  3277  }
  3278  
  3279  // PutInt64 writes v into the buffer using little-endian encoding.
  3280  func (ab *AsmBuf) PutInt64(v int64) {
  3281  	ab.buf[ab.off+0] = byte(v)
  3282  	ab.buf[ab.off+1] = byte(v >> 8)
  3283  	ab.buf[ab.off+2] = byte(v >> 16)
  3284  	ab.buf[ab.off+3] = byte(v >> 24)
  3285  	ab.buf[ab.off+4] = byte(v >> 32)
  3286  	ab.buf[ab.off+5] = byte(v >> 40)
  3287  	ab.buf[ab.off+6] = byte(v >> 48)
  3288  	ab.buf[ab.off+7] = byte(v >> 56)
  3289  	ab.off += 8
  3290  }
  3291  
  3292  // Put copies b into the buffer.
  3293  func (ab *AsmBuf) Put(b []byte) {
  3294  	copy(ab.buf[ab.off:], b)
  3295  	ab.off += len(b)
  3296  }
  3297  
  3298  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3299  // starting at specified offset (e.g. z counter value).
  3300  // Trailing 0 is not written.
  3301  //
  3302  // Intended to be used for literal Z cases.
  3303  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3304  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3305  	for int(op[offset]) != 0 {
  3306  		ab.Put1(byte(op[offset]))
  3307  		offset++
  3308  	}
  3309  }
  3310  
  3311  // Insert inserts b at offset i.
  3312  func (ab *AsmBuf) Insert(i int, b byte) {
  3313  	ab.off++
  3314  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3315  	ab.buf[i] = b
  3316  }
  3317  
  3318  // Last returns the byte at the end of the buffer.
  3319  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3320  
  3321  // Len returns the length of the buffer.
  3322  func (ab *AsmBuf) Len() int { return ab.off }
  3323  
  3324  // Bytes returns the contents of the buffer.
  3325  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3326  
  3327  // Reset empties the buffer.
  3328  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3329  
  3330  // At returns the byte at offset i.
  3331  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3332  
  3333  // asmidx emits SIB byte.
  3334  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3335  	var i int
  3336  
  3337  	// X/Y index register is used in VSIB.
  3338  	switch index {
  3339  	default:
  3340  		goto bad
  3341  
  3342  	case REG_NONE:
  3343  		i = 4 << 3
  3344  		goto bas
  3345  
  3346  	case REG_R8,
  3347  		REG_R9,
  3348  		REG_R10,
  3349  		REG_R11,
  3350  		REG_R12,
  3351  		REG_R13,
  3352  		REG_R14,
  3353  		REG_R15,
  3354  		REG_X8,
  3355  		REG_X9,
  3356  		REG_X10,
  3357  		REG_X11,
  3358  		REG_X12,
  3359  		REG_X13,
  3360  		REG_X14,
  3361  		REG_X15,
  3362  		REG_X16,
  3363  		REG_X17,
  3364  		REG_X18,
  3365  		REG_X19,
  3366  		REG_X20,
  3367  		REG_X21,
  3368  		REG_X22,
  3369  		REG_X23,
  3370  		REG_X24,
  3371  		REG_X25,
  3372  		REG_X26,
  3373  		REG_X27,
  3374  		REG_X28,
  3375  		REG_X29,
  3376  		REG_X30,
  3377  		REG_X31,
  3378  		REG_Y8,
  3379  		REG_Y9,
  3380  		REG_Y10,
  3381  		REG_Y11,
  3382  		REG_Y12,
  3383  		REG_Y13,
  3384  		REG_Y14,
  3385  		REG_Y15,
  3386  		REG_Y16,
  3387  		REG_Y17,
  3388  		REG_Y18,
  3389  		REG_Y19,
  3390  		REG_Y20,
  3391  		REG_Y21,
  3392  		REG_Y22,
  3393  		REG_Y23,
  3394  		REG_Y24,
  3395  		REG_Y25,
  3396  		REG_Y26,
  3397  		REG_Y27,
  3398  		REG_Y28,
  3399  		REG_Y29,
  3400  		REG_Y30,
  3401  		REG_Y31,
  3402  		REG_Z8,
  3403  		REG_Z9,
  3404  		REG_Z10,
  3405  		REG_Z11,
  3406  		REG_Z12,
  3407  		REG_Z13,
  3408  		REG_Z14,
  3409  		REG_Z15,
  3410  		REG_Z16,
  3411  		REG_Z17,
  3412  		REG_Z18,
  3413  		REG_Z19,
  3414  		REG_Z20,
  3415  		REG_Z21,
  3416  		REG_Z22,
  3417  		REG_Z23,
  3418  		REG_Z24,
  3419  		REG_Z25,
  3420  		REG_Z26,
  3421  		REG_Z27,
  3422  		REG_Z28,
  3423  		REG_Z29,
  3424  		REG_Z30,
  3425  		REG_Z31:
  3426  		if ctxt.Arch.Family == sys.I386 {
  3427  			goto bad
  3428  		}
  3429  		fallthrough
  3430  
  3431  	case REG_AX,
  3432  		REG_CX,
  3433  		REG_DX,
  3434  		REG_BX,
  3435  		REG_BP,
  3436  		REG_SI,
  3437  		REG_DI,
  3438  		REG_X0,
  3439  		REG_X1,
  3440  		REG_X2,
  3441  		REG_X3,
  3442  		REG_X4,
  3443  		REG_X5,
  3444  		REG_X6,
  3445  		REG_X7,
  3446  		REG_Y0,
  3447  		REG_Y1,
  3448  		REG_Y2,
  3449  		REG_Y3,
  3450  		REG_Y4,
  3451  		REG_Y5,
  3452  		REG_Y6,
  3453  		REG_Y7,
  3454  		REG_Z0,
  3455  		REG_Z1,
  3456  		REG_Z2,
  3457  		REG_Z3,
  3458  		REG_Z4,
  3459  		REG_Z5,
  3460  		REG_Z6,
  3461  		REG_Z7:
  3462  		i = reg[index] << 3
  3463  	}
  3464  
  3465  	switch scale {
  3466  	default:
  3467  		goto bad
  3468  
  3469  	case 1:
  3470  		break
  3471  
  3472  	case 2:
  3473  		i |= 1 << 6
  3474  
  3475  	case 4:
  3476  		i |= 2 << 6
  3477  
  3478  	case 8:
  3479  		i |= 3 << 6
  3480  	}
  3481  
  3482  bas:
  3483  	switch base {
  3484  	default:
  3485  		goto bad
  3486  
  3487  	case REG_NONE: // must be mod=00
  3488  		i |= 5
  3489  
  3490  	case REG_R8,
  3491  		REG_R9,
  3492  		REG_R10,
  3493  		REG_R11,
  3494  		REG_R12,
  3495  		REG_R13,
  3496  		REG_R14,
  3497  		REG_R15:
  3498  		if ctxt.Arch.Family == sys.I386 {
  3499  			goto bad
  3500  		}
  3501  		fallthrough
  3502  
  3503  	case REG_AX,
  3504  		REG_CX,
  3505  		REG_DX,
  3506  		REG_BX,
  3507  		REG_SP,
  3508  		REG_BP,
  3509  		REG_SI,
  3510  		REG_DI:
  3511  		i |= reg[base]
  3512  	}
  3513  
  3514  	ab.Put1(byte(i))
  3515  	return
  3516  
  3517  bad:
  3518  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3519  	ab.Put1(0)
  3520  }
  3521  
  3522  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3523  	var rel obj.Reloc
  3524  
  3525  	v := vaddr(ctxt, p, a, &rel)
  3526  	if rel.Siz != 0 {
  3527  		if rel.Siz != 4 {
  3528  			ctxt.Diag("bad reloc")
  3529  		}
  3530  		r := obj.Addrel(cursym)
  3531  		*r = rel
  3532  		r.Off = int32(p.Pc + int64(ab.Len()))
  3533  	}
  3534  
  3535  	ab.PutInt32(int32(v))
  3536  }
  3537  
  3538  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3539  	if r != nil {
  3540  		*r = obj.Reloc{}
  3541  	}
  3542  
  3543  	switch a.Name {
  3544  	case obj.NAME_STATIC,
  3545  		obj.NAME_GOTREF,
  3546  		obj.NAME_EXTERN:
  3547  		s := a.Sym
  3548  		if r == nil {
  3549  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3550  			log.Fatalf("reloc")
  3551  		}
  3552  
  3553  		if a.Name == obj.NAME_GOTREF {
  3554  			r.Siz = 4
  3555  			r.Type = objabi.R_GOTPCREL
  3556  		} else if useAbs(ctxt, s) {
  3557  			r.Siz = 4
  3558  			r.Type = objabi.R_ADDR
  3559  		} else {
  3560  			r.Siz = 4
  3561  			r.Type = objabi.R_PCREL
  3562  		}
  3563  
  3564  		r.Off = -1 // caller must fill in
  3565  		r.Sym = s
  3566  		r.Add = a.Offset
  3567  
  3568  		return 0
  3569  	}
  3570  
  3571  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3572  		if r == nil {
  3573  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3574  			log.Fatalf("reloc")
  3575  		}
  3576  
  3577  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3578  			r.Type = objabi.R_TLS_LE
  3579  			r.Siz = 4
  3580  			r.Off = -1 // caller must fill in
  3581  			r.Add = a.Offset
  3582  		}
  3583  		return 0
  3584  	}
  3585  
  3586  	return a.Offset
  3587  }
  3588  
  3589  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3590  	var base int
  3591  	var rel obj.Reloc
  3592  
  3593  	rex &= 0x40 | Rxr
  3594  	if a.Offset != int64(int32(a.Offset)) {
  3595  		// The rules are slightly different for 386 and AMD64,
  3596  		// mostly for historical reasons. We may unify them later,
  3597  		// but it must be discussed beforehand.
  3598  		//
  3599  		// For 64bit mode only LEAL is allowed to overflow.
  3600  		// It's how https://golang.org/cl/59630 made it.
  3601  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3602  		//
  3603  		// For 32bit mode rules are more permissive.
  3604  		// If offset fits uint32, it's permitted.
  3605  		// This is allowed for assembly that wants to use 32-bit hex
  3606  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3607  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3608  			(ctxt.Arch.Family != sys.AMD64 &&
  3609  				int64(uint32(a.Offset)) == a.Offset &&
  3610  				ab.rexflag&Rxw == 0)
  3611  		if !overflowOK {
  3612  			ctxt.Diag("offset too large in %s", p)
  3613  		}
  3614  	}
  3615  	v := int32(a.Offset)
  3616  	rel.Siz = 0
  3617  
  3618  	switch a.Type {
  3619  	case obj.TYPE_ADDR:
  3620  		if a.Name == obj.NAME_NONE {
  3621  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3622  		}
  3623  		if a.Index == REG_TLS {
  3624  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3625  		}
  3626  		goto bad
  3627  
  3628  	case obj.TYPE_REG:
  3629  		const regFirst = REG_AL
  3630  		const regLast = REG_Z31
  3631  		if a.Reg < regFirst || regLast < a.Reg {
  3632  			goto bad
  3633  		}
  3634  		if v != 0 {
  3635  			goto bad
  3636  		}
  3637  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3638  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3639  		return
  3640  	}
  3641  
  3642  	if a.Type != obj.TYPE_MEM {
  3643  		goto bad
  3644  	}
  3645  
  3646  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3647  		base := int(a.Reg)
  3648  		switch a.Name {
  3649  		case obj.NAME_EXTERN,
  3650  			obj.NAME_GOTREF,
  3651  			obj.NAME_STATIC:
  3652  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3653  				goto bad
  3654  			}
  3655  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3656  				// The base register has already been set. It holds the PC
  3657  				// of this instruction returned by a PC-reading thunk.
  3658  				// See obj6.go:rewriteToPcrel.
  3659  			} else {
  3660  				base = REG_NONE
  3661  			}
  3662  			v = int32(vaddr(ctxt, p, a, &rel))
  3663  
  3664  		case obj.NAME_AUTO,
  3665  			obj.NAME_PARAM:
  3666  			base = REG_SP
  3667  		}
  3668  
  3669  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3670  		if base == REG_NONE {
  3671  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3672  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3673  			goto putrelv
  3674  		}
  3675  
  3676  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3677  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3678  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3679  			return
  3680  		}
  3681  
  3682  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3683  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3684  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3685  			ab.Put1(disp8)
  3686  			return
  3687  		}
  3688  
  3689  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3690  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3691  		goto putrelv
  3692  	}
  3693  
  3694  	base = int(a.Reg)
  3695  	switch a.Name {
  3696  	case obj.NAME_STATIC,
  3697  		obj.NAME_GOTREF,
  3698  		obj.NAME_EXTERN:
  3699  		if a.Sym == nil {
  3700  			ctxt.Diag("bad addr: %v", p)
  3701  		}
  3702  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3703  			// The base register has already been set. It holds the PC
  3704  			// of this instruction returned by a PC-reading thunk.
  3705  			// See obj6.go:rewriteToPcrel.
  3706  		} else {
  3707  			base = REG_NONE
  3708  		}
  3709  		v = int32(vaddr(ctxt, p, a, &rel))
  3710  
  3711  	case obj.NAME_AUTO,
  3712  		obj.NAME_PARAM:
  3713  		base = REG_SP
  3714  	}
  3715  
  3716  	if base == REG_TLS {
  3717  		v = int32(vaddr(ctxt, p, a, &rel))
  3718  	}
  3719  
  3720  	ab.rexflag |= regrex[base]&Rxb | rex
  3721  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3722  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3723  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3724  				ctxt.Diag("%v has offset against gotref", p)
  3725  			}
  3726  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3727  			goto putrelv
  3728  		}
  3729  
  3730  		// temporary
  3731  		ab.Put2(
  3732  			byte(0<<6|4<<0|r<<3), // sib present
  3733  			0<<6|4<<3|5<<0,       // DS:d32
  3734  		)
  3735  		goto putrelv
  3736  	}
  3737  
  3738  	if base == REG_SP || base == REG_R12 {
  3739  		if v == 0 {
  3740  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3741  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3742  			return
  3743  		}
  3744  
  3745  		if disp8, ok := toDisp8(v, p, ab); ok {
  3746  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3747  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3748  			ab.Put1(disp8)
  3749  			return
  3750  		}
  3751  
  3752  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3753  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3754  		goto putrelv
  3755  	}
  3756  
  3757  	if REG_AX <= base && base <= REG_R15 {
  3758  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3759  			ctxt.Headtype != objabi.Hwindows {
  3760  			rel = obj.Reloc{}
  3761  			rel.Type = objabi.R_TLS_LE
  3762  			rel.Siz = 4
  3763  			rel.Sym = nil
  3764  			rel.Add = int64(v)
  3765  			v = 0
  3766  		}
  3767  
  3768  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3769  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3770  			return
  3771  		}
  3772  
  3773  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3774  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3775  			return
  3776  		}
  3777  
  3778  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3779  		goto putrelv
  3780  	}
  3781  
  3782  	goto bad
  3783  
  3784  putrelv:
  3785  	if rel.Siz != 0 {
  3786  		if rel.Siz != 4 {
  3787  			ctxt.Diag("bad rel")
  3788  			goto bad
  3789  		}
  3790  
  3791  		r := obj.Addrel(cursym)
  3792  		*r = rel
  3793  		r.Off = int32(p.Pc + int64(ab.Len()))
  3794  	}
  3795  
  3796  	ab.PutInt32(v)
  3797  	return
  3798  
  3799  bad:
  3800  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3801  }
  3802  
  3803  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3804  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3805  }
  3806  
  3807  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3808  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3809  }
  3810  
  3811  func bytereg(a *obj.Addr, t *uint8) {
  3812  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3813  		a.Reg += REG_AL - REG_AX
  3814  		*t = 0
  3815  	}
  3816  }
  3817  
  3818  func unbytereg(a *obj.Addr, t *uint8) {
  3819  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3820  		a.Reg += REG_AX - REG_AL
  3821  		*t = 0
  3822  	}
  3823  }
  3824  
  3825  const (
  3826  	movLit uint8 = iota // Like Zlit
  3827  	movRegMem
  3828  	movMemReg
  3829  	movRegMem2op
  3830  	movMemReg2op
  3831  	movFullPtr // Load full pointer, trash heap (unsupported)
  3832  	movDoubleShift
  3833  	movTLSReg
  3834  )
  3835  
  3836  var ymovtab = []movtab{
  3837  	// push
  3838  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3839  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3840  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3841  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3842  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3843  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3844  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3845  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3846  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3847  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3848  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3849  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3850  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3851  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3852  
  3853  	// pop
  3854  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3855  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3856  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3857  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3858  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3859  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3860  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3861  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3862  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3863  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3864  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3865  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3866  
  3867  	// mov seg
  3868  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3869  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3870  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3871  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3872  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3873  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3874  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3875  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3876  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3877  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3878  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3879  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3880  
  3881  	// mov cr
  3882  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3883  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3884  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3885  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3886  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3887  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3888  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3889  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3890  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3891  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3892  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3893  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3894  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3895  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3896  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3897  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3898  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3899  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3900  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3901  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3902  
  3903  	// mov dr
  3904  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3905  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3906  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3907  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3908  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3909  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3910  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3911  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3912  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3913  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3914  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3915  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3916  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3917  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3918  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3919  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3920  
  3921  	// mov tr
  3922  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3923  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3924  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3925  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3926  
  3927  	// lgdt, sgdt, lidt, sidt
  3928  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3929  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3930  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3931  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3932  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3933  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3934  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3935  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3936  
  3937  	// lldt, sldt
  3938  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3939  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3940  
  3941  	// lmsw, smsw
  3942  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3943  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3944  
  3945  	// ltr, str
  3946  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3947  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3948  
  3949  	/* load full pointer - unsupported
  3950  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3951  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3952  	*/
  3953  
  3954  	// double shift
  3955  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3956  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3957  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3958  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3959  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3960  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3961  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3962  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3963  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3964  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3965  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3966  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3967  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3968  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3969  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3970  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3971  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3972  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3973  
  3974  	// load TLS base
  3975  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3976  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3977  	{0, 0, 0, 0, 0, [4]uint8{}},
  3978  }
  3979  
  3980  func isax(a *obj.Addr) bool {
  3981  	switch a.Reg {
  3982  	case REG_AX, REG_AL, REG_AH:
  3983  		return true
  3984  	}
  3985  
  3986  	return a.Index == REG_AX
  3987  }
  3988  
  3989  func subreg(p *obj.Prog, from int, to int) {
  3990  	if false { /* debug['Q'] */
  3991  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3992  	}
  3993  
  3994  	if int(p.From.Reg) == from {
  3995  		p.From.Reg = int16(to)
  3996  		p.Ft = 0
  3997  	}
  3998  
  3999  	if int(p.To.Reg) == from {
  4000  		p.To.Reg = int16(to)
  4001  		p.Tt = 0
  4002  	}
  4003  
  4004  	if int(p.From.Index) == from {
  4005  		p.From.Index = int16(to)
  4006  		p.Ft = 0
  4007  	}
  4008  
  4009  	if int(p.To.Index) == from {
  4010  		p.To.Index = int16(to)
  4011  		p.Tt = 0
  4012  	}
  4013  
  4014  	if false { /* debug['Q'] */
  4015  		fmt.Printf("%v\n", p)
  4016  	}
  4017  }
  4018  
  4019  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  4020  	switch op {
  4021  	case Pm, Pe, Pf2, Pf3:
  4022  		if osize != 1 {
  4023  			if op != Pm {
  4024  				ab.Put1(byte(op))
  4025  			}
  4026  			ab.Put1(Pm)
  4027  			z++
  4028  			op = int(o.op[z])
  4029  			break
  4030  		}
  4031  		fallthrough
  4032  
  4033  	default:
  4034  		if ab.Len() == 0 || ab.Last() != Pm {
  4035  			ab.Put1(Pm)
  4036  		}
  4037  	}
  4038  
  4039  	ab.Put1(byte(op))
  4040  	return z
  4041  }
  4042  
  4043  var bpduff1 = []byte{
  4044  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4045  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4046  }
  4047  
  4048  var bpduff2 = []byte{
  4049  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4050  }
  4051  
  4052  // asmevex emits EVEX pregis and opcode byte.
  4053  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4054  // K-masking register.
  4055  //
  4056  // Expects asmbuf.evex to be properly initialized.
  4057  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4058  	ab.evexflag = true
  4059  	evex := ab.evex
  4060  
  4061  	rexR := byte(1)
  4062  	evexR := byte(1)
  4063  	rexX := byte(1)
  4064  	rexB := byte(1)
  4065  	if r != nil {
  4066  		if regrex[r.Reg]&Rxr != 0 {
  4067  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4068  		}
  4069  		if regrex[r.Reg]&RxrEvex != 0 {
  4070  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4071  		}
  4072  	}
  4073  	if rm != nil {
  4074  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4075  			rexX = 0
  4076  		} else if regrex[rm.Index]&Rxx != 0 {
  4077  			rexX = 0
  4078  		}
  4079  		if regrex[rm.Reg]&Rxb != 0 {
  4080  			rexB = 0
  4081  		}
  4082  	}
  4083  	// P0 = [R][X][B][R'][00][mm]
  4084  	p0 := (rexR << 7) |
  4085  		(rexX << 6) |
  4086  		(rexB << 5) |
  4087  		(evexR << 4) |
  4088  		(0 << 2) |
  4089  		(evex.M() << 0)
  4090  
  4091  	vexV := byte(0)
  4092  	if v != nil {
  4093  		// 4bit-wide reg index.
  4094  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4095  	}
  4096  	vexV ^= 0x0F
  4097  	// P1 = [W][vvvv][1][pp]
  4098  	p1 := (evex.W() << 7) |
  4099  		(vexV << 3) |
  4100  		(1 << 2) |
  4101  		(evex.P() << 0)
  4102  
  4103  	suffix := evexSuffixMap[p.Scond]
  4104  	evexZ := byte(0)
  4105  	evexLL := evex.L()
  4106  	evexB := byte(0)
  4107  	evexV := byte(1)
  4108  	evexA := byte(0)
  4109  	if suffix.zeroing {
  4110  		if !evex.ZeroingEnabled() {
  4111  			ctxt.Diag("unsupported zeroing: %v", p)
  4112  		}
  4113  		if k == nil {
  4114  			// When you request zeroing you must specify a mask register.
  4115  			// See issue 57952.
  4116  			ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
  4117  		} else if k.Reg == REG_K0 {
  4118  			// The mask register must not be K0. That restriction is already
  4119  			// handled by the Yknot0 restriction in the opcode tables, so we
  4120  			// won't ever reach here. But put something sensible here just in case.
  4121  			ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
  4122  		}
  4123  		evexZ = 1
  4124  	}
  4125  	switch {
  4126  	case suffix.rounding != rcUnset:
  4127  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4128  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4129  		} else if !evex.RoundingEnabled() {
  4130  			ctxt.Diag("unsupported rounding: %v", p)
  4131  		}
  4132  		evexB = 1
  4133  		evexLL = suffix.rounding
  4134  	case suffix.broadcast:
  4135  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4136  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4137  		} else if !evex.BroadcastEnabled() {
  4138  			ctxt.Diag("unsupported broadcast: %v", p)
  4139  		}
  4140  		evexB = 1
  4141  	case suffix.sae:
  4142  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4143  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4144  		} else if !evex.SaeEnabled() {
  4145  			ctxt.Diag("unsupported SAE: %v", p)
  4146  		}
  4147  		evexB = 1
  4148  	}
  4149  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4150  		evexV = 0
  4151  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4152  		evexV = 0 // VSR selector 5th bit.
  4153  	}
  4154  	if k != nil {
  4155  		evexA = byte(reg[k.Reg])
  4156  	}
  4157  	// P2 = [z][L'L][b][V'][aaa]
  4158  	p2 := (evexZ << 7) |
  4159  		(evexLL << 5) |
  4160  		(evexB << 4) |
  4161  		(evexV << 3) |
  4162  		(evexA << 0)
  4163  
  4164  	const evexEscapeByte = 0x62
  4165  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4166  	ab.Put1(evex.opcode)
  4167  }
  4168  
  4169  // Emit VEX prefix and opcode byte.
  4170  // The three addresses are the r/m, vvvv, and reg fields.
  4171  // The reg and rm arguments appear in the same order as the
  4172  // arguments to asmand, which typically follows the call to asmvex.
  4173  // The final two arguments are the VEX prefix (see encoding above)
  4174  // and the opcode byte.
  4175  // For details about vex prefix see:
  4176  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4177  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4178  	ab.vexflag = true
  4179  	rexR := 0
  4180  	if r != nil {
  4181  		rexR = regrex[r.Reg] & Rxr
  4182  	}
  4183  	rexB := 0
  4184  	rexX := 0
  4185  	if rm != nil {
  4186  		rexB = regrex[rm.Reg] & Rxb
  4187  		rexX = regrex[rm.Index] & Rxx
  4188  	}
  4189  	vexM := (vex >> 3) & 0x7
  4190  	vexWLP := vex & 0x87
  4191  	vexV := byte(0)
  4192  	if v != nil {
  4193  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4194  	}
  4195  	vexV ^= 0xF
  4196  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4197  		// Can use 2-byte encoding.
  4198  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4199  	} else {
  4200  		// Must use 3-byte encoding.
  4201  		ab.Put3(0xc4,
  4202  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4203  			vexV<<3|vexWLP,
  4204  		)
  4205  	}
  4206  	ab.Put1(opcode)
  4207  }
  4208  
  4209  // regIndex returns register index that fits in 5 bits.
  4210  //
  4211  //	R         : 3 bit | legacy instructions     | N/A
  4212  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4213  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4214  //
  4215  // Examples:
  4216  //
  4217  //	REG_Z30 => 30
  4218  //	REG_X15 => 15
  4219  //	REG_R9  => 9
  4220  //	REG_AX  => 0
  4221  func regIndex(r int16) int {
  4222  	lower3bits := reg[r]
  4223  	high4bit := regrex[r] & Rxr << 1
  4224  	high5bit := regrex[r] & RxrEvex << 0
  4225  	return lower3bits | high4bit | high5bit
  4226  }
  4227  
  4228  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4229  // Reports errors via ctxt.
  4230  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4231  	// If any pair of the index, mask, or destination registers
  4232  	// are the same, illegal instruction trap (#UD) is triggered.
  4233  	index := regIndex(p.GetFrom3().Index)
  4234  	mask := regIndex(p.From.Reg)
  4235  	dest := regIndex(p.To.Reg)
  4236  	if dest == mask || dest == index || mask == index {
  4237  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4238  		return false
  4239  	}
  4240  
  4241  	return true
  4242  }
  4243  
  4244  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4245  // Reports errors via ctxt.
  4246  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4247  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4248  	// register is the same as index vector in VSIB.
  4249  	index := regIndex(p.From.Index)
  4250  	dest := regIndex(p.To.Reg)
  4251  	if dest == index {
  4252  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4253  		return false
  4254  	}
  4255  
  4256  	return true
  4257  }
  4258  
  4259  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4260  	o := opindex[p.As&obj.AMask]
  4261  
  4262  	if o == nil {
  4263  		ctxt.Diag("asmins: missing op %v", p)
  4264  		return
  4265  	}
  4266  
  4267  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4268  		ab.Put1(byte(pre))
  4269  	}
  4270  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4271  		ab.Put1(byte(pre))
  4272  	}
  4273  
  4274  	// Checks to warn about instruction/arguments combinations that
  4275  	// will unconditionally trigger illegal instruction trap (#UD).
  4276  	switch p.As {
  4277  	case AVGATHERDPD,
  4278  		AVGATHERQPD,
  4279  		AVGATHERDPS,
  4280  		AVGATHERQPS,
  4281  		AVPGATHERDD,
  4282  		AVPGATHERQD,
  4283  		AVPGATHERDQ,
  4284  		AVPGATHERQQ:
  4285  		if p.GetFrom3() == nil {
  4286  			// gathers need a 3rd arg. See issue 58822.
  4287  			ctxt.Diag("need a third arg for gather instruction: %v", p)
  4288  			return
  4289  		}
  4290  		// AVX512 gather requires explicit K mask.
  4291  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4292  			if !avx512gatherValid(ctxt, p) {
  4293  				return
  4294  			}
  4295  		} else {
  4296  			if !avx2gatherValid(ctxt, p) {
  4297  				return
  4298  			}
  4299  		}
  4300  	}
  4301  
  4302  	if p.Ft == 0 {
  4303  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4304  	}
  4305  	if p.Tt == 0 {
  4306  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4307  	}
  4308  
  4309  	ft := int(p.Ft) * Ymax
  4310  	var f3t int
  4311  	tt := int(p.Tt) * Ymax
  4312  
  4313  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4314  	z := 0
  4315  	var a *obj.Addr
  4316  	var l int
  4317  	var op int
  4318  	var q *obj.Prog
  4319  	var r *obj.Reloc
  4320  	var rel obj.Reloc
  4321  	var v int64
  4322  
  4323  	args := make([]int, 0, argListMax)
  4324  	if ft != Ynone*Ymax {
  4325  		args = append(args, ft)
  4326  	}
  4327  	for i := range p.RestArgs {
  4328  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4329  	}
  4330  	if tt != Ynone*Ymax {
  4331  		args = append(args, tt)
  4332  	}
  4333  
  4334  	for _, yt := range o.ytab {
  4335  		// ytab matching is purely args-based,
  4336  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4337  		// add EVEX-only filter that will reject non-EVEX matches.
  4338  		//
  4339  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4340  		// Without this rule, operands will lead to VEX-encoded form
  4341  		// and produce "c5b15813" encoding.
  4342  		if !yt.match(args) {
  4343  			// "xo" is always zero for VEX/EVEX encoded insts.
  4344  			z += int(yt.zoffset) + xo
  4345  		} else {
  4346  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4347  				// Do not signal error and continue to search
  4348  				// for matching EVEX-encoded form.
  4349  				z += int(yt.zoffset)
  4350  				continue
  4351  			}
  4352  
  4353  			switch o.prefix {
  4354  			case Px1: // first option valid only in 32-bit mode
  4355  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4356  					z += int(yt.zoffset) + xo
  4357  					continue
  4358  				}
  4359  			case Pq: // 16 bit escape and opcode escape
  4360  				ab.Put2(Pe, Pm)
  4361  
  4362  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4363  				ab.rexflag |= Pw
  4364  				ab.Put2(Pe, Pm)
  4365  
  4366  			case Pq4: // 66 0F 38
  4367  				ab.Put3(0x66, 0x0F, 0x38)
  4368  
  4369  			case Pq4w: // 66 0F 38 + REX.W
  4370  				ab.rexflag |= Pw
  4371  				ab.Put3(0x66, 0x0F, 0x38)
  4372  
  4373  			case Pq5: // F3 0F 38
  4374  				ab.Put3(0xF3, 0x0F, 0x38)
  4375  
  4376  			case Pq5w: //  F3 0F 38 + REX.W
  4377  				ab.rexflag |= Pw
  4378  				ab.Put3(0xF3, 0x0F, 0x38)
  4379  
  4380  			case Pf2, // xmm opcode escape
  4381  				Pf3:
  4382  				ab.Put2(o.prefix, Pm)
  4383  
  4384  			case Pef3:
  4385  				ab.Put3(Pe, Pf3, Pm)
  4386  
  4387  			case Pfw: // xmm opcode escape + REX.W
  4388  				ab.rexflag |= Pw
  4389  				ab.Put2(Pf3, Pm)
  4390  
  4391  			case Pm: // opcode escape
  4392  				ab.Put1(Pm)
  4393  
  4394  			case Pe: // 16 bit escape
  4395  				ab.Put1(Pe)
  4396  
  4397  			case Pw: // 64-bit escape
  4398  				if ctxt.Arch.Family != sys.AMD64 {
  4399  					ctxt.Diag("asmins: illegal 64: %v", p)
  4400  				}
  4401  				ab.rexflag |= Pw
  4402  
  4403  			case Pw8: // 64-bit escape if z >= 8
  4404  				if z >= 8 {
  4405  					if ctxt.Arch.Family != sys.AMD64 {
  4406  						ctxt.Diag("asmins: illegal 64: %v", p)
  4407  					}
  4408  					ab.rexflag |= Pw
  4409  				}
  4410  
  4411  			case Pb: // botch
  4412  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4413  					goto bad
  4414  				}
  4415  				// NOTE(rsc): This is probably safe to do always,
  4416  				// but when enabled it chooses different encodings
  4417  				// than the old github.com/go-asm/go/cmd/obj/i386 code did,
  4418  				// which breaks our "same bits out" checks.
  4419  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4420  				// in the original obj/i386, and it would encode
  4421  				// (using a valid, shorter form) as 3c 00 if we enabled
  4422  				// the call to bytereg here.
  4423  				if ctxt.Arch.Family == sys.AMD64 {
  4424  					bytereg(&p.From, &p.Ft)
  4425  					bytereg(&p.To, &p.Tt)
  4426  				}
  4427  
  4428  			case P32: // 32 bit but illegal if 64-bit mode
  4429  				if ctxt.Arch.Family == sys.AMD64 {
  4430  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4431  				}
  4432  
  4433  			case Py: // 64-bit only, no prefix
  4434  				if ctxt.Arch.Family != sys.AMD64 {
  4435  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4436  				}
  4437  
  4438  			case Py1: // 64-bit only if z < 1, no prefix
  4439  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4440  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4441  				}
  4442  
  4443  			case Py3: // 64-bit only if z < 3, no prefix
  4444  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4445  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4446  				}
  4447  			}
  4448  
  4449  			if z >= len(o.op) {
  4450  				log.Fatalf("asmins bad table %v", p)
  4451  			}
  4452  			op = int(o.op[z])
  4453  			if op == 0x0f {
  4454  				ab.Put1(byte(op))
  4455  				z++
  4456  				op = int(o.op[z])
  4457  			}
  4458  
  4459  			switch yt.zcase {
  4460  			default:
  4461  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4462  				return
  4463  
  4464  			case Zpseudo:
  4465  				break
  4466  
  4467  			case Zlit:
  4468  				ab.PutOpBytesLit(z, &o.op)
  4469  
  4470  			case Zlitr_m:
  4471  				ab.PutOpBytesLit(z, &o.op)
  4472  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4473  
  4474  			case Zlitm_r:
  4475  				ab.PutOpBytesLit(z, &o.op)
  4476  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4477  
  4478  			case Zlit_m_r:
  4479  				ab.PutOpBytesLit(z, &o.op)
  4480  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4481  
  4482  			case Zmb_r:
  4483  				bytereg(&p.From, &p.Ft)
  4484  				fallthrough
  4485  
  4486  			case Zm_r:
  4487  				ab.Put1(byte(op))
  4488  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4489  
  4490  			case Z_m_r:
  4491  				ab.Put1(byte(op))
  4492  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4493  
  4494  			case Zm2_r:
  4495  				ab.Put2(byte(op), o.op[z+1])
  4496  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4497  
  4498  			case Zm_r_xm:
  4499  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4500  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4501  
  4502  			case Zm_r_xm_nr:
  4503  				ab.rexflag = 0
  4504  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4505  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4506  
  4507  			case Zm_r_i_xm:
  4508  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4509  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4510  				ab.Put1(byte(p.To.Offset))
  4511  
  4512  			case Zibm_r, Zibr_m:
  4513  				ab.PutOpBytesLit(z, &o.op)
  4514  				if yt.zcase == Zibr_m {
  4515  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4516  				} else {
  4517  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4518  				}
  4519  				switch {
  4520  				default:
  4521  					ab.Put1(byte(p.From.Offset))
  4522  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4523  					ab.PutInt16(int16(p.From.Offset))
  4524  				case yt.args[0] == Yi32:
  4525  					ab.PutInt32(int32(p.From.Offset))
  4526  				}
  4527  
  4528  			case Zaut_r:
  4529  				ab.Put1(0x8d) // leal
  4530  				if p.From.Type != obj.TYPE_ADDR {
  4531  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4532  				}
  4533  				p.From.Type = obj.TYPE_MEM
  4534  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4535  				p.From.Type = obj.TYPE_ADDR
  4536  
  4537  			case Zm_o:
  4538  				ab.Put1(byte(op))
  4539  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4540  
  4541  			case Zr_m:
  4542  				ab.Put1(byte(op))
  4543  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4544  
  4545  			case Zvex:
  4546  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4547  
  4548  			case Zvex_rm_v_r:
  4549  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4550  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4551  
  4552  			case Zvex_rm_v_ro:
  4553  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4554  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4555  
  4556  			case Zvex_i_rm_vo:
  4557  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4558  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4559  				ab.Put1(byte(p.From.Offset))
  4560  
  4561  			case Zvex_i_r_v:
  4562  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4563  				regnum := byte(0x7)
  4564  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4565  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4566  				} else {
  4567  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4568  				}
  4569  				ab.Put1(o.op[z+2] | regnum)
  4570  				ab.Put1(byte(p.From.Offset))
  4571  
  4572  			case Zvex_i_rm_v_r:
  4573  				imm, from, from3, to := unpackOps4(p)
  4574  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4575  				ab.asmand(ctxt, cursym, p, from, to)
  4576  				ab.Put1(byte(imm.Offset))
  4577  
  4578  			case Zvex_i_rm_r:
  4579  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4580  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4581  				ab.Put1(byte(p.From.Offset))
  4582  
  4583  			case Zvex_v_rm_r:
  4584  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4585  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4586  
  4587  			case Zvex_r_v_rm:
  4588  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4589  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4590  
  4591  			case Zvex_rm_r_vo:
  4592  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4593  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4594  
  4595  			case Zvex_i_r_rm:
  4596  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4597  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4598  				ab.Put1(byte(p.From.Offset))
  4599  
  4600  			case Zvex_hr_rm_v_r:
  4601  				hr, from, from3, to := unpackOps4(p)
  4602  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4603  				ab.asmand(ctxt, cursym, p, from, to)
  4604  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4605  
  4606  			case Zevex_k_rmo:
  4607  				ab.evex = newEVEXBits(z, &o.op)
  4608  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4609  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4610  
  4611  			case Zevex_i_rm_vo:
  4612  				ab.evex = newEVEXBits(z, &o.op)
  4613  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4614  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4615  				ab.Put1(byte(p.From.Offset))
  4616  
  4617  			case Zevex_i_rm_k_vo:
  4618  				imm, from, kmask, to := unpackOps4(p)
  4619  				ab.evex = newEVEXBits(z, &o.op)
  4620  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4621  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4622  				ab.Put1(byte(imm.Offset))
  4623  
  4624  			case Zevex_i_r_rm:
  4625  				ab.evex = newEVEXBits(z, &o.op)
  4626  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4627  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4628  				ab.Put1(byte(p.From.Offset))
  4629  
  4630  			case Zevex_i_r_k_rm:
  4631  				imm, from, kmask, to := unpackOps4(p)
  4632  				ab.evex = newEVEXBits(z, &o.op)
  4633  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4634  				ab.asmand(ctxt, cursym, p, to, from)
  4635  				ab.Put1(byte(imm.Offset))
  4636  
  4637  			case Zevex_i_rm_r:
  4638  				ab.evex = newEVEXBits(z, &o.op)
  4639  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4640  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4641  				ab.Put1(byte(p.From.Offset))
  4642  
  4643  			case Zevex_i_rm_k_r:
  4644  				imm, from, kmask, to := unpackOps4(p)
  4645  				ab.evex = newEVEXBits(z, &o.op)
  4646  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4647  				ab.asmand(ctxt, cursym, p, from, to)
  4648  				ab.Put1(byte(imm.Offset))
  4649  
  4650  			case Zevex_i_rm_v_r:
  4651  				imm, from, from3, to := unpackOps4(p)
  4652  				ab.evex = newEVEXBits(z, &o.op)
  4653  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4654  				ab.asmand(ctxt, cursym, p, from, to)
  4655  				ab.Put1(byte(imm.Offset))
  4656  
  4657  			case Zevex_i_rm_v_k_r:
  4658  				imm, from, from3, kmask, to := unpackOps5(p)
  4659  				ab.evex = newEVEXBits(z, &o.op)
  4660  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4661  				ab.asmand(ctxt, cursym, p, from, to)
  4662  				ab.Put1(byte(imm.Offset))
  4663  
  4664  			case Zevex_r_v_rm:
  4665  				ab.evex = newEVEXBits(z, &o.op)
  4666  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4667  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4668  
  4669  			case Zevex_rm_v_r:
  4670  				ab.evex = newEVEXBits(z, &o.op)
  4671  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4672  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4673  
  4674  			case Zevex_rm_k_r:
  4675  				ab.evex = newEVEXBits(z, &o.op)
  4676  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4677  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4678  
  4679  			case Zevex_r_k_rm:
  4680  				ab.evex = newEVEXBits(z, &o.op)
  4681  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4682  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4683  
  4684  			case Zevex_rm_v_k_r:
  4685  				from, from3, kmask, to := unpackOps4(p)
  4686  				ab.evex = newEVEXBits(z, &o.op)
  4687  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4688  				ab.asmand(ctxt, cursym, p, from, to)
  4689  
  4690  			case Zevex_r_v_k_rm:
  4691  				from, from3, kmask, to := unpackOps4(p)
  4692  				ab.evex = newEVEXBits(z, &o.op)
  4693  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4694  				ab.asmand(ctxt, cursym, p, to, from)
  4695  
  4696  			case Zr_m_xm:
  4697  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4698  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4699  
  4700  			case Zr_m_xm_nr:
  4701  				ab.rexflag = 0
  4702  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4703  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4704  
  4705  			case Zo_m:
  4706  				ab.Put1(byte(op))
  4707  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4708  
  4709  			case Zcallindreg:
  4710  				r = obj.Addrel(cursym)
  4711  				r.Off = int32(p.Pc)
  4712  				r.Type = objabi.R_CALLIND
  4713  				r.Siz = 0
  4714  				fallthrough
  4715  
  4716  			case Zo_m64:
  4717  				ab.Put1(byte(op))
  4718  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4719  
  4720  			case Zm_ibo:
  4721  				ab.Put1(byte(op))
  4722  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4723  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4724  
  4725  			case Zibo_m:
  4726  				ab.Put1(byte(op))
  4727  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4728  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4729  
  4730  			case Zibo_m_xm:
  4731  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4732  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4733  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4734  
  4735  			case Z_ib, Zib_:
  4736  				if yt.zcase == Zib_ {
  4737  					a = &p.From
  4738  				} else {
  4739  					a = &p.To
  4740  				}
  4741  				ab.Put1(byte(op))
  4742  				if p.As == AXABORT {
  4743  					ab.Put1(o.op[z+1])
  4744  				}
  4745  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4746  
  4747  			case Zib_rp:
  4748  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4749  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4750  
  4751  			case Zil_rp:
  4752  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4753  				ab.Put1(byte(op + reg[p.To.Reg]))
  4754  				if o.prefix == Pe {
  4755  					v = vaddr(ctxt, p, &p.From, nil)
  4756  					ab.PutInt16(int16(v))
  4757  				} else {
  4758  					ab.relput4(ctxt, cursym, p, &p.From)
  4759  				}
  4760  
  4761  			case Zo_iw:
  4762  				ab.Put1(byte(op))
  4763  				if p.From.Type != obj.TYPE_NONE {
  4764  					v = vaddr(ctxt, p, &p.From, nil)
  4765  					ab.PutInt16(int16(v))
  4766  				}
  4767  
  4768  			case Ziq_rp:
  4769  				v = vaddr(ctxt, p, &p.From, &rel)
  4770  				l = int(v >> 32)
  4771  				if l == 0 && rel.Siz != 8 {
  4772  					ab.rexflag &^= (0x40 | Rxw)
  4773  
  4774  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4775  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4776  					if rel.Type != 0 {
  4777  						r = obj.Addrel(cursym)
  4778  						*r = rel
  4779  						r.Off = int32(p.Pc + int64(ab.Len()))
  4780  					}
  4781  
  4782  					ab.PutInt32(int32(v))
  4783  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4784  					ab.Put1(0xc7)
  4785  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4786  
  4787  					ab.PutInt32(int32(v)) // need all 8
  4788  				} else {
  4789  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4790  					ab.Put1(byte(op + reg[p.To.Reg]))
  4791  					if rel.Type != 0 {
  4792  						r = obj.Addrel(cursym)
  4793  						*r = rel
  4794  						r.Off = int32(p.Pc + int64(ab.Len()))
  4795  					}
  4796  
  4797  					ab.PutInt64(v)
  4798  				}
  4799  
  4800  			case Zib_rr:
  4801  				ab.Put1(byte(op))
  4802  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4803  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4804  
  4805  			case Z_il, Zil_:
  4806  				if yt.zcase == Zil_ {
  4807  					a = &p.From
  4808  				} else {
  4809  					a = &p.To
  4810  				}
  4811  				ab.Put1(byte(op))
  4812  				if o.prefix == Pe {
  4813  					v = vaddr(ctxt, p, a, nil)
  4814  					ab.PutInt16(int16(v))
  4815  				} else {
  4816  					ab.relput4(ctxt, cursym, p, a)
  4817  				}
  4818  
  4819  			case Zm_ilo, Zilo_m:
  4820  				ab.Put1(byte(op))
  4821  				if yt.zcase == Zilo_m {
  4822  					a = &p.From
  4823  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4824  				} else {
  4825  					a = &p.To
  4826  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4827  				}
  4828  
  4829  				if o.prefix == Pe {
  4830  					v = vaddr(ctxt, p, a, nil)
  4831  					ab.PutInt16(int16(v))
  4832  				} else {
  4833  					ab.relput4(ctxt, cursym, p, a)
  4834  				}
  4835  
  4836  			case Zil_rr:
  4837  				ab.Put1(byte(op))
  4838  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4839  				if o.prefix == Pe {
  4840  					v = vaddr(ctxt, p, &p.From, nil)
  4841  					ab.PutInt16(int16(v))
  4842  				} else {
  4843  					ab.relput4(ctxt, cursym, p, &p.From)
  4844  				}
  4845  
  4846  			case Z_rp:
  4847  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4848  				ab.Put1(byte(op + reg[p.To.Reg]))
  4849  
  4850  			case Zrp_:
  4851  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4852  				ab.Put1(byte(op + reg[p.From.Reg]))
  4853  
  4854  			case Zcallcon, Zjmpcon:
  4855  				if yt.zcase == Zcallcon {
  4856  					ab.Put1(byte(op))
  4857  				} else {
  4858  					ab.Put1(o.op[z+1])
  4859  				}
  4860  				r = obj.Addrel(cursym)
  4861  				r.Off = int32(p.Pc + int64(ab.Len()))
  4862  				r.Type = objabi.R_PCREL
  4863  				r.Siz = 4
  4864  				r.Add = p.To.Offset
  4865  				ab.PutInt32(0)
  4866  
  4867  			case Zcallind:
  4868  				ab.Put2(byte(op), o.op[z+1])
  4869  				r = obj.Addrel(cursym)
  4870  				r.Off = int32(p.Pc + int64(ab.Len()))
  4871  				if ctxt.Arch.Family == sys.AMD64 {
  4872  					r.Type = objabi.R_PCREL
  4873  				} else {
  4874  					r.Type = objabi.R_ADDR
  4875  				}
  4876  				r.Siz = 4
  4877  				r.Add = p.To.Offset
  4878  				r.Sym = p.To.Sym
  4879  				ab.PutInt32(0)
  4880  
  4881  			case Zcall, Zcallduff:
  4882  				if p.To.Sym == nil {
  4883  					ctxt.Diag("call without target")
  4884  					ctxt.DiagFlush()
  4885  					log.Fatalf("bad code")
  4886  				}
  4887  
  4888  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4889  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4890  				}
  4891  
  4892  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4893  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4894  					// (the call jumps into the middle of the function).
  4895  					// This makes it possible to see call sites for duffcopy/duffzero in
  4896  					// BP-based profiling tools like Linux perf (which is the
  4897  					// whole point of maintaining frame pointers in Go).
  4898  					// MOVQ BP, -16(SP)
  4899  					// LEAQ -16(SP), BP
  4900  					ab.Put(bpduff1)
  4901  				}
  4902  				ab.Put1(byte(op))
  4903  				r = obj.Addrel(cursym)
  4904  				r.Off = int32(p.Pc + int64(ab.Len()))
  4905  				r.Sym = p.To.Sym
  4906  				r.Add = p.To.Offset
  4907  				r.Type = objabi.R_CALL
  4908  				r.Siz = 4
  4909  				ab.PutInt32(0)
  4910  
  4911  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4912  					// Pop BP pushed above.
  4913  					// MOVQ 0(BP), BP
  4914  					ab.Put(bpduff2)
  4915  				}
  4916  
  4917  			// TODO: jump across functions needs reloc
  4918  			case Zbr, Zjmp, Zloop:
  4919  				if p.As == AXBEGIN {
  4920  					ab.Put1(byte(op))
  4921  				}
  4922  				if p.To.Sym != nil {
  4923  					if yt.zcase != Zjmp {
  4924  						ctxt.Diag("branch to ATEXT")
  4925  						ctxt.DiagFlush()
  4926  						log.Fatalf("bad code")
  4927  					}
  4928  
  4929  					ab.Put1(o.op[z+1])
  4930  					r = obj.Addrel(cursym)
  4931  					r.Off = int32(p.Pc + int64(ab.Len()))
  4932  					r.Sym = p.To.Sym
  4933  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4934  					// it can point to a trampoline instead of the destination itself.
  4935  					r.Type = objabi.R_CALL
  4936  					r.Siz = 4
  4937  					ab.PutInt32(0)
  4938  					break
  4939  				}
  4940  
  4941  				// Assumes q is in this function.
  4942  				// TODO: Check in input, preserve in brchain.
  4943  
  4944  				// Fill in backward jump now.
  4945  				q = p.To.Target()
  4946  
  4947  				if q == nil {
  4948  					ctxt.Diag("jmp/branch/loop without target")
  4949  					ctxt.DiagFlush()
  4950  					log.Fatalf("bad code")
  4951  				}
  4952  
  4953  				if p.Back&branchBackwards != 0 {
  4954  					v = q.Pc - (p.Pc + 2)
  4955  					if v >= -128 && p.As != AXBEGIN {
  4956  						if p.As == AJCXZL {
  4957  							ab.Put1(0x67)
  4958  						}
  4959  						ab.Put2(byte(op), byte(v))
  4960  					} else if yt.zcase == Zloop {
  4961  						ctxt.Diag("loop too far: %v", p)
  4962  					} else {
  4963  						v -= 5 - 2
  4964  						if p.As == AXBEGIN {
  4965  							v--
  4966  						}
  4967  						if yt.zcase == Zbr {
  4968  							ab.Put1(0x0f)
  4969  							v--
  4970  						}
  4971  
  4972  						ab.Put1(o.op[z+1])
  4973  						ab.PutInt32(int32(v))
  4974  					}
  4975  
  4976  					break
  4977  				}
  4978  
  4979  				// Annotate target; will fill in later.
  4980  				p.Forwd = q.Rel
  4981  
  4982  				q.Rel = p
  4983  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4984  					if p.As == AJCXZL {
  4985  						ab.Put1(0x67)
  4986  					}
  4987  					ab.Put2(byte(op), 0)
  4988  				} else if yt.zcase == Zloop {
  4989  					ctxt.Diag("loop too far: %v", p)
  4990  				} else {
  4991  					if yt.zcase == Zbr {
  4992  						ab.Put1(0x0f)
  4993  					}
  4994  					ab.Put1(o.op[z+1])
  4995  					ab.PutInt32(0)
  4996  				}
  4997  
  4998  			case Zbyte:
  4999  				v = vaddr(ctxt, p, &p.From, &rel)
  5000  				if rel.Siz != 0 {
  5001  					rel.Siz = uint8(op)
  5002  					r = obj.Addrel(cursym)
  5003  					*r = rel
  5004  					r.Off = int32(p.Pc + int64(ab.Len()))
  5005  				}
  5006  
  5007  				ab.Put1(byte(v))
  5008  				if op > 1 {
  5009  					ab.Put1(byte(v >> 8))
  5010  					if op > 2 {
  5011  						ab.PutInt16(int16(v >> 16))
  5012  						if op > 4 {
  5013  							ab.PutInt32(int32(v >> 32))
  5014  						}
  5015  					}
  5016  				}
  5017  			}
  5018  
  5019  			return
  5020  		}
  5021  	}
  5022  	f3t = Ynone * Ymax
  5023  	if p.GetFrom3() != nil {
  5024  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  5025  	}
  5026  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  5027  		var pp obj.Prog
  5028  		var t []byte
  5029  		if p.As == mo[0].as {
  5030  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  5031  				t = mo[0].op[:]
  5032  				switch mo[0].code {
  5033  				default:
  5034  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  5035  
  5036  				case movLit:
  5037  					for z = 0; t[z] != 0; z++ {
  5038  						ab.Put1(t[z])
  5039  					}
  5040  
  5041  				case movRegMem:
  5042  					ab.Put1(t[0])
  5043  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  5044  
  5045  				case movMemReg:
  5046  					ab.Put1(t[0])
  5047  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  5048  
  5049  				case movRegMem2op: // r,m - 2op
  5050  					ab.Put2(t[0], t[1])
  5051  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  5052  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  5053  
  5054  				case movMemReg2op:
  5055  					ab.Put2(t[0], t[1])
  5056  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5057  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5058  
  5059  				case movFullPtr:
  5060  					if t[0] != 0 {
  5061  						ab.Put1(t[0])
  5062  					}
  5063  					switch p.To.Index {
  5064  					default:
  5065  						goto bad
  5066  
  5067  					case REG_DS:
  5068  						ab.Put1(0xc5)
  5069  
  5070  					case REG_SS:
  5071  						ab.Put2(0x0f, 0xb2)
  5072  
  5073  					case REG_ES:
  5074  						ab.Put1(0xc4)
  5075  
  5076  					case REG_FS:
  5077  						ab.Put2(0x0f, 0xb4)
  5078  
  5079  					case REG_GS:
  5080  						ab.Put2(0x0f, 0xb5)
  5081  					}
  5082  
  5083  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5084  
  5085  				case movDoubleShift:
  5086  					if t[0] == Pw {
  5087  						if ctxt.Arch.Family != sys.AMD64 {
  5088  							ctxt.Diag("asmins: illegal 64: %v", p)
  5089  						}
  5090  						ab.rexflag |= Pw
  5091  						t = t[1:]
  5092  					} else if t[0] == Pe {
  5093  						ab.Put1(Pe)
  5094  						t = t[1:]
  5095  					}
  5096  
  5097  					switch p.From.Type {
  5098  					default:
  5099  						goto bad
  5100  
  5101  					case obj.TYPE_CONST:
  5102  						ab.Put2(0x0f, t[0])
  5103  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5104  						ab.Put1(byte(p.From.Offset))
  5105  
  5106  					case obj.TYPE_REG:
  5107  						switch p.From.Reg {
  5108  						default:
  5109  							goto bad
  5110  
  5111  						case REG_CL, REG_CX:
  5112  							ab.Put2(0x0f, t[1])
  5113  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5114  						}
  5115  					}
  5116  
  5117  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5118  				// where you load the TLS base register into a register and then index off that
  5119  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5120  				// are handled in prefixof above and should not be listed here.
  5121  				case movTLSReg:
  5122  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5123  						ctxt.Diag("invalid load of TLS: %v", p)
  5124  					}
  5125  
  5126  					if ctxt.Arch.Family == sys.I386 {
  5127  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5128  						// where you load the TLS base register into a register and then index off that
  5129  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5130  						// are handled in prefixof above and should not be listed here.
  5131  						switch ctxt.Headtype {
  5132  						default:
  5133  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5134  
  5135  						case objabi.Hlinux, objabi.Hfreebsd:
  5136  							if ctxt.Flag_shared {
  5137  								// Note that this is not generating the same insns as the other cases.
  5138  								//     MOV TLS, dst
  5139  								// becomes
  5140  								//     call __x86.get_pc_thunk.dst
  5141  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5142  								// which is encoded as
  5143  								//     call __x86.get_pc_thunk.dst
  5144  								//     movq 0(dst), dst
  5145  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5146  								// is g, which we can't check here, but will when we assemble the second
  5147  								// instruction.
  5148  								dst := p.To.Reg
  5149  								ab.Put1(0xe8)
  5150  								r = obj.Addrel(cursym)
  5151  								r.Off = int32(p.Pc + int64(ab.Len()))
  5152  								r.Type = objabi.R_CALL
  5153  								r.Siz = 4
  5154  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5155  								ab.PutInt32(0)
  5156  
  5157  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5158  								r = obj.Addrel(cursym)
  5159  								r.Off = int32(p.Pc + int64(ab.Len()))
  5160  								r.Type = objabi.R_TLS_IE
  5161  								r.Siz = 4
  5162  								r.Add = 2
  5163  								ab.PutInt32(0)
  5164  							} else {
  5165  								// ELF TLS base is 0(GS).
  5166  								pp.From = p.From
  5167  
  5168  								pp.From.Type = obj.TYPE_MEM
  5169  								pp.From.Reg = REG_GS
  5170  								pp.From.Offset = 0
  5171  								pp.From.Index = REG_NONE
  5172  								pp.From.Scale = 0
  5173  								ab.Put2(0x65, // GS
  5174  									0x8B)
  5175  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5176  							}
  5177  						case objabi.Hplan9:
  5178  							pp.From = obj.Addr{}
  5179  							pp.From.Type = obj.TYPE_MEM
  5180  							pp.From.Name = obj.NAME_EXTERN
  5181  							pp.From.Sym = plan9privates
  5182  							pp.From.Offset = 0
  5183  							pp.From.Index = REG_NONE
  5184  							ab.Put1(0x8B)
  5185  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5186  						}
  5187  						break
  5188  					}
  5189  
  5190  					switch ctxt.Headtype {
  5191  					default:
  5192  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5193  
  5194  					case objabi.Hlinux, objabi.Hfreebsd:
  5195  						if !ctxt.Flag_shared {
  5196  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5197  						}
  5198  						// Note that this is not generating the same insn as the other cases.
  5199  						//     MOV TLS, R_to
  5200  						// becomes
  5201  						//     movq g@gottpoff(%rip), R_to
  5202  						// which is encoded as
  5203  						//     movq 0(%rip), R_to
  5204  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5205  						// is g, which we can't check here, but will when we assemble the second
  5206  						// instruction.
  5207  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5208  
  5209  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5210  						r = obj.Addrel(cursym)
  5211  						r.Off = int32(p.Pc + int64(ab.Len()))
  5212  						r.Type = objabi.R_TLS_IE
  5213  						r.Siz = 4
  5214  						r.Add = -4
  5215  						ab.PutInt32(0)
  5216  
  5217  					case objabi.Hplan9:
  5218  						pp.From = obj.Addr{}
  5219  						pp.From.Type = obj.TYPE_MEM
  5220  						pp.From.Name = obj.NAME_EXTERN
  5221  						pp.From.Sym = plan9privates
  5222  						pp.From.Offset = 0
  5223  						pp.From.Index = REG_NONE
  5224  						ab.rexflag |= Pw
  5225  						ab.Put1(0x8B)
  5226  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5227  
  5228  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5229  						// TLS base is 0(FS).
  5230  						pp.From = p.From
  5231  
  5232  						pp.From.Type = obj.TYPE_MEM
  5233  						pp.From.Name = obj.NAME_NONE
  5234  						pp.From.Reg = REG_NONE
  5235  						pp.From.Offset = 0
  5236  						pp.From.Index = REG_NONE
  5237  						pp.From.Scale = 0
  5238  						ab.rexflag |= Pw
  5239  						ab.Put2(0x64, // FS
  5240  							0x8B)
  5241  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5242  					}
  5243  				}
  5244  				return
  5245  			}
  5246  		}
  5247  	}
  5248  	goto bad
  5249  
  5250  bad:
  5251  	if ctxt.Arch.Family != sys.AMD64 {
  5252  		// here, the assembly has failed.
  5253  		// if it's a byte instruction that has
  5254  		// unaddressable registers, try to
  5255  		// exchange registers and reissue the
  5256  		// instruction with the operands renamed.
  5257  		pp := *p
  5258  
  5259  		unbytereg(&pp.From, &pp.Ft)
  5260  		unbytereg(&pp.To, &pp.Tt)
  5261  
  5262  		z := int(p.From.Reg)
  5263  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5264  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5265  			// For now, different to keep bit-for-bit compatibility.
  5266  			if ctxt.Arch.Family == sys.I386 {
  5267  				breg := byteswapreg(ctxt, &p.To)
  5268  				if breg != REG_AX {
  5269  					ab.Put1(0x87) // xchg lhs,bx
  5270  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5271  					subreg(&pp, z, breg)
  5272  					ab.doasm(ctxt, cursym, &pp)
  5273  					ab.Put1(0x87) // xchg lhs,bx
  5274  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5275  				} else {
  5276  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5277  					subreg(&pp, z, REG_AX)
  5278  					ab.doasm(ctxt, cursym, &pp)
  5279  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5280  				}
  5281  				return
  5282  			}
  5283  
  5284  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5285  				// We certainly don't want to exchange
  5286  				// with AX if the op is MUL or DIV.
  5287  				ab.Put1(0x87) // xchg lhs,bx
  5288  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5289  				subreg(&pp, z, REG_BX)
  5290  				ab.doasm(ctxt, cursym, &pp)
  5291  				ab.Put1(0x87) // xchg lhs,bx
  5292  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5293  			} else {
  5294  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5295  				subreg(&pp, z, REG_AX)
  5296  				ab.doasm(ctxt, cursym, &pp)
  5297  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5298  			}
  5299  			return
  5300  		}
  5301  
  5302  		z = int(p.To.Reg)
  5303  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5304  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5305  			// For now, different to keep bit-for-bit compatibility.
  5306  			if ctxt.Arch.Family == sys.I386 {
  5307  				breg := byteswapreg(ctxt, &p.From)
  5308  				if breg != REG_AX {
  5309  					ab.Put1(0x87) //xchg rhs,bx
  5310  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5311  					subreg(&pp, z, breg)
  5312  					ab.doasm(ctxt, cursym, &pp)
  5313  					ab.Put1(0x87) // xchg rhs,bx
  5314  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5315  				} else {
  5316  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5317  					subreg(&pp, z, REG_AX)
  5318  					ab.doasm(ctxt, cursym, &pp)
  5319  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5320  				}
  5321  				return
  5322  			}
  5323  
  5324  			if isax(&p.From) {
  5325  				ab.Put1(0x87) // xchg rhs,bx
  5326  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5327  				subreg(&pp, z, REG_BX)
  5328  				ab.doasm(ctxt, cursym, &pp)
  5329  				ab.Put1(0x87) // xchg rhs,bx
  5330  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5331  			} else {
  5332  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5333  				subreg(&pp, z, REG_AX)
  5334  				ab.doasm(ctxt, cursym, &pp)
  5335  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5336  			}
  5337  			return
  5338  		}
  5339  	}
  5340  
  5341  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5342  }
  5343  
  5344  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5345  // which is not referenced in a.
  5346  // If a is empty, it returns BX to account for MULB-like instructions
  5347  // that might use DX and AX.
  5348  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5349  	cana, canb, canc, cand := true, true, true, true
  5350  	if a.Type == obj.TYPE_NONE {
  5351  		cana, cand = false, false
  5352  	}
  5353  
  5354  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5355  		switch a.Reg {
  5356  		case REG_NONE:
  5357  			cana, cand = false, false
  5358  		case REG_AX, REG_AL, REG_AH:
  5359  			cana = false
  5360  		case REG_BX, REG_BL, REG_BH:
  5361  			canb = false
  5362  		case REG_CX, REG_CL, REG_CH:
  5363  			canc = false
  5364  		case REG_DX, REG_DL, REG_DH:
  5365  			cand = false
  5366  		}
  5367  	}
  5368  
  5369  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5370  		switch a.Index {
  5371  		case REG_AX:
  5372  			cana = false
  5373  		case REG_BX:
  5374  			canb = false
  5375  		case REG_CX:
  5376  			canc = false
  5377  		case REG_DX:
  5378  			cand = false
  5379  		}
  5380  	}
  5381  
  5382  	switch {
  5383  	case cana:
  5384  		return REG_AX
  5385  	case canb:
  5386  		return REG_BX
  5387  	case canc:
  5388  		return REG_CX
  5389  	case cand:
  5390  		return REG_DX
  5391  	default:
  5392  		ctxt.Diag("impossible byte register")
  5393  		ctxt.DiagFlush()
  5394  		log.Fatalf("bad code")
  5395  		return 0
  5396  	}
  5397  }
  5398  
  5399  func isbadbyte(a *obj.Addr) bool {
  5400  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5401  }
  5402  
  5403  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5404  	ab.Reset()
  5405  
  5406  	ab.rexflag = 0
  5407  	ab.vexflag = false
  5408  	ab.evexflag = false
  5409  	mark := ab.Len()
  5410  	ab.doasm(ctxt, cursym, p)
  5411  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5412  		// as befits the whole approach of the architecture,
  5413  		// the rex prefix must appear before the first opcode byte
  5414  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5415  		// before the 0f opcode escape!), or it might be ignored.
  5416  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5417  		if ctxt.Arch.Family != sys.AMD64 {
  5418  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5419  		}
  5420  		n := ab.Len()
  5421  		var np int
  5422  		for np = mark; np < n; np++ {
  5423  			c := ab.At(np)
  5424  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5425  				break
  5426  			}
  5427  		}
  5428  		ab.Insert(np, byte(0x40|ab.rexflag))
  5429  	}
  5430  
  5431  	n := ab.Len()
  5432  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5433  		r := &cursym.R[i]
  5434  		if int64(r.Off) < p.Pc {
  5435  			break
  5436  		}
  5437  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5438  			r.Off++
  5439  		}
  5440  		if r.Type == objabi.R_PCREL {
  5441  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5442  				// PC-relative addressing is relative to the end of the instruction,
  5443  				// but the relocations applied by the linker are relative to the end
  5444  				// of the relocation. Because immediate instruction
  5445  				// arguments can follow the PC-relative memory reference in the
  5446  				// instruction encoding, the two may not coincide. In this case,
  5447  				// adjust addend so that linker can keep relocating relative to the
  5448  				// end of the relocation.
  5449  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5450  			} else if ctxt.Arch.Family == sys.I386 {
  5451  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5452  				// assumes that the previous instruction loaded the PC of the end
  5453  				// of that instruction into CX, so the adjustment is relative to
  5454  				// that.
  5455  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5456  			}
  5457  		}
  5458  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5459  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5460  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5461  		}
  5462  
  5463  	}
  5464  }
  5465  
  5466  // unpackOps4 extracts 4 operands from p.
  5467  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5468  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5469  }
  5470  
  5471  // unpackOps5 extracts 5 operands from p.
  5472  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5473  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5474  }