github.com/bir3/gocompiler@v0.9.2202/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    35  	"github.com/bir3/gocompiler/src/cmd/internal/objabi"
    36  	"github.com/bir3/gocompiler/src/cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"github.com/bir3/gocompiler/src/internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  const (
    63  	loopAlign	= 16
    64  	maxLoopPad	= 0
    65  )
    66  
    67  // Bit flags that are used to express jump target properties.
    68  const (
    69  	// branchBackwards marks targets that are located behind.
    70  	// Used to express jumps to loop headers.
    71  	branchBackwards	= (1 << iota)
    72  	// branchShort marks branches those target is close,
    73  	// with offset is in -128..127 range.
    74  	branchShort
    75  	// branchLoopHead marks loop entry.
    76  	// Used to insert padding for misaligned loops.
    77  	branchLoopHead
    78  )
    79  
    80  // opBytes holds optab encoding bytes.
    81  // Each ytab reserves fixed amount of bytes in this array.
    82  //
    83  // The size should be the minimal number of bytes that
    84  // are enough to hold biggest optab op lines.
    85  type opBytes [31]uint8
    86  
    87  type Optab struct {
    88  	as	obj.As
    89  	ytab	[]ytab
    90  	prefix	uint8
    91  	op	opBytes
    92  }
    93  
    94  type movtab struct {
    95  	as	obj.As
    96  	ft	uint8
    97  	f3t	uint8
    98  	tt	uint8
    99  	code	uint8
   100  	op	[4]uint8
   101  }
   102  
   103  const (
   104  	Yxxx	= iota
   105  	Ynone
   106  	Yi0	// $0
   107  	Yi1	// $1
   108  	Yu2	// $x, x fits in uint2
   109  	Yi8	// $x, x fits in int8
   110  	Yu8	// $x, x fits in uint8
   111  	Yu7	// $x, x in 0..127 (fits in both int8 and uint8)
   112  	Ys32
   113  	Yi32
   114  	Yi64
   115  	Yiauto
   116  	Yal
   117  	Ycl
   118  	Yax
   119  	Ycx
   120  	Yrb
   121  	Yrl
   122  	Yrl32	// Yrl on 32-bit system
   123  	Yrf
   124  	Yf0
   125  	Yrx
   126  	Ymb
   127  	Yml
   128  	Ym
   129  	Ybr
   130  	Ycs
   131  	Yss
   132  	Yds
   133  	Yes
   134  	Yfs
   135  	Ygs
   136  	Ygdtr
   137  	Yidtr
   138  	Yldtr
   139  	Ymsw
   140  	Ytask
   141  	Ycr0
   142  	Ycr1
   143  	Ycr2
   144  	Ycr3
   145  	Ycr4
   146  	Ycr5
   147  	Ycr6
   148  	Ycr7
   149  	Ycr8
   150  	Ydr0
   151  	Ydr1
   152  	Ydr2
   153  	Ydr3
   154  	Ydr4
   155  	Ydr5
   156  	Ydr6
   157  	Ydr7
   158  	Ytr0
   159  	Ytr1
   160  	Ytr2
   161  	Ytr3
   162  	Ytr4
   163  	Ytr5
   164  	Ytr6
   165  	Ytr7
   166  	Ymr
   167  	Ymm
   168  	Yxr0		// X0 only. "<XMM0>" notation in Intel manual.
   169  	YxrEvexMulti4	// [ X<n> - X<n+3> ]; multisource YxrEvex
   170  	Yxr		// X0..X15
   171  	YxrEvex		// X0..X31
   172  	Yxm
   173  	YxmEvex		// YxrEvex+Ym
   174  	Yxvm		// VSIB vector array; vm32x/vm64x
   175  	YxvmEvex	// Yxvm which permits High-16 X register as index.
   176  	YyrEvexMulti4	// [ Y<n> - Y<n+3> ]; multisource YyrEvex
   177  	Yyr		// Y0..Y15
   178  	YyrEvex		// Y0..Y31
   179  	Yym
   180  	YymEvex		// YyrEvex+Ym
   181  	Yyvm		// VSIB vector array; vm32y/vm64y
   182  	YyvmEvex	// Yyvm which permits High-16 Y register as index.
   183  	YzrMulti4	// [ Z<n> - Z<n+3> ]; multisource YzrEvex
   184  	Yzr		// Z0..Z31
   185  	Yzm		// Yzr+Ym
   186  	Yzvm		// VSIB vector array; vm32z/vm64z
   187  	Yk0		// K0
   188  	Yknot0		// K1..K7; write mask
   189  	Yk		// K0..K7; used for KOP
   190  	Ykm		// Yk+Ym; used for KOP
   191  	Ytls
   192  	Ytextsize
   193  	Yindir
   194  	Ymax
   195  )
   196  
   197  const (
   198  	Zxxx	= iota
   199  	Zlit
   200  	Zlitm_r
   201  	Zlitr_m
   202  	Zlit_m_r
   203  	Z_rp
   204  	Zbr
   205  	Zcall
   206  	Zcallcon
   207  	Zcallduff
   208  	Zcallind
   209  	Zcallindreg
   210  	Zib_
   211  	Zib_rp
   212  	Zibo_m
   213  	Zibo_m_xm
   214  	Zil_
   215  	Zil_rp
   216  	Ziq_rp
   217  	Zilo_m
   218  	Zjmp
   219  	Zjmpcon
   220  	Zloop
   221  	Zo_iw
   222  	Zm_o
   223  	Zm_r
   224  	Z_m_r
   225  	Zm2_r
   226  	Zm_r_xm
   227  	Zm_r_i_xm
   228  	Zm_r_xm_nr
   229  	Zr_m_xm_nr
   230  	Zibm_r	// mmx1,mmx2/mem64,imm8
   231  	Zibr_m
   232  	Zmb_r
   233  	Zaut_r
   234  	Zo_m
   235  	Zo_m64
   236  	Zpseudo
   237  	Zr_m
   238  	Zr_m_xm
   239  	Zrp_
   240  	Z_ib
   241  	Z_il
   242  	Zm_ibo
   243  	Zm_ilo
   244  	Zib_rr
   245  	Zil_rr
   246  	Zbyte
   247  
   248  	Zvex_rm_v_r
   249  	Zvex_rm_v_ro
   250  	Zvex_r_v_rm
   251  	Zvex_i_rm_vo
   252  	Zvex_v_rm_r
   253  	Zvex_i_rm_r
   254  	Zvex_i_r_v
   255  	Zvex_i_rm_v_r
   256  	Zvex
   257  	Zvex_rm_r_vo
   258  	Zvex_i_r_rm
   259  	Zvex_hr_rm_v_r
   260  
   261  	Zevex_first
   262  	Zevex_i_r_k_rm
   263  	Zevex_i_r_rm
   264  	Zevex_i_rm_k_r
   265  	Zevex_i_rm_k_vo
   266  	Zevex_i_rm_r
   267  	Zevex_i_rm_v_k_r
   268  	Zevex_i_rm_v_r
   269  	Zevex_i_rm_vo
   270  	Zevex_k_rmo
   271  	Zevex_r_k_rm
   272  	Zevex_r_v_k_rm
   273  	Zevex_r_v_rm
   274  	Zevex_rm_k_r
   275  	Zevex_rm_v_k_r
   276  	Zevex_rm_v_r
   277  	Zevex_last
   278  
   279  	Zmax
   280  )
   281  
   282  const (
   283  	Px	= 0
   284  	Px1	= 1	// symbolic; exact value doesn't matter
   285  	P32	= 0x32	// 32-bit only
   286  	Pe	= 0x66	// operand escape
   287  	Pm	= 0x0f	// 2byte opcode escape
   288  	Pq	= 0xff	// both escapes: 66 0f
   289  	Pb	= 0xfe	// byte operands
   290  	Pf2	= 0xf2	// xmm escape 1: f2 0f
   291  	Pf3	= 0xf3	// xmm escape 2: f3 0f
   292  	Pef3	= 0xf5	// xmm escape 2 with 16-bit prefix: 66 f3 0f
   293  	Pq3	= 0x67	// xmm escape 3: 66 48 0f
   294  	Pq4	= 0x68	// xmm escape 4: 66 0F 38
   295  	Pq4w	= 0x69	// Pq4 with Rex.w 66 0F 38
   296  	Pq5	= 0x6a	// xmm escape 5: F3 0F 38
   297  	Pq5w	= 0x6b	// Pq5 with Rex.w F3 0F 38
   298  	Pfw	= 0xf4	// Pf3 with Rex.w: f3 48 0f
   299  	Pw	= 0x48	// Rex.w
   300  	Pw8	= 0x90	// symbolic; exact value doesn't matter
   301  	Py	= 0x80	// defaults to 64-bit mode
   302  	Py1	= 0x81	// symbolic; exact value doesn't matter
   303  	Py3	= 0x83	// symbolic; exact value doesn't matter
   304  	Pavx	= 0x84	// symbolic; exact value doesn't matter
   305  
   306  	RxrEvex	= 1 << 4	// AVX512 extension to REX.R/VEX.R
   307  	Rxw	= 1 << 3	// =1, 64-bit operand size
   308  	Rxr	= 1 << 2	// extend modrm reg
   309  	Rxx	= 1 << 1	// extend sib index
   310  	Rxb	= 1 << 0	// extend modrm r/m, sib base, or opcode reg
   311  )
   312  
   313  const (
   314  	// Encoding for VEX prefix in tables.
   315  	// The P, L, and W fields are chosen to match
   316  	// their eventual locations in the VEX prefix bytes.
   317  
   318  	// Encoding for VEX prefix in tables.
   319  	// The P, L, and W fields are chosen to match
   320  	// their eventual locations in the VEX prefix bytes.
   321  
   322  	// Using spare bit to make leading [E]VEX encoding byte different from
   323  	// 0x0f even if all other VEX fields are 0.
   324  	avxEscape	= 1 << 6
   325  
   326  	// P field - 2 bits
   327  	vex66	= 1 << 0
   328  	vexF3	= 2 << 0
   329  	vexF2	= 3 << 0
   330  	// L field - 1 bit
   331  	vexLZ	= 0 << 2
   332  	vexLIG	= 0 << 2
   333  	vex128	= 0 << 2
   334  	vex256	= 1 << 2
   335  	// W field - 1 bit
   336  	vexWIG	= 0 << 7
   337  	vexW0	= 0 << 7
   338  	vexW1	= 1 << 7
   339  	// M field - 5 bits, but mostly reserved; we can store up to 3
   340  	vex0F	= 1 << 3
   341  	vex0F38	= 2 << 3
   342  	vex0F3A	= 3 << 3
   343  )
   344  
   345  var ycover [Ymax * Ymax]uint8
   346  
   347  var reg [MAXREG]int
   348  
   349  var regrex [MAXREG + 1]int
   350  
   351  var ynone = []ytab{
   352  	{Zlit, 1, argList{}},
   353  }
   354  
   355  var ytext = []ytab{
   356  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   357  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   358  }
   359  
   360  var ynop = []ytab{
   361  	{Zpseudo, 0, argList{}},
   362  	{Zpseudo, 0, argList{Yiauto}},
   363  	{Zpseudo, 0, argList{Yml}},
   364  	{Zpseudo, 0, argList{Yrf}},
   365  	{Zpseudo, 0, argList{Yxr}},
   366  	{Zpseudo, 0, argList{Yiauto}},
   367  	{Zpseudo, 0, argList{Yml}},
   368  	{Zpseudo, 0, argList{Yrf}},
   369  	{Zpseudo, 1, argList{Yxr}},
   370  }
   371  
   372  var yfuncdata = []ytab{
   373  	{Zpseudo, 0, argList{Yi32, Ym}},
   374  }
   375  
   376  var ypcdata = []ytab{
   377  	{Zpseudo, 0, argList{Yi32, Yi32}},
   378  }
   379  
   380  var yxorb = []ytab{
   381  	{Zib_, 1, argList{Yi32, Yal}},
   382  	{Zibo_m, 2, argList{Yi32, Ymb}},
   383  	{Zr_m, 1, argList{Yrb, Ymb}},
   384  	{Zm_r, 1, argList{Ymb, Yrb}},
   385  }
   386  
   387  var yaddl = []ytab{
   388  	{Zibo_m, 2, argList{Yi8, Yml}},
   389  	{Zil_, 1, argList{Yi32, Yax}},
   390  	{Zilo_m, 2, argList{Yi32, Yml}},
   391  	{Zr_m, 1, argList{Yrl, Yml}},
   392  	{Zm_r, 1, argList{Yml, Yrl}},
   393  }
   394  
   395  var yincl = []ytab{
   396  	{Z_rp, 1, argList{Yrl}},
   397  	{Zo_m, 2, argList{Yml}},
   398  }
   399  
   400  var yincq = []ytab{
   401  	{Zo_m, 2, argList{Yml}},
   402  }
   403  
   404  var ycmpb = []ytab{
   405  	{Z_ib, 1, argList{Yal, Yi32}},
   406  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   407  	{Zm_r, 1, argList{Ymb, Yrb}},
   408  	{Zr_m, 1, argList{Yrb, Ymb}},
   409  }
   410  
   411  var ycmpl = []ytab{
   412  	{Zm_ibo, 2, argList{Yml, Yi8}},
   413  	{Z_il, 1, argList{Yax, Yi32}},
   414  	{Zm_ilo, 2, argList{Yml, Yi32}},
   415  	{Zm_r, 1, argList{Yml, Yrl}},
   416  	{Zr_m, 1, argList{Yrl, Yml}},
   417  }
   418  
   419  var yshb = []ytab{
   420  	{Zo_m, 2, argList{Yi1, Ymb}},
   421  	{Zibo_m, 2, argList{Yu8, Ymb}},
   422  	{Zo_m, 2, argList{Ycx, Ymb}},
   423  }
   424  
   425  var yshl = []ytab{
   426  	{Zo_m, 2, argList{Yi1, Yml}},
   427  	{Zibo_m, 2, argList{Yu8, Yml}},
   428  	{Zo_m, 2, argList{Ycl, Yml}},
   429  	{Zo_m, 2, argList{Ycx, Yml}},
   430  }
   431  
   432  var ytestl = []ytab{
   433  	{Zil_, 1, argList{Yi32, Yax}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zr_m, 1, argList{Yrl, Yml}},
   436  	{Zm_r, 1, argList{Yml, Yrl}},
   437  }
   438  
   439  var ymovb = []ytab{
   440  	{Zr_m, 1, argList{Yrb, Ymb}},
   441  	{Zm_r, 1, argList{Ymb, Yrb}},
   442  	{Zib_rp, 1, argList{Yi32, Yrb}},
   443  	{Zibo_m, 2, argList{Yi32, Ymb}},
   444  }
   445  
   446  var ybtl = []ytab{
   447  	{Zibo_m, 2, argList{Yi8, Yml}},
   448  	{Zr_m, 1, argList{Yrl, Yml}},
   449  }
   450  
   451  var ymovw = []ytab{
   452  	{Zr_m, 1, argList{Yrl, Yml}},
   453  	{Zm_r, 1, argList{Yml, Yrl}},
   454  	{Zil_rp, 1, argList{Yi32, Yrl}},
   455  	{Zilo_m, 2, argList{Yi32, Yml}},
   456  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   457  }
   458  
   459  var ymovl = []ytab{
   460  	{Zr_m, 1, argList{Yrl, Yml}},
   461  	{Zm_r, 1, argList{Yml, Yrl}},
   462  	{Zil_rp, 1, argList{Yi32, Yrl}},
   463  	{Zilo_m, 2, argList{Yi32, Yml}},
   464  	{Zm_r_xm, 1, argList{Yml, Ymr}},	// MMX MOVD
   465  	{Zr_m_xm, 1, argList{Ymr, Yml}},	// MMX MOVD
   466  	{Zm_r_xm, 2, argList{Yml, Yxr}},	// XMM MOVD (32 bit)
   467  	{Zr_m_xm, 2, argList{Yxr, Yml}},	// XMM MOVD (32 bit)
   468  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   469  }
   470  
   471  var yret = []ytab{
   472  	{Zo_iw, 1, argList{}},
   473  	{Zo_iw, 1, argList{Yi32}},
   474  }
   475  
   476  var ymovq = []ytab{
   477  	// valid in 32-bit mode
   478  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},	// 0x6f MMX MOVQ (shorter encoding)
   479  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},	// 0x7f MMX MOVQ
   480  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}},	// Pf2, 0xd6 MOVDQ2Q
   481  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}},	// Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   482  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}},	// Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   483  
   484  	// valid only in 64-bit mode, usually with 64-bit prefix
   485  	{Zr_m, 1, argList{Yrl, Yml}},		// 0x89
   486  	{Zm_r, 1, argList{Yml, Yrl}},		// 0x8b
   487  	{Zilo_m, 2, argList{Ys32, Yrl}},	// 32 bit signed 0xc7,(0)
   488  	{Ziq_rp, 1, argList{Yi64, Yrl}},	// 0xb8 -- 32/64 bit immediate
   489  	{Zilo_m, 2, argList{Yi32, Yml}},	// 0xc7,(0)
   490  	{Zm_r_xm, 1, argList{Ymm, Ymr}},	// 0x6e MMX MOVD
   491  	{Zr_m_xm, 1, argList{Ymr, Ymm}},	// 0x7e MMX MOVD
   492  	{Zm_r_xm, 2, argList{Yml, Yxr}},	// Pe, 0x6e MOVD xmm load
   493  	{Zr_m_xm, 2, argList{Yxr, Yml}},	// Pe, 0x7e MOVD xmm store
   494  	{Zaut_r, 1, argList{Yiauto, Yrl}},	// 0 built-in LEAQ
   495  }
   496  
   497  var ymovbe = []ytab{
   498  	{Zlitm_r, 3, argList{Ym, Yrl}},
   499  	{Zlitr_m, 3, argList{Yrl, Ym}},
   500  }
   501  
   502  var ym_rl = []ytab{
   503  	{Zm_r, 1, argList{Ym, Yrl}},
   504  }
   505  
   506  var yrl_m = []ytab{
   507  	{Zr_m, 1, argList{Yrl, Ym}},
   508  }
   509  
   510  var ymb_rl = []ytab{
   511  	{Zmb_r, 1, argList{Ymb, Yrl}},
   512  }
   513  
   514  var yml_rl = []ytab{
   515  	{Zm_r, 1, argList{Yml, Yrl}},
   516  }
   517  
   518  var yrl_ml = []ytab{
   519  	{Zr_m, 1, argList{Yrl, Yml}},
   520  }
   521  
   522  var yml_mb = []ytab{
   523  	{Zr_m, 1, argList{Yrb, Ymb}},
   524  	{Zm_r, 1, argList{Ymb, Yrb}},
   525  }
   526  
   527  var yrb_mb = []ytab{
   528  	{Zr_m, 1, argList{Yrb, Ymb}},
   529  }
   530  
   531  var yxchg = []ytab{
   532  	{Z_rp, 1, argList{Yax, Yrl}},
   533  	{Zrp_, 1, argList{Yrl, Yax}},
   534  	{Zr_m, 1, argList{Yrl, Yml}},
   535  	{Zm_r, 1, argList{Yml, Yrl}},
   536  }
   537  
   538  var ydivl = []ytab{
   539  	{Zm_o, 2, argList{Yml}},
   540  }
   541  
   542  var ydivb = []ytab{
   543  	{Zm_o, 2, argList{Ymb}},
   544  }
   545  
   546  var yimul = []ytab{
   547  	{Zm_o, 2, argList{Yml}},
   548  	{Zib_rr, 1, argList{Yi8, Yrl}},
   549  	{Zil_rr, 1, argList{Yi32, Yrl}},
   550  	{Zm_r, 2, argList{Yml, Yrl}},
   551  }
   552  
   553  var yimul3 = []ytab{
   554  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   555  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   556  }
   557  
   558  var ybyte = []ytab{
   559  	{Zbyte, 1, argList{Yi64}},
   560  }
   561  
   562  var yin = []ytab{
   563  	{Zib_, 1, argList{Yi32}},
   564  	{Zlit, 1, argList{}},
   565  }
   566  
   567  var yint = []ytab{
   568  	{Zib_, 1, argList{Yi32}},
   569  }
   570  
   571  var ypushl = []ytab{
   572  	{Zrp_, 1, argList{Yrl}},
   573  	{Zm_o, 2, argList{Ym}},
   574  	{Zib_, 1, argList{Yi8}},
   575  	{Zil_, 1, argList{Yi32}},
   576  }
   577  
   578  var ypopl = []ytab{
   579  	{Z_rp, 1, argList{Yrl}},
   580  	{Zo_m, 2, argList{Ym}},
   581  }
   582  
   583  var ywrfsbase = []ytab{
   584  	{Zm_o, 2, argList{Yrl}},
   585  }
   586  
   587  var yrdrand = []ytab{
   588  	{Zo_m, 2, argList{Yrl}},
   589  }
   590  
   591  var yclflush = []ytab{
   592  	{Zo_m, 2, argList{Ym}},
   593  }
   594  
   595  var ybswap = []ytab{
   596  	{Z_rp, 2, argList{Yrl}},
   597  }
   598  
   599  var yscond = []ytab{
   600  	{Zo_m, 2, argList{Ymb}},
   601  }
   602  
   603  var yjcond = []ytab{
   604  	{Zbr, 0, argList{Ybr}},
   605  	{Zbr, 0, argList{Yi0, Ybr}},
   606  	{Zbr, 1, argList{Yi1, Ybr}},
   607  }
   608  
   609  var yloop = []ytab{
   610  	{Zloop, 1, argList{Ybr}},
   611  }
   612  
   613  var ycall = []ytab{
   614  	{Zcallindreg, 0, argList{Yml}},
   615  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   616  	{Zcallind, 2, argList{Yindir}},
   617  	{Zcall, 0, argList{Ybr}},
   618  	{Zcallcon, 1, argList{Yi32}},
   619  }
   620  
   621  var yduff = []ytab{
   622  	{Zcallduff, 1, argList{Yi32}},
   623  }
   624  
   625  var yjmp = []ytab{
   626  	{Zo_m64, 2, argList{Yml}},
   627  	{Zjmp, 0, argList{Ybr}},
   628  	{Zjmpcon, 1, argList{Yi32}},
   629  }
   630  
   631  var yfmvd = []ytab{
   632  	{Zm_o, 2, argList{Ym, Yf0}},
   633  	{Zo_m, 2, argList{Yf0, Ym}},
   634  	{Zm_o, 2, argList{Yrf, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Yrf}},
   636  }
   637  
   638  var yfmvdp = []ytab{
   639  	{Zo_m, 2, argList{Yf0, Ym}},
   640  	{Zo_m, 2, argList{Yf0, Yrf}},
   641  }
   642  
   643  var yfmvf = []ytab{
   644  	{Zm_o, 2, argList{Ym, Yf0}},
   645  	{Zo_m, 2, argList{Yf0, Ym}},
   646  }
   647  
   648  var yfmvx = []ytab{
   649  	{Zm_o, 2, argList{Ym, Yf0}},
   650  }
   651  
   652  var yfmvp = []ytab{
   653  	{Zo_m, 2, argList{Yf0, Ym}},
   654  }
   655  
   656  var yfcmv = []ytab{
   657  	{Zm_o, 2, argList{Yrf, Yf0}},
   658  }
   659  
   660  var yfadd = []ytab{
   661  	{Zm_o, 2, argList{Ym, Yf0}},
   662  	{Zm_o, 2, argList{Yrf, Yf0}},
   663  	{Zo_m, 2, argList{Yf0, Yrf}},
   664  }
   665  
   666  var yfxch = []ytab{
   667  	{Zo_m, 2, argList{Yf0, Yrf}},
   668  	{Zm_o, 2, argList{Yrf, Yf0}},
   669  }
   670  
   671  var ycompp = []ytab{
   672  	{Zo_m, 2, argList{Yf0, Yrf}},	// botch is really f0,f1
   673  }
   674  
   675  var ystsw = []ytab{
   676  	{Zo_m, 2, argList{Ym}},
   677  	{Zlit, 1, argList{Yax}},
   678  }
   679  
   680  var ysvrs_mo = []ytab{
   681  	{Zm_o, 2, argList{Ym}},
   682  }
   683  
   684  // unaryDst version of "ysvrs_mo".
   685  var ysvrs_om = []ytab{
   686  	{Zo_m, 2, argList{Ym}},
   687  }
   688  
   689  var ymm = []ytab{
   690  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   691  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   692  }
   693  
   694  var yxm = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   696  }
   697  
   698  var yxm_q4 = []ytab{
   699  	{Zm_r, 1, argList{Yxm, Yxr}},
   700  }
   701  
   702  var yxcvm1 = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   704  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   705  }
   706  
   707  var yxcvm2 = []ytab{
   708  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   709  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   710  }
   711  
   712  var yxr = []ytab{
   713  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   714  }
   715  
   716  var yxr_ml = []ytab{
   717  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   718  }
   719  
   720  var ymr = []ytab{
   721  	{Zm_r, 1, argList{Ymr, Ymr}},
   722  }
   723  
   724  var ymr_ml = []ytab{
   725  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   726  }
   727  
   728  var yxcmpi = []ytab{
   729  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   730  }
   731  
   732  var yxmov = []ytab{
   733  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   734  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   735  }
   736  
   737  var yxcvfl = []ytab{
   738  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   739  }
   740  
   741  var yxcvlf = []ytab{
   742  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   743  }
   744  
   745  var yxcvfq = []ytab{
   746  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   747  }
   748  
   749  var yxcvqf = []ytab{
   750  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   751  }
   752  
   753  var yps = []ytab{
   754  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   755  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   756  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   757  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   758  }
   759  
   760  var yxrrl = []ytab{
   761  	{Zm_r, 1, argList{Yxr, Yrl}},
   762  }
   763  
   764  var ymrxr = []ytab{
   765  	{Zm_r, 1, argList{Ymr, Yxr}},
   766  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   767  }
   768  
   769  var ymshuf = []ytab{
   770  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   771  }
   772  
   773  var ymshufb = []ytab{
   774  	{Zm2_r, 2, argList{Yxm, Yxr}},
   775  }
   776  
   777  // It should never have more than 1 entry,
   778  // because some optab entries have opcode sequences that
   779  // are longer than 2 bytes (zoffset=2 here),
   780  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   781  // to name a few.
   782  var yxshuf = []ytab{
   783  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   784  }
   785  
   786  var yextrw = []ytab{
   787  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   788  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   789  }
   790  
   791  var yextr = []ytab{
   792  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   793  }
   794  
   795  var yinsrw = []ytab{
   796  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   797  }
   798  
   799  var yinsr = []ytab{
   800  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   801  }
   802  
   803  var ypsdq = []ytab{
   804  	{Zibo_m, 2, argList{Yi8, Yxr}},
   805  }
   806  
   807  var ymskb = []ytab{
   808  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   809  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   810  }
   811  
   812  var ycrc32l = []ytab{
   813  	{Zlitm_r, 0, argList{Yml, Yrl}},
   814  }
   815  
   816  var ycrc32b = []ytab{
   817  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   818  }
   819  
   820  var yprefetch = []ytab{
   821  	{Zm_o, 2, argList{Ym}},
   822  }
   823  
   824  var yaes = []ytab{
   825  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   826  }
   827  
   828  var yxbegin = []ytab{
   829  	{Zjmp, 1, argList{Ybr}},
   830  }
   831  
   832  var yxabort = []ytab{
   833  	{Zib_, 1, argList{Yu8}},
   834  }
   835  
   836  var ylddqu = []ytab{
   837  	{Zm_r, 1, argList{Ym, Yxr}},
   838  }
   839  
   840  var ypalignr = []ytab{
   841  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   842  }
   843  
   844  var ysha256rnds2 = []ytab{
   845  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   846  }
   847  
   848  var yblendvpd = []ytab{
   849  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   850  }
   851  
   852  var ymmxmm0f38 = []ytab{
   853  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   854  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   855  }
   856  
   857  var yextractps = []ytab{
   858  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   859  }
   860  
   861  var ysha1rnds4 = []ytab{
   862  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   863  }
   864  
   865  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   866  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   867  // to find the entry with the given p.As and then looks through the ytable for
   868  // that instruction (the second field in the optab struct) for a line whose
   869  // first two values match the Ytypes of the p.From and p.To operands.  The
   870  // function oclass computes the specific Ytype of an operand and then the set
   871  // of more general Ytypes that it satisfies is implied by the ycover table, set
   872  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   873  // from the more general 8-bit constants, but instinit says
   874  //
   875  //	ycover[Yi0*Ymax+Ys32] = 1
   876  //	ycover[Yi1*Ymax+Ys32] = 1
   877  //	ycover[Yi8*Ymax+Ys32] = 1
   878  //
   879  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   880  // if that's what an instruction can handle.
   881  //
   882  // In parallel with the scan through the ytable for the appropriate line, there
   883  // is a z pointer that starts out pointing at the strange magic byte list in
   884  // the Optab struct.  With each step past a non-matching ytable line, z
   885  // advances by the 4th entry in the line.  When a matching line is found, that
   886  // z pointer has the extra data to use in laying down the instruction bytes.
   887  // The actual bytes laid down are a function of the 3rd entry in the line (that
   888  // is, the Ztype) and the z bytes.
   889  //
   890  // For example, let's look at AADDL.  The optab line says:
   891  //
   892  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //
   896  //	var yaddl = []ytab{
   897  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //	        {Yi32, Ynone, Yax, Zil_, 1},
   899  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   901  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   902  //	}
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //	Yi32, Yax -> Zil_, z+2 (0x05)
   910  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  // as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1100  	{AENTER, nil, 0, opBytes{}},	// botch
  1101  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1102  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1103  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1104  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1105  	{AHLT, ynone, Px, opBytes{0xf4}},
  1106  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1107  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1108  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1109  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1110  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1111  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1118  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1119  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1120  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1121  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1122  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1123  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1124  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1125  	{AINSL, ynone, Px, opBytes{0x6d}},
  1126  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1127  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1128  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1129  	{AINT, yint, Px, opBytes{0xcd}},
  1130  	{AINTO, ynone, P32, opBytes{0xce}},
  1131  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1132  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1133  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1134  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1135  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1136  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1139  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1140  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1141  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1142  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1143  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1144  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1145  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1146  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1147  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1148  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1149  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1150  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1151  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1152  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1153  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1154  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1155  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1156  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1157  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1158  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1159  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1160  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1161  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1162  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1163  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1164  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1165  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1166  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1167  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1168  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1169  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1170  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1171  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1172  	{ALODSL, ynone, Px, opBytes{0xad}},
  1173  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1174  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1175  	{ALONG, ybyte, Px, opBytes{4}},
  1176  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1177  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1178  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1179  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1180  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1181  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1182  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1183  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1184  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1185  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1186  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1187  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1188  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1189  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1190  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1191  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1192  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1193  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1194  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1195  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1196  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1197  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1198  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1199  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1200  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1201  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1202  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1203  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1204  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1205  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1206  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1207  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1208  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1209  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1210  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1211  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1212  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1213  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1214  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1216  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1217  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1218  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1219  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1220  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1221  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1222  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1223  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1224  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1225  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1226  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1227  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1228  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1229  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1230  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1231  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1232  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1233  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1234  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1235  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1236  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1237  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1238  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1239  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1240  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1241  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1242  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1243  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1244  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1245  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1246  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1247  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1248  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1249  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1250  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1251  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1252  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1253  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1254  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1255  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1256  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1257  	{ANOTL, yscond, Px, opBytes{0xf7, 02}},	// TODO(rsc): yscond is wrong here.
  1258  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1259  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1260  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1261  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1262  	{AORPD, yxm, Pq, opBytes{0x56}},
  1263  	{AORPS, yxm, Pm, opBytes{0x56}},
  1264  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1267  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1268  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1269  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1270  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1271  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1272  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1273  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1274  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1275  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1276  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1277  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1278  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1279  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1280  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1281  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1282  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1283  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1284  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1285  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1286  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1287  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1288  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1289  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1290  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1291  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1292  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1293  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1294  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1295  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1296  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1297  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1298  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1299  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1300  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1301  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1302  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1303  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1304  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1305  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1306  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1307  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1308  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1309  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1310  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1311  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1312  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1313  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1314  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1315  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1316  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1317  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1318  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1319  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1320  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1321  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1322  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1323  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1324  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1325  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1326  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1327  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1328  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1329  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1330  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1331  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1332  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1333  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1334  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1335  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1336  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1337  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1338  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1339  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1340  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1341  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1342  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1343  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1344  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1345  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1346  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1347  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1348  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1349  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1350  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1351  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1352  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1353  	{APOPAL, ynone, P32, opBytes{0x61}},
  1354  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1355  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1356  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1357  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1358  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1359  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1360  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1361  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1364  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1365  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1366  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1367  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1368  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1369  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1370  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1371  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1372  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1373  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1374  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1375  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1376  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1377  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1378  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1379  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1380  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1381  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1382  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1383  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1384  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1385  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1386  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1387  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1388  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1389  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1390  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1391  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1392  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1393  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1394  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1395  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1396  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1397  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1398  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1399  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1400  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1401  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1402  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1403  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1404  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1405  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1406  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1410  	{AQUAD, ybyte, Px, opBytes{8}},
  1411  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1412  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1416  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1417  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1418  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{AREP, ynone, Px, opBytes{0xf3}},
  1422  	{AREPN, ynone, Px, opBytes{0xf2}},
  1423  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1424  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1425  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1426  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1427  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1428  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1432  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1436  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1437  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}},	// XCHGB AH,AL; PUSH AX; POPFL
  1438  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1439  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1443  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1447  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1451  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1452  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1453  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1454  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1455  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1456  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1457  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1458  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1459  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1460  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1461  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1462  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1463  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1464  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1465  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1466  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1467  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1468  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1469  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1470  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1471  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1475  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1479  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1480  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1481  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1482  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1483  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1484  	{ASTC, ynone, Px, opBytes{0xf9}},
  1485  	{ASTD, ynone, Px, opBytes{0xfd}},
  1486  	{ASTI, ynone, Px, opBytes{0xfb}},
  1487  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1488  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1489  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1490  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1491  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1492  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1493  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1494  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1495  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1496  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1497  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1498  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1499  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1500  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1501  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}},	// fast syscall
  1502  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1503  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1507  	{obj.ATEXT, ytext, Px, opBytes{}},
  1508  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1509  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1510  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1511  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1512  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1513  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1514  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1515  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1516  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1517  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1518  	{AWORD, ybyte, Px, opBytes{2}},
  1519  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1520  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1524  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1525  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1526  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1527  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1528  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1531  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1532  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1533  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1534  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1535  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1536  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1537  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1538  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1539  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1540  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1541  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1542  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1543  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1544  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1545  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1546  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1547  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1548  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1549  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1550  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1551  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1552  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1553  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1554  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1555  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1556  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},	// botch
  1559  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}},	// botch
  1560  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1561  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1562  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1563  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1564  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1565  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1566  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1567  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1568  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1569  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1570  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1571  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1572  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1573  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1574  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1575  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1576  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1577  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1578  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1579  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1580  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1581  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1582  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1583  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1584  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1585  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1586  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1587  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1588  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1589  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1590  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1591  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1592  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1593  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1594  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1595  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1596  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1597  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1598  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1599  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1600  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1601  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1602  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1603  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1604  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1605  	{AFFREE, nil, 0, opBytes{}},
  1606  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1607  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1608  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1609  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1610  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1611  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1612  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1613  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1614  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1615  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1616  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1617  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1618  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1619  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1620  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1621  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1622  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1623  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1624  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1625  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1626  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1627  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1628  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1629  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1630  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1631  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1632  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1633  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1634  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1635  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1636  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1637  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1638  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1639  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1640  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1641  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1642  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1643  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1644  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1645  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1646  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1650  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1651  	{AINVD, ynone, Pm, opBytes{0x08}},
  1652  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1653  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1654  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1655  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1656  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1657  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1658  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1659  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1660  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1661  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1662  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1663  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1664  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1665  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1666  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1667  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1668  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1669  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1670  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1671  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1672  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1673  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1677  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1678  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1679  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1680  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1681  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1682  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1683  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1684  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1685  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1686  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1687  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1688  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1689  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1690  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1691  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1692  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1693  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1694  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1695  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1696  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1697  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1698  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1699  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1700  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1701  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1702  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1703  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1704  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1707  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1708  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1709  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1710  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1711  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1712  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1726  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1727  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1728  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1729  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1735  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1738  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1739  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1742  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1743  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1744  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1747  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1750  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1751  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1752  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1753  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1754  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1755  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1756  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1757  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1758  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1759  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1760  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1761  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1762  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1763  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1764  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1765  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1766  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1767  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1768  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1769  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1770  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1771  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1772  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1773  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1774  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1775  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1776  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1777  	{ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
  1778  
  1779  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1780  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1781  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1782  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1783  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1784  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1785  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1786  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1787  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1788  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1789  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1790  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1791  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1792  
  1793  	{obj.AEND, nil, 0, opBytes{}},
  1794  	{0, nil, 0, opBytes{}},
  1795  }
  1796  
  1797  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1798  
  1799  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1800  // This happens on systems like Solaris that call .so functions instead of system calls.
  1801  // It does not seem to be necessary for any other systems. This is probably working
  1802  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1803  // what that bug is. And this does fix it.
  1804  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1805  	if ctxt.Headtype == objabi.Hsolaris {
  1806  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1807  		return strings.HasPrefix(s.Name, "libc_")
  1808  	}
  1809  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1810  }
  1811  
  1812  // single-instruction no-ops of various lengths.
  1813  // constructed by hand and disassembled with gdb to verify.
  1814  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1815  var nop = [][16]uint8{
  1816  	{0x90},
  1817  	{0x66, 0x90},
  1818  	{0x0F, 0x1F, 0x00},
  1819  	{0x0F, 0x1F, 0x40, 0x00},
  1820  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1821  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1822  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1823  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1824  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1825  }
  1826  
  1827  // Native Client rejects the repeated 0x66 prefix.
  1828  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1829  func fillnop(p []byte, n int) {
  1830  	var m int
  1831  
  1832  	for n > 0 {
  1833  		m = n
  1834  		if m > len(nop) {
  1835  			m = len(nop)
  1836  		}
  1837  		copy(p[:m], nop[m-1][:m])
  1838  		p = p[m:]
  1839  		n -= m
  1840  	}
  1841  }
  1842  
  1843  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1844  	s.Grow(int64(c) + int64(pad))
  1845  	fillnop(s.P[c:], int(pad))
  1846  	return c + pad
  1847  }
  1848  
  1849  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1850  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1851  		return l
  1852  	}
  1853  	return q
  1854  }
  1855  
  1856  // isJump returns whether p is a jump instruction.
  1857  // It is used to ensure that no standalone or macro-fused jump will straddle
  1858  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1859  func isJump(p *obj.Prog) bool {
  1860  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1861  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1862  }
  1863  
  1864  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1865  // jump. Otherwise, nil is returned.
  1866  func lookForJCC(p *obj.Prog) *obj.Prog {
  1867  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1868  	var q *obj.Prog
  1869  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1870  	}
  1871  
  1872  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1873  		return nil
  1874  	}
  1875  
  1876  	switch q.As {
  1877  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1878  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1879  	default:
  1880  		return nil
  1881  	}
  1882  
  1883  	return q
  1884  }
  1885  
  1886  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1887  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1888  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1889  func fusedJump(p *obj.Prog) (bool, uint8) {
  1890  	var fusedSize uint8
  1891  
  1892  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1893  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1894  	// need to be careful to insert any padding before the locks rather than directly after them.
  1895  
  1896  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1897  		fusedSize += p.Isize
  1898  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1899  		}
  1900  		if p == nil {
  1901  			return false, 0
  1902  		}
  1903  	}
  1904  	if p.As == ALOCK {
  1905  		fusedSize += p.Isize
  1906  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1907  		}
  1908  		if p == nil {
  1909  			return false, 0
  1910  		}
  1911  	}
  1912  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1913  
  1914  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1915  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1916  
  1917  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1918  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1919  
  1920  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1921  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1922  
  1923  	if !cmpAddSub && !testAnd && !incDec {
  1924  		return false, 0
  1925  	}
  1926  
  1927  	if !incDec {
  1928  		var argOne obj.AddrType
  1929  		var argTwo obj.AddrType
  1930  		if cmp {
  1931  			argOne = p.From.Type
  1932  			argTwo = p.To.Type
  1933  		} else {
  1934  			argOne = p.To.Type
  1935  			argTwo = p.From.Type
  1936  		}
  1937  		if argOne == obj.TYPE_REG {
  1938  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1939  				return false, 0
  1940  			}
  1941  		} else if argOne == obj.TYPE_MEM {
  1942  			if argTwo != obj.TYPE_REG {
  1943  				return false, 0
  1944  			}
  1945  		} else {
  1946  			return false, 0
  1947  		}
  1948  	}
  1949  
  1950  	fusedSize += p.Isize
  1951  	jmp := lookForJCC(p)
  1952  	if jmp == nil {
  1953  		return false, 0
  1954  	}
  1955  
  1956  	fusedSize += jmp.Isize
  1957  
  1958  	if testAnd {
  1959  		return true, fusedSize
  1960  	}
  1961  
  1962  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1963  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1964  		return false, 0
  1965  	}
  1966  
  1967  	if cmpAddSub {
  1968  		return true, fusedSize
  1969  	}
  1970  
  1971  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1972  		return false, 0
  1973  	}
  1974  
  1975  	return true, fusedSize
  1976  }
  1977  
  1978  type padJumpsCtx int32
  1979  
  1980  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1981  	// Disable jump padding on 32 bit builds by setting
  1982  	// padJumps to 0.
  1983  	if ctxt.Arch.Family == sys.I386 {
  1984  		return padJumpsCtx(0)
  1985  	}
  1986  
  1987  	// Disable jump padding for hand written assembly code.
  1988  	if ctxt.IsAsm {
  1989  		return padJumpsCtx(0)
  1990  	}
  1991  
  1992  	return padJumpsCtx(32)
  1993  }
  1994  
  1995  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1996  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1997  // not cross or end on a 32 byte boundary.
  1998  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1999  	if pjc == 0 {
  2000  		return c
  2001  	}
  2002  
  2003  	var toPad int32
  2004  	fj, fjSize := fusedJump(p)
  2005  	mask := int32(pjc - 1)
  2006  	if fj {
  2007  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2008  			toPad = int32(pjc) - (c & mask)
  2009  		}
  2010  	} else if isJump(p) {
  2011  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2012  			toPad = int32(pjc) - (c & mask)
  2013  		}
  2014  	}
  2015  	if toPad <= 0 {
  2016  		return c
  2017  	}
  2018  
  2019  	return noppad(ctxt, s, c, toPad)
  2020  }
  2021  
  2022  // reAssemble is called if an instruction's size changes during assembly. If
  2023  // it does and the instruction is a standalone or a macro-fused jump we need to
  2024  // reassemble.
  2025  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2026  	if pjc == 0 {
  2027  		return false
  2028  	}
  2029  
  2030  	fj, _ := fusedJump(p)
  2031  	return fj || isJump(p)
  2032  }
  2033  
  2034  type nopPad struct {
  2035  	p	*obj.Prog	// Instruction before the pad
  2036  	n	int32		// Size of the pad
  2037  }
  2038  
  2039  // Padding bytes to add to align code as requested.
  2040  // Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
  2041  //
  2042  // pc: current offset in function, in bytes
  2043  // a: requested alignment, in bytes
  2044  // cursym: current function being assembled
  2045  // returns number of bytes of padding needed
  2046  func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
  2047  	if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
  2048  		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
  2049  		return 0
  2050  	}
  2051  
  2052  	// By default function alignment is 32 bytes for amd64
  2053  	if cursym.Func().Align < int32(a) {
  2054  		cursym.Func().Align = int32(a)
  2055  	}
  2056  
  2057  	if pc&(a-1) != 0 {
  2058  		return int(a - (pc & (a - 1)))
  2059  	}
  2060  
  2061  	return 0
  2062  }
  2063  
  2064  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2065  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2066  		ctxt.Diag("-spectre=ret not supported on 386")
  2067  		ctxt.Retpoline = false	// don't keep printing
  2068  	}
  2069  
  2070  	pjc := makePjcCtx(ctxt)
  2071  
  2072  	if s.P != nil {
  2073  		return
  2074  	}
  2075  
  2076  	if ycover[0] == 0 {
  2077  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2078  	}
  2079  
  2080  	for p := s.Func().Text; p != nil; p = p.Link {
  2081  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2082  			p.To.SetTarget(p)
  2083  		}
  2084  		if p.As == AADJSP {
  2085  			p.To.Type = obj.TYPE_REG
  2086  			p.To.Reg = REG_SP
  2087  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2088  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2089  			// For that case, flip the sign and the op:
  2090  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2091  			switch v := p.From.Offset; {
  2092  			case v == 0:
  2093  				p.As = obj.ANOP
  2094  			case v == 0x80 || (v < 0 && v != -0x80):
  2095  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2096  				p.From.Offset *= -1
  2097  			default:
  2098  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2099  			}
  2100  		}
  2101  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2102  			if p.To.Type != obj.TYPE_REG {
  2103  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2104  				continue
  2105  			}
  2106  			p.To.Type = obj.TYPE_BRANCH
  2107  			p.To.Name = obj.NAME_EXTERN
  2108  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2109  			p.To.Reg = 0
  2110  			p.To.Offset = 0
  2111  		}
  2112  	}
  2113  
  2114  	var count int64	// rough count of number of instructions
  2115  	for p := s.Func().Text; p != nil; p = p.Link {
  2116  		count++
  2117  		p.Back = branchShort	// use short branches first time through
  2118  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2119  			p.Back |= branchBackwards
  2120  			q.Back |= branchLoopHead
  2121  		}
  2122  	}
  2123  	s.GrowCap(count * 5)	// preallocate roughly 5 bytes per instruction
  2124  
  2125  	var ab AsmBuf
  2126  	var n int
  2127  	var c int32
  2128  	errors := ctxt.Errors
  2129  	var nops []nopPad	// Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2130  	nrelocs0 := len(s.R)
  2131  	for {
  2132  		// This loop continues while there are reasons to re-assemble
  2133  		// whole block, like the presence of long forward jumps.
  2134  		reAssemble := false
  2135  		for i := range s.R[nrelocs0:] {
  2136  			s.R[nrelocs0+i] = obj.Reloc{}
  2137  		}
  2138  		s.R = s.R[:nrelocs0]	// preserve marker relocations generated by the compiler
  2139  		s.P = s.P[:0]
  2140  		c = 0
  2141  		var pPrev *obj.Prog
  2142  		nops = nops[:0]
  2143  		for p := s.Func().Text; p != nil; p = p.Link {
  2144  			c0 := c
  2145  			c = pjc.padJump(ctxt, s, p, c)
  2146  
  2147  			if p.As == obj.APCALIGN {
  2148  				aln := p.From.Offset
  2149  				v := addpad(int64(c), aln, ctxt, s)
  2150  				if v > 0 {
  2151  					s.Grow(int64(c) + int64(v))
  2152  					fillnop(s.P[c:], int(v))
  2153  				}
  2154  
  2155  				c += int32(v)
  2156  				pPrev = p
  2157  				continue
  2158  			}
  2159  
  2160  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2161  				// pad with NOPs
  2162  				v := -c & (loopAlign - 1)
  2163  
  2164  				if v <= maxLoopPad {
  2165  					s.Grow(int64(c) + int64(v))
  2166  					fillnop(s.P[c:], int(v))
  2167  					c += v
  2168  				}
  2169  			}
  2170  
  2171  			p.Pc = int64(c)
  2172  
  2173  			// process forward jumps to p
  2174  			for q := p.Rel; q != nil; q = q.Forwd {
  2175  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2176  				if q.Back&branchShort != 0 {
  2177  					if v > 127 {
  2178  						reAssemble = true
  2179  						q.Back ^= branchShort
  2180  					}
  2181  
  2182  					if q.As == AJCXZL || q.As == AXBEGIN {
  2183  						s.P[q.Pc+2] = byte(v)
  2184  					} else {
  2185  						s.P[q.Pc+1] = byte(v)
  2186  					}
  2187  				} else {
  2188  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2189  				}
  2190  			}
  2191  
  2192  			p.Rel = nil
  2193  
  2194  			p.Pc = int64(c)
  2195  			ab.asmins(ctxt, s, p)
  2196  			m := ab.Len()
  2197  			if int(p.Isize) != m {
  2198  				p.Isize = uint8(m)
  2199  				if pjc.reAssemble(p) {
  2200  					// We need to re-assemble here to check for jumps and fused jumps
  2201  					// that span or end on 32 byte boundaries.
  2202  					reAssemble = true
  2203  				}
  2204  			}
  2205  
  2206  			s.Grow(p.Pc + int64(m))
  2207  			copy(s.P[p.Pc:], ab.Bytes())
  2208  			// If there was padding, remember it.
  2209  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2210  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2211  			}
  2212  			c += int32(m)
  2213  			pPrev = p
  2214  		}
  2215  
  2216  		n++
  2217  		if n > 1000 {
  2218  			ctxt.Diag("span must be looping")
  2219  			log.Fatalf("loop")
  2220  		}
  2221  		if !reAssemble {
  2222  			break
  2223  		}
  2224  		if ctxt.Errors > errors {
  2225  			return
  2226  		}
  2227  	}
  2228  	// splice padding nops into Progs
  2229  	for _, n := range nops {
  2230  		pp := n.p
  2231  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2232  		pp.Link = np
  2233  	}
  2234  
  2235  	s.Size = int64(c)
  2236  
  2237  	if false {	/* debug['a'] > 1 */
  2238  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2239  		var i int
  2240  		for i = 0; i < len(s.P); i++ {
  2241  			fmt.Printf(" %.2x", s.P[i])
  2242  			if i%16 == 15 {
  2243  				fmt.Printf("\n  %.6x", uint(i+1))
  2244  			}
  2245  		}
  2246  
  2247  		if i%16 != 0 {
  2248  			fmt.Printf("\n")
  2249  		}
  2250  
  2251  		for i := 0; i < len(s.R); i++ {
  2252  			r := &s.R[i]
  2253  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2254  		}
  2255  	}
  2256  
  2257  	// Mark nonpreemptible instruction sequences.
  2258  	// The 2-instruction TLS access sequence
  2259  	//	MOVQ TLS, BX
  2260  	//	MOVQ 0(BX)(TLS*1), BX
  2261  	// is not async preemptible, as if it is preempted and resumed on
  2262  	// a different thread, the TLS address may become invalid.
  2263  	if !CanUse1InsnTLS(ctxt) {
  2264  		useTLS := func(p *obj.Prog) bool {
  2265  			// Only need to mark the second instruction, which has
  2266  			// REG_TLS as Index. (It is okay to interrupt and restart
  2267  			// the first instruction.)
  2268  			return p.From.Index == REG_TLS
  2269  		}
  2270  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2271  	}
  2272  
  2273  	// Now that we know byte offsets, we can generate jump table entries.
  2274  	// TODO: could this live in obj instead of obj/$ARCH?
  2275  	for _, jt := range s.Func().JumpTables {
  2276  		for i, p := range jt.Targets {
  2277  			// The ith jumptable entry points to the p.Pc'th
  2278  			// byte in the function symbol s.
  2279  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2280  		}
  2281  	}
  2282  }
  2283  
  2284  func instinit(ctxt *obj.Link) {
  2285  	if ycover[0] != 0 {
  2286  		// Already initialized; stop now.
  2287  		// This happens in the cmd/asm tests,
  2288  		// each of which re-initializes the arch.
  2289  		return
  2290  	}
  2291  
  2292  	switch ctxt.Headtype {
  2293  	case objabi.Hplan9:
  2294  		plan9privates = ctxt.Lookup("_privates")
  2295  	}
  2296  
  2297  	for i := range avxOptab {
  2298  		c := avxOptab[i].as
  2299  		if opindex[c&obj.AMask] != nil {
  2300  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2301  		}
  2302  		opindex[c&obj.AMask] = &avxOptab[i]
  2303  	}
  2304  	for i := 1; optab[i].as != 0; i++ {
  2305  		c := optab[i].as
  2306  		if opindex[c&obj.AMask] != nil {
  2307  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2308  		}
  2309  		opindex[c&obj.AMask] = &optab[i]
  2310  	}
  2311  
  2312  	for i := 0; i < Ymax; i++ {
  2313  		ycover[i*Ymax+i] = 1
  2314  	}
  2315  
  2316  	ycover[Yi0*Ymax+Yu2] = 1
  2317  	ycover[Yi1*Ymax+Yu2] = 1
  2318  
  2319  	ycover[Yi0*Ymax+Yi8] = 1
  2320  	ycover[Yi1*Ymax+Yi8] = 1
  2321  	ycover[Yu2*Ymax+Yi8] = 1
  2322  	ycover[Yu7*Ymax+Yi8] = 1
  2323  
  2324  	ycover[Yi0*Ymax+Yu7] = 1
  2325  	ycover[Yi1*Ymax+Yu7] = 1
  2326  	ycover[Yu2*Ymax+Yu7] = 1
  2327  
  2328  	ycover[Yi0*Ymax+Yu8] = 1
  2329  	ycover[Yi1*Ymax+Yu8] = 1
  2330  	ycover[Yu2*Ymax+Yu8] = 1
  2331  	ycover[Yu7*Ymax+Yu8] = 1
  2332  
  2333  	ycover[Yi0*Ymax+Ys32] = 1
  2334  	ycover[Yi1*Ymax+Ys32] = 1
  2335  	ycover[Yu2*Ymax+Ys32] = 1
  2336  	ycover[Yu7*Ymax+Ys32] = 1
  2337  	ycover[Yu8*Ymax+Ys32] = 1
  2338  	ycover[Yi8*Ymax+Ys32] = 1
  2339  
  2340  	ycover[Yi0*Ymax+Yi32] = 1
  2341  	ycover[Yi1*Ymax+Yi32] = 1
  2342  	ycover[Yu2*Ymax+Yi32] = 1
  2343  	ycover[Yu7*Ymax+Yi32] = 1
  2344  	ycover[Yu8*Ymax+Yi32] = 1
  2345  	ycover[Yi8*Ymax+Yi32] = 1
  2346  	ycover[Ys32*Ymax+Yi32] = 1
  2347  
  2348  	ycover[Yi0*Ymax+Yi64] = 1
  2349  	ycover[Yi1*Ymax+Yi64] = 1
  2350  	ycover[Yu7*Ymax+Yi64] = 1
  2351  	ycover[Yu2*Ymax+Yi64] = 1
  2352  	ycover[Yu8*Ymax+Yi64] = 1
  2353  	ycover[Yi8*Ymax+Yi64] = 1
  2354  	ycover[Ys32*Ymax+Yi64] = 1
  2355  	ycover[Yi32*Ymax+Yi64] = 1
  2356  
  2357  	ycover[Yal*Ymax+Yrb] = 1
  2358  	ycover[Ycl*Ymax+Yrb] = 1
  2359  	ycover[Yax*Ymax+Yrb] = 1
  2360  	ycover[Ycx*Ymax+Yrb] = 1
  2361  	ycover[Yrx*Ymax+Yrb] = 1
  2362  	ycover[Yrl*Ymax+Yrb] = 1	// but not Yrl32
  2363  
  2364  	ycover[Ycl*Ymax+Ycx] = 1
  2365  
  2366  	ycover[Yax*Ymax+Yrx] = 1
  2367  	ycover[Ycx*Ymax+Yrx] = 1
  2368  
  2369  	ycover[Yax*Ymax+Yrl] = 1
  2370  	ycover[Ycx*Ymax+Yrl] = 1
  2371  	ycover[Yrx*Ymax+Yrl] = 1
  2372  	ycover[Yrl32*Ymax+Yrl] = 1
  2373  
  2374  	ycover[Yf0*Ymax+Yrf] = 1
  2375  
  2376  	ycover[Yal*Ymax+Ymb] = 1
  2377  	ycover[Ycl*Ymax+Ymb] = 1
  2378  	ycover[Yax*Ymax+Ymb] = 1
  2379  	ycover[Ycx*Ymax+Ymb] = 1
  2380  	ycover[Yrx*Ymax+Ymb] = 1
  2381  	ycover[Yrb*Ymax+Ymb] = 1
  2382  	ycover[Yrl*Ymax+Ymb] = 1	// but not Yrl32
  2383  	ycover[Ym*Ymax+Ymb] = 1
  2384  
  2385  	ycover[Yax*Ymax+Yml] = 1
  2386  	ycover[Ycx*Ymax+Yml] = 1
  2387  	ycover[Yrx*Ymax+Yml] = 1
  2388  	ycover[Yrl*Ymax+Yml] = 1
  2389  	ycover[Yrl32*Ymax+Yml] = 1
  2390  	ycover[Ym*Ymax+Yml] = 1
  2391  
  2392  	ycover[Yax*Ymax+Ymm] = 1
  2393  	ycover[Ycx*Ymax+Ymm] = 1
  2394  	ycover[Yrx*Ymax+Ymm] = 1
  2395  	ycover[Yrl*Ymax+Ymm] = 1
  2396  	ycover[Yrl32*Ymax+Ymm] = 1
  2397  	ycover[Ym*Ymax+Ymm] = 1
  2398  	ycover[Ymr*Ymax+Ymm] = 1
  2399  
  2400  	ycover[Yxr0*Ymax+Yxr] = 1
  2401  
  2402  	ycover[Ym*Ymax+Yxm] = 1
  2403  	ycover[Yxr0*Ymax+Yxm] = 1
  2404  	ycover[Yxr*Ymax+Yxm] = 1
  2405  
  2406  	ycover[Ym*Ymax+Yym] = 1
  2407  	ycover[Yyr*Ymax+Yym] = 1
  2408  
  2409  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2410  	ycover[Yxr*Ymax+YxrEvex] = 1
  2411  
  2412  	ycover[Ym*Ymax+YxmEvex] = 1
  2413  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2414  	ycover[Yxr*Ymax+YxmEvex] = 1
  2415  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2416  
  2417  	ycover[Yyr*Ymax+YyrEvex] = 1
  2418  
  2419  	ycover[Ym*Ymax+YymEvex] = 1
  2420  	ycover[Yyr*Ymax+YymEvex] = 1
  2421  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2422  
  2423  	ycover[Ym*Ymax+Yzm] = 1
  2424  	ycover[Yzr*Ymax+Yzm] = 1
  2425  
  2426  	ycover[Yk0*Ymax+Yk] = 1
  2427  	ycover[Yknot0*Ymax+Yk] = 1
  2428  
  2429  	ycover[Yk0*Ymax+Ykm] = 1
  2430  	ycover[Yknot0*Ymax+Ykm] = 1
  2431  	ycover[Yk*Ymax+Ykm] = 1
  2432  	ycover[Ym*Ymax+Ykm] = 1
  2433  
  2434  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2435  
  2436  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2437  
  2438  	for i := 0; i < MAXREG; i++ {
  2439  		reg[i] = -1
  2440  		if i >= REG_AL && i <= REG_R15B {
  2441  			reg[i] = (i - REG_AL) & 7
  2442  			if i >= REG_SPB && i <= REG_DIB {
  2443  				regrex[i] = 0x40
  2444  			}
  2445  			if i >= REG_R8B && i <= REG_R15B {
  2446  				regrex[i] = Rxr | Rxx | Rxb
  2447  			}
  2448  		}
  2449  
  2450  		if i >= REG_AH && i <= REG_BH {
  2451  			reg[i] = 4 + ((i - REG_AH) & 7)
  2452  		}
  2453  		if i >= REG_AX && i <= REG_R15 {
  2454  			reg[i] = (i - REG_AX) & 7
  2455  			if i >= REG_R8 {
  2456  				regrex[i] = Rxr | Rxx | Rxb
  2457  			}
  2458  		}
  2459  
  2460  		if i >= REG_F0 && i <= REG_F0+7 {
  2461  			reg[i] = (i - REG_F0) & 7
  2462  		}
  2463  		if i >= REG_M0 && i <= REG_M0+7 {
  2464  			reg[i] = (i - REG_M0) & 7
  2465  		}
  2466  		if i >= REG_K0 && i <= REG_K0+7 {
  2467  			reg[i] = (i - REG_K0) & 7
  2468  		}
  2469  		if i >= REG_X0 && i <= REG_X0+15 {
  2470  			reg[i] = (i - REG_X0) & 7
  2471  			if i >= REG_X0+8 {
  2472  				regrex[i] = Rxr | Rxx | Rxb
  2473  			}
  2474  		}
  2475  		if i >= REG_X16 && i <= REG_X16+15 {
  2476  			reg[i] = (i - REG_X16) & 7
  2477  			if i >= REG_X16+8 {
  2478  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2479  			} else {
  2480  				regrex[i] = RxrEvex
  2481  			}
  2482  		}
  2483  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2484  			reg[i] = (i - REG_Y0) & 7
  2485  			if i >= REG_Y0+8 {
  2486  				regrex[i] = Rxr | Rxx | Rxb
  2487  			}
  2488  		}
  2489  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2490  			reg[i] = (i - REG_Y16) & 7
  2491  			if i >= REG_Y16+8 {
  2492  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2493  			} else {
  2494  				regrex[i] = RxrEvex
  2495  			}
  2496  		}
  2497  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2498  			reg[i] = (i - REG_Z0) & 7
  2499  			if i > REG_Z0+7 {
  2500  				regrex[i] = Rxr | Rxx | Rxb
  2501  			}
  2502  		}
  2503  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2504  			reg[i] = (i - REG_Z16) & 7
  2505  			if i >= REG_Z16+8 {
  2506  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2507  			} else {
  2508  				regrex[i] = RxrEvex
  2509  			}
  2510  		}
  2511  
  2512  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2513  			regrex[i] = Rxr
  2514  		}
  2515  	}
  2516  }
  2517  
  2518  var isAndroid = buildcfg.GOOS == "android"
  2519  
  2520  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2521  	if a.Reg < REG_CS && a.Index < REG_CS {	// fast path
  2522  		return 0
  2523  	}
  2524  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2525  		switch a.Reg {
  2526  		case REG_CS:
  2527  			return 0x2e
  2528  
  2529  		case REG_DS:
  2530  			return 0x3e
  2531  
  2532  		case REG_ES:
  2533  			return 0x26
  2534  
  2535  		case REG_FS:
  2536  			return 0x64
  2537  
  2538  		case REG_GS:
  2539  			return 0x65
  2540  
  2541  		case REG_TLS:
  2542  			// NOTE: Systems listed here should be only systems that
  2543  			// support direct TLS references like 8(TLS) implemented as
  2544  			// direct references from FS or GS. Systems that require
  2545  			// the initial-exec model, where you load the TLS base into
  2546  			// a register and then index from that register, do not reach
  2547  			// this code and should not be listed.
  2548  			if ctxt.Arch.Family == sys.I386 {
  2549  				switch ctxt.Headtype {
  2550  				default:
  2551  					if isAndroid {
  2552  						return 0x65	// GS
  2553  					}
  2554  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2555  
  2556  				case objabi.Hdarwin,
  2557  					objabi.Hdragonfly,
  2558  					objabi.Hfreebsd,
  2559  					objabi.Hnetbsd,
  2560  					objabi.Hopenbsd:
  2561  					return 0x65	// GS
  2562  				}
  2563  			}
  2564  
  2565  			switch ctxt.Headtype {
  2566  			default:
  2567  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2568  
  2569  			case objabi.Hlinux:
  2570  				if isAndroid {
  2571  					return 0x64	// FS
  2572  				}
  2573  
  2574  				if ctxt.Flag_shared {
  2575  					log.Fatalf("unknown TLS base register for linux with -shared")
  2576  				} else {
  2577  					return 0x64	// FS
  2578  				}
  2579  
  2580  			case objabi.Hdragonfly,
  2581  				objabi.Hfreebsd,
  2582  				objabi.Hnetbsd,
  2583  				objabi.Hopenbsd,
  2584  				objabi.Hsolaris:
  2585  				return 0x64	// FS
  2586  
  2587  			case objabi.Hdarwin:
  2588  				return 0x65	// GS
  2589  			}
  2590  		}
  2591  	}
  2592  
  2593  	switch a.Index {
  2594  	case REG_CS:
  2595  		return 0x2e
  2596  
  2597  	case REG_DS:
  2598  		return 0x3e
  2599  
  2600  	case REG_ES:
  2601  		return 0x26
  2602  
  2603  	case REG_TLS:
  2604  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2605  			// When building for inclusion into a shared library, an instruction of the form
  2606  			//     MOV off(CX)(TLS*1), AX
  2607  			// becomes
  2608  			//     mov %gs:off(%ecx), %eax // on i386
  2609  			//     mov %fs:off(%rcx), %rax // on amd64
  2610  			// which assumes that the correct TLS offset has been loaded into CX (today
  2611  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2612  			// a shared library the instruction it becomes
  2613  			//     mov 0x0(%ecx), %eax // on i386
  2614  			//     mov 0x0(%rcx), %rax // on amd64
  2615  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2616  			if ctxt.Arch.Family == sys.I386 {
  2617  				return 0x65	// GS
  2618  			}
  2619  			return 0x64	// FS
  2620  		}
  2621  
  2622  	case REG_FS:
  2623  		return 0x64
  2624  
  2625  	case REG_GS:
  2626  		return 0x65
  2627  	}
  2628  
  2629  	return 0
  2630  }
  2631  
  2632  // oclassRegList returns multisource operand class for addr.
  2633  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2634  	// TODO(quasilyte): when oclass register case is refactored into
  2635  	// lookup table, use it here to get register kind more easily.
  2636  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2637  
  2638  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2639  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2640  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2641  
  2642  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2643  	low := regIndex(int16(reg0))
  2644  	high := regIndex(int16(reg1))
  2645  
  2646  	if ctxt.Arch.Family == sys.I386 {
  2647  		if low >= 8 || high >= 8 {
  2648  			return Yxxx
  2649  		}
  2650  	}
  2651  
  2652  	switch high - low {
  2653  	case 3:
  2654  		switch {
  2655  		case regIsXmm(reg0) && regIsXmm(reg1):
  2656  			return YxrEvexMulti4
  2657  		case regIsYmm(reg0) && regIsYmm(reg1):
  2658  			return YyrEvexMulti4
  2659  		case regIsZmm(reg0) && regIsZmm(reg1):
  2660  			return YzrMulti4
  2661  		default:
  2662  			return Yxxx
  2663  		}
  2664  	default:
  2665  		return Yxxx
  2666  	}
  2667  }
  2668  
  2669  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2670  // For addr that is not V-mem returns (Yxxx, false).
  2671  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2672  	switch addr.Index {
  2673  	case REG_X0 + 0,
  2674  		REG_X0 + 1,
  2675  		REG_X0 + 2,
  2676  		REG_X0 + 3,
  2677  		REG_X0 + 4,
  2678  		REG_X0 + 5,
  2679  		REG_X0 + 6,
  2680  		REG_X0 + 7:
  2681  		return Yxvm, true
  2682  	case REG_X8 + 0,
  2683  		REG_X8 + 1,
  2684  		REG_X8 + 2,
  2685  		REG_X8 + 3,
  2686  		REG_X8 + 4,
  2687  		REG_X8 + 5,
  2688  		REG_X8 + 6,
  2689  		REG_X8 + 7:
  2690  		if ctxt.Arch.Family == sys.I386 {
  2691  			return Yxxx, true
  2692  		}
  2693  		return Yxvm, true
  2694  	case REG_X16 + 0,
  2695  		REG_X16 + 1,
  2696  		REG_X16 + 2,
  2697  		REG_X16 + 3,
  2698  		REG_X16 + 4,
  2699  		REG_X16 + 5,
  2700  		REG_X16 + 6,
  2701  		REG_X16 + 7,
  2702  		REG_X16 + 8,
  2703  		REG_X16 + 9,
  2704  		REG_X16 + 10,
  2705  		REG_X16 + 11,
  2706  		REG_X16 + 12,
  2707  		REG_X16 + 13,
  2708  		REG_X16 + 14,
  2709  		REG_X16 + 15:
  2710  		if ctxt.Arch.Family == sys.I386 {
  2711  			return Yxxx, true
  2712  		}
  2713  		return YxvmEvex, true
  2714  
  2715  	case REG_Y0 + 0,
  2716  		REG_Y0 + 1,
  2717  		REG_Y0 + 2,
  2718  		REG_Y0 + 3,
  2719  		REG_Y0 + 4,
  2720  		REG_Y0 + 5,
  2721  		REG_Y0 + 6,
  2722  		REG_Y0 + 7:
  2723  		return Yyvm, true
  2724  	case REG_Y8 + 0,
  2725  		REG_Y8 + 1,
  2726  		REG_Y8 + 2,
  2727  		REG_Y8 + 3,
  2728  		REG_Y8 + 4,
  2729  		REG_Y8 + 5,
  2730  		REG_Y8 + 6,
  2731  		REG_Y8 + 7:
  2732  		if ctxt.Arch.Family == sys.I386 {
  2733  			return Yxxx, true
  2734  		}
  2735  		return Yyvm, true
  2736  	case REG_Y16 + 0,
  2737  		REG_Y16 + 1,
  2738  		REG_Y16 + 2,
  2739  		REG_Y16 + 3,
  2740  		REG_Y16 + 4,
  2741  		REG_Y16 + 5,
  2742  		REG_Y16 + 6,
  2743  		REG_Y16 + 7,
  2744  		REG_Y16 + 8,
  2745  		REG_Y16 + 9,
  2746  		REG_Y16 + 10,
  2747  		REG_Y16 + 11,
  2748  		REG_Y16 + 12,
  2749  		REG_Y16 + 13,
  2750  		REG_Y16 + 14,
  2751  		REG_Y16 + 15:
  2752  		if ctxt.Arch.Family == sys.I386 {
  2753  			return Yxxx, true
  2754  		}
  2755  		return YyvmEvex, true
  2756  
  2757  	case REG_Z0 + 0,
  2758  		REG_Z0 + 1,
  2759  		REG_Z0 + 2,
  2760  		REG_Z0 + 3,
  2761  		REG_Z0 + 4,
  2762  		REG_Z0 + 5,
  2763  		REG_Z0 + 6,
  2764  		REG_Z0 + 7:
  2765  		return Yzvm, true
  2766  	case REG_Z8 + 0,
  2767  		REG_Z8 + 1,
  2768  		REG_Z8 + 2,
  2769  		REG_Z8 + 3,
  2770  		REG_Z8 + 4,
  2771  		REG_Z8 + 5,
  2772  		REG_Z8 + 6,
  2773  		REG_Z8 + 7,
  2774  		REG_Z8 + 8,
  2775  		REG_Z8 + 9,
  2776  		REG_Z8 + 10,
  2777  		REG_Z8 + 11,
  2778  		REG_Z8 + 12,
  2779  		REG_Z8 + 13,
  2780  		REG_Z8 + 14,
  2781  		REG_Z8 + 15,
  2782  		REG_Z8 + 16,
  2783  		REG_Z8 + 17,
  2784  		REG_Z8 + 18,
  2785  		REG_Z8 + 19,
  2786  		REG_Z8 + 20,
  2787  		REG_Z8 + 21,
  2788  		REG_Z8 + 22,
  2789  		REG_Z8 + 23:
  2790  		if ctxt.Arch.Family == sys.I386 {
  2791  			return Yxxx, true
  2792  		}
  2793  		return Yzvm, true
  2794  	}
  2795  
  2796  	return Yxxx, false
  2797  }
  2798  
  2799  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2800  	switch a.Type {
  2801  	case obj.TYPE_REGLIST:
  2802  		return oclassRegList(ctxt, a)
  2803  
  2804  	case obj.TYPE_NONE:
  2805  		return Ynone
  2806  
  2807  	case obj.TYPE_BRANCH:
  2808  		return Ybr
  2809  
  2810  	case obj.TYPE_INDIR:
  2811  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2812  			return Yindir
  2813  		}
  2814  		return Yxxx
  2815  
  2816  	case obj.TYPE_MEM:
  2817  		// Pseudo registers have negative index, but SP is
  2818  		// not pseudo on x86, hence REG_SP check is not redundant.
  2819  		if a.Index == REG_SP || a.Index < 0 {
  2820  			// Can't use FP/SB/PC/SP as the index register.
  2821  			return Yxxx
  2822  		}
  2823  
  2824  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2825  			return vmem
  2826  		}
  2827  
  2828  		if ctxt.Arch.Family == sys.AMD64 {
  2829  			switch a.Name {
  2830  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2831  				// Global variables can't use index registers and their
  2832  				// base register is %rip (%rip is encoded as REG_NONE).
  2833  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2834  					return Yxxx
  2835  				}
  2836  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2837  				// These names must have a base of SP.  The old compiler
  2838  				// uses 0 for the base register. SSA uses REG_SP.
  2839  				if a.Reg != REG_SP && a.Reg != 0 {
  2840  					return Yxxx
  2841  				}
  2842  			case obj.NAME_NONE:
  2843  				// everything is ok
  2844  			default:
  2845  				// unknown name
  2846  				return Yxxx
  2847  			}
  2848  		}
  2849  		return Ym
  2850  
  2851  	case obj.TYPE_ADDR:
  2852  		switch a.Name {
  2853  		case obj.NAME_GOTREF:
  2854  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2855  			return Yxxx
  2856  
  2857  		case obj.NAME_EXTERN,
  2858  			obj.NAME_STATIC:
  2859  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2860  				return Yi32
  2861  			}
  2862  			return Yiauto	// use pc-relative addressing
  2863  
  2864  		case obj.NAME_AUTO,
  2865  			obj.NAME_PARAM:
  2866  			return Yiauto
  2867  		}
  2868  
  2869  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2870  		// and got Yi32 in an earlier version of this code.
  2871  		// Keep doing that until we fix yduff etc.
  2872  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2873  			return Yi32
  2874  		}
  2875  
  2876  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2877  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2878  		}
  2879  		fallthrough
  2880  
  2881  	case obj.TYPE_CONST:
  2882  		if a.Sym != nil {
  2883  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2884  		}
  2885  
  2886  		v := a.Offset
  2887  		if ctxt.Arch.Family == sys.I386 {
  2888  			v = int64(int32(v))
  2889  		}
  2890  		switch {
  2891  		case v == 0:
  2892  			return Yi0
  2893  		case v == 1:
  2894  			return Yi1
  2895  		case v >= 0 && v <= 3:
  2896  			return Yu2
  2897  		case v >= 0 && v <= 127:
  2898  			return Yu7
  2899  		case v >= 0 && v <= 255:
  2900  			return Yu8
  2901  		case v >= -128 && v <= 127:
  2902  			return Yi8
  2903  		}
  2904  		if ctxt.Arch.Family == sys.I386 {
  2905  			return Yi32
  2906  		}
  2907  		l := int32(v)
  2908  		if int64(l) == v {
  2909  			return Ys32	// can sign extend
  2910  		}
  2911  		if v>>32 == 0 {
  2912  			return Yi32	// unsigned
  2913  		}
  2914  		return Yi64
  2915  
  2916  	case obj.TYPE_TEXTSIZE:
  2917  		return Ytextsize
  2918  	}
  2919  
  2920  	if a.Type != obj.TYPE_REG {
  2921  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2922  		return Yxxx
  2923  	}
  2924  
  2925  	switch a.Reg {
  2926  	case REG_AL:
  2927  		return Yal
  2928  
  2929  	case REG_AX:
  2930  		return Yax
  2931  
  2932  		/*
  2933  			case REG_SPB:
  2934  		*/
  2935  	case REG_BPB,
  2936  		REG_SIB,
  2937  		REG_DIB,
  2938  		REG_R8B,
  2939  		REG_R9B,
  2940  		REG_R10B,
  2941  		REG_R11B,
  2942  		REG_R12B,
  2943  		REG_R13B,
  2944  		REG_R14B,
  2945  		REG_R15B:
  2946  		if ctxt.Arch.Family == sys.I386 {
  2947  			return Yxxx
  2948  		}
  2949  		fallthrough
  2950  
  2951  	case REG_DL,
  2952  		REG_BL,
  2953  		REG_AH,
  2954  		REG_CH,
  2955  		REG_DH,
  2956  		REG_BH:
  2957  		return Yrb
  2958  
  2959  	case REG_CL:
  2960  		return Ycl
  2961  
  2962  	case REG_CX:
  2963  		return Ycx
  2964  
  2965  	case REG_DX, REG_BX:
  2966  		return Yrx
  2967  
  2968  	case REG_R8,	// not really Yrl
  2969  		REG_R9,
  2970  		REG_R10,
  2971  		REG_R11,
  2972  		REG_R12,
  2973  		REG_R13,
  2974  		REG_R14,
  2975  		REG_R15:
  2976  		if ctxt.Arch.Family == sys.I386 {
  2977  			return Yxxx
  2978  		}
  2979  		fallthrough
  2980  
  2981  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2982  		if ctxt.Arch.Family == sys.I386 {
  2983  			return Yrl32
  2984  		}
  2985  		return Yrl
  2986  
  2987  	case REG_F0 + 0:
  2988  		return Yf0
  2989  
  2990  	case REG_F0 + 1,
  2991  		REG_F0 + 2,
  2992  		REG_F0 + 3,
  2993  		REG_F0 + 4,
  2994  		REG_F0 + 5,
  2995  		REG_F0 + 6,
  2996  		REG_F0 + 7:
  2997  		return Yrf
  2998  
  2999  	case REG_M0 + 0,
  3000  		REG_M0 + 1,
  3001  		REG_M0 + 2,
  3002  		REG_M0 + 3,
  3003  		REG_M0 + 4,
  3004  		REG_M0 + 5,
  3005  		REG_M0 + 6,
  3006  		REG_M0 + 7:
  3007  		return Ymr
  3008  
  3009  	case REG_X0:
  3010  		return Yxr0
  3011  
  3012  	case REG_X0 + 1,
  3013  		REG_X0 + 2,
  3014  		REG_X0 + 3,
  3015  		REG_X0 + 4,
  3016  		REG_X0 + 5,
  3017  		REG_X0 + 6,
  3018  		REG_X0 + 7,
  3019  		REG_X0 + 8,
  3020  		REG_X0 + 9,
  3021  		REG_X0 + 10,
  3022  		REG_X0 + 11,
  3023  		REG_X0 + 12,
  3024  		REG_X0 + 13,
  3025  		REG_X0 + 14,
  3026  		REG_X0 + 15:
  3027  		return Yxr
  3028  
  3029  	case REG_X0 + 16,
  3030  		REG_X0 + 17,
  3031  		REG_X0 + 18,
  3032  		REG_X0 + 19,
  3033  		REG_X0 + 20,
  3034  		REG_X0 + 21,
  3035  		REG_X0 + 22,
  3036  		REG_X0 + 23,
  3037  		REG_X0 + 24,
  3038  		REG_X0 + 25,
  3039  		REG_X0 + 26,
  3040  		REG_X0 + 27,
  3041  		REG_X0 + 28,
  3042  		REG_X0 + 29,
  3043  		REG_X0 + 30,
  3044  		REG_X0 + 31:
  3045  		return YxrEvex
  3046  
  3047  	case REG_Y0 + 0,
  3048  		REG_Y0 + 1,
  3049  		REG_Y0 + 2,
  3050  		REG_Y0 + 3,
  3051  		REG_Y0 + 4,
  3052  		REG_Y0 + 5,
  3053  		REG_Y0 + 6,
  3054  		REG_Y0 + 7,
  3055  		REG_Y0 + 8,
  3056  		REG_Y0 + 9,
  3057  		REG_Y0 + 10,
  3058  		REG_Y0 + 11,
  3059  		REG_Y0 + 12,
  3060  		REG_Y0 + 13,
  3061  		REG_Y0 + 14,
  3062  		REG_Y0 + 15:
  3063  		return Yyr
  3064  
  3065  	case REG_Y0 + 16,
  3066  		REG_Y0 + 17,
  3067  		REG_Y0 + 18,
  3068  		REG_Y0 + 19,
  3069  		REG_Y0 + 20,
  3070  		REG_Y0 + 21,
  3071  		REG_Y0 + 22,
  3072  		REG_Y0 + 23,
  3073  		REG_Y0 + 24,
  3074  		REG_Y0 + 25,
  3075  		REG_Y0 + 26,
  3076  		REG_Y0 + 27,
  3077  		REG_Y0 + 28,
  3078  		REG_Y0 + 29,
  3079  		REG_Y0 + 30,
  3080  		REG_Y0 + 31:
  3081  		return YyrEvex
  3082  
  3083  	case REG_Z0 + 0,
  3084  		REG_Z0 + 1,
  3085  		REG_Z0 + 2,
  3086  		REG_Z0 + 3,
  3087  		REG_Z0 + 4,
  3088  		REG_Z0 + 5,
  3089  		REG_Z0 + 6,
  3090  		REG_Z0 + 7:
  3091  		return Yzr
  3092  
  3093  	case REG_Z0 + 8,
  3094  		REG_Z0 + 9,
  3095  		REG_Z0 + 10,
  3096  		REG_Z0 + 11,
  3097  		REG_Z0 + 12,
  3098  		REG_Z0 + 13,
  3099  		REG_Z0 + 14,
  3100  		REG_Z0 + 15,
  3101  		REG_Z0 + 16,
  3102  		REG_Z0 + 17,
  3103  		REG_Z0 + 18,
  3104  		REG_Z0 + 19,
  3105  		REG_Z0 + 20,
  3106  		REG_Z0 + 21,
  3107  		REG_Z0 + 22,
  3108  		REG_Z0 + 23,
  3109  		REG_Z0 + 24,
  3110  		REG_Z0 + 25,
  3111  		REG_Z0 + 26,
  3112  		REG_Z0 + 27,
  3113  		REG_Z0 + 28,
  3114  		REG_Z0 + 29,
  3115  		REG_Z0 + 30,
  3116  		REG_Z0 + 31:
  3117  		if ctxt.Arch.Family == sys.I386 {
  3118  			return Yxxx
  3119  		}
  3120  		return Yzr
  3121  
  3122  	case REG_K0:
  3123  		return Yk0
  3124  
  3125  	case REG_K0 + 1,
  3126  		REG_K0 + 2,
  3127  		REG_K0 + 3,
  3128  		REG_K0 + 4,
  3129  		REG_K0 + 5,
  3130  		REG_K0 + 6,
  3131  		REG_K0 + 7:
  3132  		return Yknot0
  3133  
  3134  	case REG_CS:
  3135  		return Ycs
  3136  	case REG_SS:
  3137  		return Yss
  3138  	case REG_DS:
  3139  		return Yds
  3140  	case REG_ES:
  3141  		return Yes
  3142  	case REG_FS:
  3143  		return Yfs
  3144  	case REG_GS:
  3145  		return Ygs
  3146  	case REG_TLS:
  3147  		return Ytls
  3148  
  3149  	case REG_GDTR:
  3150  		return Ygdtr
  3151  	case REG_IDTR:
  3152  		return Yidtr
  3153  	case REG_LDTR:
  3154  		return Yldtr
  3155  	case REG_MSW:
  3156  		return Ymsw
  3157  	case REG_TASK:
  3158  		return Ytask
  3159  
  3160  	case REG_CR + 0:
  3161  		return Ycr0
  3162  	case REG_CR + 1:
  3163  		return Ycr1
  3164  	case REG_CR + 2:
  3165  		return Ycr2
  3166  	case REG_CR + 3:
  3167  		return Ycr3
  3168  	case REG_CR + 4:
  3169  		return Ycr4
  3170  	case REG_CR + 5:
  3171  		return Ycr5
  3172  	case REG_CR + 6:
  3173  		return Ycr6
  3174  	case REG_CR + 7:
  3175  		return Ycr7
  3176  	case REG_CR + 8:
  3177  		return Ycr8
  3178  
  3179  	case REG_DR + 0:
  3180  		return Ydr0
  3181  	case REG_DR + 1:
  3182  		return Ydr1
  3183  	case REG_DR + 2:
  3184  		return Ydr2
  3185  	case REG_DR + 3:
  3186  		return Ydr3
  3187  	case REG_DR + 4:
  3188  		return Ydr4
  3189  	case REG_DR + 5:
  3190  		return Ydr5
  3191  	case REG_DR + 6:
  3192  		return Ydr6
  3193  	case REG_DR + 7:
  3194  		return Ydr7
  3195  
  3196  	case REG_TR + 0:
  3197  		return Ytr0
  3198  	case REG_TR + 1:
  3199  		return Ytr1
  3200  	case REG_TR + 2:
  3201  		return Ytr2
  3202  	case REG_TR + 3:
  3203  		return Ytr3
  3204  	case REG_TR + 4:
  3205  		return Ytr4
  3206  	case REG_TR + 5:
  3207  		return Ytr5
  3208  	case REG_TR + 6:
  3209  		return Ytr6
  3210  	case REG_TR + 7:
  3211  		return Ytr7
  3212  	}
  3213  
  3214  	return Yxxx
  3215  }
  3216  
  3217  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3218  // and hold assembly state.
  3219  type AsmBuf struct {
  3220  	buf		[100]byte
  3221  	off		int
  3222  	rexflag		int
  3223  	vexflag		bool	// Per inst: true for VEX-encoded
  3224  	evexflag	bool	// Per inst: true for EVEX-encoded
  3225  	rep		bool
  3226  	repn		bool
  3227  	lock		bool
  3228  
  3229  	evex	evexBits	// Initialized when evexflag is true
  3230  }
  3231  
  3232  // Put1 appends one byte to the end of the buffer.
  3233  func (ab *AsmBuf) Put1(x byte) {
  3234  	ab.buf[ab.off] = x
  3235  	ab.off++
  3236  }
  3237  
  3238  // Put2 appends two bytes to the end of the buffer.
  3239  func (ab *AsmBuf) Put2(x, y byte) {
  3240  	ab.buf[ab.off+0] = x
  3241  	ab.buf[ab.off+1] = y
  3242  	ab.off += 2
  3243  }
  3244  
  3245  // Put3 appends three bytes to the end of the buffer.
  3246  func (ab *AsmBuf) Put3(x, y, z byte) {
  3247  	ab.buf[ab.off+0] = x
  3248  	ab.buf[ab.off+1] = y
  3249  	ab.buf[ab.off+2] = z
  3250  	ab.off += 3
  3251  }
  3252  
  3253  // Put4 appends four bytes to the end of the buffer.
  3254  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3255  	ab.buf[ab.off+0] = x
  3256  	ab.buf[ab.off+1] = y
  3257  	ab.buf[ab.off+2] = z
  3258  	ab.buf[ab.off+3] = w
  3259  	ab.off += 4
  3260  }
  3261  
  3262  // PutInt16 writes v into the buffer using little-endian encoding.
  3263  func (ab *AsmBuf) PutInt16(v int16) {
  3264  	ab.buf[ab.off+0] = byte(v)
  3265  	ab.buf[ab.off+1] = byte(v >> 8)
  3266  	ab.off += 2
  3267  }
  3268  
  3269  // PutInt32 writes v into the buffer using little-endian encoding.
  3270  func (ab *AsmBuf) PutInt32(v int32) {
  3271  	ab.buf[ab.off+0] = byte(v)
  3272  	ab.buf[ab.off+1] = byte(v >> 8)
  3273  	ab.buf[ab.off+2] = byte(v >> 16)
  3274  	ab.buf[ab.off+3] = byte(v >> 24)
  3275  	ab.off += 4
  3276  }
  3277  
  3278  // PutInt64 writes v into the buffer using little-endian encoding.
  3279  func (ab *AsmBuf) PutInt64(v int64) {
  3280  	ab.buf[ab.off+0] = byte(v)
  3281  	ab.buf[ab.off+1] = byte(v >> 8)
  3282  	ab.buf[ab.off+2] = byte(v >> 16)
  3283  	ab.buf[ab.off+3] = byte(v >> 24)
  3284  	ab.buf[ab.off+4] = byte(v >> 32)
  3285  	ab.buf[ab.off+5] = byte(v >> 40)
  3286  	ab.buf[ab.off+6] = byte(v >> 48)
  3287  	ab.buf[ab.off+7] = byte(v >> 56)
  3288  	ab.off += 8
  3289  }
  3290  
  3291  // Put copies b into the buffer.
  3292  func (ab *AsmBuf) Put(b []byte) {
  3293  	copy(ab.buf[ab.off:], b)
  3294  	ab.off += len(b)
  3295  }
  3296  
  3297  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3298  // starting at specified offset (e.g. z counter value).
  3299  // Trailing 0 is not written.
  3300  //
  3301  // Intended to be used for literal Z cases.
  3302  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3303  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3304  	for int(op[offset]) != 0 {
  3305  		ab.Put1(byte(op[offset]))
  3306  		offset++
  3307  	}
  3308  }
  3309  
  3310  // Insert inserts b at offset i.
  3311  func (ab *AsmBuf) Insert(i int, b byte) {
  3312  	ab.off++
  3313  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3314  	ab.buf[i] = b
  3315  }
  3316  
  3317  // Last returns the byte at the end of the buffer.
  3318  func (ab *AsmBuf) Last() byte	{ return ab.buf[ab.off-1] }
  3319  
  3320  // Len returns the length of the buffer.
  3321  func (ab *AsmBuf) Len() int	{ return ab.off }
  3322  
  3323  // Bytes returns the contents of the buffer.
  3324  func (ab *AsmBuf) Bytes() []byte	{ return ab.buf[:ab.off] }
  3325  
  3326  // Reset empties the buffer.
  3327  func (ab *AsmBuf) Reset()	{ ab.off = 0 }
  3328  
  3329  // At returns the byte at offset i.
  3330  func (ab *AsmBuf) At(i int) byte	{ return ab.buf[i] }
  3331  
  3332  // asmidx emits SIB byte.
  3333  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3334  	var i int
  3335  
  3336  	// X/Y index register is used in VSIB.
  3337  	switch index {
  3338  	default:
  3339  		goto bad
  3340  
  3341  	case REG_NONE:
  3342  		i = 4 << 3
  3343  		goto bas
  3344  
  3345  	case REG_R8,
  3346  		REG_R9,
  3347  		REG_R10,
  3348  		REG_R11,
  3349  		REG_R12,
  3350  		REG_R13,
  3351  		REG_R14,
  3352  		REG_R15,
  3353  		REG_X8,
  3354  		REG_X9,
  3355  		REG_X10,
  3356  		REG_X11,
  3357  		REG_X12,
  3358  		REG_X13,
  3359  		REG_X14,
  3360  		REG_X15,
  3361  		REG_X16,
  3362  		REG_X17,
  3363  		REG_X18,
  3364  		REG_X19,
  3365  		REG_X20,
  3366  		REG_X21,
  3367  		REG_X22,
  3368  		REG_X23,
  3369  		REG_X24,
  3370  		REG_X25,
  3371  		REG_X26,
  3372  		REG_X27,
  3373  		REG_X28,
  3374  		REG_X29,
  3375  		REG_X30,
  3376  		REG_X31,
  3377  		REG_Y8,
  3378  		REG_Y9,
  3379  		REG_Y10,
  3380  		REG_Y11,
  3381  		REG_Y12,
  3382  		REG_Y13,
  3383  		REG_Y14,
  3384  		REG_Y15,
  3385  		REG_Y16,
  3386  		REG_Y17,
  3387  		REG_Y18,
  3388  		REG_Y19,
  3389  		REG_Y20,
  3390  		REG_Y21,
  3391  		REG_Y22,
  3392  		REG_Y23,
  3393  		REG_Y24,
  3394  		REG_Y25,
  3395  		REG_Y26,
  3396  		REG_Y27,
  3397  		REG_Y28,
  3398  		REG_Y29,
  3399  		REG_Y30,
  3400  		REG_Y31,
  3401  		REG_Z8,
  3402  		REG_Z9,
  3403  		REG_Z10,
  3404  		REG_Z11,
  3405  		REG_Z12,
  3406  		REG_Z13,
  3407  		REG_Z14,
  3408  		REG_Z15,
  3409  		REG_Z16,
  3410  		REG_Z17,
  3411  		REG_Z18,
  3412  		REG_Z19,
  3413  		REG_Z20,
  3414  		REG_Z21,
  3415  		REG_Z22,
  3416  		REG_Z23,
  3417  		REG_Z24,
  3418  		REG_Z25,
  3419  		REG_Z26,
  3420  		REG_Z27,
  3421  		REG_Z28,
  3422  		REG_Z29,
  3423  		REG_Z30,
  3424  		REG_Z31:
  3425  		if ctxt.Arch.Family == sys.I386 {
  3426  			goto bad
  3427  		}
  3428  		fallthrough
  3429  
  3430  	case REG_AX,
  3431  		REG_CX,
  3432  		REG_DX,
  3433  		REG_BX,
  3434  		REG_BP,
  3435  		REG_SI,
  3436  		REG_DI,
  3437  		REG_X0,
  3438  		REG_X1,
  3439  		REG_X2,
  3440  		REG_X3,
  3441  		REG_X4,
  3442  		REG_X5,
  3443  		REG_X6,
  3444  		REG_X7,
  3445  		REG_Y0,
  3446  		REG_Y1,
  3447  		REG_Y2,
  3448  		REG_Y3,
  3449  		REG_Y4,
  3450  		REG_Y5,
  3451  		REG_Y6,
  3452  		REG_Y7,
  3453  		REG_Z0,
  3454  		REG_Z1,
  3455  		REG_Z2,
  3456  		REG_Z3,
  3457  		REG_Z4,
  3458  		REG_Z5,
  3459  		REG_Z6,
  3460  		REG_Z7:
  3461  		i = reg[index] << 3
  3462  	}
  3463  
  3464  	switch scale {
  3465  	default:
  3466  		goto bad
  3467  
  3468  	case 1:
  3469  		break
  3470  
  3471  	case 2:
  3472  		i |= 1 << 6
  3473  
  3474  	case 4:
  3475  		i |= 2 << 6
  3476  
  3477  	case 8:
  3478  		i |= 3 << 6
  3479  	}
  3480  
  3481  bas:
  3482  	switch base {
  3483  	default:
  3484  		goto bad
  3485  
  3486  	case REG_NONE:	// must be mod=00
  3487  		i |= 5
  3488  
  3489  	case REG_R8,
  3490  		REG_R9,
  3491  		REG_R10,
  3492  		REG_R11,
  3493  		REG_R12,
  3494  		REG_R13,
  3495  		REG_R14,
  3496  		REG_R15:
  3497  		if ctxt.Arch.Family == sys.I386 {
  3498  			goto bad
  3499  		}
  3500  		fallthrough
  3501  
  3502  	case REG_AX,
  3503  		REG_CX,
  3504  		REG_DX,
  3505  		REG_BX,
  3506  		REG_SP,
  3507  		REG_BP,
  3508  		REG_SI,
  3509  		REG_DI:
  3510  		i |= reg[base]
  3511  	}
  3512  
  3513  	ab.Put1(byte(i))
  3514  	return
  3515  
  3516  bad:
  3517  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3518  	ab.Put1(0)
  3519  }
  3520  
  3521  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3522  	var rel obj.Reloc
  3523  
  3524  	v := vaddr(ctxt, p, a, &rel)
  3525  	if rel.Siz != 0 {
  3526  		if rel.Siz != 4 {
  3527  			ctxt.Diag("bad reloc")
  3528  		}
  3529  		r := obj.Addrel(cursym)
  3530  		*r = rel
  3531  		r.Off = int32(p.Pc + int64(ab.Len()))
  3532  	}
  3533  
  3534  	ab.PutInt32(int32(v))
  3535  }
  3536  
  3537  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3538  	if r != nil {
  3539  		*r = obj.Reloc{}
  3540  	}
  3541  
  3542  	switch a.Name {
  3543  	case obj.NAME_STATIC,
  3544  		obj.NAME_GOTREF,
  3545  		obj.NAME_EXTERN:
  3546  		s := a.Sym
  3547  		if r == nil {
  3548  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3549  			log.Fatalf("reloc")
  3550  		}
  3551  
  3552  		if a.Name == obj.NAME_GOTREF {
  3553  			r.Siz = 4
  3554  			r.Type = objabi.R_GOTPCREL
  3555  		} else if useAbs(ctxt, s) {
  3556  			r.Siz = 4
  3557  			r.Type = objabi.R_ADDR
  3558  		} else {
  3559  			r.Siz = 4
  3560  			r.Type = objabi.R_PCREL
  3561  		}
  3562  
  3563  		r.Off = -1	// caller must fill in
  3564  		r.Sym = s
  3565  		r.Add = a.Offset
  3566  
  3567  		return 0
  3568  	}
  3569  
  3570  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3571  		if r == nil {
  3572  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3573  			log.Fatalf("reloc")
  3574  		}
  3575  
  3576  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3577  			r.Type = objabi.R_TLS_LE
  3578  			r.Siz = 4
  3579  			r.Off = -1	// caller must fill in
  3580  			r.Add = a.Offset
  3581  		}
  3582  		return 0
  3583  	}
  3584  
  3585  	return a.Offset
  3586  }
  3587  
  3588  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3589  	var base int
  3590  	var rel obj.Reloc
  3591  
  3592  	rex &= 0x40 | Rxr
  3593  	if a.Offset != int64(int32(a.Offset)) {
  3594  		// The rules are slightly different for 386 and AMD64,
  3595  		// mostly for historical reasons. We may unify them later,
  3596  		// but it must be discussed beforehand.
  3597  		//
  3598  		// For 64bit mode only LEAL is allowed to overflow.
  3599  		// It's how https://golang.org/cl/59630 made it.
  3600  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3601  		//
  3602  		// For 32bit mode rules are more permissive.
  3603  		// If offset fits uint32, it's permitted.
  3604  		// This is allowed for assembly that wants to use 32-bit hex
  3605  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3606  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3607  			(ctxt.Arch.Family != sys.AMD64 &&
  3608  				int64(uint32(a.Offset)) == a.Offset &&
  3609  				ab.rexflag&Rxw == 0)
  3610  		if !overflowOK {
  3611  			ctxt.Diag("offset too large in %s", p)
  3612  		}
  3613  	}
  3614  	v := int32(a.Offset)
  3615  	rel.Siz = 0
  3616  
  3617  	switch a.Type {
  3618  	case obj.TYPE_ADDR:
  3619  		if a.Name == obj.NAME_NONE {
  3620  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3621  		}
  3622  		if a.Index == REG_TLS {
  3623  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3624  		}
  3625  		goto bad
  3626  
  3627  	case obj.TYPE_REG:
  3628  		const regFirst = REG_AL
  3629  		const regLast = REG_Z31
  3630  		if a.Reg < regFirst || regLast < a.Reg {
  3631  			goto bad
  3632  		}
  3633  		if v != 0 {
  3634  			goto bad
  3635  		}
  3636  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3637  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3638  		return
  3639  	}
  3640  
  3641  	if a.Type != obj.TYPE_MEM {
  3642  		goto bad
  3643  	}
  3644  
  3645  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3646  		base := int(a.Reg)
  3647  		switch a.Name {
  3648  		case obj.NAME_EXTERN,
  3649  			obj.NAME_GOTREF,
  3650  			obj.NAME_STATIC:
  3651  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3652  				goto bad
  3653  			}
  3654  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3655  				// The base register has already been set. It holds the PC
  3656  				// of this instruction returned by a PC-reading thunk.
  3657  				// See obj6.go:rewriteToPcrel.
  3658  			} else {
  3659  				base = REG_NONE
  3660  			}
  3661  			v = int32(vaddr(ctxt, p, a, &rel))
  3662  
  3663  		case obj.NAME_AUTO,
  3664  			obj.NAME_PARAM:
  3665  			base = REG_SP
  3666  		}
  3667  
  3668  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3669  		if base == REG_NONE {
  3670  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3671  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3672  			goto putrelv
  3673  		}
  3674  
  3675  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3676  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3677  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3678  			return
  3679  		}
  3680  
  3681  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3682  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3683  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3684  			ab.Put1(disp8)
  3685  			return
  3686  		}
  3687  
  3688  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3689  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3690  		goto putrelv
  3691  	}
  3692  
  3693  	base = int(a.Reg)
  3694  	switch a.Name {
  3695  	case obj.NAME_STATIC,
  3696  		obj.NAME_GOTREF,
  3697  		obj.NAME_EXTERN:
  3698  		if a.Sym == nil {
  3699  			ctxt.Diag("bad addr: %v", p)
  3700  		}
  3701  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3702  			// The base register has already been set. It holds the PC
  3703  			// of this instruction returned by a PC-reading thunk.
  3704  			// See obj6.go:rewriteToPcrel.
  3705  		} else {
  3706  			base = REG_NONE
  3707  		}
  3708  		v = int32(vaddr(ctxt, p, a, &rel))
  3709  
  3710  	case obj.NAME_AUTO,
  3711  		obj.NAME_PARAM:
  3712  		base = REG_SP
  3713  	}
  3714  
  3715  	if base == REG_TLS {
  3716  		v = int32(vaddr(ctxt, p, a, &rel))
  3717  	}
  3718  
  3719  	ab.rexflag |= regrex[base]&Rxb | rex
  3720  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3721  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3722  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3723  				ctxt.Diag("%v has offset against gotref", p)
  3724  			}
  3725  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3726  			goto putrelv
  3727  		}
  3728  
  3729  		// temporary
  3730  		ab.Put2(
  3731  			byte(0<<6|4<<0|r<<3),	// sib present
  3732  			0<<6|4<<3|5<<0,		// DS:d32
  3733  		)
  3734  		goto putrelv
  3735  	}
  3736  
  3737  	if base == REG_SP || base == REG_R12 {
  3738  		if v == 0 {
  3739  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3740  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3741  			return
  3742  		}
  3743  
  3744  		if disp8, ok := toDisp8(v, p, ab); ok {
  3745  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3746  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3747  			ab.Put1(disp8)
  3748  			return
  3749  		}
  3750  
  3751  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3752  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3753  		goto putrelv
  3754  	}
  3755  
  3756  	if REG_AX <= base && base <= REG_R15 {
  3757  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3758  			ctxt.Headtype != objabi.Hwindows {
  3759  			rel = obj.Reloc{}
  3760  			rel.Type = objabi.R_TLS_LE
  3761  			rel.Siz = 4
  3762  			rel.Sym = nil
  3763  			rel.Add = int64(v)
  3764  			v = 0
  3765  		}
  3766  
  3767  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3768  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3769  			return
  3770  		}
  3771  
  3772  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3773  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3774  			return
  3775  		}
  3776  
  3777  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3778  		goto putrelv
  3779  	}
  3780  
  3781  	goto bad
  3782  
  3783  putrelv:
  3784  	if rel.Siz != 0 {
  3785  		if rel.Siz != 4 {
  3786  			ctxt.Diag("bad rel")
  3787  			goto bad
  3788  		}
  3789  
  3790  		r := obj.Addrel(cursym)
  3791  		*r = rel
  3792  		r.Off = int32(p.Pc + int64(ab.Len()))
  3793  	}
  3794  
  3795  	ab.PutInt32(v)
  3796  	return
  3797  
  3798  bad:
  3799  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3800  }
  3801  
  3802  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3803  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3804  }
  3805  
  3806  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3807  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3808  }
  3809  
  3810  func bytereg(a *obj.Addr, t *uint8) {
  3811  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3812  		a.Reg += REG_AL - REG_AX
  3813  		*t = 0
  3814  	}
  3815  }
  3816  
  3817  func unbytereg(a *obj.Addr, t *uint8) {
  3818  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3819  		a.Reg += REG_AX - REG_AL
  3820  		*t = 0
  3821  	}
  3822  }
  3823  
  3824  const (
  3825  	movLit	uint8	= iota	// Like Zlit
  3826  	movRegMem
  3827  	movMemReg
  3828  	movRegMem2op
  3829  	movMemReg2op
  3830  	movFullPtr	// Load full pointer, trash heap (unsupported)
  3831  	movDoubleShift
  3832  	movTLSReg
  3833  )
  3834  
  3835  var ymovtab = []movtab{
  3836  	// push
  3837  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3838  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3839  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3840  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3841  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3842  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3843  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3844  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3845  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3846  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3847  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3848  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3849  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3850  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3851  
  3852  	// pop
  3853  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3854  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3855  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3856  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3857  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3858  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3859  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3860  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3861  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3862  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3863  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3864  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3865  
  3866  	// mov seg
  3867  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3868  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3869  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3870  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3871  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3872  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3873  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3874  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3875  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3876  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3877  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3878  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3879  
  3880  	// mov cr
  3881  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3882  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3883  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3884  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3885  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3886  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3887  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3888  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3889  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3890  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3891  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3892  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3893  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3894  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3895  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3896  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3897  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3898  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3899  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3900  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3901  
  3902  	// mov dr
  3903  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3904  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3905  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3906  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3907  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3908  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3909  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3910  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3911  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3912  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3913  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3914  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3915  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3916  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3917  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3918  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3919  
  3920  	// mov tr
  3921  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3922  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3923  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3924  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3925  
  3926  	// lgdt, sgdt, lidt, sidt
  3927  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3928  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3929  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3930  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3931  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3932  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3933  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3934  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3935  
  3936  	// lldt, sldt
  3937  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3938  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3939  
  3940  	// lmsw, smsw
  3941  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3942  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3943  
  3944  	// ltr, str
  3945  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3946  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3947  
  3948  	/* load full pointer - unsupported
  3949  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3950  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3951  	*/
  3952  
  3953  	// double shift
  3954  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3955  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3956  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3957  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3958  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3959  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3960  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3961  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3962  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3963  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3964  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3965  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3966  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3967  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3968  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3969  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3970  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3971  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3972  
  3973  	// load TLS base
  3974  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3975  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3976  	{0, 0, 0, 0, 0, [4]uint8{}},
  3977  }
  3978  
  3979  func isax(a *obj.Addr) bool {
  3980  	switch a.Reg {
  3981  	case REG_AX, REG_AL, REG_AH:
  3982  		return true
  3983  	}
  3984  
  3985  	return a.Index == REG_AX
  3986  }
  3987  
  3988  func subreg(p *obj.Prog, from int, to int) {
  3989  	if false {	/* debug['Q'] */
  3990  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3991  	}
  3992  
  3993  	if int(p.From.Reg) == from {
  3994  		p.From.Reg = int16(to)
  3995  		p.Ft = 0
  3996  	}
  3997  
  3998  	if int(p.To.Reg) == from {
  3999  		p.To.Reg = int16(to)
  4000  		p.Tt = 0
  4001  	}
  4002  
  4003  	if int(p.From.Index) == from {
  4004  		p.From.Index = int16(to)
  4005  		p.Ft = 0
  4006  	}
  4007  
  4008  	if int(p.To.Index) == from {
  4009  		p.To.Index = int16(to)
  4010  		p.Tt = 0
  4011  	}
  4012  
  4013  	if false {	/* debug['Q'] */
  4014  		fmt.Printf("%v\n", p)
  4015  	}
  4016  }
  4017  
  4018  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  4019  	switch op {
  4020  	case Pm, Pe, Pf2, Pf3:
  4021  		if osize != 1 {
  4022  			if op != Pm {
  4023  				ab.Put1(byte(op))
  4024  			}
  4025  			ab.Put1(Pm)
  4026  			z++
  4027  			op = int(o.op[z])
  4028  			break
  4029  		}
  4030  		fallthrough
  4031  
  4032  	default:
  4033  		if ab.Len() == 0 || ab.Last() != Pm {
  4034  			ab.Put1(Pm)
  4035  		}
  4036  	}
  4037  
  4038  	ab.Put1(byte(op))
  4039  	return z
  4040  }
  4041  
  4042  var bpduff1 = []byte{
  4043  	0x48, 0x89, 0x6c, 0x24, 0xf0,	// MOVQ BP, -16(SP)
  4044  	0x48, 0x8d, 0x6c, 0x24, 0xf0,	// LEAQ -16(SP), BP
  4045  }
  4046  
  4047  var bpduff2 = []byte{
  4048  	0x48, 0x8b, 0x6d, 0x00,	// MOVQ 0(BP), BP
  4049  }
  4050  
  4051  // asmevex emits EVEX pregis and opcode byte.
  4052  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4053  // K-masking register.
  4054  //
  4055  // Expects asmbuf.evex to be properly initialized.
  4056  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4057  	ab.evexflag = true
  4058  	evex := ab.evex
  4059  
  4060  	rexR := byte(1)
  4061  	evexR := byte(1)
  4062  	rexX := byte(1)
  4063  	rexB := byte(1)
  4064  	if r != nil {
  4065  		if regrex[r.Reg]&Rxr != 0 {
  4066  			rexR = 0	// "ModR/M.reg" selector 4th bit.
  4067  		}
  4068  		if regrex[r.Reg]&RxrEvex != 0 {
  4069  			evexR = 0	// "ModR/M.reg" selector 5th bit.
  4070  		}
  4071  	}
  4072  	if rm != nil {
  4073  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4074  			rexX = 0
  4075  		} else if regrex[rm.Index]&Rxx != 0 {
  4076  			rexX = 0
  4077  		}
  4078  		if regrex[rm.Reg]&Rxb != 0 {
  4079  			rexB = 0
  4080  		}
  4081  	}
  4082  	// P0 = [R][X][B][R'][00][mm]
  4083  	p0 := (rexR << 7) |
  4084  		(rexX << 6) |
  4085  		(rexB << 5) |
  4086  		(evexR << 4) |
  4087  		(0 << 2) |
  4088  		(evex.M() << 0)
  4089  
  4090  	vexV := byte(0)
  4091  	if v != nil {
  4092  		// 4bit-wide reg index.
  4093  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4094  	}
  4095  	vexV ^= 0x0F
  4096  	// P1 = [W][vvvv][1][pp]
  4097  	p1 := (evex.W() << 7) |
  4098  		(vexV << 3) |
  4099  		(1 << 2) |
  4100  		(evex.P() << 0)
  4101  
  4102  	suffix := evexSuffixMap[p.Scond]
  4103  	evexZ := byte(0)
  4104  	evexLL := evex.L()
  4105  	evexB := byte(0)
  4106  	evexV := byte(1)
  4107  	evexA := byte(0)
  4108  	if suffix.zeroing {
  4109  		if !evex.ZeroingEnabled() {
  4110  			ctxt.Diag("unsupported zeroing: %v", p)
  4111  		}
  4112  		if k == nil {
  4113  			// When you request zeroing you must specify a mask register.
  4114  			// See issue 57952.
  4115  			ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
  4116  		} else if k.Reg == REG_K0 {
  4117  			// The mask register must not be K0. That restriction is already
  4118  			// handled by the Yknot0 restriction in the opcode tables, so we
  4119  			// won't ever reach here. But put something sensible here just in case.
  4120  			ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
  4121  		}
  4122  		evexZ = 1
  4123  	}
  4124  	switch {
  4125  	case suffix.rounding != rcUnset:
  4126  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4127  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4128  		} else if !evex.RoundingEnabled() {
  4129  			ctxt.Diag("unsupported rounding: %v", p)
  4130  		}
  4131  		evexB = 1
  4132  		evexLL = suffix.rounding
  4133  	case suffix.broadcast:
  4134  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4135  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4136  		} else if !evex.BroadcastEnabled() {
  4137  			ctxt.Diag("unsupported broadcast: %v", p)
  4138  		}
  4139  		evexB = 1
  4140  	case suffix.sae:
  4141  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4142  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4143  		} else if !evex.SaeEnabled() {
  4144  			ctxt.Diag("unsupported SAE: %v", p)
  4145  		}
  4146  		evexB = 1
  4147  	}
  4148  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4149  		evexV = 0
  4150  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4151  		evexV = 0	// VSR selector 5th bit.
  4152  	}
  4153  	if k != nil {
  4154  		evexA = byte(reg[k.Reg])
  4155  	}
  4156  	// P2 = [z][L'L][b][V'][aaa]
  4157  	p2 := (evexZ << 7) |
  4158  		(evexLL << 5) |
  4159  		(evexB << 4) |
  4160  		(evexV << 3) |
  4161  		(evexA << 0)
  4162  
  4163  	const evexEscapeByte = 0x62
  4164  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4165  	ab.Put1(evex.opcode)
  4166  }
  4167  
  4168  // Emit VEX prefix and opcode byte.
  4169  // The three addresses are the r/m, vvvv, and reg fields.
  4170  // The reg and rm arguments appear in the same order as the
  4171  // arguments to asmand, which typically follows the call to asmvex.
  4172  // The final two arguments are the VEX prefix (see encoding above)
  4173  // and the opcode byte.
  4174  // For details about vex prefix see:
  4175  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4176  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4177  	ab.vexflag = true
  4178  	rexR := 0
  4179  	if r != nil {
  4180  		rexR = regrex[r.Reg] & Rxr
  4181  	}
  4182  	rexB := 0
  4183  	rexX := 0
  4184  	if rm != nil {
  4185  		rexB = regrex[rm.Reg] & Rxb
  4186  		rexX = regrex[rm.Index] & Rxx
  4187  	}
  4188  	vexM := (vex >> 3) & 0x7
  4189  	vexWLP := vex & 0x87
  4190  	vexV := byte(0)
  4191  	if v != nil {
  4192  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4193  	}
  4194  	vexV ^= 0xF
  4195  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4196  		// Can use 2-byte encoding.
  4197  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4198  	} else {
  4199  		// Must use 3-byte encoding.
  4200  		ab.Put3(0xc4,
  4201  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4202  			vexV<<3|vexWLP,
  4203  		)
  4204  	}
  4205  	ab.Put1(opcode)
  4206  }
  4207  
  4208  // regIndex returns register index that fits in 5 bits.
  4209  //
  4210  //	R         : 3 bit | legacy instructions     | N/A
  4211  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4212  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4213  //
  4214  // Examples:
  4215  //
  4216  //	REG_Z30 => 30
  4217  //	REG_X15 => 15
  4218  //	REG_R9  => 9
  4219  //	REG_AX  => 0
  4220  func regIndex(r int16) int {
  4221  	lower3bits := reg[r]
  4222  	high4bit := regrex[r] & Rxr << 1
  4223  	high5bit := regrex[r] & RxrEvex << 0
  4224  	return lower3bits | high4bit | high5bit
  4225  }
  4226  
  4227  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4228  // Reports errors via ctxt.
  4229  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4230  	// If any pair of the index, mask, or destination registers
  4231  	// are the same, illegal instruction trap (#UD) is triggered.
  4232  	index := regIndex(p.GetFrom3().Index)
  4233  	mask := regIndex(p.From.Reg)
  4234  	dest := regIndex(p.To.Reg)
  4235  	if dest == mask || dest == index || mask == index {
  4236  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4237  		return false
  4238  	}
  4239  
  4240  	return true
  4241  }
  4242  
  4243  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4244  // Reports errors via ctxt.
  4245  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4246  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4247  	// register is the same as index vector in VSIB.
  4248  	index := regIndex(p.From.Index)
  4249  	dest := regIndex(p.To.Reg)
  4250  	if dest == index {
  4251  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4252  		return false
  4253  	}
  4254  
  4255  	return true
  4256  }
  4257  
  4258  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4259  	o := opindex[p.As&obj.AMask]
  4260  
  4261  	if o == nil {
  4262  		ctxt.Diag("asmins: missing op %v", p)
  4263  		return
  4264  	}
  4265  
  4266  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4267  		ab.Put1(byte(pre))
  4268  	}
  4269  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4270  		ab.Put1(byte(pre))
  4271  	}
  4272  
  4273  	// Checks to warn about instruction/arguments combinations that
  4274  	// will unconditionally trigger illegal instruction trap (#UD).
  4275  	switch p.As {
  4276  	case AVGATHERDPD,
  4277  		AVGATHERQPD,
  4278  		AVGATHERDPS,
  4279  		AVGATHERQPS,
  4280  		AVPGATHERDD,
  4281  		AVPGATHERQD,
  4282  		AVPGATHERDQ,
  4283  		AVPGATHERQQ:
  4284  		if p.GetFrom3() == nil {
  4285  			// gathers need a 3rd arg. See issue 58822.
  4286  			ctxt.Diag("need a third arg for gather instruction: %v", p)
  4287  			return
  4288  		}
  4289  		// AVX512 gather requires explicit K mask.
  4290  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4291  			if !avx512gatherValid(ctxt, p) {
  4292  				return
  4293  			}
  4294  		} else {
  4295  			if !avx2gatherValid(ctxt, p) {
  4296  				return
  4297  			}
  4298  		}
  4299  	}
  4300  
  4301  	if p.Ft == 0 {
  4302  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4303  	}
  4304  	if p.Tt == 0 {
  4305  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4306  	}
  4307  
  4308  	ft := int(p.Ft) * Ymax
  4309  	var f3t int
  4310  	tt := int(p.Tt) * Ymax
  4311  
  4312  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4313  	z := 0
  4314  	var a *obj.Addr
  4315  	var l int
  4316  	var op int
  4317  	var q *obj.Prog
  4318  	var r *obj.Reloc
  4319  	var rel obj.Reloc
  4320  	var v int64
  4321  
  4322  	args := make([]int, 0, argListMax)
  4323  	if ft != Ynone*Ymax {
  4324  		args = append(args, ft)
  4325  	}
  4326  	for i := range p.RestArgs {
  4327  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4328  	}
  4329  	if tt != Ynone*Ymax {
  4330  		args = append(args, tt)
  4331  	}
  4332  
  4333  	for _, yt := range o.ytab {
  4334  		// ytab matching is purely args-based,
  4335  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4336  		// add EVEX-only filter that will reject non-EVEX matches.
  4337  		//
  4338  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4339  		// Without this rule, operands will lead to VEX-encoded form
  4340  		// and produce "c5b15813" encoding.
  4341  		if !yt.match(args) {
  4342  			// "xo" is always zero for VEX/EVEX encoded insts.
  4343  			z += int(yt.zoffset) + xo
  4344  		} else {
  4345  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4346  				// Do not signal error and continue to search
  4347  				// for matching EVEX-encoded form.
  4348  				z += int(yt.zoffset)
  4349  				continue
  4350  			}
  4351  
  4352  			switch o.prefix {
  4353  			case Px1:	// first option valid only in 32-bit mode
  4354  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4355  					z += int(yt.zoffset) + xo
  4356  					continue
  4357  				}
  4358  			case Pq:	// 16 bit escape and opcode escape
  4359  				ab.Put2(Pe, Pm)
  4360  
  4361  			case Pq3:	// 16 bit escape and opcode escape + REX.W
  4362  				ab.rexflag |= Pw
  4363  				ab.Put2(Pe, Pm)
  4364  
  4365  			case Pq4:	// 66 0F 38
  4366  				ab.Put3(0x66, 0x0F, 0x38)
  4367  
  4368  			case Pq4w:	// 66 0F 38 + REX.W
  4369  				ab.rexflag |= Pw
  4370  				ab.Put3(0x66, 0x0F, 0x38)
  4371  
  4372  			case Pq5:	// F3 0F 38
  4373  				ab.Put3(0xF3, 0x0F, 0x38)
  4374  
  4375  			case Pq5w:	//  F3 0F 38 + REX.W
  4376  				ab.rexflag |= Pw
  4377  				ab.Put3(0xF3, 0x0F, 0x38)
  4378  
  4379  			case Pf2,	// xmm opcode escape
  4380  				Pf3:
  4381  				ab.Put2(o.prefix, Pm)
  4382  
  4383  			case Pef3:
  4384  				ab.Put3(Pe, Pf3, Pm)
  4385  
  4386  			case Pfw:	// xmm opcode escape + REX.W
  4387  				ab.rexflag |= Pw
  4388  				ab.Put2(Pf3, Pm)
  4389  
  4390  			case Pm:	// opcode escape
  4391  				ab.Put1(Pm)
  4392  
  4393  			case Pe:	// 16 bit escape
  4394  				ab.Put1(Pe)
  4395  
  4396  			case Pw:	// 64-bit escape
  4397  				if ctxt.Arch.Family != sys.AMD64 {
  4398  					ctxt.Diag("asmins: illegal 64: %v", p)
  4399  				}
  4400  				ab.rexflag |= Pw
  4401  
  4402  			case Pw8:	// 64-bit escape if z >= 8
  4403  				if z >= 8 {
  4404  					if ctxt.Arch.Family != sys.AMD64 {
  4405  						ctxt.Diag("asmins: illegal 64: %v", p)
  4406  					}
  4407  					ab.rexflag |= Pw
  4408  				}
  4409  
  4410  			case Pb:	// botch
  4411  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4412  					goto bad
  4413  				}
  4414  				// NOTE(rsc): This is probably safe to do always,
  4415  				// but when enabled it chooses different encodings
  4416  				// than the old cmd/internal/obj/i386 code did,
  4417  				// which breaks our "same bits out" checks.
  4418  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4419  				// in the original obj/i386, and it would encode
  4420  				// (using a valid, shorter form) as 3c 00 if we enabled
  4421  				// the call to bytereg here.
  4422  				if ctxt.Arch.Family == sys.AMD64 {
  4423  					bytereg(&p.From, &p.Ft)
  4424  					bytereg(&p.To, &p.Tt)
  4425  				}
  4426  
  4427  			case P32:	// 32 bit but illegal if 64-bit mode
  4428  				if ctxt.Arch.Family == sys.AMD64 {
  4429  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4430  				}
  4431  
  4432  			case Py:	// 64-bit only, no prefix
  4433  				if ctxt.Arch.Family != sys.AMD64 {
  4434  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4435  				}
  4436  
  4437  			case Py1:	// 64-bit only if z < 1, no prefix
  4438  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4439  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4440  				}
  4441  
  4442  			case Py3:	// 64-bit only if z < 3, no prefix
  4443  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4444  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4445  				}
  4446  			}
  4447  
  4448  			if z >= len(o.op) {
  4449  				log.Fatalf("asmins bad table %v", p)
  4450  			}
  4451  			op = int(o.op[z])
  4452  			if op == 0x0f {
  4453  				ab.Put1(byte(op))
  4454  				z++
  4455  				op = int(o.op[z])
  4456  			}
  4457  
  4458  			switch yt.zcase {
  4459  			default:
  4460  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4461  				return
  4462  
  4463  			case Zpseudo:
  4464  				break
  4465  
  4466  			case Zlit:
  4467  				ab.PutOpBytesLit(z, &o.op)
  4468  
  4469  			case Zlitr_m:
  4470  				ab.PutOpBytesLit(z, &o.op)
  4471  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4472  
  4473  			case Zlitm_r:
  4474  				ab.PutOpBytesLit(z, &o.op)
  4475  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4476  
  4477  			case Zlit_m_r:
  4478  				ab.PutOpBytesLit(z, &o.op)
  4479  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4480  
  4481  			case Zmb_r:
  4482  				bytereg(&p.From, &p.Ft)
  4483  				fallthrough
  4484  
  4485  			case Zm_r:
  4486  				ab.Put1(byte(op))
  4487  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4488  
  4489  			case Z_m_r:
  4490  				ab.Put1(byte(op))
  4491  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4492  
  4493  			case Zm2_r:
  4494  				ab.Put2(byte(op), o.op[z+1])
  4495  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4496  
  4497  			case Zm_r_xm:
  4498  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4499  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4500  
  4501  			case Zm_r_xm_nr:
  4502  				ab.rexflag = 0
  4503  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4504  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4505  
  4506  			case Zm_r_i_xm:
  4507  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4508  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4509  				ab.Put1(byte(p.To.Offset))
  4510  
  4511  			case Zibm_r, Zibr_m:
  4512  				ab.PutOpBytesLit(z, &o.op)
  4513  				if yt.zcase == Zibr_m {
  4514  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4515  				} else {
  4516  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4517  				}
  4518  				switch {
  4519  				default:
  4520  					ab.Put1(byte(p.From.Offset))
  4521  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4522  					ab.PutInt16(int16(p.From.Offset))
  4523  				case yt.args[0] == Yi32:
  4524  					ab.PutInt32(int32(p.From.Offset))
  4525  				}
  4526  
  4527  			case Zaut_r:
  4528  				ab.Put1(0x8d)	// leal
  4529  				if p.From.Type != obj.TYPE_ADDR {
  4530  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4531  				}
  4532  				p.From.Type = obj.TYPE_MEM
  4533  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4534  				p.From.Type = obj.TYPE_ADDR
  4535  
  4536  			case Zm_o:
  4537  				ab.Put1(byte(op))
  4538  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4539  
  4540  			case Zr_m:
  4541  				ab.Put1(byte(op))
  4542  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4543  
  4544  			case Zvex:
  4545  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4546  
  4547  			case Zvex_rm_v_r:
  4548  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4549  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4550  
  4551  			case Zvex_rm_v_ro:
  4552  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4553  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4554  
  4555  			case Zvex_i_rm_vo:
  4556  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4557  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4558  				ab.Put1(byte(p.From.Offset))
  4559  
  4560  			case Zvex_i_r_v:
  4561  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4562  				regnum := byte(0x7)
  4563  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4564  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4565  				} else {
  4566  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4567  				}
  4568  				ab.Put1(o.op[z+2] | regnum)
  4569  				ab.Put1(byte(p.From.Offset))
  4570  
  4571  			case Zvex_i_rm_v_r:
  4572  				imm, from, from3, to := unpackOps4(p)
  4573  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4574  				ab.asmand(ctxt, cursym, p, from, to)
  4575  				ab.Put1(byte(imm.Offset))
  4576  
  4577  			case Zvex_i_rm_r:
  4578  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4579  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4580  				ab.Put1(byte(p.From.Offset))
  4581  
  4582  			case Zvex_v_rm_r:
  4583  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4584  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4585  
  4586  			case Zvex_r_v_rm:
  4587  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4588  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4589  
  4590  			case Zvex_rm_r_vo:
  4591  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4592  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4593  
  4594  			case Zvex_i_r_rm:
  4595  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4596  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4597  				ab.Put1(byte(p.From.Offset))
  4598  
  4599  			case Zvex_hr_rm_v_r:
  4600  				hr, from, from3, to := unpackOps4(p)
  4601  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4602  				ab.asmand(ctxt, cursym, p, from, to)
  4603  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4604  
  4605  			case Zevex_k_rmo:
  4606  				ab.evex = newEVEXBits(z, &o.op)
  4607  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4608  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4609  
  4610  			case Zevex_i_rm_vo:
  4611  				ab.evex = newEVEXBits(z, &o.op)
  4612  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4613  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4614  				ab.Put1(byte(p.From.Offset))
  4615  
  4616  			case Zevex_i_rm_k_vo:
  4617  				imm, from, kmask, to := unpackOps4(p)
  4618  				ab.evex = newEVEXBits(z, &o.op)
  4619  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4620  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4621  				ab.Put1(byte(imm.Offset))
  4622  
  4623  			case Zevex_i_r_rm:
  4624  				ab.evex = newEVEXBits(z, &o.op)
  4625  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4626  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4627  				ab.Put1(byte(p.From.Offset))
  4628  
  4629  			case Zevex_i_r_k_rm:
  4630  				imm, from, kmask, to := unpackOps4(p)
  4631  				ab.evex = newEVEXBits(z, &o.op)
  4632  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4633  				ab.asmand(ctxt, cursym, p, to, from)
  4634  				ab.Put1(byte(imm.Offset))
  4635  
  4636  			case Zevex_i_rm_r:
  4637  				ab.evex = newEVEXBits(z, &o.op)
  4638  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4639  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4640  				ab.Put1(byte(p.From.Offset))
  4641  
  4642  			case Zevex_i_rm_k_r:
  4643  				imm, from, kmask, to := unpackOps4(p)
  4644  				ab.evex = newEVEXBits(z, &o.op)
  4645  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4646  				ab.asmand(ctxt, cursym, p, from, to)
  4647  				ab.Put1(byte(imm.Offset))
  4648  
  4649  			case Zevex_i_rm_v_r:
  4650  				imm, from, from3, to := unpackOps4(p)
  4651  				ab.evex = newEVEXBits(z, &o.op)
  4652  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4653  				ab.asmand(ctxt, cursym, p, from, to)
  4654  				ab.Put1(byte(imm.Offset))
  4655  
  4656  			case Zevex_i_rm_v_k_r:
  4657  				imm, from, from3, kmask, to := unpackOps5(p)
  4658  				ab.evex = newEVEXBits(z, &o.op)
  4659  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4660  				ab.asmand(ctxt, cursym, p, from, to)
  4661  				ab.Put1(byte(imm.Offset))
  4662  
  4663  			case Zevex_r_v_rm:
  4664  				ab.evex = newEVEXBits(z, &o.op)
  4665  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4666  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4667  
  4668  			case Zevex_rm_v_r:
  4669  				ab.evex = newEVEXBits(z, &o.op)
  4670  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4671  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4672  
  4673  			case Zevex_rm_k_r:
  4674  				ab.evex = newEVEXBits(z, &o.op)
  4675  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4676  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4677  
  4678  			case Zevex_r_k_rm:
  4679  				ab.evex = newEVEXBits(z, &o.op)
  4680  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4681  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4682  
  4683  			case Zevex_rm_v_k_r:
  4684  				from, from3, kmask, to := unpackOps4(p)
  4685  				ab.evex = newEVEXBits(z, &o.op)
  4686  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4687  				ab.asmand(ctxt, cursym, p, from, to)
  4688  
  4689  			case Zevex_r_v_k_rm:
  4690  				from, from3, kmask, to := unpackOps4(p)
  4691  				ab.evex = newEVEXBits(z, &o.op)
  4692  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4693  				ab.asmand(ctxt, cursym, p, to, from)
  4694  
  4695  			case Zr_m_xm:
  4696  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4697  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4698  
  4699  			case Zr_m_xm_nr:
  4700  				ab.rexflag = 0
  4701  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4702  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4703  
  4704  			case Zo_m:
  4705  				ab.Put1(byte(op))
  4706  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4707  
  4708  			case Zcallindreg:
  4709  				r = obj.Addrel(cursym)
  4710  				r.Off = int32(p.Pc)
  4711  				r.Type = objabi.R_CALLIND
  4712  				r.Siz = 0
  4713  				fallthrough
  4714  
  4715  			case Zo_m64:
  4716  				ab.Put1(byte(op))
  4717  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4718  
  4719  			case Zm_ibo:
  4720  				ab.Put1(byte(op))
  4721  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4722  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4723  
  4724  			case Zibo_m:
  4725  				ab.Put1(byte(op))
  4726  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4727  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4728  
  4729  			case Zibo_m_xm:
  4730  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4731  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4732  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4733  
  4734  			case Z_ib, Zib_:
  4735  				if yt.zcase == Zib_ {
  4736  					a = &p.From
  4737  				} else {
  4738  					a = &p.To
  4739  				}
  4740  				ab.Put1(byte(op))
  4741  				if p.As == AXABORT {
  4742  					ab.Put1(o.op[z+1])
  4743  				}
  4744  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4745  
  4746  			case Zib_rp:
  4747  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4748  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4749  
  4750  			case Zil_rp:
  4751  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4752  				ab.Put1(byte(op + reg[p.To.Reg]))
  4753  				if o.prefix == Pe {
  4754  					v = vaddr(ctxt, p, &p.From, nil)
  4755  					ab.PutInt16(int16(v))
  4756  				} else {
  4757  					ab.relput4(ctxt, cursym, p, &p.From)
  4758  				}
  4759  
  4760  			case Zo_iw:
  4761  				ab.Put1(byte(op))
  4762  				if p.From.Type != obj.TYPE_NONE {
  4763  					v = vaddr(ctxt, p, &p.From, nil)
  4764  					ab.PutInt16(int16(v))
  4765  				}
  4766  
  4767  			case Ziq_rp:
  4768  				v = vaddr(ctxt, p, &p.From, &rel)
  4769  				l = int(v >> 32)
  4770  				if l == 0 && rel.Siz != 8 {
  4771  					ab.rexflag &^= (0x40 | Rxw)
  4772  
  4773  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4774  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4775  					if rel.Type != 0 {
  4776  						r = obj.Addrel(cursym)
  4777  						*r = rel
  4778  						r.Off = int32(p.Pc + int64(ab.Len()))
  4779  					}
  4780  
  4781  					ab.PutInt32(int32(v))
  4782  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 {	// sign extend
  4783  					ab.Put1(0xc7)
  4784  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4785  
  4786  					ab.PutInt32(int32(v))	// need all 8
  4787  				} else {
  4788  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4789  					ab.Put1(byte(op + reg[p.To.Reg]))
  4790  					if rel.Type != 0 {
  4791  						r = obj.Addrel(cursym)
  4792  						*r = rel
  4793  						r.Off = int32(p.Pc + int64(ab.Len()))
  4794  					}
  4795  
  4796  					ab.PutInt64(v)
  4797  				}
  4798  
  4799  			case Zib_rr:
  4800  				ab.Put1(byte(op))
  4801  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4802  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4803  
  4804  			case Z_il, Zil_:
  4805  				if yt.zcase == Zil_ {
  4806  					a = &p.From
  4807  				} else {
  4808  					a = &p.To
  4809  				}
  4810  				ab.Put1(byte(op))
  4811  				if o.prefix == Pe {
  4812  					v = vaddr(ctxt, p, a, nil)
  4813  					ab.PutInt16(int16(v))
  4814  				} else {
  4815  					ab.relput4(ctxt, cursym, p, a)
  4816  				}
  4817  
  4818  			case Zm_ilo, Zilo_m:
  4819  				ab.Put1(byte(op))
  4820  				if yt.zcase == Zilo_m {
  4821  					a = &p.From
  4822  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4823  				} else {
  4824  					a = &p.To
  4825  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4826  				}
  4827  
  4828  				if o.prefix == Pe {
  4829  					v = vaddr(ctxt, p, a, nil)
  4830  					ab.PutInt16(int16(v))
  4831  				} else {
  4832  					ab.relput4(ctxt, cursym, p, a)
  4833  				}
  4834  
  4835  			case Zil_rr:
  4836  				ab.Put1(byte(op))
  4837  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4838  				if o.prefix == Pe {
  4839  					v = vaddr(ctxt, p, &p.From, nil)
  4840  					ab.PutInt16(int16(v))
  4841  				} else {
  4842  					ab.relput4(ctxt, cursym, p, &p.From)
  4843  				}
  4844  
  4845  			case Z_rp:
  4846  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4847  				ab.Put1(byte(op + reg[p.To.Reg]))
  4848  
  4849  			case Zrp_:
  4850  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4851  				ab.Put1(byte(op + reg[p.From.Reg]))
  4852  
  4853  			case Zcallcon, Zjmpcon:
  4854  				if yt.zcase == Zcallcon {
  4855  					ab.Put1(byte(op))
  4856  				} else {
  4857  					ab.Put1(o.op[z+1])
  4858  				}
  4859  				r = obj.Addrel(cursym)
  4860  				r.Off = int32(p.Pc + int64(ab.Len()))
  4861  				r.Type = objabi.R_PCREL
  4862  				r.Siz = 4
  4863  				r.Add = p.To.Offset
  4864  				ab.PutInt32(0)
  4865  
  4866  			case Zcallind:
  4867  				ab.Put2(byte(op), o.op[z+1])
  4868  				r = obj.Addrel(cursym)
  4869  				r.Off = int32(p.Pc + int64(ab.Len()))
  4870  				if ctxt.Arch.Family == sys.AMD64 {
  4871  					r.Type = objabi.R_PCREL
  4872  				} else {
  4873  					r.Type = objabi.R_ADDR
  4874  				}
  4875  				r.Siz = 4
  4876  				r.Add = p.To.Offset
  4877  				r.Sym = p.To.Sym
  4878  				ab.PutInt32(0)
  4879  
  4880  			case Zcall, Zcallduff:
  4881  				if p.To.Sym == nil {
  4882  					ctxt.Diag("call without target")
  4883  					ctxt.DiagFlush()
  4884  					log.Fatalf("bad code")
  4885  				}
  4886  
  4887  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4888  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4889  				}
  4890  
  4891  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4892  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4893  					// (the call jumps into the middle of the function).
  4894  					// This makes it possible to see call sites for duffcopy/duffzero in
  4895  					// BP-based profiling tools like Linux perf (which is the
  4896  					// whole point of maintaining frame pointers in Go).
  4897  					// MOVQ BP, -16(SP)
  4898  					// LEAQ -16(SP), BP
  4899  					ab.Put(bpduff1)
  4900  				}
  4901  				ab.Put1(byte(op))
  4902  				r = obj.Addrel(cursym)
  4903  				r.Off = int32(p.Pc + int64(ab.Len()))
  4904  				r.Sym = p.To.Sym
  4905  				r.Add = p.To.Offset
  4906  				r.Type = objabi.R_CALL
  4907  				r.Siz = 4
  4908  				ab.PutInt32(0)
  4909  
  4910  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4911  					// Pop BP pushed above.
  4912  					// MOVQ 0(BP), BP
  4913  					ab.Put(bpduff2)
  4914  				}
  4915  
  4916  			// TODO: jump across functions needs reloc
  4917  			case Zbr, Zjmp, Zloop:
  4918  				if p.As == AXBEGIN {
  4919  					ab.Put1(byte(op))
  4920  				}
  4921  				if p.To.Sym != nil {
  4922  					if yt.zcase != Zjmp {
  4923  						ctxt.Diag("branch to ATEXT")
  4924  						ctxt.DiagFlush()
  4925  						log.Fatalf("bad code")
  4926  					}
  4927  
  4928  					ab.Put1(o.op[z+1])
  4929  					r = obj.Addrel(cursym)
  4930  					r.Off = int32(p.Pc + int64(ab.Len()))
  4931  					r.Sym = p.To.Sym
  4932  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4933  					// it can point to a trampoline instead of the destination itself.
  4934  					r.Type = objabi.R_CALL
  4935  					r.Siz = 4
  4936  					ab.PutInt32(0)
  4937  					break
  4938  				}
  4939  
  4940  				// Assumes q is in this function.
  4941  				// TODO: Check in input, preserve in brchain.
  4942  
  4943  				// Fill in backward jump now.
  4944  				q = p.To.Target()
  4945  
  4946  				if q == nil {
  4947  					ctxt.Diag("jmp/branch/loop without target")
  4948  					ctxt.DiagFlush()
  4949  					log.Fatalf("bad code")
  4950  				}
  4951  
  4952  				if p.Back&branchBackwards != 0 {
  4953  					v = q.Pc - (p.Pc + 2)
  4954  					if v >= -128 && p.As != AXBEGIN {
  4955  						if p.As == AJCXZL {
  4956  							ab.Put1(0x67)
  4957  						}
  4958  						ab.Put2(byte(op), byte(v))
  4959  					} else if yt.zcase == Zloop {
  4960  						ctxt.Diag("loop too far: %v", p)
  4961  					} else {
  4962  						v -= 5 - 2
  4963  						if p.As == AXBEGIN {
  4964  							v--
  4965  						}
  4966  						if yt.zcase == Zbr {
  4967  							ab.Put1(0x0f)
  4968  							v--
  4969  						}
  4970  
  4971  						ab.Put1(o.op[z+1])
  4972  						ab.PutInt32(int32(v))
  4973  					}
  4974  
  4975  					break
  4976  				}
  4977  
  4978  				// Annotate target; will fill in later.
  4979  				p.Forwd = q.Rel
  4980  
  4981  				q.Rel = p
  4982  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4983  					if p.As == AJCXZL {
  4984  						ab.Put1(0x67)
  4985  					}
  4986  					ab.Put2(byte(op), 0)
  4987  				} else if yt.zcase == Zloop {
  4988  					ctxt.Diag("loop too far: %v", p)
  4989  				} else {
  4990  					if yt.zcase == Zbr {
  4991  						ab.Put1(0x0f)
  4992  					}
  4993  					ab.Put1(o.op[z+1])
  4994  					ab.PutInt32(0)
  4995  				}
  4996  
  4997  			case Zbyte:
  4998  				v = vaddr(ctxt, p, &p.From, &rel)
  4999  				if rel.Siz != 0 {
  5000  					rel.Siz = uint8(op)
  5001  					r = obj.Addrel(cursym)
  5002  					*r = rel
  5003  					r.Off = int32(p.Pc + int64(ab.Len()))
  5004  				}
  5005  
  5006  				ab.Put1(byte(v))
  5007  				if op > 1 {
  5008  					ab.Put1(byte(v >> 8))
  5009  					if op > 2 {
  5010  						ab.PutInt16(int16(v >> 16))
  5011  						if op > 4 {
  5012  							ab.PutInt32(int32(v >> 32))
  5013  						}
  5014  					}
  5015  				}
  5016  			}
  5017  
  5018  			return
  5019  		}
  5020  	}
  5021  	f3t = Ynone * Ymax
  5022  	if p.GetFrom3() != nil {
  5023  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  5024  	}
  5025  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  5026  		var pp obj.Prog
  5027  		var t []byte
  5028  		if p.As == mo[0].as {
  5029  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  5030  				t = mo[0].op[:]
  5031  				switch mo[0].code {
  5032  				default:
  5033  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  5034  
  5035  				case movLit:
  5036  					for z = 0; t[z] != 0; z++ {
  5037  						ab.Put1(t[z])
  5038  					}
  5039  
  5040  				case movRegMem:
  5041  					ab.Put1(t[0])
  5042  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  5043  
  5044  				case movMemReg:
  5045  					ab.Put1(t[0])
  5046  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  5047  
  5048  				case movRegMem2op:	// r,m - 2op
  5049  					ab.Put2(t[0], t[1])
  5050  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  5051  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  5052  
  5053  				case movMemReg2op:
  5054  					ab.Put2(t[0], t[1])
  5055  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5056  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5057  
  5058  				case movFullPtr:
  5059  					if t[0] != 0 {
  5060  						ab.Put1(t[0])
  5061  					}
  5062  					switch p.To.Index {
  5063  					default:
  5064  						goto bad
  5065  
  5066  					case REG_DS:
  5067  						ab.Put1(0xc5)
  5068  
  5069  					case REG_SS:
  5070  						ab.Put2(0x0f, 0xb2)
  5071  
  5072  					case REG_ES:
  5073  						ab.Put1(0xc4)
  5074  
  5075  					case REG_FS:
  5076  						ab.Put2(0x0f, 0xb4)
  5077  
  5078  					case REG_GS:
  5079  						ab.Put2(0x0f, 0xb5)
  5080  					}
  5081  
  5082  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5083  
  5084  				case movDoubleShift:
  5085  					if t[0] == Pw {
  5086  						if ctxt.Arch.Family != sys.AMD64 {
  5087  							ctxt.Diag("asmins: illegal 64: %v", p)
  5088  						}
  5089  						ab.rexflag |= Pw
  5090  						t = t[1:]
  5091  					} else if t[0] == Pe {
  5092  						ab.Put1(Pe)
  5093  						t = t[1:]
  5094  					}
  5095  
  5096  					switch p.From.Type {
  5097  					default:
  5098  						goto bad
  5099  
  5100  					case obj.TYPE_CONST:
  5101  						ab.Put2(0x0f, t[0])
  5102  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5103  						ab.Put1(byte(p.From.Offset))
  5104  
  5105  					case obj.TYPE_REG:
  5106  						switch p.From.Reg {
  5107  						default:
  5108  							goto bad
  5109  
  5110  						case REG_CL, REG_CX:
  5111  							ab.Put2(0x0f, t[1])
  5112  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5113  						}
  5114  					}
  5115  
  5116  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5117  				// where you load the TLS base register into a register and then index off that
  5118  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5119  				// are handled in prefixof above and should not be listed here.
  5120  				case movTLSReg:
  5121  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5122  						ctxt.Diag("invalid load of TLS: %v", p)
  5123  					}
  5124  
  5125  					if ctxt.Arch.Family == sys.I386 {
  5126  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5127  						// where you load the TLS base register into a register and then index off that
  5128  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5129  						// are handled in prefixof above and should not be listed here.
  5130  						switch ctxt.Headtype {
  5131  						default:
  5132  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5133  
  5134  						case objabi.Hlinux, objabi.Hfreebsd:
  5135  							if ctxt.Flag_shared {
  5136  								// Note that this is not generating the same insns as the other cases.
  5137  								//     MOV TLS, dst
  5138  								// becomes
  5139  								//     call __x86.get_pc_thunk.dst
  5140  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5141  								// which is encoded as
  5142  								//     call __x86.get_pc_thunk.dst
  5143  								//     movq 0(dst), dst
  5144  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5145  								// is g, which we can't check here, but will when we assemble the second
  5146  								// instruction.
  5147  								dst := p.To.Reg
  5148  								ab.Put1(0xe8)
  5149  								r = obj.Addrel(cursym)
  5150  								r.Off = int32(p.Pc + int64(ab.Len()))
  5151  								r.Type = objabi.R_CALL
  5152  								r.Siz = 4
  5153  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5154  								ab.PutInt32(0)
  5155  
  5156  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5157  								r = obj.Addrel(cursym)
  5158  								r.Off = int32(p.Pc + int64(ab.Len()))
  5159  								r.Type = objabi.R_TLS_IE
  5160  								r.Siz = 4
  5161  								r.Add = 2
  5162  								ab.PutInt32(0)
  5163  							} else {
  5164  								// ELF TLS base is 0(GS).
  5165  								pp.From = p.From
  5166  
  5167  								pp.From.Type = obj.TYPE_MEM
  5168  								pp.From.Reg = REG_GS
  5169  								pp.From.Offset = 0
  5170  								pp.From.Index = REG_NONE
  5171  								pp.From.Scale = 0
  5172  								ab.Put2(0x65,	// GS
  5173  									0x8B)
  5174  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5175  							}
  5176  						case objabi.Hplan9:
  5177  							pp.From = obj.Addr{}
  5178  							pp.From.Type = obj.TYPE_MEM
  5179  							pp.From.Name = obj.NAME_EXTERN
  5180  							pp.From.Sym = plan9privates
  5181  							pp.From.Offset = 0
  5182  							pp.From.Index = REG_NONE
  5183  							ab.Put1(0x8B)
  5184  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5185  						}
  5186  						break
  5187  					}
  5188  
  5189  					switch ctxt.Headtype {
  5190  					default:
  5191  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5192  
  5193  					case objabi.Hlinux, objabi.Hfreebsd:
  5194  						if !ctxt.Flag_shared {
  5195  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5196  						}
  5197  						// Note that this is not generating the same insn as the other cases.
  5198  						//     MOV TLS, R_to
  5199  						// becomes
  5200  						//     movq g@gottpoff(%rip), R_to
  5201  						// which is encoded as
  5202  						//     movq 0(%rip), R_to
  5203  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5204  						// is g, which we can't check here, but will when we assemble the second
  5205  						// instruction.
  5206  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5207  
  5208  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5209  						r = obj.Addrel(cursym)
  5210  						r.Off = int32(p.Pc + int64(ab.Len()))
  5211  						r.Type = objabi.R_TLS_IE
  5212  						r.Siz = 4
  5213  						r.Add = -4
  5214  						ab.PutInt32(0)
  5215  
  5216  					case objabi.Hplan9:
  5217  						pp.From = obj.Addr{}
  5218  						pp.From.Type = obj.TYPE_MEM
  5219  						pp.From.Name = obj.NAME_EXTERN
  5220  						pp.From.Sym = plan9privates
  5221  						pp.From.Offset = 0
  5222  						pp.From.Index = REG_NONE
  5223  						ab.rexflag |= Pw
  5224  						ab.Put1(0x8B)
  5225  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5226  
  5227  					case objabi.Hsolaris:	// TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5228  						// TLS base is 0(FS).
  5229  						pp.From = p.From
  5230  
  5231  						pp.From.Type = obj.TYPE_MEM
  5232  						pp.From.Name = obj.NAME_NONE
  5233  						pp.From.Reg = REG_NONE
  5234  						pp.From.Offset = 0
  5235  						pp.From.Index = REG_NONE
  5236  						pp.From.Scale = 0
  5237  						ab.rexflag |= Pw
  5238  						ab.Put2(0x64,	// FS
  5239  							0x8B)
  5240  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5241  					}
  5242  				}
  5243  				return
  5244  			}
  5245  		}
  5246  	}
  5247  	goto bad
  5248  
  5249  bad:
  5250  	if ctxt.Arch.Family != sys.AMD64 {
  5251  		// here, the assembly has failed.
  5252  		// if it's a byte instruction that has
  5253  		// unaddressable registers, try to
  5254  		// exchange registers and reissue the
  5255  		// instruction with the operands renamed.
  5256  		pp := *p
  5257  
  5258  		unbytereg(&pp.From, &pp.Ft)
  5259  		unbytereg(&pp.To, &pp.Tt)
  5260  
  5261  		z := int(p.From.Reg)
  5262  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5263  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5264  			// For now, different to keep bit-for-bit compatibility.
  5265  			if ctxt.Arch.Family == sys.I386 {
  5266  				breg := byteswapreg(ctxt, &p.To)
  5267  				if breg != REG_AX {
  5268  					ab.Put1(0x87)	// xchg lhs,bx
  5269  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5270  					subreg(&pp, z, breg)
  5271  					ab.doasm(ctxt, cursym, &pp)
  5272  					ab.Put1(0x87)	// xchg lhs,bx
  5273  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5274  				} else {
  5275  					ab.Put1(byte(0x90 + reg[z]))	// xchg lsh,ax
  5276  					subreg(&pp, z, REG_AX)
  5277  					ab.doasm(ctxt, cursym, &pp)
  5278  					ab.Put1(byte(0x90 + reg[z]))	// xchg lsh,ax
  5279  				}
  5280  				return
  5281  			}
  5282  
  5283  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5284  				// We certainly don't want to exchange
  5285  				// with AX if the op is MUL or DIV.
  5286  				ab.Put1(0x87)	// xchg lhs,bx
  5287  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5288  				subreg(&pp, z, REG_BX)
  5289  				ab.doasm(ctxt, cursym, &pp)
  5290  				ab.Put1(0x87)	// xchg lhs,bx
  5291  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5292  			} else {
  5293  				ab.Put1(byte(0x90 + reg[z]))	// xchg lsh,ax
  5294  				subreg(&pp, z, REG_AX)
  5295  				ab.doasm(ctxt, cursym, &pp)
  5296  				ab.Put1(byte(0x90 + reg[z]))	// xchg lsh,ax
  5297  			}
  5298  			return
  5299  		}
  5300  
  5301  		z = int(p.To.Reg)
  5302  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5303  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5304  			// For now, different to keep bit-for-bit compatibility.
  5305  			if ctxt.Arch.Family == sys.I386 {
  5306  				breg := byteswapreg(ctxt, &p.From)
  5307  				if breg != REG_AX {
  5308  					ab.Put1(0x87)	//xchg rhs,bx
  5309  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5310  					subreg(&pp, z, breg)
  5311  					ab.doasm(ctxt, cursym, &pp)
  5312  					ab.Put1(0x87)	// xchg rhs,bx
  5313  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5314  				} else {
  5315  					ab.Put1(byte(0x90 + reg[z]))	// xchg rsh,ax
  5316  					subreg(&pp, z, REG_AX)
  5317  					ab.doasm(ctxt, cursym, &pp)
  5318  					ab.Put1(byte(0x90 + reg[z]))	// xchg rsh,ax
  5319  				}
  5320  				return
  5321  			}
  5322  
  5323  			if isax(&p.From) {
  5324  				ab.Put1(0x87)	// xchg rhs,bx
  5325  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5326  				subreg(&pp, z, REG_BX)
  5327  				ab.doasm(ctxt, cursym, &pp)
  5328  				ab.Put1(0x87)	// xchg rhs,bx
  5329  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5330  			} else {
  5331  				ab.Put1(byte(0x90 + reg[z]))	// xchg rsh,ax
  5332  				subreg(&pp, z, REG_AX)
  5333  				ab.doasm(ctxt, cursym, &pp)
  5334  				ab.Put1(byte(0x90 + reg[z]))	// xchg rsh,ax
  5335  			}
  5336  			return
  5337  		}
  5338  	}
  5339  
  5340  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5341  }
  5342  
  5343  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5344  // which is not referenced in a.
  5345  // If a is empty, it returns BX to account for MULB-like instructions
  5346  // that might use DX and AX.
  5347  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5348  	cana, canb, canc, cand := true, true, true, true
  5349  	if a.Type == obj.TYPE_NONE {
  5350  		cana, cand = false, false
  5351  	}
  5352  
  5353  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5354  		switch a.Reg {
  5355  		case REG_NONE:
  5356  			cana, cand = false, false
  5357  		case REG_AX, REG_AL, REG_AH:
  5358  			cana = false
  5359  		case REG_BX, REG_BL, REG_BH:
  5360  			canb = false
  5361  		case REG_CX, REG_CL, REG_CH:
  5362  			canc = false
  5363  		case REG_DX, REG_DL, REG_DH:
  5364  			cand = false
  5365  		}
  5366  	}
  5367  
  5368  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5369  		switch a.Index {
  5370  		case REG_AX:
  5371  			cana = false
  5372  		case REG_BX:
  5373  			canb = false
  5374  		case REG_CX:
  5375  			canc = false
  5376  		case REG_DX:
  5377  			cand = false
  5378  		}
  5379  	}
  5380  
  5381  	switch {
  5382  	case cana:
  5383  		return REG_AX
  5384  	case canb:
  5385  		return REG_BX
  5386  	case canc:
  5387  		return REG_CX
  5388  	case cand:
  5389  		return REG_DX
  5390  	default:
  5391  		ctxt.Diag("impossible byte register")
  5392  		ctxt.DiagFlush()
  5393  		log.Fatalf("bad code")
  5394  		return 0
  5395  	}
  5396  }
  5397  
  5398  func isbadbyte(a *obj.Addr) bool {
  5399  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5400  }
  5401  
  5402  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5403  	ab.Reset()
  5404  
  5405  	ab.rexflag = 0
  5406  	ab.vexflag = false
  5407  	ab.evexflag = false
  5408  	mark := ab.Len()
  5409  	ab.doasm(ctxt, cursym, p)
  5410  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5411  		// as befits the whole approach of the architecture,
  5412  		// the rex prefix must appear before the first opcode byte
  5413  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5414  		// before the 0f opcode escape!), or it might be ignored.
  5415  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5416  		if ctxt.Arch.Family != sys.AMD64 {
  5417  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5418  		}
  5419  		n := ab.Len()
  5420  		var np int
  5421  		for np = mark; np < n; np++ {
  5422  			c := ab.At(np)
  5423  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5424  				break
  5425  			}
  5426  		}
  5427  		ab.Insert(np, byte(0x40|ab.rexflag))
  5428  	}
  5429  
  5430  	n := ab.Len()
  5431  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5432  		r := &cursym.R[i]
  5433  		if int64(r.Off) < p.Pc {
  5434  			break
  5435  		}
  5436  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5437  			r.Off++
  5438  		}
  5439  		if r.Type == objabi.R_PCREL {
  5440  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5441  				// PC-relative addressing is relative to the end of the instruction,
  5442  				// but the relocations applied by the linker are relative to the end
  5443  				// of the relocation. Because immediate instruction
  5444  				// arguments can follow the PC-relative memory reference in the
  5445  				// instruction encoding, the two may not coincide. In this case,
  5446  				// adjust addend so that linker can keep relocating relative to the
  5447  				// end of the relocation.
  5448  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5449  			} else if ctxt.Arch.Family == sys.I386 {
  5450  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5451  				// assumes that the previous instruction loaded the PC of the end
  5452  				// of that instruction into CX, so the adjustment is relative to
  5453  				// that.
  5454  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5455  			}
  5456  		}
  5457  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5458  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5459  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5460  		}
  5461  
  5462  	}
  5463  }
  5464  
  5465  // unpackOps4 extracts 4 operands from p.
  5466  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5467  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5468  }
  5469  
  5470  // unpackOps5 extracts 5 operands from p.
  5471  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5472  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5473  }