github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"github.com/gagliardetto/golang-go/cmd/internal/obj"
    35  	"github.com/gagliardetto/golang-go/cmd/internal/objabi"
    36  	"github.com/gagliardetto/golang-go/cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  //
    63  const (
    64  	loopAlign  = 16
    65  	maxLoopPad = 0
    66  )
    67  
    68  // Bit flags that are used to express jump target properties.
    69  const (
    70  	// branchBackwards marks targets that are located behind.
    71  	// Used to express jumps to loop headers.
    72  	branchBackwards = (1 << iota)
    73  	// branchShort marks branches those target is close,
    74  	// with offset is in -128..127 range.
    75  	branchShort
    76  	// branchLoopHead marks loop entry.
    77  	// Used to insert padding for misaligned loops.
    78  	branchLoopHead
    79  )
    80  
    81  // opBytes holds optab encoding bytes.
    82  // Each ytab reserves fixed amount of bytes in this array.
    83  //
    84  // The size should be the minimal number of bytes that
    85  // are enough to hold biggest optab op lines.
    86  type opBytes [31]uint8
    87  
    88  type Optab struct {
    89  	as     obj.As
    90  	ytab   []ytab
    91  	prefix uint8
    92  	op     opBytes
    93  }
    94  
    95  type movtab struct {
    96  	as   obj.As
    97  	ft   uint8
    98  	f3t  uint8
    99  	tt   uint8
   100  	code uint8
   101  	op   [4]uint8
   102  }
   103  
   104  const (
   105  	Yxxx = iota
   106  	Ynone
   107  	Yi0 // $0
   108  	Yi1 // $1
   109  	Yu2 // $x, x fits in uint2
   110  	Yi8 // $x, x fits in int8
   111  	Yu8 // $x, x fits in uint8
   112  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113  	Ys32
   114  	Yi32
   115  	Yi64
   116  	Yiauto
   117  	Yal
   118  	Ycl
   119  	Yax
   120  	Ycx
   121  	Yrb
   122  	Yrl
   123  	Yrl32 // Yrl on 32-bit system
   124  	Yrf
   125  	Yf0
   126  	Yrx
   127  	Ymb
   128  	Yml
   129  	Ym
   130  	Ybr
   131  	Ycs
   132  	Yss
   133  	Yds
   134  	Yes
   135  	Yfs
   136  	Ygs
   137  	Ygdtr
   138  	Yidtr
   139  	Yldtr
   140  	Ymsw
   141  	Ytask
   142  	Ycr0
   143  	Ycr1
   144  	Ycr2
   145  	Ycr3
   146  	Ycr4
   147  	Ycr5
   148  	Ycr6
   149  	Ycr7
   150  	Ycr8
   151  	Ydr0
   152  	Ydr1
   153  	Ydr2
   154  	Ydr3
   155  	Ydr4
   156  	Ydr5
   157  	Ydr6
   158  	Ydr7
   159  	Ytr0
   160  	Ytr1
   161  	Ytr2
   162  	Ytr3
   163  	Ytr4
   164  	Ytr5
   165  	Ytr6
   166  	Ytr7
   167  	Ymr
   168  	Ymm
   169  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171  	Yxr           // X0..X15
   172  	YxrEvex       // X0..X31
   173  	Yxm
   174  	YxmEvex       // YxrEvex+Ym
   175  	Yxvm          // VSIB vector array; vm32x/vm64x
   176  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   177  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178  	Yyr           // Y0..Y15
   179  	YyrEvex       // Y0..Y31
   180  	Yym
   181  	YymEvex   // YyrEvex+Ym
   182  	Yyvm      // VSIB vector array; vm32y/vm64y
   183  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185  	Yzr       // Z0..Z31
   186  	Yzm       // Yzr+Ym
   187  	Yzvm      // VSIB vector array; vm32z/vm64z
   188  	Yk0       // K0
   189  	Yknot0    // K1..K7; write mask
   190  	Yk        // K0..K7; used for KOP
   191  	Ykm       // Yk+Ym; used for KOP
   192  	Ytls
   193  	Ytextsize
   194  	Yindir
   195  	Ymax
   196  )
   197  
   198  const (
   199  	Zxxx = iota
   200  	Zlit
   201  	Zlitm_r
   202  	Zlitr_m
   203  	Zlit_m_r
   204  	Z_rp
   205  	Zbr
   206  	Zcall
   207  	Zcallcon
   208  	Zcallduff
   209  	Zcallind
   210  	Zcallindreg
   211  	Zib_
   212  	Zib_rp
   213  	Zibo_m
   214  	Zibo_m_xm
   215  	Zil_
   216  	Zil_rp
   217  	Ziq_rp
   218  	Zilo_m
   219  	Zjmp
   220  	Zjmpcon
   221  	Zloop
   222  	Zo_iw
   223  	Zm_o
   224  	Zm_r
   225  	Z_m_r
   226  	Zm2_r
   227  	Zm_r_xm
   228  	Zm_r_i_xm
   229  	Zm_r_xm_nr
   230  	Zr_m_xm_nr
   231  	Zibm_r // mmx1,mmx2/mem64,imm8
   232  	Zibr_m
   233  	Zmb_r
   234  	Zaut_r
   235  	Zo_m
   236  	Zo_m64
   237  	Zpseudo
   238  	Zr_m
   239  	Zr_m_xm
   240  	Zrp_
   241  	Z_ib
   242  	Z_il
   243  	Zm_ibo
   244  	Zm_ilo
   245  	Zib_rr
   246  	Zil_rr
   247  	Zbyte
   248  
   249  	Zvex_rm_v_r
   250  	Zvex_rm_v_ro
   251  	Zvex_r_v_rm
   252  	Zvex_i_rm_vo
   253  	Zvex_v_rm_r
   254  	Zvex_i_rm_r
   255  	Zvex_i_r_v
   256  	Zvex_i_rm_v_r
   257  	Zvex
   258  	Zvex_rm_r_vo
   259  	Zvex_i_r_rm
   260  	Zvex_hr_rm_v_r
   261  
   262  	Zevex_first
   263  	Zevex_i_r_k_rm
   264  	Zevex_i_r_rm
   265  	Zevex_i_rm_k_r
   266  	Zevex_i_rm_k_vo
   267  	Zevex_i_rm_r
   268  	Zevex_i_rm_v_k_r
   269  	Zevex_i_rm_v_r
   270  	Zevex_i_rm_vo
   271  	Zevex_k_rmo
   272  	Zevex_r_k_rm
   273  	Zevex_r_v_k_rm
   274  	Zevex_r_v_rm
   275  	Zevex_rm_k_r
   276  	Zevex_rm_v_k_r
   277  	Zevex_rm_v_r
   278  	Zevex_last
   279  
   280  	Zmax
   281  )
   282  
   283  const (
   284  	Px   = 0
   285  	Px1  = 1    // symbolic; exact value doesn't matter
   286  	P32  = 0x32 // 32-bit only
   287  	Pe   = 0x66 // operand escape
   288  	Pm   = 0x0f // 2byte opcode escape
   289  	Pq   = 0xff // both escapes: 66 0f
   290  	Pb   = 0xfe // byte operands
   291  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   292  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   293  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300  	Pw   = 0x48 // Rex.w
   301  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   302  	Py   = 0x80 // defaults to 64-bit mode
   303  	Py1  = 0x81 // symbolic; exact value doesn't matter
   304  	Py3  = 0x83 // symbolic; exact value doesn't matter
   305  	Pavx = 0x84 // symbolic: exact value doesn't matter
   306  
   307  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308  	Rxw     = 1 << 3 // =1, 64-bit operand size
   309  	Rxr     = 1 << 2 // extend modrm reg
   310  	Rxx     = 1 << 1 // extend sib index
   311  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312  )
   313  
   314  const (
   315  	// Encoding for VEX prefix in tables.
   316  	// The P, L, and W fields are chosen to match
   317  	// their eventual locations in the VEX prefix bytes.
   318  
   319  	// Encoding for VEX prefix in tables.
   320  	// The P, L, and W fields are chosen to match
   321  	// their eventual locations in the VEX prefix bytes.
   322  
   323  	// Using spare bit to make leading [E]VEX encoding byte different from
   324  	// 0x0f even if all other VEX fields are 0.
   325  	avxEscape = 1 << 6
   326  
   327  	// P field - 2 bits
   328  	vex66 = 1 << 0
   329  	vexF3 = 2 << 0
   330  	vexF2 = 3 << 0
   331  	// L field - 1 bit
   332  	vexLZ  = 0 << 2
   333  	vexLIG = 0 << 2
   334  	vex128 = 0 << 2
   335  	vex256 = 1 << 2
   336  	// W field - 1 bit
   337  	vexWIG = 0 << 7
   338  	vexW0  = 0 << 7
   339  	vexW1  = 1 << 7
   340  	// M field - 5 bits, but mostly reserved; we can store up to 3
   341  	vex0F   = 1 << 3
   342  	vex0F38 = 2 << 3
   343  	vex0F3A = 3 << 3
   344  )
   345  
   346  var ycover [Ymax * Ymax]uint8
   347  
   348  var reg [MAXREG]int
   349  
   350  var regrex [MAXREG + 1]int
   351  
   352  var ynone = []ytab{
   353  	{Zlit, 1, argList{}},
   354  }
   355  
   356  var ytext = []ytab{
   357  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359  }
   360  
   361  var ynop = []ytab{
   362  	{Zpseudo, 0, argList{}},
   363  	{Zpseudo, 0, argList{Yiauto}},
   364  	{Zpseudo, 0, argList{Yml}},
   365  	{Zpseudo, 0, argList{Yrf}},
   366  	{Zpseudo, 0, argList{Yxr}},
   367  	{Zpseudo, 0, argList{Yiauto}},
   368  	{Zpseudo, 0, argList{Yml}},
   369  	{Zpseudo, 0, argList{Yrf}},
   370  	{Zpseudo, 1, argList{Yxr}},
   371  }
   372  
   373  var yfuncdata = []ytab{
   374  	{Zpseudo, 0, argList{Yi32, Ym}},
   375  }
   376  
   377  var ypcdata = []ytab{
   378  	{Zpseudo, 0, argList{Yi32, Yi32}},
   379  }
   380  
   381  var yxorb = []ytab{
   382  	{Zib_, 1, argList{Yi32, Yal}},
   383  	{Zibo_m, 2, argList{Yi32, Ymb}},
   384  	{Zr_m, 1, argList{Yrb, Ymb}},
   385  	{Zm_r, 1, argList{Ymb, Yrb}},
   386  }
   387  
   388  var yaddl = []ytab{
   389  	{Zibo_m, 2, argList{Yi8, Yml}},
   390  	{Zil_, 1, argList{Yi32, Yax}},
   391  	{Zilo_m, 2, argList{Yi32, Yml}},
   392  	{Zr_m, 1, argList{Yrl, Yml}},
   393  	{Zm_r, 1, argList{Yml, Yrl}},
   394  }
   395  
   396  var yincl = []ytab{
   397  	{Z_rp, 1, argList{Yrl}},
   398  	{Zo_m, 2, argList{Yml}},
   399  }
   400  
   401  var yincq = []ytab{
   402  	{Zo_m, 2, argList{Yml}},
   403  }
   404  
   405  var ycmpb = []ytab{
   406  	{Z_ib, 1, argList{Yal, Yi32}},
   407  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   408  	{Zm_r, 1, argList{Ymb, Yrb}},
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  }
   411  
   412  var ycmpl = []ytab{
   413  	{Zm_ibo, 2, argList{Yml, Yi8}},
   414  	{Z_il, 1, argList{Yax, Yi32}},
   415  	{Zm_ilo, 2, argList{Yml, Yi32}},
   416  	{Zm_r, 1, argList{Yml, Yrl}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var yshb = []ytab{
   421  	{Zo_m, 2, argList{Yi1, Ymb}},
   422  	{Zibo_m, 2, argList{Yu8, Ymb}},
   423  	{Zo_m, 2, argList{Ycx, Ymb}},
   424  }
   425  
   426  var yshl = []ytab{
   427  	{Zo_m, 2, argList{Yi1, Yml}},
   428  	{Zibo_m, 2, argList{Yu8, Yml}},
   429  	{Zo_m, 2, argList{Ycl, Yml}},
   430  	{Zo_m, 2, argList{Ycx, Yml}},
   431  }
   432  
   433  var ytestl = []ytab{
   434  	{Zil_, 1, argList{Yi32, Yax}},
   435  	{Zilo_m, 2, argList{Yi32, Yml}},
   436  	{Zr_m, 1, argList{Yrl, Yml}},
   437  	{Zm_r, 1, argList{Yml, Yrl}},
   438  }
   439  
   440  var ymovb = []ytab{
   441  	{Zr_m, 1, argList{Yrb, Ymb}},
   442  	{Zm_r, 1, argList{Ymb, Yrb}},
   443  	{Zib_rp, 1, argList{Yi32, Yrb}},
   444  	{Zibo_m, 2, argList{Yi32, Ymb}},
   445  }
   446  
   447  var ybtl = []ytab{
   448  	{Zibo_m, 2, argList{Yi8, Yml}},
   449  	{Zr_m, 1, argList{Yrl, Yml}},
   450  }
   451  
   452  var ymovw = []ytab{
   453  	{Zr_m, 1, argList{Yrl, Yml}},
   454  	{Zm_r, 1, argList{Yml, Yrl}},
   455  	{Zil_rp, 1, argList{Yi32, Yrl}},
   456  	{Zilo_m, 2, argList{Yi32, Yml}},
   457  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   458  }
   459  
   460  var ymovl = []ytab{
   461  	{Zr_m, 1, argList{Yrl, Yml}},
   462  	{Zm_r, 1, argList{Yml, Yrl}},
   463  	{Zil_rp, 1, argList{Yi32, Yrl}},
   464  	{Zilo_m, 2, argList{Yi32, Yml}},
   465  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   470  }
   471  
   472  var yret = []ytab{
   473  	{Zo_iw, 1, argList{}},
   474  	{Zo_iw, 1, argList{Yi32}},
   475  }
   476  
   477  var ymovq = []ytab{
   478  	// valid in 32-bit mode
   479  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484  
   485  	// valid only in 64-bit mode, usually with 64-bit prefix
   486  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496  }
   497  
   498  var ymovbe = []ytab{
   499  	{Zlitm_r, 3, argList{Ym, Yrl}},
   500  	{Zlitr_m, 3, argList{Yrl, Ym}},
   501  }
   502  
   503  var ym_rl = []ytab{
   504  	{Zm_r, 1, argList{Ym, Yrl}},
   505  }
   506  
   507  var yrl_m = []ytab{
   508  	{Zr_m, 1, argList{Yrl, Ym}},
   509  }
   510  
   511  var ymb_rl = []ytab{
   512  	{Zmb_r, 1, argList{Ymb, Yrl}},
   513  }
   514  
   515  var yml_rl = []ytab{
   516  	{Zm_r, 1, argList{Yml, Yrl}},
   517  }
   518  
   519  var yrl_ml = []ytab{
   520  	{Zr_m, 1, argList{Yrl, Yml}},
   521  }
   522  
   523  var yml_mb = []ytab{
   524  	{Zr_m, 1, argList{Yrb, Ymb}},
   525  	{Zm_r, 1, argList{Ymb, Yrb}},
   526  }
   527  
   528  var yrb_mb = []ytab{
   529  	{Zr_m, 1, argList{Yrb, Ymb}},
   530  }
   531  
   532  var yxchg = []ytab{
   533  	{Z_rp, 1, argList{Yax, Yrl}},
   534  	{Zrp_, 1, argList{Yrl, Yax}},
   535  	{Zr_m, 1, argList{Yrl, Yml}},
   536  	{Zm_r, 1, argList{Yml, Yrl}},
   537  }
   538  
   539  var ydivl = []ytab{
   540  	{Zm_o, 2, argList{Yml}},
   541  }
   542  
   543  var ydivb = []ytab{
   544  	{Zm_o, 2, argList{Ymb}},
   545  }
   546  
   547  var yimul = []ytab{
   548  	{Zm_o, 2, argList{Yml}},
   549  	{Zib_rr, 1, argList{Yi8, Yrl}},
   550  	{Zil_rr, 1, argList{Yi32, Yrl}},
   551  	{Zm_r, 2, argList{Yml, Yrl}},
   552  }
   553  
   554  var yimul3 = []ytab{
   555  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557  }
   558  
   559  var ybyte = []ytab{
   560  	{Zbyte, 1, argList{Yi64}},
   561  }
   562  
   563  var yin = []ytab{
   564  	{Zib_, 1, argList{Yi32}},
   565  	{Zlit, 1, argList{}},
   566  }
   567  
   568  var yint = []ytab{
   569  	{Zib_, 1, argList{Yi32}},
   570  }
   571  
   572  var ypushl = []ytab{
   573  	{Zrp_, 1, argList{Yrl}},
   574  	{Zm_o, 2, argList{Ym}},
   575  	{Zib_, 1, argList{Yi8}},
   576  	{Zil_, 1, argList{Yi32}},
   577  }
   578  
   579  var ypopl = []ytab{
   580  	{Z_rp, 1, argList{Yrl}},
   581  	{Zo_m, 2, argList{Ym}},
   582  }
   583  
   584  var ywrfsbase = []ytab{
   585  	{Zm_o, 2, argList{Yrl}},
   586  }
   587  
   588  var yrdrand = []ytab{
   589  	{Zo_m, 2, argList{Yrl}},
   590  }
   591  
   592  var yclflush = []ytab{
   593  	{Zo_m, 2, argList{Ym}},
   594  }
   595  
   596  var ybswap = []ytab{
   597  	{Z_rp, 2, argList{Yrl}},
   598  }
   599  
   600  var yscond = []ytab{
   601  	{Zo_m, 2, argList{Ymb}},
   602  }
   603  
   604  var yjcond = []ytab{
   605  	{Zbr, 0, argList{Ybr}},
   606  	{Zbr, 0, argList{Yi0, Ybr}},
   607  	{Zbr, 1, argList{Yi1, Ybr}},
   608  }
   609  
   610  var yloop = []ytab{
   611  	{Zloop, 1, argList{Ybr}},
   612  }
   613  
   614  var ycall = []ytab{
   615  	{Zcallindreg, 0, argList{Yml}},
   616  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   617  	{Zcallind, 2, argList{Yindir}},
   618  	{Zcall, 0, argList{Ybr}},
   619  	{Zcallcon, 1, argList{Yi32}},
   620  }
   621  
   622  var yduff = []ytab{
   623  	{Zcallduff, 1, argList{Yi32}},
   624  }
   625  
   626  var yjmp = []ytab{
   627  	{Zo_m64, 2, argList{Yml}},
   628  	{Zjmp, 0, argList{Ybr}},
   629  	{Zjmpcon, 1, argList{Yi32}},
   630  }
   631  
   632  var yfmvd = []ytab{
   633  	{Zm_o, 2, argList{Ym, Yf0}},
   634  	{Zo_m, 2, argList{Yf0, Ym}},
   635  	{Zm_o, 2, argList{Yrf, Yf0}},
   636  	{Zo_m, 2, argList{Yf0, Yrf}},
   637  }
   638  
   639  var yfmvdp = []ytab{
   640  	{Zo_m, 2, argList{Yf0, Ym}},
   641  	{Zo_m, 2, argList{Yf0, Yrf}},
   642  }
   643  
   644  var yfmvf = []ytab{
   645  	{Zm_o, 2, argList{Ym, Yf0}},
   646  	{Zo_m, 2, argList{Yf0, Ym}},
   647  }
   648  
   649  var yfmvx = []ytab{
   650  	{Zm_o, 2, argList{Ym, Yf0}},
   651  }
   652  
   653  var yfmvp = []ytab{
   654  	{Zo_m, 2, argList{Yf0, Ym}},
   655  }
   656  
   657  var yfcmv = []ytab{
   658  	{Zm_o, 2, argList{Yrf, Yf0}},
   659  }
   660  
   661  var yfadd = []ytab{
   662  	{Zm_o, 2, argList{Ym, Yf0}},
   663  	{Zm_o, 2, argList{Yrf, Yf0}},
   664  	{Zo_m, 2, argList{Yf0, Yrf}},
   665  }
   666  
   667  var yfxch = []ytab{
   668  	{Zo_m, 2, argList{Yf0, Yrf}},
   669  	{Zm_o, 2, argList{Yrf, Yf0}},
   670  }
   671  
   672  var ycompp = []ytab{
   673  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674  }
   675  
   676  var ystsw = []ytab{
   677  	{Zo_m, 2, argList{Ym}},
   678  	{Zlit, 1, argList{Yax}},
   679  }
   680  
   681  var ysvrs_mo = []ytab{
   682  	{Zm_o, 2, argList{Ym}},
   683  }
   684  
   685  // unaryDst version of "ysvrs_mo".
   686  var ysvrs_om = []ytab{
   687  	{Zo_m, 2, argList{Ym}},
   688  }
   689  
   690  var ymm = []ytab{
   691  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693  }
   694  
   695  var yxm = []ytab{
   696  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697  }
   698  
   699  var yxm_q4 = []ytab{
   700  	{Zm_r, 1, argList{Yxm, Yxr}},
   701  }
   702  
   703  var yxcvm1 = []ytab{
   704  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706  }
   707  
   708  var yxcvm2 = []ytab{
   709  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711  }
   712  
   713  var yxr = []ytab{
   714  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715  }
   716  
   717  var yxr_ml = []ytab{
   718  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   719  }
   720  
   721  var ymr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Ymr}},
   723  }
   724  
   725  var ymr_ml = []ytab{
   726  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   727  }
   728  
   729  var yxcmpi = []ytab{
   730  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731  }
   732  
   733  var yxmov = []ytab{
   734  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736  }
   737  
   738  var yxcvfl = []ytab{
   739  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740  }
   741  
   742  var yxcvlf = []ytab{
   743  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   744  }
   745  
   746  var yxcvfq = []ytab{
   747  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748  }
   749  
   750  var yxcvqf = []ytab{
   751  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   752  }
   753  
   754  var yps = []ytab{
   755  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759  }
   760  
   761  var yxrrl = []ytab{
   762  	{Zm_r, 1, argList{Yxr, Yrl}},
   763  }
   764  
   765  var ymrxr = []ytab{
   766  	{Zm_r, 1, argList{Ymr, Yxr}},
   767  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768  }
   769  
   770  var ymshuf = []ytab{
   771  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772  }
   773  
   774  var ymshufb = []ytab{
   775  	{Zm2_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  // It should never have more than 1 entry,
   779  // because some optab entries you opcode secuences that
   780  // are longer than 2 bytes (zoffset=2 here),
   781  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   782  // to name a few.
   783  var yxshuf = []ytab{
   784  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785  }
   786  
   787  var yextrw = []ytab{
   788  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790  }
   791  
   792  var yextr = []ytab{
   793  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794  }
   795  
   796  var yinsrw = []ytab{
   797  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798  }
   799  
   800  var yinsr = []ytab{
   801  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802  }
   803  
   804  var ypsdq = []ytab{
   805  	{Zibo_m, 2, argList{Yi8, Yxr}},
   806  }
   807  
   808  var ymskb = []ytab{
   809  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811  }
   812  
   813  var ycrc32l = []ytab{
   814  	{Zlitm_r, 0, argList{Yml, Yrl}},
   815  }
   816  
   817  var ycrc32b = []ytab{
   818  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   819  }
   820  
   821  var yprefetch = []ytab{
   822  	{Zm_o, 2, argList{Ym}},
   823  }
   824  
   825  var yaes = []ytab{
   826  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   827  }
   828  
   829  var yxbegin = []ytab{
   830  	{Zjmp, 1, argList{Ybr}},
   831  }
   832  
   833  var yxabort = []ytab{
   834  	{Zib_, 1, argList{Yu8}},
   835  }
   836  
   837  var ylddqu = []ytab{
   838  	{Zm_r, 1, argList{Ym, Yxr}},
   839  }
   840  
   841  var ypalignr = []ytab{
   842  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843  }
   844  
   845  var ysha256rnds2 = []ytab{
   846  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847  }
   848  
   849  var yblendvpd = []ytab{
   850  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851  }
   852  
   853  var ymmxmm0f38 = []ytab{
   854  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   855  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   856  }
   857  
   858  var yextractps = []ytab{
   859  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860  }
   861  
   862  var ysha1rnds4 = []ytab{
   863  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864  }
   865  
   866  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868  // to find the entry with the given p.As and then looks through the ytable for
   869  // that instruction (the second field in the optab struct) for a line whose
   870  // first two values match the Ytypes of the p.From and p.To operands.  The
   871  // function oclass computes the specific Ytype of an operand and then the set
   872  // of more general Ytypes that it satisfies is implied by the ycover table, set
   873  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874  // from the more general 8-bit constants, but instinit says
   875  //
   876  //        ycover[Yi0*Ymax+Ys32] = 1
   877  //        ycover[Yi1*Ymax+Ys32] = 1
   878  //        ycover[Yi8*Ymax+Ys32] = 1
   879  //
   880  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881  // if that's what an instruction can handle.
   882  //
   883  // In parallel with the scan through the ytable for the appropriate line, there
   884  // is a z pointer that starts out pointing at the strange magic byte list in
   885  // the Optab struct.  With each step past a non-matching ytable line, z
   886  // advances by the 4th entry in the line.  When a matching line is found, that
   887  // z pointer has the extra data to use in laying down the instruction bytes.
   888  // The actual bytes laid down are a function of the 3rd entry in the line (that
   889  // is, the Ztype) and the z bytes.
   890  //
   891  // For example, let's look at AADDL.  The optab line says:
   892  //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //        var yaddl = []ytab{
   896  //                {Yi8, Ynone, Yml, Zibo_m, 2},
   897  //                {Yi32, Ynone, Yax, Zil_, 1},
   898  //                {Yi32, Ynone, Yml, Zilo_m, 2},
   899  //                {Yrl, Ynone, Yml, Zr_m, 1},
   900  //                {Yml, Ynone, Yrl, Zm_r, 1},
   901  //        }
   902  //
   903  // so there are 5 possible types of ADDL instruction that can be laid down, and
   904  // possible states used to lay them down (Ztype and z pointer, assuming z
   905  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   906  //
   907  //        Yi8, Yml -> Zibo_m, z (0x83, 00)
   908  //        Yi32, Yax -> Zil_, z+2 (0x05)
   909  //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   910  //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   911  //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   912  //
   913  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   914  // relatively straightforward as this program goes.
   915  //
   916  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   917  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   918  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   919  // Zilo_m is the same but a long (32-bit) immediate.
   920  var optab =
   921  //	as, ytab, andproto, opcode
   922  [...]Optab{
   923  	{obj.AXXX, nil, 0, opBytes{}},
   924  	{AAAA, ynone, P32, opBytes{0x37}},
   925  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   926  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   927  	{AAAS, ynone, P32, opBytes{0x3f}},
   928  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   929  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   930  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   933  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   934  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   935  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   936  	{AADDPD, yxm, Pq, opBytes{0x58}},
   937  	{AADDPS, yxm, Pm, opBytes{0x58}},
   938  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   939  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   940  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   941  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   942  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   943  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   944  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   945  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   946  	{AADJSP, nil, 0, opBytes{}},
   947  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   948  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   949  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   950  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   951  	{AANDPD, yxm, Pq, opBytes{0x54}},
   952  	{AANDPS, yxm, Pm, opBytes{0x54}},
   953  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   954  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   956  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   957  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   958  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   959  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   960  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   961  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   962  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   963  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   964  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   965  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   966  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   967  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   968  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   969  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   970  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   971  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   972  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   973  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   974  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   975  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   976  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   977  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   978  	{ABYTE, ybyte, Px, opBytes{1}},
   979  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   980  	{ACBW, ynone, Pe, opBytes{0x98}},
   981  	{ACDQ, ynone, Px, opBytes{0x99}},
   982  	{ACDQE, ynone, Pw, opBytes{0x98}},
   983  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   984  	{ACLC, ynone, Px, opBytes{0xf8}},
   985  	{ACLD, ynone, Px, opBytes{0xfc}},
   986  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   987  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   988  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   989  	{ACLI, ynone, Px, opBytes{0xfa}},
   990  	{ACLTS, ynone, Pm, opBytes{0x06}},
   991  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   992  	{ACMC, ynone, Px, opBytes{0xf5}},
   993  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   994  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   995  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   996  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   997  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   998  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
   999  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1000  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1001  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1002  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1003  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1004  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1005  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1006  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1007  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1008  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1009  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1010  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1011  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1012  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1013  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1014  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1015  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1016  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1017  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1018  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1019  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1020  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1021  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1022  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1023  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1024  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1025  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1026  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1027  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1028  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1029  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1030  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1031  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1032  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1033  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1034  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1035  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1036  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1037  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1038  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1039  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1040  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1041  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1042  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1043  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1044  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1045  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1046  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1047  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1048  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1049  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1050  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1051  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1052  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1054  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1055  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1056  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1057  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1058  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1059  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1060  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1061  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1062  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1063  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1064  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1065  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1066  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1067  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1068  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1069  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1070  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1071  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1072  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1073  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1074  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1075  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1076  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1077  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1078  	{ACWD, ynone, Pe, opBytes{0x99}},
  1079  	{ACWDE, ynone, Px, opBytes{0x98}},
  1080  	{ACQO, ynone, Pw, opBytes{0x99}},
  1081  	{ADAA, ynone, P32, opBytes{0x27}},
  1082  	{ADAS, ynone, P32, opBytes{0x2f}},
  1083  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1084  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1085  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1086  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1087  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1088  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1089  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1090  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1091  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1092  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1093  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1094  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1095  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1096  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1097  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1098  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1099  	{AENTER, nil, 0, opBytes{}}, // botch
  1100  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1101  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1102  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1103  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1104  	{AHLT, ynone, Px, opBytes{0xf4}},
  1105  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1106  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1107  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1108  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1109  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1110  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1114  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1117  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1118  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1119  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1120  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1121  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1122  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1123  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1124  	{AINSL, ynone, Px, opBytes{0x6d}},
  1125  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1126  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1127  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1128  	{AINT, yint, Px, opBytes{0xcd}},
  1129  	{AINTO, ynone, P32, opBytes{0xce}},
  1130  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1131  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1132  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1133  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1134  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1135  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1136  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1138  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1139  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1140  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1141  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1142  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1143  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1144  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1145  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1146  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1147  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1148  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1149  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1150  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1151  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1152  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1153  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1154  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1155  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1156  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1157  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1158  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1159  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1160  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1161  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1162  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1163  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1164  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1165  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1166  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1167  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1168  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1169  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1170  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1171  	{ALODSL, ynone, Px, opBytes{0xad}},
  1172  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1173  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1174  	{ALONG, ybyte, Px, opBytes{4}},
  1175  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1176  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1177  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1178  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1179  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1180  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1181  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1182  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1183  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1184  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1185  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1186  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1187  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1188  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1189  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1190  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1191  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1192  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1193  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1194  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1195  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1196  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1197  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1198  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1199  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1200  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1201  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1202  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1203  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1204  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1205  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1206  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1207  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1208  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1209  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1210  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1211  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1212  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1213  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1214  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1215  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1216  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1217  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1218  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1219  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1220  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1221  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1222  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1223  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1224  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1225  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1226  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1227  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1228  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1229  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1230  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1231  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1232  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1233  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1234  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1235  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1236  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1237  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1238  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1239  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1240  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1241  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1242  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1243  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1244  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1245  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1246  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1247  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1248  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1249  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1250  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1251  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1252  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1253  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1254  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1255  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1256  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1257  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1258  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1259  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1260  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1261  	{AORPD, yxm, Pq, opBytes{0x56}},
  1262  	{AORPS, yxm, Pm, opBytes{0x56}},
  1263  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1264  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1266  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1267  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1268  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1269  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1270  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1271  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1272  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1273  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1274  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1275  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1276  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1277  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1278  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1279  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1280  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1281  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1282  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1283  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1284  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1285  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1286  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1287  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1288  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1289  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1290  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1291  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1292  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1293  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1294  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1295  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1296  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1297  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1298  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1299  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1300  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1301  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1302  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1303  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1304  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1305  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1306  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1307  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1308  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1309  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1310  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1311  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1312  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1313  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1314  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1315  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1316  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1317  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1318  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1319  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1320  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1321  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1322  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1323  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1324  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1325  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1326  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1327  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1328  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1329  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1330  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1331  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1332  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1333  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1334  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1335  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1336  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1337  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1338  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1339  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1340  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1341  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1342  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1343  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1344  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1345  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1346  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1347  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1348  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1349  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1350  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1351  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1352  	{APOPAL, ynone, P32, opBytes{0x61}},
  1353  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1354  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1355  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1356  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1357  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1358  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1359  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1360  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1361  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1363  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1364  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1365  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1366  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1367  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1368  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1369  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1370  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1371  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1372  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1373  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1374  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1375  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1376  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1377  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1378  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1379  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1380  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1381  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1382  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1383  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1384  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1385  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1386  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1387  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1388  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1389  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1390  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1391  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1392  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1393  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1394  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1395  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1396  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1397  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1398  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1399  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1400  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1401  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1402  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1403  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1404  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1405  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1406  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1409  	{AQUAD, ybyte, Px, opBytes{8}},
  1410  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1411  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1412  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1415  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1416  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1417  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1418  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{AREP, ynone, Px, opBytes{0xf3}},
  1421  	{AREPN, ynone, Px, opBytes{0xf2}},
  1422  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1423  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1424  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1425  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1426  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1427  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1428  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1431  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1432  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1435  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1436  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1437  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1438  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1439  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1442  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1443  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1446  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1447  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1450  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1451  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1452  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1453  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1454  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1455  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1456  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1457  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1458  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1459  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1460  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1461  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1462  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1463  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1464  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1465  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1466  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1467  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1468  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1469  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1470  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1471  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1474  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1475  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1478  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1479  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1480  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1481  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1482  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1483  	{ASTC, ynone, Px, opBytes{0xf9}},
  1484  	{ASTD, ynone, Px, opBytes{0xfd}},
  1485  	{ASTI, ynone, Px, opBytes{0xfb}},
  1486  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1487  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1488  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1489  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1490  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1491  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1492  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1493  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1494  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1495  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1496  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1497  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1498  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1500  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1501  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1502  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1503  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1506  	{obj.ATEXT, ytext, Px, opBytes{}},
  1507  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1508  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1509  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1510  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1511  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1512  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1513  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1514  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1515  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1516  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1517  	{AWORD, ybyte, Px, opBytes{2}},
  1518  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1519  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1520  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1523  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1524  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1525  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1526  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1527  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1528  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1530  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1531  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1532  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1533  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1534  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1535  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1536  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1537  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1538  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1539  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1540  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1541  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1542  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1543  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1544  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1545  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1546  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1547  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1548  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1549  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1551  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1552  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1553  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1554  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1555  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1556  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1558  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1559  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1560  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1561  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1562  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1563  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1564  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1565  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1566  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1567  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1568  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1569  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1570  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1571  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1572  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1573  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1574  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1575  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1576  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1577  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1578  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1579  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1580  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1581  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1582  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1583  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1584  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1585  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1586  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1587  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1588  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1589  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1590  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1591  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1592  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1593  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1594  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1595  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1596  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1597  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1598  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1599  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1600  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1601  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1602  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1603  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1604  	{AFFREE, nil, 0, opBytes{}},
  1605  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1606  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1607  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1608  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1609  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1610  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1611  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1612  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1613  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1614  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1615  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1616  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1617  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1618  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1619  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1620  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1621  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1622  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1623  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1624  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1625  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1626  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1627  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1628  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1629  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1630  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1631  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1632  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1633  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1634  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1635  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1636  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1637  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1638  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1639  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1640  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1641  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1642  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1643  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1644  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1645  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1646  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1649  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1650  	{AINVD, ynone, Pm, opBytes{0x08}},
  1651  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1652  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1653  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1654  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1655  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1656  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1657  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1658  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1659  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1660  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1661  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1662  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1663  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1664  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1665  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1666  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1667  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1668  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1669  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1670  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1671  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1672  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1673  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1676  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1677  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1678  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1679  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1680  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1681  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1682  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1683  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1684  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1685  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1686  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1687  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1688  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1689  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1690  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1691  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1692  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1693  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1694  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1695  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1696  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1697  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1698  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1699  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1700  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1701  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1702  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1703  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1704  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1706  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1707  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1708  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1709  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1710  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1711  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1712  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1715  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1717  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1719  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1721  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1723  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1725  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1726  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1727  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1728  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1729  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1732  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1734  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1735  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1737  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1738  	{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1739  	{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1741  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1742  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1743  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1744  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1746  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1747  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1749  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1750  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1751  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1752  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1753  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1754  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1755  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1756  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1757  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1758  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1759  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1760  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1761  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1762  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1763  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1764  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1765  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1766  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1767  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1768  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1769  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1770  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1771  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1772  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1773  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1774  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1775  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1776  
  1777  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1778  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1779  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1780  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1781  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1782  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1783  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1784  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1785  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1786  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1787  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1788  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1789  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1790  
  1791  	{obj.AEND, nil, 0, opBytes{}},
  1792  	{0, nil, 0, opBytes{}},
  1793  }
  1794  
  1795  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1796  
  1797  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1798  // This happens on systems like Solaris that call .so functions instead of system calls.
  1799  // It does not seem to be necessary for any other systems. This is probably working
  1800  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1801  // what that bug is. And this does fix it.
  1802  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1803  	if ctxt.Headtype == objabi.Hsolaris {
  1804  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1805  		return strings.HasPrefix(s.Name, "libc_")
  1806  	}
  1807  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1808  }
  1809  
  1810  // single-instruction no-ops of various lengths.
  1811  // constructed by hand and disassembled with gdb to verify.
  1812  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1813  var nop = [][16]uint8{
  1814  	{0x90},
  1815  	{0x66, 0x90},
  1816  	{0x0F, 0x1F, 0x00},
  1817  	{0x0F, 0x1F, 0x40, 0x00},
  1818  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1819  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1820  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1821  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823  }
  1824  
  1825  // Native Client rejects the repeated 0x66 prefix.
  1826  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1827  func fillnop(p []byte, n int) {
  1828  	var m int
  1829  
  1830  	for n > 0 {
  1831  		m = n
  1832  		if m > len(nop) {
  1833  			m = len(nop)
  1834  		}
  1835  		copy(p[:m], nop[m-1][:m])
  1836  		p = p[m:]
  1837  		n -= m
  1838  	}
  1839  }
  1840  
  1841  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1842  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1843  		return l
  1844  	}
  1845  	return q
  1846  }
  1847  
  1848  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1849  	if s.P != nil {
  1850  		return
  1851  	}
  1852  
  1853  	if ycover[0] == 0 {
  1854  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1855  	}
  1856  
  1857  	for p := s.Func.Text; p != nil; p = p.Link {
  1858  		if p.To.Type == obj.TYPE_BRANCH && p.Pcond == nil {
  1859  			p.Pcond = p
  1860  		}
  1861  		if p.As == AADJSP {
  1862  			p.To.Type = obj.TYPE_REG
  1863  			p.To.Reg = REG_SP
  1864  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  1865  			// One exception: It is smaller to encode $-0x80 than $0x80.
  1866  			// For that case, flip the sign and the op:
  1867  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  1868  			switch v := p.From.Offset; {
  1869  			case v == 0:
  1870  				p.As = obj.ANOP
  1871  			case v == 0x80 || (v < 0 && v != -0x80):
  1872  				p.As = spadjop(ctxt, AADDL, AADDQ)
  1873  				p.From.Offset *= -1
  1874  			default:
  1875  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  1876  			}
  1877  		}
  1878  	}
  1879  
  1880  	var count int64 // rough count of number of instructions
  1881  	for p := s.Func.Text; p != nil; p = p.Link {
  1882  		count++
  1883  		p.Back = branchShort // use short branches first time through
  1884  		if q := p.Pcond; q != nil && (q.Back&branchShort != 0) {
  1885  			p.Back |= branchBackwards
  1886  			q.Back |= branchLoopHead
  1887  		}
  1888  	}
  1889  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1890  
  1891  	var ab AsmBuf
  1892  	var n int
  1893  	var c int32
  1894  	errors := ctxt.Errors
  1895  	for {
  1896  		// This loop continues while there are reasons to re-assemble
  1897  		// whole block, like the presence of long forward jumps.
  1898  		reAssemble := false
  1899  		for i := range s.R {
  1900  			s.R[i] = obj.Reloc{}
  1901  		}
  1902  		s.R = s.R[:0]
  1903  		s.P = s.P[:0]
  1904  		c = 0
  1905  		for p := s.Func.Text; p != nil; p = p.Link {
  1906  
  1907  			if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 {
  1908  				// pad with NOPs
  1909  				v := -c & (loopAlign - 1)
  1910  
  1911  				if v <= maxLoopPad {
  1912  					s.Grow(int64(c) + int64(v))
  1913  					fillnop(s.P[c:], int(v))
  1914  					c += v
  1915  				}
  1916  			}
  1917  
  1918  			p.Pc = int64(c)
  1919  
  1920  			// process forward jumps to p
  1921  			for q := p.Rel; q != nil; q = q.Forwd {
  1922  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1923  				if q.Back&branchShort != 0 {
  1924  					if v > 127 {
  1925  						reAssemble = true
  1926  						q.Back ^= branchShort
  1927  					}
  1928  
  1929  					if q.As == AJCXZL || q.As == AXBEGIN {
  1930  						s.P[q.Pc+2] = byte(v)
  1931  					} else {
  1932  						s.P[q.Pc+1] = byte(v)
  1933  					}
  1934  				} else {
  1935  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1936  				}
  1937  			}
  1938  
  1939  			p.Rel = nil
  1940  
  1941  			p.Pc = int64(c)
  1942  			ab.asmins(ctxt, s, p)
  1943  			m := ab.Len()
  1944  			if int(p.Isize) != m {
  1945  				p.Isize = uint8(m)
  1946  			}
  1947  
  1948  			s.Grow(p.Pc + int64(m))
  1949  			copy(s.P[p.Pc:], ab.Bytes())
  1950  			c += int32(m)
  1951  		}
  1952  
  1953  		n++
  1954  		if n > 20 {
  1955  			ctxt.Diag("span must be looping")
  1956  			log.Fatalf("loop")
  1957  		}
  1958  		if !reAssemble {
  1959  			break
  1960  		}
  1961  		if ctxt.Errors > errors {
  1962  			return
  1963  		}
  1964  	}
  1965  
  1966  	s.Size = int64(c)
  1967  
  1968  	if false { /* debug['a'] > 1 */
  1969  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1970  		var i int
  1971  		for i = 0; i < len(s.P); i++ {
  1972  			fmt.Printf(" %.2x", s.P[i])
  1973  			if i%16 == 15 {
  1974  				fmt.Printf("\n  %.6x", uint(i+1))
  1975  			}
  1976  		}
  1977  
  1978  		if i%16 != 0 {
  1979  			fmt.Printf("\n")
  1980  		}
  1981  
  1982  		for i := 0; i < len(s.R); i++ {
  1983  			r := &s.R[i]
  1984  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1985  		}
  1986  	}
  1987  
  1988  	// Mark nonpreemptible instruction sequences.
  1989  	// The 2-instruction TLS access sequence
  1990  	//	MOVQ TLS, BX
  1991  	//	MOVQ 0(BX)(TLS*1), BX
  1992  	// is not async preemptible, as if it is preempted and resumed on
  1993  	// a different thread, the TLS address may become invalid.
  1994  	if !CanUse1InsnTLS(ctxt) {
  1995  		useTLS := func(p *obj.Prog) bool {
  1996  			// Only need to mark the second instruction, which has
  1997  			// REG_TLS as Index. (It is okay to interrupt and restart
  1998  			// the first instruction.)
  1999  			return p.From.Index == REG_TLS
  2000  		}
  2001  		obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS)
  2002  	}
  2003  }
  2004  
  2005  func instinit(ctxt *obj.Link) {
  2006  	if ycover[0] != 0 {
  2007  		// Already initialized; stop now.
  2008  		// This happens in the cmd/asm tests,
  2009  		// each of which re-initializes the arch.
  2010  		return
  2011  	}
  2012  
  2013  	switch ctxt.Headtype {
  2014  	case objabi.Hplan9:
  2015  		plan9privates = ctxt.Lookup("_privates")
  2016  	}
  2017  
  2018  	for i := range avxOptab {
  2019  		c := avxOptab[i].as
  2020  		if opindex[c&obj.AMask] != nil {
  2021  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2022  		}
  2023  		opindex[c&obj.AMask] = &avxOptab[i]
  2024  	}
  2025  	for i := 1; optab[i].as != 0; i++ {
  2026  		c := optab[i].as
  2027  		if opindex[c&obj.AMask] != nil {
  2028  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2029  		}
  2030  		opindex[c&obj.AMask] = &optab[i]
  2031  	}
  2032  
  2033  	for i := 0; i < Ymax; i++ {
  2034  		ycover[i*Ymax+i] = 1
  2035  	}
  2036  
  2037  	ycover[Yi0*Ymax+Yu2] = 1
  2038  	ycover[Yi1*Ymax+Yu2] = 1
  2039  
  2040  	ycover[Yi0*Ymax+Yi8] = 1
  2041  	ycover[Yi1*Ymax+Yi8] = 1
  2042  	ycover[Yu2*Ymax+Yi8] = 1
  2043  	ycover[Yu7*Ymax+Yi8] = 1
  2044  
  2045  	ycover[Yi0*Ymax+Yu7] = 1
  2046  	ycover[Yi1*Ymax+Yu7] = 1
  2047  	ycover[Yu2*Ymax+Yu7] = 1
  2048  
  2049  	ycover[Yi0*Ymax+Yu8] = 1
  2050  	ycover[Yi1*Ymax+Yu8] = 1
  2051  	ycover[Yu2*Ymax+Yu8] = 1
  2052  	ycover[Yu7*Ymax+Yu8] = 1
  2053  
  2054  	ycover[Yi0*Ymax+Ys32] = 1
  2055  	ycover[Yi1*Ymax+Ys32] = 1
  2056  	ycover[Yu2*Ymax+Ys32] = 1
  2057  	ycover[Yu7*Ymax+Ys32] = 1
  2058  	ycover[Yu8*Ymax+Ys32] = 1
  2059  	ycover[Yi8*Ymax+Ys32] = 1
  2060  
  2061  	ycover[Yi0*Ymax+Yi32] = 1
  2062  	ycover[Yi1*Ymax+Yi32] = 1
  2063  	ycover[Yu2*Ymax+Yi32] = 1
  2064  	ycover[Yu7*Ymax+Yi32] = 1
  2065  	ycover[Yu8*Ymax+Yi32] = 1
  2066  	ycover[Yi8*Ymax+Yi32] = 1
  2067  	ycover[Ys32*Ymax+Yi32] = 1
  2068  
  2069  	ycover[Yi0*Ymax+Yi64] = 1
  2070  	ycover[Yi1*Ymax+Yi64] = 1
  2071  	ycover[Yu7*Ymax+Yi64] = 1
  2072  	ycover[Yu2*Ymax+Yi64] = 1
  2073  	ycover[Yu8*Ymax+Yi64] = 1
  2074  	ycover[Yi8*Ymax+Yi64] = 1
  2075  	ycover[Ys32*Ymax+Yi64] = 1
  2076  	ycover[Yi32*Ymax+Yi64] = 1
  2077  
  2078  	ycover[Yal*Ymax+Yrb] = 1
  2079  	ycover[Ycl*Ymax+Yrb] = 1
  2080  	ycover[Yax*Ymax+Yrb] = 1
  2081  	ycover[Ycx*Ymax+Yrb] = 1
  2082  	ycover[Yrx*Ymax+Yrb] = 1
  2083  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2084  
  2085  	ycover[Ycl*Ymax+Ycx] = 1
  2086  
  2087  	ycover[Yax*Ymax+Yrx] = 1
  2088  	ycover[Ycx*Ymax+Yrx] = 1
  2089  
  2090  	ycover[Yax*Ymax+Yrl] = 1
  2091  	ycover[Ycx*Ymax+Yrl] = 1
  2092  	ycover[Yrx*Ymax+Yrl] = 1
  2093  	ycover[Yrl32*Ymax+Yrl] = 1
  2094  
  2095  	ycover[Yf0*Ymax+Yrf] = 1
  2096  
  2097  	ycover[Yal*Ymax+Ymb] = 1
  2098  	ycover[Ycl*Ymax+Ymb] = 1
  2099  	ycover[Yax*Ymax+Ymb] = 1
  2100  	ycover[Ycx*Ymax+Ymb] = 1
  2101  	ycover[Yrx*Ymax+Ymb] = 1
  2102  	ycover[Yrb*Ymax+Ymb] = 1
  2103  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2104  	ycover[Ym*Ymax+Ymb] = 1
  2105  
  2106  	ycover[Yax*Ymax+Yml] = 1
  2107  	ycover[Ycx*Ymax+Yml] = 1
  2108  	ycover[Yrx*Ymax+Yml] = 1
  2109  	ycover[Yrl*Ymax+Yml] = 1
  2110  	ycover[Yrl32*Ymax+Yml] = 1
  2111  	ycover[Ym*Ymax+Yml] = 1
  2112  
  2113  	ycover[Yax*Ymax+Ymm] = 1
  2114  	ycover[Ycx*Ymax+Ymm] = 1
  2115  	ycover[Yrx*Ymax+Ymm] = 1
  2116  	ycover[Yrl*Ymax+Ymm] = 1
  2117  	ycover[Yrl32*Ymax+Ymm] = 1
  2118  	ycover[Ym*Ymax+Ymm] = 1
  2119  	ycover[Ymr*Ymax+Ymm] = 1
  2120  
  2121  	ycover[Yxr0*Ymax+Yxr] = 1
  2122  
  2123  	ycover[Ym*Ymax+Yxm] = 1
  2124  	ycover[Yxr0*Ymax+Yxm] = 1
  2125  	ycover[Yxr*Ymax+Yxm] = 1
  2126  
  2127  	ycover[Ym*Ymax+Yym] = 1
  2128  	ycover[Yyr*Ymax+Yym] = 1
  2129  
  2130  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2131  	ycover[Yxr*Ymax+YxrEvex] = 1
  2132  
  2133  	ycover[Ym*Ymax+YxmEvex] = 1
  2134  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2135  	ycover[Yxr*Ymax+YxmEvex] = 1
  2136  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2137  
  2138  	ycover[Yyr*Ymax+YyrEvex] = 1
  2139  
  2140  	ycover[Ym*Ymax+YymEvex] = 1
  2141  	ycover[Yyr*Ymax+YymEvex] = 1
  2142  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2143  
  2144  	ycover[Ym*Ymax+Yzm] = 1
  2145  	ycover[Yzr*Ymax+Yzm] = 1
  2146  
  2147  	ycover[Yk0*Ymax+Yk] = 1
  2148  	ycover[Yknot0*Ymax+Yk] = 1
  2149  
  2150  	ycover[Yk0*Ymax+Ykm] = 1
  2151  	ycover[Yknot0*Ymax+Ykm] = 1
  2152  	ycover[Yk*Ymax+Ykm] = 1
  2153  	ycover[Ym*Ymax+Ykm] = 1
  2154  
  2155  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2156  
  2157  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2158  
  2159  	for i := 0; i < MAXREG; i++ {
  2160  		reg[i] = -1
  2161  		if i >= REG_AL && i <= REG_R15B {
  2162  			reg[i] = (i - REG_AL) & 7
  2163  			if i >= REG_SPB && i <= REG_DIB {
  2164  				regrex[i] = 0x40
  2165  			}
  2166  			if i >= REG_R8B && i <= REG_R15B {
  2167  				regrex[i] = Rxr | Rxx | Rxb
  2168  			}
  2169  		}
  2170  
  2171  		if i >= REG_AH && i <= REG_BH {
  2172  			reg[i] = 4 + ((i - REG_AH) & 7)
  2173  		}
  2174  		if i >= REG_AX && i <= REG_R15 {
  2175  			reg[i] = (i - REG_AX) & 7
  2176  			if i >= REG_R8 {
  2177  				regrex[i] = Rxr | Rxx | Rxb
  2178  			}
  2179  		}
  2180  
  2181  		if i >= REG_F0 && i <= REG_F0+7 {
  2182  			reg[i] = (i - REG_F0) & 7
  2183  		}
  2184  		if i >= REG_M0 && i <= REG_M0+7 {
  2185  			reg[i] = (i - REG_M0) & 7
  2186  		}
  2187  		if i >= REG_K0 && i <= REG_K0+7 {
  2188  			reg[i] = (i - REG_K0) & 7
  2189  		}
  2190  		if i >= REG_X0 && i <= REG_X0+15 {
  2191  			reg[i] = (i - REG_X0) & 7
  2192  			if i >= REG_X0+8 {
  2193  				regrex[i] = Rxr | Rxx | Rxb
  2194  			}
  2195  		}
  2196  		if i >= REG_X16 && i <= REG_X16+15 {
  2197  			reg[i] = (i - REG_X16) & 7
  2198  			if i >= REG_X16+8 {
  2199  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2200  			} else {
  2201  				regrex[i] = RxrEvex
  2202  			}
  2203  		}
  2204  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2205  			reg[i] = (i - REG_Y0) & 7
  2206  			if i >= REG_Y0+8 {
  2207  				regrex[i] = Rxr | Rxx | Rxb
  2208  			}
  2209  		}
  2210  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2211  			reg[i] = (i - REG_Y16) & 7
  2212  			if i >= REG_Y16+8 {
  2213  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2214  			} else {
  2215  				regrex[i] = RxrEvex
  2216  			}
  2217  		}
  2218  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2219  			reg[i] = (i - REG_Z0) & 7
  2220  			if i > REG_Z0+7 {
  2221  				regrex[i] = Rxr | Rxx | Rxb
  2222  			}
  2223  		}
  2224  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2225  			reg[i] = (i - REG_Z16) & 7
  2226  			if i >= REG_Z16+8 {
  2227  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2228  			} else {
  2229  				regrex[i] = RxrEvex
  2230  			}
  2231  		}
  2232  
  2233  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2234  			regrex[i] = Rxr
  2235  		}
  2236  	}
  2237  }
  2238  
  2239  var isAndroid = objabi.GOOS == "android"
  2240  
  2241  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2242  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2243  		return 0
  2244  	}
  2245  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2246  		switch a.Reg {
  2247  		case REG_CS:
  2248  			return 0x2e
  2249  
  2250  		case REG_DS:
  2251  			return 0x3e
  2252  
  2253  		case REG_ES:
  2254  			return 0x26
  2255  
  2256  		case REG_FS:
  2257  			return 0x64
  2258  
  2259  		case REG_GS:
  2260  			return 0x65
  2261  
  2262  		case REG_TLS:
  2263  			// NOTE: Systems listed here should be only systems that
  2264  			// support direct TLS references like 8(TLS) implemented as
  2265  			// direct references from FS or GS. Systems that require
  2266  			// the initial-exec model, where you load the TLS base into
  2267  			// a register and then index from that register, do not reach
  2268  			// this code and should not be listed.
  2269  			if ctxt.Arch.Family == sys.I386 {
  2270  				switch ctxt.Headtype {
  2271  				default:
  2272  					if isAndroid {
  2273  						return 0x65 // GS
  2274  					}
  2275  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2276  
  2277  				case objabi.Hdarwin,
  2278  					objabi.Hdragonfly,
  2279  					objabi.Hfreebsd,
  2280  					objabi.Hnetbsd,
  2281  					objabi.Hopenbsd:
  2282  					return 0x65 // GS
  2283  				}
  2284  			}
  2285  
  2286  			switch ctxt.Headtype {
  2287  			default:
  2288  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2289  
  2290  			case objabi.Hlinux:
  2291  				if isAndroid {
  2292  					return 0x64 // FS
  2293  				}
  2294  
  2295  				if ctxt.Flag_shared {
  2296  					log.Fatalf("unknown TLS base register for linux with -shared")
  2297  				} else {
  2298  					return 0x64 // FS
  2299  				}
  2300  
  2301  			case objabi.Hdragonfly,
  2302  				objabi.Hfreebsd,
  2303  				objabi.Hnetbsd,
  2304  				objabi.Hopenbsd,
  2305  				objabi.Hsolaris:
  2306  				return 0x64 // FS
  2307  
  2308  			case objabi.Hdarwin:
  2309  				return 0x65 // GS
  2310  			}
  2311  		}
  2312  	}
  2313  
  2314  	if ctxt.Arch.Family == sys.I386 {
  2315  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2316  			// When building for inclusion into a shared library, an instruction of the form
  2317  			//     MOVL off(CX)(TLS*1), AX
  2318  			// becomes
  2319  			//     mov %gs:off(%ecx), %eax
  2320  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2321  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2322  			// a shared library the instruction it becomes
  2323  			//     mov 0x0(%ecx), %eax
  2324  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2325  			return 0x65 // GS
  2326  		}
  2327  		return 0
  2328  	}
  2329  
  2330  	switch a.Index {
  2331  	case REG_CS:
  2332  		return 0x2e
  2333  
  2334  	case REG_DS:
  2335  		return 0x3e
  2336  
  2337  	case REG_ES:
  2338  		return 0x26
  2339  
  2340  	case REG_TLS:
  2341  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2342  			// When building for inclusion into a shared library, an instruction of the form
  2343  			//     MOV off(CX)(TLS*1), AX
  2344  			// becomes
  2345  			//     mov %fs:off(%rcx), %rax
  2346  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2347  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2348  			// a shared library the instruction does not require a prefix.
  2349  			return 0x64
  2350  		}
  2351  
  2352  	case REG_FS:
  2353  		return 0x64
  2354  
  2355  	case REG_GS:
  2356  		return 0x65
  2357  	}
  2358  
  2359  	return 0
  2360  }
  2361  
  2362  // oclassRegList returns multisource operand class for addr.
  2363  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2364  	// TODO(quasilyte): when oclass register case is refactored into
  2365  	// lookup table, use it here to get register kind more easily.
  2366  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2367  
  2368  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2369  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2370  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2371  
  2372  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2373  	low := regIndex(int16(reg0))
  2374  	high := regIndex(int16(reg1))
  2375  
  2376  	if ctxt.Arch.Family == sys.I386 {
  2377  		if low >= 8 || high >= 8 {
  2378  			return Yxxx
  2379  		}
  2380  	}
  2381  
  2382  	switch high - low {
  2383  	case 3:
  2384  		switch {
  2385  		case regIsXmm(reg0) && regIsXmm(reg1):
  2386  			return YxrEvexMulti4
  2387  		case regIsYmm(reg0) && regIsYmm(reg1):
  2388  			return YyrEvexMulti4
  2389  		case regIsZmm(reg0) && regIsZmm(reg1):
  2390  			return YzrMulti4
  2391  		default:
  2392  			return Yxxx
  2393  		}
  2394  	default:
  2395  		return Yxxx
  2396  	}
  2397  }
  2398  
  2399  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2400  // For addr that is not V-mem returns (Yxxx, false).
  2401  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2402  	switch addr.Index {
  2403  	case REG_X0 + 0,
  2404  		REG_X0 + 1,
  2405  		REG_X0 + 2,
  2406  		REG_X0 + 3,
  2407  		REG_X0 + 4,
  2408  		REG_X0 + 5,
  2409  		REG_X0 + 6,
  2410  		REG_X0 + 7:
  2411  		return Yxvm, true
  2412  	case REG_X8 + 0,
  2413  		REG_X8 + 1,
  2414  		REG_X8 + 2,
  2415  		REG_X8 + 3,
  2416  		REG_X8 + 4,
  2417  		REG_X8 + 5,
  2418  		REG_X8 + 6,
  2419  		REG_X8 + 7:
  2420  		if ctxt.Arch.Family == sys.I386 {
  2421  			return Yxxx, true
  2422  		}
  2423  		return Yxvm, true
  2424  	case REG_X16 + 0,
  2425  		REG_X16 + 1,
  2426  		REG_X16 + 2,
  2427  		REG_X16 + 3,
  2428  		REG_X16 + 4,
  2429  		REG_X16 + 5,
  2430  		REG_X16 + 6,
  2431  		REG_X16 + 7,
  2432  		REG_X16 + 8,
  2433  		REG_X16 + 9,
  2434  		REG_X16 + 10,
  2435  		REG_X16 + 11,
  2436  		REG_X16 + 12,
  2437  		REG_X16 + 13,
  2438  		REG_X16 + 14,
  2439  		REG_X16 + 15:
  2440  		if ctxt.Arch.Family == sys.I386 {
  2441  			return Yxxx, true
  2442  		}
  2443  		return YxvmEvex, true
  2444  
  2445  	case REG_Y0 + 0,
  2446  		REG_Y0 + 1,
  2447  		REG_Y0 + 2,
  2448  		REG_Y0 + 3,
  2449  		REG_Y0 + 4,
  2450  		REG_Y0 + 5,
  2451  		REG_Y0 + 6,
  2452  		REG_Y0 + 7:
  2453  		return Yyvm, true
  2454  	case REG_Y8 + 0,
  2455  		REG_Y8 + 1,
  2456  		REG_Y8 + 2,
  2457  		REG_Y8 + 3,
  2458  		REG_Y8 + 4,
  2459  		REG_Y8 + 5,
  2460  		REG_Y8 + 6,
  2461  		REG_Y8 + 7:
  2462  		if ctxt.Arch.Family == sys.I386 {
  2463  			return Yxxx, true
  2464  		}
  2465  		return Yyvm, true
  2466  	case REG_Y16 + 0,
  2467  		REG_Y16 + 1,
  2468  		REG_Y16 + 2,
  2469  		REG_Y16 + 3,
  2470  		REG_Y16 + 4,
  2471  		REG_Y16 + 5,
  2472  		REG_Y16 + 6,
  2473  		REG_Y16 + 7,
  2474  		REG_Y16 + 8,
  2475  		REG_Y16 + 9,
  2476  		REG_Y16 + 10,
  2477  		REG_Y16 + 11,
  2478  		REG_Y16 + 12,
  2479  		REG_Y16 + 13,
  2480  		REG_Y16 + 14,
  2481  		REG_Y16 + 15:
  2482  		if ctxt.Arch.Family == sys.I386 {
  2483  			return Yxxx, true
  2484  		}
  2485  		return YyvmEvex, true
  2486  
  2487  	case REG_Z0 + 0,
  2488  		REG_Z0 + 1,
  2489  		REG_Z0 + 2,
  2490  		REG_Z0 + 3,
  2491  		REG_Z0 + 4,
  2492  		REG_Z0 + 5,
  2493  		REG_Z0 + 6,
  2494  		REG_Z0 + 7:
  2495  		return Yzvm, true
  2496  	case REG_Z8 + 0,
  2497  		REG_Z8 + 1,
  2498  		REG_Z8 + 2,
  2499  		REG_Z8 + 3,
  2500  		REG_Z8 + 4,
  2501  		REG_Z8 + 5,
  2502  		REG_Z8 + 6,
  2503  		REG_Z8 + 7,
  2504  		REG_Z8 + 8,
  2505  		REG_Z8 + 9,
  2506  		REG_Z8 + 10,
  2507  		REG_Z8 + 11,
  2508  		REG_Z8 + 12,
  2509  		REG_Z8 + 13,
  2510  		REG_Z8 + 14,
  2511  		REG_Z8 + 15,
  2512  		REG_Z8 + 16,
  2513  		REG_Z8 + 17,
  2514  		REG_Z8 + 18,
  2515  		REG_Z8 + 19,
  2516  		REG_Z8 + 20,
  2517  		REG_Z8 + 21,
  2518  		REG_Z8 + 22,
  2519  		REG_Z8 + 23:
  2520  		if ctxt.Arch.Family == sys.I386 {
  2521  			return Yxxx, true
  2522  		}
  2523  		return Yzvm, true
  2524  	}
  2525  
  2526  	return Yxxx, false
  2527  }
  2528  
  2529  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2530  	switch a.Type {
  2531  	case obj.TYPE_REGLIST:
  2532  		return oclassRegList(ctxt, a)
  2533  
  2534  	case obj.TYPE_NONE:
  2535  		return Ynone
  2536  
  2537  	case obj.TYPE_BRANCH:
  2538  		return Ybr
  2539  
  2540  	case obj.TYPE_INDIR:
  2541  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2542  			return Yindir
  2543  		}
  2544  		return Yxxx
  2545  
  2546  	case obj.TYPE_MEM:
  2547  		// Pseudo registers have negative index, but SP is
  2548  		// not pseudo on x86, hence REG_SP check is not redundant.
  2549  		if a.Index == REG_SP || a.Index < 0 {
  2550  			// Can't use FP/SB/PC/SP as the index register.
  2551  			return Yxxx
  2552  		}
  2553  
  2554  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2555  			return vmem
  2556  		}
  2557  
  2558  		if ctxt.Arch.Family == sys.AMD64 {
  2559  			switch a.Name {
  2560  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2561  				// Global variables can't use index registers and their
  2562  				// base register is %rip (%rip is encoded as REG_NONE).
  2563  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2564  					return Yxxx
  2565  				}
  2566  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2567  				// These names must have a base of SP.  The old compiler
  2568  				// uses 0 for the base register. SSA uses REG_SP.
  2569  				if a.Reg != REG_SP && a.Reg != 0 {
  2570  					return Yxxx
  2571  				}
  2572  			case obj.NAME_NONE:
  2573  				// everything is ok
  2574  			default:
  2575  				// unknown name
  2576  				return Yxxx
  2577  			}
  2578  		}
  2579  		return Ym
  2580  
  2581  	case obj.TYPE_ADDR:
  2582  		switch a.Name {
  2583  		case obj.NAME_GOTREF:
  2584  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2585  			return Yxxx
  2586  
  2587  		case obj.NAME_EXTERN,
  2588  			obj.NAME_STATIC:
  2589  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2590  				return Yi32
  2591  			}
  2592  			return Yiauto // use pc-relative addressing
  2593  
  2594  		case obj.NAME_AUTO,
  2595  			obj.NAME_PARAM:
  2596  			return Yiauto
  2597  		}
  2598  
  2599  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2600  		// and got Yi32 in an earlier version of this code.
  2601  		// Keep doing that until we fix yduff etc.
  2602  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2603  			return Yi32
  2604  		}
  2605  
  2606  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2607  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2608  		}
  2609  		fallthrough
  2610  
  2611  	case obj.TYPE_CONST:
  2612  		if a.Sym != nil {
  2613  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2614  		}
  2615  
  2616  		v := a.Offset
  2617  		if ctxt.Arch.Family == sys.I386 {
  2618  			v = int64(int32(v))
  2619  		}
  2620  		switch {
  2621  		case v == 0:
  2622  			return Yi0
  2623  		case v == 1:
  2624  			return Yi1
  2625  		case v >= 0 && v <= 3:
  2626  			return Yu2
  2627  		case v >= 0 && v <= 127:
  2628  			return Yu7
  2629  		case v >= 0 && v <= 255:
  2630  			return Yu8
  2631  		case v >= -128 && v <= 127:
  2632  			return Yi8
  2633  		}
  2634  		if ctxt.Arch.Family == sys.I386 {
  2635  			return Yi32
  2636  		}
  2637  		l := int32(v)
  2638  		if int64(l) == v {
  2639  			return Ys32 // can sign extend
  2640  		}
  2641  		if v>>32 == 0 {
  2642  			return Yi32 // unsigned
  2643  		}
  2644  		return Yi64
  2645  
  2646  	case obj.TYPE_TEXTSIZE:
  2647  		return Ytextsize
  2648  	}
  2649  
  2650  	if a.Type != obj.TYPE_REG {
  2651  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2652  		return Yxxx
  2653  	}
  2654  
  2655  	switch a.Reg {
  2656  	case REG_AL:
  2657  		return Yal
  2658  
  2659  	case REG_AX:
  2660  		return Yax
  2661  
  2662  		/*
  2663  			case REG_SPB:
  2664  		*/
  2665  	case REG_BPB,
  2666  		REG_SIB,
  2667  		REG_DIB,
  2668  		REG_R8B,
  2669  		REG_R9B,
  2670  		REG_R10B,
  2671  		REG_R11B,
  2672  		REG_R12B,
  2673  		REG_R13B,
  2674  		REG_R14B,
  2675  		REG_R15B:
  2676  		if ctxt.Arch.Family == sys.I386 {
  2677  			return Yxxx
  2678  		}
  2679  		fallthrough
  2680  
  2681  	case REG_DL,
  2682  		REG_BL,
  2683  		REG_AH,
  2684  		REG_CH,
  2685  		REG_DH,
  2686  		REG_BH:
  2687  		return Yrb
  2688  
  2689  	case REG_CL:
  2690  		return Ycl
  2691  
  2692  	case REG_CX:
  2693  		return Ycx
  2694  
  2695  	case REG_DX, REG_BX:
  2696  		return Yrx
  2697  
  2698  	case REG_R8, // not really Yrl
  2699  		REG_R9,
  2700  		REG_R10,
  2701  		REG_R11,
  2702  		REG_R12,
  2703  		REG_R13,
  2704  		REG_R14,
  2705  		REG_R15:
  2706  		if ctxt.Arch.Family == sys.I386 {
  2707  			return Yxxx
  2708  		}
  2709  		fallthrough
  2710  
  2711  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2712  		if ctxt.Arch.Family == sys.I386 {
  2713  			return Yrl32
  2714  		}
  2715  		return Yrl
  2716  
  2717  	case REG_F0 + 0:
  2718  		return Yf0
  2719  
  2720  	case REG_F0 + 1,
  2721  		REG_F0 + 2,
  2722  		REG_F0 + 3,
  2723  		REG_F0 + 4,
  2724  		REG_F0 + 5,
  2725  		REG_F0 + 6,
  2726  		REG_F0 + 7:
  2727  		return Yrf
  2728  
  2729  	case REG_M0 + 0,
  2730  		REG_M0 + 1,
  2731  		REG_M0 + 2,
  2732  		REG_M0 + 3,
  2733  		REG_M0 + 4,
  2734  		REG_M0 + 5,
  2735  		REG_M0 + 6,
  2736  		REG_M0 + 7:
  2737  		return Ymr
  2738  
  2739  	case REG_X0:
  2740  		return Yxr0
  2741  
  2742  	case REG_X0 + 1,
  2743  		REG_X0 + 2,
  2744  		REG_X0 + 3,
  2745  		REG_X0 + 4,
  2746  		REG_X0 + 5,
  2747  		REG_X0 + 6,
  2748  		REG_X0 + 7,
  2749  		REG_X0 + 8,
  2750  		REG_X0 + 9,
  2751  		REG_X0 + 10,
  2752  		REG_X0 + 11,
  2753  		REG_X0 + 12,
  2754  		REG_X0 + 13,
  2755  		REG_X0 + 14,
  2756  		REG_X0 + 15:
  2757  		return Yxr
  2758  
  2759  	case REG_X0 + 16,
  2760  		REG_X0 + 17,
  2761  		REG_X0 + 18,
  2762  		REG_X0 + 19,
  2763  		REG_X0 + 20,
  2764  		REG_X0 + 21,
  2765  		REG_X0 + 22,
  2766  		REG_X0 + 23,
  2767  		REG_X0 + 24,
  2768  		REG_X0 + 25,
  2769  		REG_X0 + 26,
  2770  		REG_X0 + 27,
  2771  		REG_X0 + 28,
  2772  		REG_X0 + 29,
  2773  		REG_X0 + 30,
  2774  		REG_X0 + 31:
  2775  		return YxrEvex
  2776  
  2777  	case REG_Y0 + 0,
  2778  		REG_Y0 + 1,
  2779  		REG_Y0 + 2,
  2780  		REG_Y0 + 3,
  2781  		REG_Y0 + 4,
  2782  		REG_Y0 + 5,
  2783  		REG_Y0 + 6,
  2784  		REG_Y0 + 7,
  2785  		REG_Y0 + 8,
  2786  		REG_Y0 + 9,
  2787  		REG_Y0 + 10,
  2788  		REG_Y0 + 11,
  2789  		REG_Y0 + 12,
  2790  		REG_Y0 + 13,
  2791  		REG_Y0 + 14,
  2792  		REG_Y0 + 15:
  2793  		return Yyr
  2794  
  2795  	case REG_Y0 + 16,
  2796  		REG_Y0 + 17,
  2797  		REG_Y0 + 18,
  2798  		REG_Y0 + 19,
  2799  		REG_Y0 + 20,
  2800  		REG_Y0 + 21,
  2801  		REG_Y0 + 22,
  2802  		REG_Y0 + 23,
  2803  		REG_Y0 + 24,
  2804  		REG_Y0 + 25,
  2805  		REG_Y0 + 26,
  2806  		REG_Y0 + 27,
  2807  		REG_Y0 + 28,
  2808  		REG_Y0 + 29,
  2809  		REG_Y0 + 30,
  2810  		REG_Y0 + 31:
  2811  		return YyrEvex
  2812  
  2813  	case REG_Z0 + 0,
  2814  		REG_Z0 + 1,
  2815  		REG_Z0 + 2,
  2816  		REG_Z0 + 3,
  2817  		REG_Z0 + 4,
  2818  		REG_Z0 + 5,
  2819  		REG_Z0 + 6,
  2820  		REG_Z0 + 7:
  2821  		return Yzr
  2822  
  2823  	case REG_Z0 + 8,
  2824  		REG_Z0 + 9,
  2825  		REG_Z0 + 10,
  2826  		REG_Z0 + 11,
  2827  		REG_Z0 + 12,
  2828  		REG_Z0 + 13,
  2829  		REG_Z0 + 14,
  2830  		REG_Z0 + 15,
  2831  		REG_Z0 + 16,
  2832  		REG_Z0 + 17,
  2833  		REG_Z0 + 18,
  2834  		REG_Z0 + 19,
  2835  		REG_Z0 + 20,
  2836  		REG_Z0 + 21,
  2837  		REG_Z0 + 22,
  2838  		REG_Z0 + 23,
  2839  		REG_Z0 + 24,
  2840  		REG_Z0 + 25,
  2841  		REG_Z0 + 26,
  2842  		REG_Z0 + 27,
  2843  		REG_Z0 + 28,
  2844  		REG_Z0 + 29,
  2845  		REG_Z0 + 30,
  2846  		REG_Z0 + 31:
  2847  		if ctxt.Arch.Family == sys.I386 {
  2848  			return Yxxx
  2849  		}
  2850  		return Yzr
  2851  
  2852  	case REG_K0:
  2853  		return Yk0
  2854  
  2855  	case REG_K0 + 1,
  2856  		REG_K0 + 2,
  2857  		REG_K0 + 3,
  2858  		REG_K0 + 4,
  2859  		REG_K0 + 5,
  2860  		REG_K0 + 6,
  2861  		REG_K0 + 7:
  2862  		return Yknot0
  2863  
  2864  	case REG_CS:
  2865  		return Ycs
  2866  	case REG_SS:
  2867  		return Yss
  2868  	case REG_DS:
  2869  		return Yds
  2870  	case REG_ES:
  2871  		return Yes
  2872  	case REG_FS:
  2873  		return Yfs
  2874  	case REG_GS:
  2875  		return Ygs
  2876  	case REG_TLS:
  2877  		return Ytls
  2878  
  2879  	case REG_GDTR:
  2880  		return Ygdtr
  2881  	case REG_IDTR:
  2882  		return Yidtr
  2883  	case REG_LDTR:
  2884  		return Yldtr
  2885  	case REG_MSW:
  2886  		return Ymsw
  2887  	case REG_TASK:
  2888  		return Ytask
  2889  
  2890  	case REG_CR + 0:
  2891  		return Ycr0
  2892  	case REG_CR + 1:
  2893  		return Ycr1
  2894  	case REG_CR + 2:
  2895  		return Ycr2
  2896  	case REG_CR + 3:
  2897  		return Ycr3
  2898  	case REG_CR + 4:
  2899  		return Ycr4
  2900  	case REG_CR + 5:
  2901  		return Ycr5
  2902  	case REG_CR + 6:
  2903  		return Ycr6
  2904  	case REG_CR + 7:
  2905  		return Ycr7
  2906  	case REG_CR + 8:
  2907  		return Ycr8
  2908  
  2909  	case REG_DR + 0:
  2910  		return Ydr0
  2911  	case REG_DR + 1:
  2912  		return Ydr1
  2913  	case REG_DR + 2:
  2914  		return Ydr2
  2915  	case REG_DR + 3:
  2916  		return Ydr3
  2917  	case REG_DR + 4:
  2918  		return Ydr4
  2919  	case REG_DR + 5:
  2920  		return Ydr5
  2921  	case REG_DR + 6:
  2922  		return Ydr6
  2923  	case REG_DR + 7:
  2924  		return Ydr7
  2925  
  2926  	case REG_TR + 0:
  2927  		return Ytr0
  2928  	case REG_TR + 1:
  2929  		return Ytr1
  2930  	case REG_TR + 2:
  2931  		return Ytr2
  2932  	case REG_TR + 3:
  2933  		return Ytr3
  2934  	case REG_TR + 4:
  2935  		return Ytr4
  2936  	case REG_TR + 5:
  2937  		return Ytr5
  2938  	case REG_TR + 6:
  2939  		return Ytr6
  2940  	case REG_TR + 7:
  2941  		return Ytr7
  2942  	}
  2943  
  2944  	return Yxxx
  2945  }
  2946  
  2947  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2948  // and hold assembly state.
  2949  type AsmBuf struct {
  2950  	buf      [100]byte
  2951  	off      int
  2952  	rexflag  int
  2953  	vexflag  bool // Per inst: true for VEX-encoded
  2954  	evexflag bool // Per inst: true for EVEX-encoded
  2955  	rep      bool
  2956  	repn     bool
  2957  	lock     bool
  2958  
  2959  	evex evexBits // Initialized when evexflag is true
  2960  }
  2961  
  2962  // Put1 appends one byte to the end of the buffer.
  2963  func (ab *AsmBuf) Put1(x byte) {
  2964  	ab.buf[ab.off] = x
  2965  	ab.off++
  2966  }
  2967  
  2968  // Put2 appends two bytes to the end of the buffer.
  2969  func (ab *AsmBuf) Put2(x, y byte) {
  2970  	ab.buf[ab.off+0] = x
  2971  	ab.buf[ab.off+1] = y
  2972  	ab.off += 2
  2973  }
  2974  
  2975  // Put3 appends three bytes to the end of the buffer.
  2976  func (ab *AsmBuf) Put3(x, y, z byte) {
  2977  	ab.buf[ab.off+0] = x
  2978  	ab.buf[ab.off+1] = y
  2979  	ab.buf[ab.off+2] = z
  2980  	ab.off += 3
  2981  }
  2982  
  2983  // Put4 appends four bytes to the end of the buffer.
  2984  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  2985  	ab.buf[ab.off+0] = x
  2986  	ab.buf[ab.off+1] = y
  2987  	ab.buf[ab.off+2] = z
  2988  	ab.buf[ab.off+3] = w
  2989  	ab.off += 4
  2990  }
  2991  
  2992  // PutInt16 writes v into the buffer using little-endian encoding.
  2993  func (ab *AsmBuf) PutInt16(v int16) {
  2994  	ab.buf[ab.off+0] = byte(v)
  2995  	ab.buf[ab.off+1] = byte(v >> 8)
  2996  	ab.off += 2
  2997  }
  2998  
  2999  // PutInt32 writes v into the buffer using little-endian encoding.
  3000  func (ab *AsmBuf) PutInt32(v int32) {
  3001  	ab.buf[ab.off+0] = byte(v)
  3002  	ab.buf[ab.off+1] = byte(v >> 8)
  3003  	ab.buf[ab.off+2] = byte(v >> 16)
  3004  	ab.buf[ab.off+3] = byte(v >> 24)
  3005  	ab.off += 4
  3006  }
  3007  
  3008  // PutInt64 writes v into the buffer using little-endian encoding.
  3009  func (ab *AsmBuf) PutInt64(v int64) {
  3010  	ab.buf[ab.off+0] = byte(v)
  3011  	ab.buf[ab.off+1] = byte(v >> 8)
  3012  	ab.buf[ab.off+2] = byte(v >> 16)
  3013  	ab.buf[ab.off+3] = byte(v >> 24)
  3014  	ab.buf[ab.off+4] = byte(v >> 32)
  3015  	ab.buf[ab.off+5] = byte(v >> 40)
  3016  	ab.buf[ab.off+6] = byte(v >> 48)
  3017  	ab.buf[ab.off+7] = byte(v >> 56)
  3018  	ab.off += 8
  3019  }
  3020  
  3021  // Put copies b into the buffer.
  3022  func (ab *AsmBuf) Put(b []byte) {
  3023  	copy(ab.buf[ab.off:], b)
  3024  	ab.off += len(b)
  3025  }
  3026  
  3027  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3028  // starting at specified offset (e.g. z counter value).
  3029  // Trailing 0 is not written.
  3030  //
  3031  // Intended to be used for literal Z cases.
  3032  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3033  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3034  	for int(op[offset]) != 0 {
  3035  		ab.Put1(byte(op[offset]))
  3036  		offset++
  3037  	}
  3038  }
  3039  
  3040  // Insert inserts b at offset i.
  3041  func (ab *AsmBuf) Insert(i int, b byte) {
  3042  	ab.off++
  3043  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3044  	ab.buf[i] = b
  3045  }
  3046  
  3047  // Last returns the byte at the end of the buffer.
  3048  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3049  
  3050  // Len returns the length of the buffer.
  3051  func (ab *AsmBuf) Len() int { return ab.off }
  3052  
  3053  // Bytes returns the contents of the buffer.
  3054  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3055  
  3056  // Reset empties the buffer.
  3057  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3058  
  3059  // At returns the byte at offset i.
  3060  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3061  
  3062  // asmidx emits SIB byte.
  3063  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3064  	var i int
  3065  
  3066  	// X/Y index register is used in VSIB.
  3067  	switch index {
  3068  	default:
  3069  		goto bad
  3070  
  3071  	case REG_NONE:
  3072  		i = 4 << 3
  3073  		goto bas
  3074  
  3075  	case REG_R8,
  3076  		REG_R9,
  3077  		REG_R10,
  3078  		REG_R11,
  3079  		REG_R12,
  3080  		REG_R13,
  3081  		REG_R14,
  3082  		REG_R15,
  3083  		REG_X8,
  3084  		REG_X9,
  3085  		REG_X10,
  3086  		REG_X11,
  3087  		REG_X12,
  3088  		REG_X13,
  3089  		REG_X14,
  3090  		REG_X15,
  3091  		REG_X16,
  3092  		REG_X17,
  3093  		REG_X18,
  3094  		REG_X19,
  3095  		REG_X20,
  3096  		REG_X21,
  3097  		REG_X22,
  3098  		REG_X23,
  3099  		REG_X24,
  3100  		REG_X25,
  3101  		REG_X26,
  3102  		REG_X27,
  3103  		REG_X28,
  3104  		REG_X29,
  3105  		REG_X30,
  3106  		REG_X31,
  3107  		REG_Y8,
  3108  		REG_Y9,
  3109  		REG_Y10,
  3110  		REG_Y11,
  3111  		REG_Y12,
  3112  		REG_Y13,
  3113  		REG_Y14,
  3114  		REG_Y15,
  3115  		REG_Y16,
  3116  		REG_Y17,
  3117  		REG_Y18,
  3118  		REG_Y19,
  3119  		REG_Y20,
  3120  		REG_Y21,
  3121  		REG_Y22,
  3122  		REG_Y23,
  3123  		REG_Y24,
  3124  		REG_Y25,
  3125  		REG_Y26,
  3126  		REG_Y27,
  3127  		REG_Y28,
  3128  		REG_Y29,
  3129  		REG_Y30,
  3130  		REG_Y31,
  3131  		REG_Z8,
  3132  		REG_Z9,
  3133  		REG_Z10,
  3134  		REG_Z11,
  3135  		REG_Z12,
  3136  		REG_Z13,
  3137  		REG_Z14,
  3138  		REG_Z15,
  3139  		REG_Z16,
  3140  		REG_Z17,
  3141  		REG_Z18,
  3142  		REG_Z19,
  3143  		REG_Z20,
  3144  		REG_Z21,
  3145  		REG_Z22,
  3146  		REG_Z23,
  3147  		REG_Z24,
  3148  		REG_Z25,
  3149  		REG_Z26,
  3150  		REG_Z27,
  3151  		REG_Z28,
  3152  		REG_Z29,
  3153  		REG_Z30,
  3154  		REG_Z31:
  3155  		if ctxt.Arch.Family == sys.I386 {
  3156  			goto bad
  3157  		}
  3158  		fallthrough
  3159  
  3160  	case REG_AX,
  3161  		REG_CX,
  3162  		REG_DX,
  3163  		REG_BX,
  3164  		REG_BP,
  3165  		REG_SI,
  3166  		REG_DI,
  3167  		REG_X0,
  3168  		REG_X1,
  3169  		REG_X2,
  3170  		REG_X3,
  3171  		REG_X4,
  3172  		REG_X5,
  3173  		REG_X6,
  3174  		REG_X7,
  3175  		REG_Y0,
  3176  		REG_Y1,
  3177  		REG_Y2,
  3178  		REG_Y3,
  3179  		REG_Y4,
  3180  		REG_Y5,
  3181  		REG_Y6,
  3182  		REG_Y7,
  3183  		REG_Z0,
  3184  		REG_Z1,
  3185  		REG_Z2,
  3186  		REG_Z3,
  3187  		REG_Z4,
  3188  		REG_Z5,
  3189  		REG_Z6,
  3190  		REG_Z7:
  3191  		i = reg[index] << 3
  3192  	}
  3193  
  3194  	switch scale {
  3195  	default:
  3196  		goto bad
  3197  
  3198  	case 1:
  3199  		break
  3200  
  3201  	case 2:
  3202  		i |= 1 << 6
  3203  
  3204  	case 4:
  3205  		i |= 2 << 6
  3206  
  3207  	case 8:
  3208  		i |= 3 << 6
  3209  	}
  3210  
  3211  bas:
  3212  	switch base {
  3213  	default:
  3214  		goto bad
  3215  
  3216  	case REG_NONE: // must be mod=00
  3217  		i |= 5
  3218  
  3219  	case REG_R8,
  3220  		REG_R9,
  3221  		REG_R10,
  3222  		REG_R11,
  3223  		REG_R12,
  3224  		REG_R13,
  3225  		REG_R14,
  3226  		REG_R15:
  3227  		if ctxt.Arch.Family == sys.I386 {
  3228  			goto bad
  3229  		}
  3230  		fallthrough
  3231  
  3232  	case REG_AX,
  3233  		REG_CX,
  3234  		REG_DX,
  3235  		REG_BX,
  3236  		REG_SP,
  3237  		REG_BP,
  3238  		REG_SI,
  3239  		REG_DI:
  3240  		i |= reg[base]
  3241  	}
  3242  
  3243  	ab.Put1(byte(i))
  3244  	return
  3245  
  3246  bad:
  3247  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3248  	ab.Put1(0)
  3249  }
  3250  
  3251  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3252  	var rel obj.Reloc
  3253  
  3254  	v := vaddr(ctxt, p, a, &rel)
  3255  	if rel.Siz != 0 {
  3256  		if rel.Siz != 4 {
  3257  			ctxt.Diag("bad reloc")
  3258  		}
  3259  		r := obj.Addrel(cursym)
  3260  		*r = rel
  3261  		r.Off = int32(p.Pc + int64(ab.Len()))
  3262  	}
  3263  
  3264  	ab.PutInt32(int32(v))
  3265  }
  3266  
  3267  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3268  	if r != nil {
  3269  		*r = obj.Reloc{}
  3270  	}
  3271  
  3272  	switch a.Name {
  3273  	case obj.NAME_STATIC,
  3274  		obj.NAME_GOTREF,
  3275  		obj.NAME_EXTERN:
  3276  		s := a.Sym
  3277  		if r == nil {
  3278  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3279  			log.Fatalf("reloc")
  3280  		}
  3281  
  3282  		if a.Name == obj.NAME_GOTREF {
  3283  			r.Siz = 4
  3284  			r.Type = objabi.R_GOTPCREL
  3285  		} else if useAbs(ctxt, s) {
  3286  			r.Siz = 4
  3287  			r.Type = objabi.R_ADDR
  3288  		} else {
  3289  			r.Siz = 4
  3290  			r.Type = objabi.R_PCREL
  3291  		}
  3292  
  3293  		r.Off = -1 // caller must fill in
  3294  		r.Sym = s
  3295  		r.Add = a.Offset
  3296  
  3297  		return 0
  3298  	}
  3299  
  3300  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3301  		if r == nil {
  3302  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3303  			log.Fatalf("reloc")
  3304  		}
  3305  
  3306  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3307  			r.Type = objabi.R_TLS_LE
  3308  			r.Siz = 4
  3309  			r.Off = -1 // caller must fill in
  3310  			r.Add = a.Offset
  3311  		}
  3312  		return 0
  3313  	}
  3314  
  3315  	return a.Offset
  3316  }
  3317  
  3318  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3319  	var base int
  3320  	var rel obj.Reloc
  3321  
  3322  	rex &= 0x40 | Rxr
  3323  	if a.Offset != int64(int32(a.Offset)) {
  3324  		// The rules are slightly different for 386 and AMD64,
  3325  		// mostly for historical reasons. We may unify them later,
  3326  		// but it must be discussed beforehand.
  3327  		//
  3328  		// For 64bit mode only LEAL is allowed to overflow.
  3329  		// It's how https://golang.org/cl/59630 made it.
  3330  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3331  		//
  3332  		// For 32bit mode rules are more permissive.
  3333  		// If offset fits uint32, it's permitted.
  3334  		// This is allowed for assembly that wants to use 32-bit hex
  3335  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3336  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3337  			(ctxt.Arch.Family != sys.AMD64 &&
  3338  				int64(uint32(a.Offset)) == a.Offset &&
  3339  				ab.rexflag&Rxw == 0)
  3340  		if !overflowOK {
  3341  			ctxt.Diag("offset too large in %s", p)
  3342  		}
  3343  	}
  3344  	v := int32(a.Offset)
  3345  	rel.Siz = 0
  3346  
  3347  	switch a.Type {
  3348  	case obj.TYPE_ADDR:
  3349  		if a.Name == obj.NAME_NONE {
  3350  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3351  		}
  3352  		if a.Index == REG_TLS {
  3353  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3354  		}
  3355  		goto bad
  3356  
  3357  	case obj.TYPE_REG:
  3358  		const regFirst = REG_AL
  3359  		const regLast = REG_Z31
  3360  		if a.Reg < regFirst || regLast < a.Reg {
  3361  			goto bad
  3362  		}
  3363  		if v != 0 {
  3364  			goto bad
  3365  		}
  3366  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3367  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3368  		return
  3369  	}
  3370  
  3371  	if a.Type != obj.TYPE_MEM {
  3372  		goto bad
  3373  	}
  3374  
  3375  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3376  		base := int(a.Reg)
  3377  		switch a.Name {
  3378  		case obj.NAME_EXTERN,
  3379  			obj.NAME_GOTREF,
  3380  			obj.NAME_STATIC:
  3381  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3382  				goto bad
  3383  			}
  3384  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3385  				// The base register has already been set. It holds the PC
  3386  				// of this instruction returned by a PC-reading thunk.
  3387  				// See obj6.go:rewriteToPcrel.
  3388  			} else {
  3389  				base = REG_NONE
  3390  			}
  3391  			v = int32(vaddr(ctxt, p, a, &rel))
  3392  
  3393  		case obj.NAME_AUTO,
  3394  			obj.NAME_PARAM:
  3395  			base = REG_SP
  3396  		}
  3397  
  3398  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3399  		if base == REG_NONE {
  3400  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3401  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3402  			goto putrelv
  3403  		}
  3404  
  3405  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3406  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3407  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3408  			return
  3409  		}
  3410  
  3411  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3412  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3413  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3414  			ab.Put1(disp8)
  3415  			return
  3416  		}
  3417  
  3418  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3419  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3420  		goto putrelv
  3421  	}
  3422  
  3423  	base = int(a.Reg)
  3424  	switch a.Name {
  3425  	case obj.NAME_STATIC,
  3426  		obj.NAME_GOTREF,
  3427  		obj.NAME_EXTERN:
  3428  		if a.Sym == nil {
  3429  			ctxt.Diag("bad addr: %v", p)
  3430  		}
  3431  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3432  			// The base register has already been set. It holds the PC
  3433  			// of this instruction returned by a PC-reading thunk.
  3434  			// See obj6.go:rewriteToPcrel.
  3435  		} else {
  3436  			base = REG_NONE
  3437  		}
  3438  		v = int32(vaddr(ctxt, p, a, &rel))
  3439  
  3440  	case obj.NAME_AUTO,
  3441  		obj.NAME_PARAM:
  3442  		base = REG_SP
  3443  	}
  3444  
  3445  	if base == REG_TLS {
  3446  		v = int32(vaddr(ctxt, p, a, &rel))
  3447  	}
  3448  
  3449  	ab.rexflag |= regrex[base]&Rxb | rex
  3450  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3451  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3452  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3453  				ctxt.Diag("%v has offset against gotref", p)
  3454  			}
  3455  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3456  			goto putrelv
  3457  		}
  3458  
  3459  		// temporary
  3460  		ab.Put2(
  3461  			byte(0<<6|4<<0|r<<3), // sib present
  3462  			0<<6|4<<3|5<<0,       // DS:d32
  3463  		)
  3464  		goto putrelv
  3465  	}
  3466  
  3467  	if base == REG_SP || base == REG_R12 {
  3468  		if v == 0 {
  3469  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3470  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3471  			return
  3472  		}
  3473  
  3474  		if disp8, ok := toDisp8(v, p, ab); ok {
  3475  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3476  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3477  			ab.Put1(disp8)
  3478  			return
  3479  		}
  3480  
  3481  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3482  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3483  		goto putrelv
  3484  	}
  3485  
  3486  	if REG_AX <= base && base <= REG_R15 {
  3487  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3488  			rel = obj.Reloc{}
  3489  			rel.Type = objabi.R_TLS_LE
  3490  			rel.Siz = 4
  3491  			rel.Sym = nil
  3492  			rel.Add = int64(v)
  3493  			v = 0
  3494  		}
  3495  
  3496  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3497  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3498  			return
  3499  		}
  3500  
  3501  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3502  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3503  			return
  3504  		}
  3505  
  3506  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3507  		goto putrelv
  3508  	}
  3509  
  3510  	goto bad
  3511  
  3512  putrelv:
  3513  	if rel.Siz != 0 {
  3514  		if rel.Siz != 4 {
  3515  			ctxt.Diag("bad rel")
  3516  			goto bad
  3517  		}
  3518  
  3519  		r := obj.Addrel(cursym)
  3520  		*r = rel
  3521  		r.Off = int32(p.Pc + int64(ab.Len()))
  3522  	}
  3523  
  3524  	ab.PutInt32(v)
  3525  	return
  3526  
  3527  bad:
  3528  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3529  }
  3530  
  3531  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3532  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3533  }
  3534  
  3535  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3536  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3537  }
  3538  
  3539  func bytereg(a *obj.Addr, t *uint8) {
  3540  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3541  		a.Reg += REG_AL - REG_AX
  3542  		*t = 0
  3543  	}
  3544  }
  3545  
  3546  func unbytereg(a *obj.Addr, t *uint8) {
  3547  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3548  		a.Reg += REG_AX - REG_AL
  3549  		*t = 0
  3550  	}
  3551  }
  3552  
  3553  const (
  3554  	movLit uint8 = iota // Like Zlit
  3555  	movRegMem
  3556  	movMemReg
  3557  	movRegMem2op
  3558  	movMemReg2op
  3559  	movFullPtr // Load full pointer, trash heap (unsupported)
  3560  	movDoubleShift
  3561  	movTLSReg
  3562  )
  3563  
  3564  var ymovtab = []movtab{
  3565  	// push
  3566  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3567  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3568  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3569  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3570  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3571  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3572  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3573  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3574  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3575  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3576  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3577  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3578  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3579  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3580  
  3581  	// pop
  3582  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3583  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3584  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3585  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3586  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3587  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3588  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3589  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3590  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3591  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3592  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3593  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3594  
  3595  	// mov seg
  3596  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3597  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3598  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3599  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3600  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3601  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3602  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3603  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3604  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3605  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3606  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3607  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3608  
  3609  	// mov cr
  3610  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3611  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3612  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3613  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3614  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3615  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3616  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3617  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3618  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3619  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3620  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3621  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3622  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3623  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3624  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3625  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3626  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3627  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3628  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3629  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3630  
  3631  	// mov dr
  3632  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3633  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3634  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3635  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3636  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3637  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3638  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3639  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3640  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3641  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3642  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3643  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3644  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3645  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3646  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3647  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3648  
  3649  	// mov tr
  3650  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3651  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3652  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3653  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3654  
  3655  	// lgdt, sgdt, lidt, sidt
  3656  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3657  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3658  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3659  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3660  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3661  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3662  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3663  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3664  
  3665  	// lldt, sldt
  3666  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3667  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3668  
  3669  	// lmsw, smsw
  3670  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3671  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3672  
  3673  	// ltr, str
  3674  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3675  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3676  
  3677  	/* load full pointer - unsupported
  3678  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3679  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3680  	*/
  3681  
  3682  	// double shift
  3683  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3684  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3685  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3686  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3687  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3688  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3689  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3690  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3691  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3692  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3693  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3694  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3695  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3696  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3697  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3698  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3699  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3700  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3701  
  3702  	// load TLS base
  3703  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3704  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3705  	{0, 0, 0, 0, 0, [4]uint8{}},
  3706  }
  3707  
  3708  func isax(a *obj.Addr) bool {
  3709  	switch a.Reg {
  3710  	case REG_AX, REG_AL, REG_AH:
  3711  		return true
  3712  	}
  3713  
  3714  	if a.Index == REG_AX {
  3715  		return true
  3716  	}
  3717  	return false
  3718  }
  3719  
  3720  func subreg(p *obj.Prog, from int, to int) {
  3721  	if false { /* debug['Q'] */
  3722  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3723  	}
  3724  
  3725  	if int(p.From.Reg) == from {
  3726  		p.From.Reg = int16(to)
  3727  		p.Ft = 0
  3728  	}
  3729  
  3730  	if int(p.To.Reg) == from {
  3731  		p.To.Reg = int16(to)
  3732  		p.Tt = 0
  3733  	}
  3734  
  3735  	if int(p.From.Index) == from {
  3736  		p.From.Index = int16(to)
  3737  		p.Ft = 0
  3738  	}
  3739  
  3740  	if int(p.To.Index) == from {
  3741  		p.To.Index = int16(to)
  3742  		p.Tt = 0
  3743  	}
  3744  
  3745  	if false { /* debug['Q'] */
  3746  		fmt.Printf("%v\n", p)
  3747  	}
  3748  }
  3749  
  3750  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3751  	switch op {
  3752  	case Pm, Pe, Pf2, Pf3:
  3753  		if osize != 1 {
  3754  			if op != Pm {
  3755  				ab.Put1(byte(op))
  3756  			}
  3757  			ab.Put1(Pm)
  3758  			z++
  3759  			op = int(o.op[z])
  3760  			break
  3761  		}
  3762  		fallthrough
  3763  
  3764  	default:
  3765  		if ab.Len() == 0 || ab.Last() != Pm {
  3766  			ab.Put1(Pm)
  3767  		}
  3768  	}
  3769  
  3770  	ab.Put1(byte(op))
  3771  	return z
  3772  }
  3773  
  3774  var bpduff1 = []byte{
  3775  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3776  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3777  }
  3778  
  3779  var bpduff2 = []byte{
  3780  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3781  }
  3782  
  3783  // asmevex emits EVEX pregis and opcode byte.
  3784  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  3785  // K-masking register.
  3786  //
  3787  // Expects asmbuf.evex to be properly initialized.
  3788  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  3789  	ab.evexflag = true
  3790  	evex := ab.evex
  3791  
  3792  	rexR := byte(1)
  3793  	evexR := byte(1)
  3794  	rexX := byte(1)
  3795  	rexB := byte(1)
  3796  	if r != nil {
  3797  		if regrex[r.Reg]&Rxr != 0 {
  3798  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  3799  		}
  3800  		if regrex[r.Reg]&RxrEvex != 0 {
  3801  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  3802  		}
  3803  	}
  3804  	if rm != nil {
  3805  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  3806  			rexX = 0
  3807  		} else if regrex[rm.Index]&Rxx != 0 {
  3808  			rexX = 0
  3809  		}
  3810  		if regrex[rm.Reg]&Rxb != 0 {
  3811  			rexB = 0
  3812  		}
  3813  	}
  3814  	// P0 = [R][X][B][R'][00][mm]
  3815  	p0 := (rexR << 7) |
  3816  		(rexX << 6) |
  3817  		(rexB << 5) |
  3818  		(evexR << 4) |
  3819  		(0 << 2) |
  3820  		(evex.M() << 0)
  3821  
  3822  	vexV := byte(0)
  3823  	if v != nil {
  3824  		// 4bit-wide reg index.
  3825  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3826  	}
  3827  	vexV ^= 0x0F
  3828  	// P1 = [W][vvvv][1][pp]
  3829  	p1 := (evex.W() << 7) |
  3830  		(vexV << 3) |
  3831  		(1 << 2) |
  3832  		(evex.P() << 0)
  3833  
  3834  	suffix := evexSuffixMap[p.Scond]
  3835  	evexZ := byte(0)
  3836  	evexLL := evex.L()
  3837  	evexB := byte(0)
  3838  	evexV := byte(1)
  3839  	evexA := byte(0)
  3840  	if suffix.zeroing {
  3841  		if !evex.ZeroingEnabled() {
  3842  			ctxt.Diag("unsupported zeroing: %v", p)
  3843  		}
  3844  		evexZ = 1
  3845  	}
  3846  	switch {
  3847  	case suffix.rounding != rcUnset:
  3848  		if rm != nil && rm.Type == obj.TYPE_MEM {
  3849  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  3850  		} else if !evex.RoundingEnabled() {
  3851  			ctxt.Diag("unsupported rounding: %v", p)
  3852  		}
  3853  		evexB = 1
  3854  		evexLL = suffix.rounding
  3855  	case suffix.broadcast:
  3856  		if rm == nil || rm.Type != obj.TYPE_MEM {
  3857  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  3858  		} else if !evex.BroadcastEnabled() {
  3859  			ctxt.Diag("unsupported broadcast: %v", p)
  3860  		}
  3861  		evexB = 1
  3862  	case suffix.sae:
  3863  		if rm != nil && rm.Type == obj.TYPE_MEM {
  3864  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  3865  		} else if !evex.SaeEnabled() {
  3866  			ctxt.Diag("unsupported SAE: %v", p)
  3867  		}
  3868  		evexB = 1
  3869  	}
  3870  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  3871  		evexV = 0
  3872  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  3873  		evexV = 0 // VSR selector 5th bit.
  3874  	}
  3875  	if k != nil {
  3876  		evexA = byte(reg[k.Reg])
  3877  	}
  3878  	// P2 = [z][L'L][b][V'][aaa]
  3879  	p2 := (evexZ << 7) |
  3880  		(evexLL << 5) |
  3881  		(evexB << 4) |
  3882  		(evexV << 3) |
  3883  		(evexA << 0)
  3884  
  3885  	const evexEscapeByte = 0x62
  3886  	ab.Put4(evexEscapeByte, p0, p1, p2)
  3887  	ab.Put1(evex.opcode)
  3888  }
  3889  
  3890  // Emit VEX prefix and opcode byte.
  3891  // The three addresses are the r/m, vvvv, and reg fields.
  3892  // The reg and rm arguments appear in the same order as the
  3893  // arguments to asmand, which typically follows the call to asmvex.
  3894  // The final two arguments are the VEX prefix (see encoding above)
  3895  // and the opcode byte.
  3896  // For details about vex prefix see:
  3897  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3898  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3899  	ab.vexflag = true
  3900  	rexR := 0
  3901  	if r != nil {
  3902  		rexR = regrex[r.Reg] & Rxr
  3903  	}
  3904  	rexB := 0
  3905  	rexX := 0
  3906  	if rm != nil {
  3907  		rexB = regrex[rm.Reg] & Rxb
  3908  		rexX = regrex[rm.Index] & Rxx
  3909  	}
  3910  	vexM := (vex >> 3) & 0x7
  3911  	vexWLP := vex & 0x87
  3912  	vexV := byte(0)
  3913  	if v != nil {
  3914  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3915  	}
  3916  	vexV ^= 0xF
  3917  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3918  		// Can use 2-byte encoding.
  3919  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3920  	} else {
  3921  		// Must use 3-byte encoding.
  3922  		ab.Put3(0xc4,
  3923  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3924  			vexV<<3|vexWLP,
  3925  		)
  3926  	}
  3927  	ab.Put1(opcode)
  3928  }
  3929  
  3930  // regIndex returns register index that fits in 5 bits.
  3931  //
  3932  //	R         : 3 bit | legacy instructions     | N/A
  3933  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  3934  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  3935  //
  3936  // Examples:
  3937  //	REG_Z30 => 30
  3938  //	REG_X15 => 15
  3939  //	REG_R9  => 9
  3940  //	REG_AX  => 0
  3941  //
  3942  func regIndex(r int16) int {
  3943  	lower3bits := reg[r]
  3944  	high4bit := regrex[r] & Rxr << 1
  3945  	high5bit := regrex[r] & RxrEvex << 0
  3946  	return lower3bits | high4bit | high5bit
  3947  }
  3948  
  3949  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  3950  // Reports errors via ctxt.
  3951  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3952  	// If any pair of the index, mask, or destination registers
  3953  	// are the same, illegal instruction trap (#UD) is triggered.
  3954  	index := regIndex(p.GetFrom3().Index)
  3955  	mask := regIndex(p.From.Reg)
  3956  	dest := regIndex(p.To.Reg)
  3957  	if dest == mask || dest == index || mask == index {
  3958  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  3959  		return false
  3960  	}
  3961  
  3962  	return true
  3963  }
  3964  
  3965  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  3966  // Reports errors via ctxt.
  3967  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3968  	// Illegal instruction trap (#UD) is triggered if the destination vector
  3969  	// register is the same as index vector in VSIB.
  3970  	index := regIndex(p.From.Index)
  3971  	dest := regIndex(p.To.Reg)
  3972  	if dest == index {
  3973  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  3974  		return false
  3975  	}
  3976  
  3977  	return true
  3978  }
  3979  
  3980  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3981  	o := opindex[p.As&obj.AMask]
  3982  
  3983  	if o == nil {
  3984  		ctxt.Diag("asmins: missing op %v", p)
  3985  		return
  3986  	}
  3987  
  3988  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  3989  		ab.Put1(byte(pre))
  3990  	}
  3991  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  3992  		ab.Put1(byte(pre))
  3993  	}
  3994  
  3995  	// Checks to warn about instruction/arguments combinations that
  3996  	// will unconditionally trigger illegal instruction trap (#UD).
  3997  	switch p.As {
  3998  	case AVGATHERDPD,
  3999  		AVGATHERQPD,
  4000  		AVGATHERDPS,
  4001  		AVGATHERQPS,
  4002  		AVPGATHERDD,
  4003  		AVPGATHERQD,
  4004  		AVPGATHERDQ,
  4005  		AVPGATHERQQ:
  4006  		// AVX512 gather requires explicit K mask.
  4007  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4008  			if !avx512gatherValid(ctxt, p) {
  4009  				return
  4010  			}
  4011  		} else {
  4012  			if !avx2gatherValid(ctxt, p) {
  4013  				return
  4014  			}
  4015  		}
  4016  	}
  4017  
  4018  	if p.Ft == 0 {
  4019  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4020  	}
  4021  	if p.Tt == 0 {
  4022  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4023  	}
  4024  
  4025  	ft := int(p.Ft) * Ymax
  4026  	var f3t int
  4027  	tt := int(p.Tt) * Ymax
  4028  
  4029  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4030  	z := 0
  4031  	var a *obj.Addr
  4032  	var l int
  4033  	var op int
  4034  	var q *obj.Prog
  4035  	var r *obj.Reloc
  4036  	var rel obj.Reloc
  4037  	var v int64
  4038  
  4039  	args := make([]int, 0, argListMax)
  4040  	if ft != Ynone*Ymax {
  4041  		args = append(args, ft)
  4042  	}
  4043  	for i := range p.RestArgs {
  4044  		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  4045  	}
  4046  	if tt != Ynone*Ymax {
  4047  		args = append(args, tt)
  4048  	}
  4049  
  4050  	for _, yt := range o.ytab {
  4051  		// ytab matching is purely args-based,
  4052  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4053  		// add EVEX-only filter that will reject non-EVEX matches.
  4054  		//
  4055  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4056  		// Without this rule, operands will lead to VEX-encoded form
  4057  		// and produce "c5b15813" encoding.
  4058  		if !yt.match(args) {
  4059  			// "xo" is always zero for VEX/EVEX encoded insts.
  4060  			z += int(yt.zoffset) + xo
  4061  		} else {
  4062  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4063  				// Do not signal error and continue to search
  4064  				// for matching EVEX-encoded form.
  4065  				z += int(yt.zoffset)
  4066  				continue
  4067  			}
  4068  
  4069  			switch o.prefix {
  4070  			case Px1: // first option valid only in 32-bit mode
  4071  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4072  					z += int(yt.zoffset) + xo
  4073  					continue
  4074  				}
  4075  			case Pq: // 16 bit escape and opcode escape
  4076  				ab.Put2(Pe, Pm)
  4077  
  4078  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4079  				ab.rexflag |= Pw
  4080  				ab.Put2(Pe, Pm)
  4081  
  4082  			case Pq4: // 66 0F 38
  4083  				ab.Put3(0x66, 0x0F, 0x38)
  4084  
  4085  			case Pq4w: // 66 0F 38 + REX.W
  4086  				ab.rexflag |= Pw
  4087  				ab.Put3(0x66, 0x0F, 0x38)
  4088  
  4089  			case Pq5: // F3 0F 38
  4090  				ab.Put3(0xF3, 0x0F, 0x38)
  4091  
  4092  			case Pq5w: //  F3 0F 38 + REX.W
  4093  				ab.rexflag |= Pw
  4094  				ab.Put3(0xF3, 0x0F, 0x38)
  4095  
  4096  			case Pf2, // xmm opcode escape
  4097  				Pf3:
  4098  				ab.Put2(o.prefix, Pm)
  4099  
  4100  			case Pef3:
  4101  				ab.Put3(Pe, Pf3, Pm)
  4102  
  4103  			case Pfw: // xmm opcode escape + REX.W
  4104  				ab.rexflag |= Pw
  4105  				ab.Put2(Pf3, Pm)
  4106  
  4107  			case Pm: // opcode escape
  4108  				ab.Put1(Pm)
  4109  
  4110  			case Pe: // 16 bit escape
  4111  				ab.Put1(Pe)
  4112  
  4113  			case Pw: // 64-bit escape
  4114  				if ctxt.Arch.Family != sys.AMD64 {
  4115  					ctxt.Diag("asmins: illegal 64: %v", p)
  4116  				}
  4117  				ab.rexflag |= Pw
  4118  
  4119  			case Pw8: // 64-bit escape if z >= 8
  4120  				if z >= 8 {
  4121  					if ctxt.Arch.Family != sys.AMD64 {
  4122  						ctxt.Diag("asmins: illegal 64: %v", p)
  4123  					}
  4124  					ab.rexflag |= Pw
  4125  				}
  4126  
  4127  			case Pb: // botch
  4128  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4129  					goto bad
  4130  				}
  4131  				// NOTE(rsc): This is probably safe to do always,
  4132  				// but when enabled it chooses different encodings
  4133  				// than the old cmd/internal/obj/i386 code did,
  4134  				// which breaks our "same bits out" checks.
  4135  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4136  				// in the original obj/i386, and it would encode
  4137  				// (using a valid, shorter form) as 3c 00 if we enabled
  4138  				// the call to bytereg here.
  4139  				if ctxt.Arch.Family == sys.AMD64 {
  4140  					bytereg(&p.From, &p.Ft)
  4141  					bytereg(&p.To, &p.Tt)
  4142  				}
  4143  
  4144  			case P32: // 32 bit but illegal if 64-bit mode
  4145  				if ctxt.Arch.Family == sys.AMD64 {
  4146  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4147  				}
  4148  
  4149  			case Py: // 64-bit only, no prefix
  4150  				if ctxt.Arch.Family != sys.AMD64 {
  4151  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4152  				}
  4153  
  4154  			case Py1: // 64-bit only if z < 1, no prefix
  4155  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4156  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4157  				}
  4158  
  4159  			case Py3: // 64-bit only if z < 3, no prefix
  4160  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4161  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4162  				}
  4163  			}
  4164  
  4165  			if z >= len(o.op) {
  4166  				log.Fatalf("asmins bad table %v", p)
  4167  			}
  4168  			op = int(o.op[z])
  4169  			if op == 0x0f {
  4170  				ab.Put1(byte(op))
  4171  				z++
  4172  				op = int(o.op[z])
  4173  			}
  4174  
  4175  			switch yt.zcase {
  4176  			default:
  4177  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4178  				return
  4179  
  4180  			case Zpseudo:
  4181  				break
  4182  
  4183  			case Zlit:
  4184  				ab.PutOpBytesLit(z, &o.op)
  4185  
  4186  			case Zlitr_m:
  4187  				ab.PutOpBytesLit(z, &o.op)
  4188  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4189  
  4190  			case Zlitm_r:
  4191  				ab.PutOpBytesLit(z, &o.op)
  4192  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4193  
  4194  			case Zlit_m_r:
  4195  				ab.PutOpBytesLit(z, &o.op)
  4196  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4197  
  4198  			case Zmb_r:
  4199  				bytereg(&p.From, &p.Ft)
  4200  				fallthrough
  4201  
  4202  			case Zm_r:
  4203  				ab.Put1(byte(op))
  4204  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4205  
  4206  			case Z_m_r:
  4207  				ab.Put1(byte(op))
  4208  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4209  
  4210  			case Zm2_r:
  4211  				ab.Put2(byte(op), o.op[z+1])
  4212  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4213  
  4214  			case Zm_r_xm:
  4215  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4216  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4217  
  4218  			case Zm_r_xm_nr:
  4219  				ab.rexflag = 0
  4220  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4221  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4222  
  4223  			case Zm_r_i_xm:
  4224  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4225  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4226  				ab.Put1(byte(p.To.Offset))
  4227  
  4228  			case Zibm_r, Zibr_m:
  4229  				ab.PutOpBytesLit(z, &o.op)
  4230  				if yt.zcase == Zibr_m {
  4231  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4232  				} else {
  4233  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4234  				}
  4235  				switch {
  4236  				default:
  4237  					ab.Put1(byte(p.From.Offset))
  4238  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4239  					ab.PutInt16(int16(p.From.Offset))
  4240  				case yt.args[0] == Yi32:
  4241  					ab.PutInt32(int32(p.From.Offset))
  4242  				}
  4243  
  4244  			case Zaut_r:
  4245  				ab.Put1(0x8d) // leal
  4246  				if p.From.Type != obj.TYPE_ADDR {
  4247  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4248  				}
  4249  				p.From.Type = obj.TYPE_MEM
  4250  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4251  				p.From.Type = obj.TYPE_ADDR
  4252  
  4253  			case Zm_o:
  4254  				ab.Put1(byte(op))
  4255  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4256  
  4257  			case Zr_m:
  4258  				ab.Put1(byte(op))
  4259  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4260  
  4261  			case Zvex:
  4262  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4263  
  4264  			case Zvex_rm_v_r:
  4265  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4266  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4267  
  4268  			case Zvex_rm_v_ro:
  4269  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4270  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4271  
  4272  			case Zvex_i_rm_vo:
  4273  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4274  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4275  				ab.Put1(byte(p.From.Offset))
  4276  
  4277  			case Zvex_i_r_v:
  4278  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4279  				regnum := byte(0x7)
  4280  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4281  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4282  				} else {
  4283  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4284  				}
  4285  				ab.Put1(o.op[z+2] | regnum)
  4286  				ab.Put1(byte(p.From.Offset))
  4287  
  4288  			case Zvex_i_rm_v_r:
  4289  				imm, from, from3, to := unpackOps4(p)
  4290  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4291  				ab.asmand(ctxt, cursym, p, from, to)
  4292  				ab.Put1(byte(imm.Offset))
  4293  
  4294  			case Zvex_i_rm_r:
  4295  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4296  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4297  				ab.Put1(byte(p.From.Offset))
  4298  
  4299  			case Zvex_v_rm_r:
  4300  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4301  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4302  
  4303  			case Zvex_r_v_rm:
  4304  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4305  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4306  
  4307  			case Zvex_rm_r_vo:
  4308  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4309  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4310  
  4311  			case Zvex_i_r_rm:
  4312  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4313  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4314  				ab.Put1(byte(p.From.Offset))
  4315  
  4316  			case Zvex_hr_rm_v_r:
  4317  				hr, from, from3, to := unpackOps4(p)
  4318  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4319  				ab.asmand(ctxt, cursym, p, from, to)
  4320  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4321  
  4322  			case Zevex_k_rmo:
  4323  				ab.evex = newEVEXBits(z, &o.op)
  4324  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4325  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4326  
  4327  			case Zevex_i_rm_vo:
  4328  				ab.evex = newEVEXBits(z, &o.op)
  4329  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4330  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4331  				ab.Put1(byte(p.From.Offset))
  4332  
  4333  			case Zevex_i_rm_k_vo:
  4334  				imm, from, kmask, to := unpackOps4(p)
  4335  				ab.evex = newEVEXBits(z, &o.op)
  4336  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4337  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4338  				ab.Put1(byte(imm.Offset))
  4339  
  4340  			case Zevex_i_r_rm:
  4341  				ab.evex = newEVEXBits(z, &o.op)
  4342  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4343  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4344  				ab.Put1(byte(p.From.Offset))
  4345  
  4346  			case Zevex_i_r_k_rm:
  4347  				imm, from, kmask, to := unpackOps4(p)
  4348  				ab.evex = newEVEXBits(z, &o.op)
  4349  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4350  				ab.asmand(ctxt, cursym, p, to, from)
  4351  				ab.Put1(byte(imm.Offset))
  4352  
  4353  			case Zevex_i_rm_r:
  4354  				ab.evex = newEVEXBits(z, &o.op)
  4355  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4356  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4357  				ab.Put1(byte(p.From.Offset))
  4358  
  4359  			case Zevex_i_rm_k_r:
  4360  				imm, from, kmask, to := unpackOps4(p)
  4361  				ab.evex = newEVEXBits(z, &o.op)
  4362  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4363  				ab.asmand(ctxt, cursym, p, from, to)
  4364  				ab.Put1(byte(imm.Offset))
  4365  
  4366  			case Zevex_i_rm_v_r:
  4367  				imm, from, from3, to := unpackOps4(p)
  4368  				ab.evex = newEVEXBits(z, &o.op)
  4369  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4370  				ab.asmand(ctxt, cursym, p, from, to)
  4371  				ab.Put1(byte(imm.Offset))
  4372  
  4373  			case Zevex_i_rm_v_k_r:
  4374  				imm, from, from3, kmask, to := unpackOps5(p)
  4375  				ab.evex = newEVEXBits(z, &o.op)
  4376  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4377  				ab.asmand(ctxt, cursym, p, from, to)
  4378  				ab.Put1(byte(imm.Offset))
  4379  
  4380  			case Zevex_r_v_rm:
  4381  				ab.evex = newEVEXBits(z, &o.op)
  4382  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4383  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4384  
  4385  			case Zevex_rm_v_r:
  4386  				ab.evex = newEVEXBits(z, &o.op)
  4387  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4388  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4389  
  4390  			case Zevex_rm_k_r:
  4391  				ab.evex = newEVEXBits(z, &o.op)
  4392  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4393  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4394  
  4395  			case Zevex_r_k_rm:
  4396  				ab.evex = newEVEXBits(z, &o.op)
  4397  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4398  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4399  
  4400  			case Zevex_rm_v_k_r:
  4401  				from, from3, kmask, to := unpackOps4(p)
  4402  				ab.evex = newEVEXBits(z, &o.op)
  4403  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4404  				ab.asmand(ctxt, cursym, p, from, to)
  4405  
  4406  			case Zevex_r_v_k_rm:
  4407  				from, from3, kmask, to := unpackOps4(p)
  4408  				ab.evex = newEVEXBits(z, &o.op)
  4409  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4410  				ab.asmand(ctxt, cursym, p, to, from)
  4411  
  4412  			case Zr_m_xm:
  4413  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4414  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4415  
  4416  			case Zr_m_xm_nr:
  4417  				ab.rexflag = 0
  4418  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4419  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4420  
  4421  			case Zo_m:
  4422  				ab.Put1(byte(op))
  4423  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4424  
  4425  			case Zcallindreg:
  4426  				r = obj.Addrel(cursym)
  4427  				r.Off = int32(p.Pc)
  4428  				r.Type = objabi.R_CALLIND
  4429  				r.Siz = 0
  4430  				fallthrough
  4431  
  4432  			case Zo_m64:
  4433  				ab.Put1(byte(op))
  4434  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4435  
  4436  			case Zm_ibo:
  4437  				ab.Put1(byte(op))
  4438  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4439  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4440  
  4441  			case Zibo_m:
  4442  				ab.Put1(byte(op))
  4443  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4444  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4445  
  4446  			case Zibo_m_xm:
  4447  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4448  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4449  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4450  
  4451  			case Z_ib, Zib_:
  4452  				if yt.zcase == Zib_ {
  4453  					a = &p.From
  4454  				} else {
  4455  					a = &p.To
  4456  				}
  4457  				ab.Put1(byte(op))
  4458  				if p.As == AXABORT {
  4459  					ab.Put1(o.op[z+1])
  4460  				}
  4461  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4462  
  4463  			case Zib_rp:
  4464  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4465  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4466  
  4467  			case Zil_rp:
  4468  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4469  				ab.Put1(byte(op + reg[p.To.Reg]))
  4470  				if o.prefix == Pe {
  4471  					v = vaddr(ctxt, p, &p.From, nil)
  4472  					ab.PutInt16(int16(v))
  4473  				} else {
  4474  					ab.relput4(ctxt, cursym, p, &p.From)
  4475  				}
  4476  
  4477  			case Zo_iw:
  4478  				ab.Put1(byte(op))
  4479  				if p.From.Type != obj.TYPE_NONE {
  4480  					v = vaddr(ctxt, p, &p.From, nil)
  4481  					ab.PutInt16(int16(v))
  4482  				}
  4483  
  4484  			case Ziq_rp:
  4485  				v = vaddr(ctxt, p, &p.From, &rel)
  4486  				l = int(v >> 32)
  4487  				if l == 0 && rel.Siz != 8 {
  4488  					ab.rexflag &^= (0x40 | Rxw)
  4489  
  4490  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4491  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4492  					if rel.Type != 0 {
  4493  						r = obj.Addrel(cursym)
  4494  						*r = rel
  4495  						r.Off = int32(p.Pc + int64(ab.Len()))
  4496  					}
  4497  
  4498  					ab.PutInt32(int32(v))
  4499  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4500  					ab.Put1(0xc7)
  4501  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4502  
  4503  					ab.PutInt32(int32(v)) // need all 8
  4504  				} else {
  4505  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4506  					ab.Put1(byte(op + reg[p.To.Reg]))
  4507  					if rel.Type != 0 {
  4508  						r = obj.Addrel(cursym)
  4509  						*r = rel
  4510  						r.Off = int32(p.Pc + int64(ab.Len()))
  4511  					}
  4512  
  4513  					ab.PutInt64(v)
  4514  				}
  4515  
  4516  			case Zib_rr:
  4517  				ab.Put1(byte(op))
  4518  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4519  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4520  
  4521  			case Z_il, Zil_:
  4522  				if yt.zcase == Zil_ {
  4523  					a = &p.From
  4524  				} else {
  4525  					a = &p.To
  4526  				}
  4527  				ab.Put1(byte(op))
  4528  				if o.prefix == Pe {
  4529  					v = vaddr(ctxt, p, a, nil)
  4530  					ab.PutInt16(int16(v))
  4531  				} else {
  4532  					ab.relput4(ctxt, cursym, p, a)
  4533  				}
  4534  
  4535  			case Zm_ilo, Zilo_m:
  4536  				ab.Put1(byte(op))
  4537  				if yt.zcase == Zilo_m {
  4538  					a = &p.From
  4539  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4540  				} else {
  4541  					a = &p.To
  4542  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4543  				}
  4544  
  4545  				if o.prefix == Pe {
  4546  					v = vaddr(ctxt, p, a, nil)
  4547  					ab.PutInt16(int16(v))
  4548  				} else {
  4549  					ab.relput4(ctxt, cursym, p, a)
  4550  				}
  4551  
  4552  			case Zil_rr:
  4553  				ab.Put1(byte(op))
  4554  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4555  				if o.prefix == Pe {
  4556  					v = vaddr(ctxt, p, &p.From, nil)
  4557  					ab.PutInt16(int16(v))
  4558  				} else {
  4559  					ab.relput4(ctxt, cursym, p, &p.From)
  4560  				}
  4561  
  4562  			case Z_rp:
  4563  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4564  				ab.Put1(byte(op + reg[p.To.Reg]))
  4565  
  4566  			case Zrp_:
  4567  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4568  				ab.Put1(byte(op + reg[p.From.Reg]))
  4569  
  4570  			case Zcallcon, Zjmpcon:
  4571  				if yt.zcase == Zcallcon {
  4572  					ab.Put1(byte(op))
  4573  				} else {
  4574  					ab.Put1(o.op[z+1])
  4575  				}
  4576  				r = obj.Addrel(cursym)
  4577  				r.Off = int32(p.Pc + int64(ab.Len()))
  4578  				r.Type = objabi.R_PCREL
  4579  				r.Siz = 4
  4580  				r.Add = p.To.Offset
  4581  				ab.PutInt32(0)
  4582  
  4583  			case Zcallind:
  4584  				ab.Put2(byte(op), o.op[z+1])
  4585  				r = obj.Addrel(cursym)
  4586  				r.Off = int32(p.Pc + int64(ab.Len()))
  4587  				if ctxt.Arch.Family == sys.AMD64 {
  4588  					r.Type = objabi.R_PCREL
  4589  				} else {
  4590  					r.Type = objabi.R_ADDR
  4591  				}
  4592  				r.Siz = 4
  4593  				r.Add = p.To.Offset
  4594  				r.Sym = p.To.Sym
  4595  				ab.PutInt32(0)
  4596  
  4597  			case Zcall, Zcallduff:
  4598  				if p.To.Sym == nil {
  4599  					ctxt.Diag("call without target")
  4600  					ctxt.DiagFlush()
  4601  					log.Fatalf("bad code")
  4602  				}
  4603  
  4604  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4605  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4606  				}
  4607  
  4608  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4609  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4610  					// (the call jumps into the middle of the function).
  4611  					// This makes it possible to see call sites for duffcopy/duffzero in
  4612  					// BP-based profiling tools like Linux perf (which is the
  4613  					// whole point of obj.Framepointer_enabled).
  4614  					// MOVQ BP, -16(SP)
  4615  					// LEAQ -16(SP), BP
  4616  					ab.Put(bpduff1)
  4617  				}
  4618  				ab.Put1(byte(op))
  4619  				r = obj.Addrel(cursym)
  4620  				r.Off = int32(p.Pc + int64(ab.Len()))
  4621  				r.Sym = p.To.Sym
  4622  				r.Add = p.To.Offset
  4623  				r.Type = objabi.R_CALL
  4624  				r.Siz = 4
  4625  				ab.PutInt32(0)
  4626  
  4627  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4628  					// Pop BP pushed above.
  4629  					// MOVQ 0(BP), BP
  4630  					ab.Put(bpduff2)
  4631  				}
  4632  
  4633  			// TODO: jump across functions needs reloc
  4634  			case Zbr, Zjmp, Zloop:
  4635  				if p.As == AXBEGIN {
  4636  					ab.Put1(byte(op))
  4637  				}
  4638  				if p.To.Sym != nil {
  4639  					if yt.zcase != Zjmp {
  4640  						ctxt.Diag("branch to ATEXT")
  4641  						ctxt.DiagFlush()
  4642  						log.Fatalf("bad code")
  4643  					}
  4644  
  4645  					ab.Put1(o.op[z+1])
  4646  					r = obj.Addrel(cursym)
  4647  					r.Off = int32(p.Pc + int64(ab.Len()))
  4648  					r.Sym = p.To.Sym
  4649  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4650  					// it can point to a trampoline instead of the destination itself.
  4651  					r.Type = objabi.R_CALL
  4652  					r.Siz = 4
  4653  					ab.PutInt32(0)
  4654  					break
  4655  				}
  4656  
  4657  				// Assumes q is in this function.
  4658  				// TODO: Check in input, preserve in brchain.
  4659  
  4660  				// Fill in backward jump now.
  4661  				q = p.Pcond
  4662  
  4663  				if q == nil {
  4664  					ctxt.Diag("jmp/branch/loop without target")
  4665  					ctxt.DiagFlush()
  4666  					log.Fatalf("bad code")
  4667  				}
  4668  
  4669  				if p.Back&branchBackwards != 0 {
  4670  					v = q.Pc - (p.Pc + 2)
  4671  					if v >= -128 && p.As != AXBEGIN {
  4672  						if p.As == AJCXZL {
  4673  							ab.Put1(0x67)
  4674  						}
  4675  						ab.Put2(byte(op), byte(v))
  4676  					} else if yt.zcase == Zloop {
  4677  						ctxt.Diag("loop too far: %v", p)
  4678  					} else {
  4679  						v -= 5 - 2
  4680  						if p.As == AXBEGIN {
  4681  							v--
  4682  						}
  4683  						if yt.zcase == Zbr {
  4684  							ab.Put1(0x0f)
  4685  							v--
  4686  						}
  4687  
  4688  						ab.Put1(o.op[z+1])
  4689  						ab.PutInt32(int32(v))
  4690  					}
  4691  
  4692  					break
  4693  				}
  4694  
  4695  				// Annotate target; will fill in later.
  4696  				p.Forwd = q.Rel
  4697  
  4698  				q.Rel = p
  4699  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4700  					if p.As == AJCXZL {
  4701  						ab.Put1(0x67)
  4702  					}
  4703  					ab.Put2(byte(op), 0)
  4704  				} else if yt.zcase == Zloop {
  4705  					ctxt.Diag("loop too far: %v", p)
  4706  				} else {
  4707  					if yt.zcase == Zbr {
  4708  						ab.Put1(0x0f)
  4709  					}
  4710  					ab.Put1(o.op[z+1])
  4711  					ab.PutInt32(0)
  4712  				}
  4713  
  4714  			case Zbyte:
  4715  				v = vaddr(ctxt, p, &p.From, &rel)
  4716  				if rel.Siz != 0 {
  4717  					rel.Siz = uint8(op)
  4718  					r = obj.Addrel(cursym)
  4719  					*r = rel
  4720  					r.Off = int32(p.Pc + int64(ab.Len()))
  4721  				}
  4722  
  4723  				ab.Put1(byte(v))
  4724  				if op > 1 {
  4725  					ab.Put1(byte(v >> 8))
  4726  					if op > 2 {
  4727  						ab.PutInt16(int16(v >> 16))
  4728  						if op > 4 {
  4729  							ab.PutInt32(int32(v >> 32))
  4730  						}
  4731  					}
  4732  				}
  4733  			}
  4734  
  4735  			return
  4736  		}
  4737  	}
  4738  	f3t = Ynone * Ymax
  4739  	if p.GetFrom3() != nil {
  4740  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4741  	}
  4742  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4743  		var pp obj.Prog
  4744  		var t []byte
  4745  		if p.As == mo[0].as {
  4746  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4747  				t = mo[0].op[:]
  4748  				switch mo[0].code {
  4749  				default:
  4750  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4751  
  4752  				case movLit:
  4753  					for z = 0; t[z] != 0; z++ {
  4754  						ab.Put1(t[z])
  4755  					}
  4756  
  4757  				case movRegMem:
  4758  					ab.Put1(t[0])
  4759  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4760  
  4761  				case movMemReg:
  4762  					ab.Put1(t[0])
  4763  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4764  
  4765  				case movRegMem2op: // r,m - 2op
  4766  					ab.Put2(t[0], t[1])
  4767  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4768  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4769  
  4770  				case movMemReg2op:
  4771  					ab.Put2(t[0], t[1])
  4772  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4773  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4774  
  4775  				case movFullPtr:
  4776  					if t[0] != 0 {
  4777  						ab.Put1(t[0])
  4778  					}
  4779  					switch p.To.Index {
  4780  					default:
  4781  						goto bad
  4782  
  4783  					case REG_DS:
  4784  						ab.Put1(0xc5)
  4785  
  4786  					case REG_SS:
  4787  						ab.Put2(0x0f, 0xb2)
  4788  
  4789  					case REG_ES:
  4790  						ab.Put1(0xc4)
  4791  
  4792  					case REG_FS:
  4793  						ab.Put2(0x0f, 0xb4)
  4794  
  4795  					case REG_GS:
  4796  						ab.Put2(0x0f, 0xb5)
  4797  					}
  4798  
  4799  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4800  
  4801  				case movDoubleShift:
  4802  					if t[0] == Pw {
  4803  						if ctxt.Arch.Family != sys.AMD64 {
  4804  							ctxt.Diag("asmins: illegal 64: %v", p)
  4805  						}
  4806  						ab.rexflag |= Pw
  4807  						t = t[1:]
  4808  					} else if t[0] == Pe {
  4809  						ab.Put1(Pe)
  4810  						t = t[1:]
  4811  					}
  4812  
  4813  					switch p.From.Type {
  4814  					default:
  4815  						goto bad
  4816  
  4817  					case obj.TYPE_CONST:
  4818  						ab.Put2(0x0f, t[0])
  4819  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4820  						ab.Put1(byte(p.From.Offset))
  4821  
  4822  					case obj.TYPE_REG:
  4823  						switch p.From.Reg {
  4824  						default:
  4825  							goto bad
  4826  
  4827  						case REG_CL, REG_CX:
  4828  							ab.Put2(0x0f, t[1])
  4829  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4830  						}
  4831  					}
  4832  
  4833  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4834  				// where you load the TLS base register into a register and then index off that
  4835  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4836  				// are handled in prefixof above and should not be listed here.
  4837  				case movTLSReg:
  4838  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4839  						ctxt.Diag("invalid load of TLS: %v", p)
  4840  					}
  4841  
  4842  					if ctxt.Arch.Family == sys.I386 {
  4843  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4844  						// where you load the TLS base register into a register and then index off that
  4845  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4846  						// are handled in prefixof above and should not be listed here.
  4847  						switch ctxt.Headtype {
  4848  						default:
  4849  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4850  
  4851  						case objabi.Hlinux, objabi.Hfreebsd:
  4852  							if ctxt.Flag_shared {
  4853  								// Note that this is not generating the same insns as the other cases.
  4854  								//     MOV TLS, dst
  4855  								// becomes
  4856  								//     call __x86.get_pc_thunk.dst
  4857  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4858  								// which is encoded as
  4859  								//     call __x86.get_pc_thunk.dst
  4860  								//     movq 0(dst), dst
  4861  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4862  								// is g, which we can't check here, but will when we assemble the second
  4863  								// instruction.
  4864  								dst := p.To.Reg
  4865  								ab.Put1(0xe8)
  4866  								r = obj.Addrel(cursym)
  4867  								r.Off = int32(p.Pc + int64(ab.Len()))
  4868  								r.Type = objabi.R_CALL
  4869  								r.Siz = 4
  4870  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4871  								ab.PutInt32(0)
  4872  
  4873  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4874  								r = obj.Addrel(cursym)
  4875  								r.Off = int32(p.Pc + int64(ab.Len()))
  4876  								r.Type = objabi.R_TLS_IE
  4877  								r.Siz = 4
  4878  								r.Add = 2
  4879  								ab.PutInt32(0)
  4880  							} else {
  4881  								// ELF TLS base is 0(GS).
  4882  								pp.From = p.From
  4883  
  4884  								pp.From.Type = obj.TYPE_MEM
  4885  								pp.From.Reg = REG_GS
  4886  								pp.From.Offset = 0
  4887  								pp.From.Index = REG_NONE
  4888  								pp.From.Scale = 0
  4889  								ab.Put2(0x65, // GS
  4890  									0x8B)
  4891  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4892  							}
  4893  						case objabi.Hplan9:
  4894  							pp.From = obj.Addr{}
  4895  							pp.From.Type = obj.TYPE_MEM
  4896  							pp.From.Name = obj.NAME_EXTERN
  4897  							pp.From.Sym = plan9privates
  4898  							pp.From.Offset = 0
  4899  							pp.From.Index = REG_NONE
  4900  							ab.Put1(0x8B)
  4901  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4902  
  4903  						case objabi.Hwindows:
  4904  							// Windows TLS base is always 0x14(FS).
  4905  							pp.From = p.From
  4906  
  4907  							pp.From.Type = obj.TYPE_MEM
  4908  							pp.From.Reg = REG_FS
  4909  							pp.From.Offset = 0x14
  4910  							pp.From.Index = REG_NONE
  4911  							pp.From.Scale = 0
  4912  							ab.Put2(0x64, // FS
  4913  								0x8B)
  4914  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4915  						}
  4916  						break
  4917  					}
  4918  
  4919  					switch ctxt.Headtype {
  4920  					default:
  4921  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4922  
  4923  					case objabi.Hlinux, objabi.Hfreebsd:
  4924  						if !ctxt.Flag_shared {
  4925  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  4926  						}
  4927  						// Note that this is not generating the same insn as the other cases.
  4928  						//     MOV TLS, R_to
  4929  						// becomes
  4930  						//     movq g@gottpoff(%rip), R_to
  4931  						// which is encoded as
  4932  						//     movq 0(%rip), R_to
  4933  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4934  						// is g, which we can't check here, but will when we assemble the second
  4935  						// instruction.
  4936  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4937  
  4938  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4939  						r = obj.Addrel(cursym)
  4940  						r.Off = int32(p.Pc + int64(ab.Len()))
  4941  						r.Type = objabi.R_TLS_IE
  4942  						r.Siz = 4
  4943  						r.Add = -4
  4944  						ab.PutInt32(0)
  4945  
  4946  					case objabi.Hplan9:
  4947  						pp.From = obj.Addr{}
  4948  						pp.From.Type = obj.TYPE_MEM
  4949  						pp.From.Name = obj.NAME_EXTERN
  4950  						pp.From.Sym = plan9privates
  4951  						pp.From.Offset = 0
  4952  						pp.From.Index = REG_NONE
  4953  						ab.rexflag |= Pw
  4954  						ab.Put1(0x8B)
  4955  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4956  
  4957  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4958  						// TLS base is 0(FS).
  4959  						pp.From = p.From
  4960  
  4961  						pp.From.Type = obj.TYPE_MEM
  4962  						pp.From.Name = obj.NAME_NONE
  4963  						pp.From.Reg = REG_NONE
  4964  						pp.From.Offset = 0
  4965  						pp.From.Index = REG_NONE
  4966  						pp.From.Scale = 0
  4967  						ab.rexflag |= Pw
  4968  						ab.Put2(0x64, // FS
  4969  							0x8B)
  4970  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4971  
  4972  					case objabi.Hwindows:
  4973  						// Windows TLS base is always 0x28(GS).
  4974  						pp.From = p.From
  4975  
  4976  						pp.From.Type = obj.TYPE_MEM
  4977  						pp.From.Name = obj.NAME_NONE
  4978  						pp.From.Reg = REG_GS
  4979  						pp.From.Offset = 0x28
  4980  						pp.From.Index = REG_NONE
  4981  						pp.From.Scale = 0
  4982  						ab.rexflag |= Pw
  4983  						ab.Put2(0x65, // GS
  4984  							0x8B)
  4985  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4986  					}
  4987  				}
  4988  				return
  4989  			}
  4990  		}
  4991  	}
  4992  	goto bad
  4993  
  4994  bad:
  4995  	if ctxt.Arch.Family != sys.AMD64 {
  4996  		// here, the assembly has failed.
  4997  		// if it's a byte instruction that has
  4998  		// unaddressable registers, try to
  4999  		// exchange registers and reissue the
  5000  		// instruction with the operands renamed.
  5001  		pp := *p
  5002  
  5003  		unbytereg(&pp.From, &pp.Ft)
  5004  		unbytereg(&pp.To, &pp.Tt)
  5005  
  5006  		z := int(p.From.Reg)
  5007  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5008  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5009  			// For now, different to keep bit-for-bit compatibility.
  5010  			if ctxt.Arch.Family == sys.I386 {
  5011  				breg := byteswapreg(ctxt, &p.To)
  5012  				if breg != REG_AX {
  5013  					ab.Put1(0x87) // xchg lhs,bx
  5014  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5015  					subreg(&pp, z, breg)
  5016  					ab.doasm(ctxt, cursym, &pp)
  5017  					ab.Put1(0x87) // xchg lhs,bx
  5018  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5019  				} else {
  5020  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5021  					subreg(&pp, z, REG_AX)
  5022  					ab.doasm(ctxt, cursym, &pp)
  5023  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5024  				}
  5025  				return
  5026  			}
  5027  
  5028  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5029  				// We certainly don't want to exchange
  5030  				// with AX if the op is MUL or DIV.
  5031  				ab.Put1(0x87) // xchg lhs,bx
  5032  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5033  				subreg(&pp, z, REG_BX)
  5034  				ab.doasm(ctxt, cursym, &pp)
  5035  				ab.Put1(0x87) // xchg lhs,bx
  5036  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5037  			} else {
  5038  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5039  				subreg(&pp, z, REG_AX)
  5040  				ab.doasm(ctxt, cursym, &pp)
  5041  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5042  			}
  5043  			return
  5044  		}
  5045  
  5046  		z = int(p.To.Reg)
  5047  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5048  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5049  			// For now, different to keep bit-for-bit compatibility.
  5050  			if ctxt.Arch.Family == sys.I386 {
  5051  				breg := byteswapreg(ctxt, &p.From)
  5052  				if breg != REG_AX {
  5053  					ab.Put1(0x87) //xchg rhs,bx
  5054  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5055  					subreg(&pp, z, breg)
  5056  					ab.doasm(ctxt, cursym, &pp)
  5057  					ab.Put1(0x87) // xchg rhs,bx
  5058  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5059  				} else {
  5060  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5061  					subreg(&pp, z, REG_AX)
  5062  					ab.doasm(ctxt, cursym, &pp)
  5063  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5064  				}
  5065  				return
  5066  			}
  5067  
  5068  			if isax(&p.From) {
  5069  				ab.Put1(0x87) // xchg rhs,bx
  5070  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5071  				subreg(&pp, z, REG_BX)
  5072  				ab.doasm(ctxt, cursym, &pp)
  5073  				ab.Put1(0x87) // xchg rhs,bx
  5074  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5075  			} else {
  5076  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5077  				subreg(&pp, z, REG_AX)
  5078  				ab.doasm(ctxt, cursym, &pp)
  5079  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5080  			}
  5081  			return
  5082  		}
  5083  	}
  5084  
  5085  	ctxt.Diag("invalid instruction: %v", p)
  5086  }
  5087  
  5088  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5089  // which is not referenced in a.
  5090  // If a is empty, it returns BX to account for MULB-like instructions
  5091  // that might use DX and AX.
  5092  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5093  	cana, canb, canc, cand := true, true, true, true
  5094  	if a.Type == obj.TYPE_NONE {
  5095  		cana, cand = false, false
  5096  	}
  5097  
  5098  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5099  		switch a.Reg {
  5100  		case REG_NONE:
  5101  			cana, cand = false, false
  5102  		case REG_AX, REG_AL, REG_AH:
  5103  			cana = false
  5104  		case REG_BX, REG_BL, REG_BH:
  5105  			canb = false
  5106  		case REG_CX, REG_CL, REG_CH:
  5107  			canc = false
  5108  		case REG_DX, REG_DL, REG_DH:
  5109  			cand = false
  5110  		}
  5111  	}
  5112  
  5113  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5114  		switch a.Index {
  5115  		case REG_AX:
  5116  			cana = false
  5117  		case REG_BX:
  5118  			canb = false
  5119  		case REG_CX:
  5120  			canc = false
  5121  		case REG_DX:
  5122  			cand = false
  5123  		}
  5124  	}
  5125  
  5126  	switch {
  5127  	case cana:
  5128  		return REG_AX
  5129  	case canb:
  5130  		return REG_BX
  5131  	case canc:
  5132  		return REG_CX
  5133  	case cand:
  5134  		return REG_DX
  5135  	default:
  5136  		ctxt.Diag("impossible byte register")
  5137  		ctxt.DiagFlush()
  5138  		log.Fatalf("bad code")
  5139  		return 0
  5140  	}
  5141  }
  5142  
  5143  func isbadbyte(a *obj.Addr) bool {
  5144  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5145  }
  5146  
  5147  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5148  	ab.Reset()
  5149  
  5150  	ab.rexflag = 0
  5151  	ab.vexflag = false
  5152  	ab.evexflag = false
  5153  	mark := ab.Len()
  5154  	ab.doasm(ctxt, cursym, p)
  5155  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5156  		// as befits the whole approach of the architecture,
  5157  		// the rex prefix must appear before the first opcode byte
  5158  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5159  		// before the 0f opcode escape!), or it might be ignored.
  5160  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5161  		if ctxt.Arch.Family != sys.AMD64 {
  5162  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5163  		}
  5164  		n := ab.Len()
  5165  		var np int
  5166  		for np = mark; np < n; np++ {
  5167  			c := ab.At(np)
  5168  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5169  				break
  5170  			}
  5171  		}
  5172  		ab.Insert(np, byte(0x40|ab.rexflag))
  5173  	}
  5174  
  5175  	n := ab.Len()
  5176  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5177  		r := &cursym.R[i]
  5178  		if int64(r.Off) < p.Pc {
  5179  			break
  5180  		}
  5181  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5182  			r.Off++
  5183  		}
  5184  		if r.Type == objabi.R_PCREL {
  5185  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5186  				// PC-relative addressing is relative to the end of the instruction,
  5187  				// but the relocations applied by the linker are relative to the end
  5188  				// of the relocation. Because immediate instruction
  5189  				// arguments can follow the PC-relative memory reference in the
  5190  				// instruction encoding, the two may not coincide. In this case,
  5191  				// adjust addend so that linker can keep relocating relative to the
  5192  				// end of the relocation.
  5193  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5194  			} else if ctxt.Arch.Family == sys.I386 {
  5195  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5196  				// assumes that the previous instruction loaded the PC of the end
  5197  				// of that instruction into CX, so the adjustment is relative to
  5198  				// that.
  5199  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5200  			}
  5201  		}
  5202  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5203  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5204  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5205  		}
  5206  
  5207  	}
  5208  }
  5209  
  5210  // unpackOps4 extracts 4 operands from p.
  5211  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5212  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  5213  }
  5214  
  5215  // unpackOps5 extracts 5 operands from p.
  5216  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5217  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
  5218  }