github.com/ccccaoqing/test@v0.0.0-20220510085219-3985d23445c0/src/liblink/asm6.c (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include <u.h>
    34  #include <libc.h>
    35  #include <bio.h>
    36  #include <link.h>
    37  #include "../cmd/6l/6.out.h"
    38  #include "../runtime/stack.h"
    39  
    40  enum
    41  {
    42  	MaxAlign = 32,	// max data alignment
    43  	
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	// 
    57  	LoopAlign = 16,
    58  	MaxLoopPad = 0,
    59  
    60  	FuncAlign = 16
    61  };
    62  
    63  typedef	struct	Optab	Optab;
    64  typedef	struct	Movtab	Movtab;
    65  
    66  struct	Optab
    67  {
    68  	short	as;
    69  	uchar*	ytab;
    70  	uchar	prefix;
    71  	uchar	op[23];
    72  };
    73  struct	Movtab
    74  {
    75  	short	as;
    76  	uchar	ft;
    77  	uchar	tt;
    78  	uchar	code;
    79  	uchar	op[4];
    80  };
    81  
    82  enum
    83  {
    84  	Yxxx		= 0,
    85  	Ynone,
    86  	Yi0,
    87  	Yi1,
    88  	Yi8,
    89  	Ys32,
    90  	Yi32,
    91  	Yi64,
    92  	Yiauto,
    93  	Yal,
    94  	Ycl,
    95  	Yax,
    96  	Ycx,
    97  	Yrb,
    98  	Yrl,
    99  	Yrf,
   100  	Yf0,
   101  	Yrx,
   102  	Ymb,
   103  	Yml,
   104  	Ym,
   105  	Ybr,
   106  	Ycol,
   107  
   108  	Ycs,	Yss,	Yds,	Yes,	Yfs,	Ygs,
   109  	Ygdtr,	Yidtr,	Yldtr,	Ymsw,	Ytask,
   110  	Ycr0,	Ycr1,	Ycr2,	Ycr3,	Ycr4,	Ycr5,	Ycr6,	Ycr7,	Ycr8,
   111  	Ydr0,	Ydr1,	Ydr2,	Ydr3,	Ydr4,	Ydr5,	Ydr6,	Ydr7,
   112  	Ytr0,	Ytr1,	Ytr2,	Ytr3,	Ytr4,	Ytr5,	Ytr6,	Ytr7,	Yrl32,	Yrl64,
   113  	Ymr, Ymm,
   114  	Yxr, Yxm,
   115  	Ytls,
   116  	Ymax,
   117  
   118  	Zxxx		= 0,
   119  
   120  	Zlit,
   121  	Zlitm_r,
   122  	Z_rp,
   123  	Zbr,
   124  	Zcall,
   125  	Zcallindreg,
   126  	Zib_,
   127  	Zib_rp,
   128  	Zibo_m,
   129  	Zibo_m_xm,
   130  	Zil_,
   131  	Zil_rp,
   132  	Ziq_rp,
   133  	Zilo_m,
   134  	Ziqo_m,
   135  	Zjmp,
   136  	Zloop,
   137  	Zo_iw,
   138  	Zm_o,
   139  	Zm_r,
   140  	Zm2_r,
   141  	Zm_r_xm,
   142  	Zm_r_i_xm,
   143  	Zm_r_3d,
   144  	Zm_r_xm_nr,
   145  	Zr_m_xm_nr,
   146  	Zibm_r,	/* mmx1,mmx2/mem64,imm8 */
   147  	Zmb_r,
   148  	Zaut_r,
   149  	Zo_m,
   150  	Zo_m64,
   151  	Zpseudo,
   152  	Zr_m,
   153  	Zr_m_xm,
   154  	Zr_m_i_xm,
   155  	Zrp_,
   156  	Z_ib,
   157  	Z_il,
   158  	Zm_ibo,
   159  	Zm_ilo,
   160  	Zib_rr,
   161  	Zil_rr,
   162  	Zclr,
   163  	Zbyte,
   164  	Zmax,
   165  
   166  	Px		= 0,
   167  	P32		= 0x32,	/* 32-bit only */
   168  	Pe		= 0x66,	/* operand escape */
   169  	Pm		= 0x0f,	/* 2byte opcode escape */
   170  	Pq		= 0xff,	/* both escapes: 66 0f */
   171  	Pb		= 0xfe,	/* byte operands */
   172  	Pf2		= 0xf2,	/* xmm escape 1: f2 0f */
   173  	Pf3		= 0xf3,	/* xmm escape 2: f3 0f */
   174  	Pq3		= 0x67, /* xmm escape 3: 66 48 0f */
   175  	Pw		= 0x48,	/* Rex.w */
   176  	Py		= 0x80,	/* defaults to 64-bit mode */
   177  
   178  	Rxf		= 1<<9,	/* internal flag for Rxr on from */
   179  	Rxt		= 1<<8,	/* internal flag for Rxr on to */
   180  	Rxw		= 1<<3,	/* =1, 64-bit operand size */
   181  	Rxr		= 1<<2,	/* extend modrm reg */
   182  	Rxx		= 1<<1,	/* extend sib index */
   183  	Rxb		= 1<<0,	/* extend modrm r/m, sib base, or opcode reg */
   184  
   185  	Maxand	= 10,		/* in -a output width of the byte codes */
   186  };
   187  
   188  static uchar ycover[Ymax*Ymax];
   189  static	int	reg[D_NONE];
   190  static	int	regrex[D_NONE+1];
   191  static	void	asmins(Link *ctxt, Prog *p);
   192  
   193  static uchar	ynone[] =
   194  {
   195  	Ynone,	Ynone,	Zlit,	1,
   196  	0
   197  };
   198  static uchar	ytext[] =
   199  {
   200  	Ymb,	Yi64,	Zpseudo,1,
   201  	0
   202  };
   203  static uchar	ynop[] =
   204  {
   205  	Ynone,	Ynone,	Zpseudo,0,
   206  	Ynone,	Yiauto,	Zpseudo,0,
   207  	Ynone,	Yml,	Zpseudo,0,
   208  	Ynone,	Yrf,	Zpseudo,0,
   209  	Ynone,	Yxr,	Zpseudo,0,
   210  	Yiauto,	Ynone,	Zpseudo,0,
   211  	Yml,	Ynone,	Zpseudo,0,
   212  	Yrf,	Ynone,	Zpseudo,0,
   213  	Yxr,	Ynone,	Zpseudo,1,
   214  	0
   215  };
   216  static uchar	yfuncdata[] =
   217  {
   218  	Yi32,	Ym,	Zpseudo,	0,
   219  	0
   220  };
   221  static uchar	ypcdata[] = 
   222  {
   223  	Yi32,	Yi32,	Zpseudo,	0,
   224  	0
   225  };
   226  static uchar	yxorb[] =
   227  {
   228  	Yi32,	Yal,	Zib_,	1,
   229  	Yi32,	Ymb,	Zibo_m,	2,
   230  	Yrb,	Ymb,	Zr_m,	1,
   231  	Ymb,	Yrb,	Zm_r,	1,
   232  	0
   233  };
   234  static uchar	yxorl[] =
   235  {
   236  	Yi8,	Yml,	Zibo_m,	2,
   237  	Yi32,	Yax,	Zil_,	1,
   238  	Yi32,	Yml,	Zilo_m,	2,
   239  	Yrl,	Yml,	Zr_m,	1,
   240  	Yml,	Yrl,	Zm_r,	1,
   241  	0
   242  };
   243  static uchar	yaddl[] =
   244  {
   245  	Yi8,	Yml,	Zibo_m,	2,
   246  	Yi32,	Yax,	Zil_,	1,
   247  	Yi32,	Yml,	Zilo_m,	2,
   248  	Yrl,	Yml,	Zr_m,	1,
   249  	Yml,	Yrl,	Zm_r,	1,
   250  	0
   251  };
   252  static uchar	yincb[] =
   253  {
   254  	Ynone,	Ymb,	Zo_m,	2,
   255  	0
   256  };
   257  static uchar	yincw[] =
   258  {
   259  	Ynone,	Yml,	Zo_m,	2,
   260  	0
   261  };
   262  static uchar	yincl[] =
   263  {
   264  	Ynone,	Yml,	Zo_m,	2,
   265  	0
   266  };
   267  static uchar	ycmpb[] =
   268  {
   269  	Yal,	Yi32,	Z_ib,	1,
   270  	Ymb,	Yi32,	Zm_ibo,	2,
   271  	Ymb,	Yrb,	Zm_r,	1,
   272  	Yrb,	Ymb,	Zr_m,	1,
   273  	0
   274  };
   275  static uchar	ycmpl[] =
   276  {
   277  	Yml,	Yi8,	Zm_ibo,	2,
   278  	Yax,	Yi32,	Z_il,	1,
   279  	Yml,	Yi32,	Zm_ilo,	2,
   280  	Yml,	Yrl,	Zm_r,	1,
   281  	Yrl,	Yml,	Zr_m,	1,
   282  	0
   283  };
   284  static uchar	yshb[] =
   285  {
   286  	Yi1,	Ymb,	Zo_m,	2,
   287  	Yi32,	Ymb,	Zibo_m,	2,
   288  	Ycx,	Ymb,	Zo_m,	2,
   289  	0
   290  };
   291  static uchar	yshl[] =
   292  {
   293  	Yi1,	Yml,	Zo_m,	2,
   294  	Yi32,	Yml,	Zibo_m,	2,
   295  	Ycl,	Yml,	Zo_m,	2,
   296  	Ycx,	Yml,	Zo_m,	2,
   297  	0
   298  };
   299  static uchar	ytestb[] =
   300  {
   301  	Yi32,	Yal,	Zib_,	1,
   302  	Yi32,	Ymb,	Zibo_m,	2,
   303  	Yrb,	Ymb,	Zr_m,	1,
   304  	Ymb,	Yrb,	Zm_r,	1,
   305  	0
   306  };
   307  static uchar	ytestl[] =
   308  {
   309  	Yi32,	Yax,	Zil_,	1,
   310  	Yi32,	Yml,	Zilo_m,	2,
   311  	Yrl,	Yml,	Zr_m,	1,
   312  	Yml,	Yrl,	Zm_r,	1,
   313  	0
   314  };
   315  static uchar	ymovb[] =
   316  {
   317  	Yrb,	Ymb,	Zr_m,	1,
   318  	Ymb,	Yrb,	Zm_r,	1,
   319  	Yi32,	Yrb,	Zib_rp,	1,
   320  	Yi32,	Ymb,	Zibo_m,	2,
   321  	0
   322  };
   323  static uchar	ymbs[] =
   324  {
   325  	Ymb,	Ynone,	Zm_o,	2,
   326  	0
   327  };
   328  static uchar	ybtl[] =
   329  {
   330  	Yi8,	Yml,	Zibo_m,	2,
   331  	Yrl,	Yml,	Zr_m,	1,
   332  	0
   333  };
   334  static uchar	ymovw[] =
   335  {
   336  	Yrl,	Yml,	Zr_m,	1,
   337  	Yml,	Yrl,	Zm_r,	1,
   338  	Yi0,	Yrl,	Zclr,	1,
   339  	Yi32,	Yrl,	Zil_rp,	1,
   340  	Yi32,	Yml,	Zilo_m,	2,
   341  	Yiauto,	Yrl,	Zaut_r,	2,
   342  	0
   343  };
   344  static uchar	ymovl[] =
   345  {
   346  	Yrl,	Yml,	Zr_m,	1,
   347  	Yml,	Yrl,	Zm_r,	1,
   348  	Yi0,	Yrl,	Zclr,	1,
   349  	Yi32,	Yrl,	Zil_rp,	1,
   350  	Yi32,	Yml,	Zilo_m,	2,
   351  	Yml,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
   352  	Ymr,	Yml,	Zr_m_xm,	1,	// MMX MOVD
   353  	Yml,	Yxr,	Zm_r_xm,	2,	// XMM MOVD (32 bit)
   354  	Yxr,	Yml,	Zr_m_xm,	2,	// XMM MOVD (32 bit)
   355  	Yiauto,	Yrl,	Zaut_r,	2,
   356  	0
   357  };
   358  static uchar	yret[] =
   359  {
   360  	Ynone,	Ynone,	Zo_iw,	1,
   361  	Yi32,	Ynone,	Zo_iw,	1,
   362  	0
   363  };
   364  static uchar	ymovq[] =
   365  {
   366  	Yrl,	Yml,	Zr_m,	1,	// 0x89
   367  	Yml,	Yrl,	Zm_r,	1,	// 0x8b
   368  	Yi0,	Yrl,	Zclr,	1,	// 0x31
   369  	Ys32,	Yrl,	Zilo_m,	2,	// 32 bit signed 0xc7,(0)
   370  	Yi64,	Yrl,	Ziq_rp,	1,	// 0xb8 -- 32/64 bit immediate
   371  	Yi32,	Yml,	Zilo_m,	2,	// 0xc7,(0)
   372  	Ym,	Ymr,	Zm_r_xm_nr,	1,	// MMX MOVQ (shorter encoding)
   373  	Ymr,	Ym,	Zr_m_xm_nr,	1,	// MMX MOVQ
   374  	Ymm,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
   375  	Ymr,	Ymm,	Zr_m_xm,	1,	// MMX MOVD
   376  	Yxr,	Ymr,	Zm_r_xm_nr,	2,	// MOVDQ2Q
   377  	Yxm,	Yxr,	Zm_r_xm_nr,	2, // MOVQ xmm1/m64 -> xmm2
   378  	Yxr,	Yxm,	Zr_m_xm_nr,	2, // MOVQ xmm1 -> xmm2/m64
   379  	Yml,	Yxr,	Zm_r_xm,	2,	// MOVD xmm load
   380  	Yxr,	Yml,	Zr_m_xm,	2,	// MOVD xmm store
   381  	Yiauto,	Yrl,	Zaut_r,	2,	// built-in LEAQ
   382  	0
   383  };
   384  static uchar	ym_rl[] =
   385  {
   386  	Ym,	Yrl,	Zm_r,	1,
   387  	0
   388  };
   389  static uchar	yrl_m[] =
   390  {
   391  	Yrl,	Ym,	Zr_m,	1,
   392  	0
   393  };
   394  static uchar	ymb_rl[] =
   395  {
   396  	Ymb,	Yrl,	Zmb_r,	1,
   397  	0
   398  };
   399  static uchar	yml_rl[] =
   400  {
   401  	Yml,	Yrl,	Zm_r,	1,
   402  	0
   403  };
   404  static uchar	yrl_ml[] =
   405  {
   406  	Yrl,	Yml,	Zr_m,	1,
   407  	0
   408  };
   409  static uchar	yml_mb[] =
   410  {
   411  	Yrb,	Ymb,	Zr_m,	1,
   412  	Ymb,	Yrb,	Zm_r,	1,
   413  	0
   414  };
   415  static uchar	yrb_mb[] =
   416  {
   417  	Yrb,	Ymb,	Zr_m,	1,
   418  	0
   419  };
   420  static uchar	yxchg[] =
   421  {
   422  	Yax,	Yrl,	Z_rp,	1,
   423  	Yrl,	Yax,	Zrp_,	1,
   424  	Yrl,	Yml,	Zr_m,	1,
   425  	Yml,	Yrl,	Zm_r,	1,
   426  	0
   427  };
   428  static uchar	ydivl[] =
   429  {
   430  	Yml,	Ynone,	Zm_o,	2,
   431  	0
   432  };
   433  static uchar	ydivb[] =
   434  {
   435  	Ymb,	Ynone,	Zm_o,	2,
   436  	0
   437  };
   438  static uchar	yimul[] =
   439  {
   440  	Yml,	Ynone,	Zm_o,	2,
   441  	Yi8,	Yrl,	Zib_rr,	1,
   442  	Yi32,	Yrl,	Zil_rr,	1,
   443  	Yml,	Yrl,	Zm_r,	2,
   444  	0
   445  };
   446  static uchar	yimul3[] =
   447  {
   448  	Yml,	Yrl,	Zibm_r,	2,
   449  	0
   450  };
   451  static uchar	ybyte[] =
   452  {
   453  	Yi64,	Ynone,	Zbyte,	1,
   454  	0
   455  };
   456  static uchar	yin[] =
   457  {
   458  	Yi32,	Ynone,	Zib_,	1,
   459  	Ynone,	Ynone,	Zlit,	1,
   460  	0
   461  };
   462  static uchar	yint[] =
   463  {
   464  	Yi32,	Ynone,	Zib_,	1,
   465  	0
   466  };
   467  static uchar	ypushl[] =
   468  {
   469  	Yrl,	Ynone,	Zrp_,	1,
   470  	Ym,	Ynone,	Zm_o,	2,
   471  	Yi8,	Ynone,	Zib_,	1,
   472  	Yi32,	Ynone,	Zil_,	1,
   473  	0
   474  };
   475  static uchar	ypopl[] =
   476  {
   477  	Ynone,	Yrl,	Z_rp,	1,
   478  	Ynone,	Ym,	Zo_m,	2,
   479  	0
   480  };
   481  static uchar	ybswap[] =
   482  {
   483  	Ynone,	Yrl,	Z_rp,	2,
   484  	0,
   485  };
   486  static uchar	yscond[] =
   487  {
   488  	Ynone,	Ymb,	Zo_m,	2,
   489  	0
   490  };
   491  static uchar	yjcond[] =
   492  {
   493  	Ynone,	Ybr,	Zbr,	0,
   494  	Yi0,	Ybr,	Zbr,	0,
   495  	Yi1,	Ybr,	Zbr,	1,
   496  	0
   497  };
   498  static uchar	yloop[] =
   499  {
   500  	Ynone,	Ybr,	Zloop,	1,
   501  	0
   502  };
   503  static uchar	ycall[] =
   504  {
   505  	Ynone,	Yml,	Zcallindreg,	0,
   506  	Yrx,	Yrx,	Zcallindreg,	2,
   507  	Ynone,	Ybr,	Zcall,	1,
   508  	0
   509  };
   510  static uchar	yduff[] =
   511  {
   512  	Ynone,	Yi32,	Zcall,	1,
   513  	0
   514  };
   515  static uchar	yjmp[] =
   516  {
   517  	Ynone,	Yml,	Zo_m64,	2,
   518  	Ynone,	Ybr,	Zjmp,	1,
   519  	0
   520  };
   521  
   522  static uchar	yfmvd[] =
   523  {
   524  	Ym,	Yf0,	Zm_o,	2,
   525  	Yf0,	Ym,	Zo_m,	2,
   526  	Yrf,	Yf0,	Zm_o,	2,
   527  	Yf0,	Yrf,	Zo_m,	2,
   528  	0
   529  };
   530  static uchar	yfmvdp[] =
   531  {
   532  	Yf0,	Ym,	Zo_m,	2,
   533  	Yf0,	Yrf,	Zo_m,	2,
   534  	0
   535  };
   536  static uchar	yfmvf[] =
   537  {
   538  	Ym,	Yf0,	Zm_o,	2,
   539  	Yf0,	Ym,	Zo_m,	2,
   540  	0
   541  };
   542  static uchar	yfmvx[] =
   543  {
   544  	Ym,	Yf0,	Zm_o,	2,
   545  	0
   546  };
   547  static uchar	yfmvp[] =
   548  {
   549  	Yf0,	Ym,	Zo_m,	2,
   550  	0
   551  };
   552  static uchar	yfadd[] =
   553  {
   554  	Ym,	Yf0,	Zm_o,	2,
   555  	Yrf,	Yf0,	Zm_o,	2,
   556  	Yf0,	Yrf,	Zo_m,	2,
   557  	0
   558  };
   559  static uchar	yfaddp[] =
   560  {
   561  	Yf0,	Yrf,	Zo_m,	2,
   562  	0
   563  };
   564  static uchar	yfxch[] =
   565  {
   566  	Yf0,	Yrf,	Zo_m,	2,
   567  	Yrf,	Yf0,	Zm_o,	2,
   568  	0
   569  };
   570  static uchar	ycompp[] =
   571  {
   572  	Yf0,	Yrf,	Zo_m,	2,	/* botch is really f0,f1 */
   573  	0
   574  };
   575  static uchar	ystsw[] =
   576  {
   577  	Ynone,	Ym,	Zo_m,	2,
   578  	Ynone,	Yax,	Zlit,	1,
   579  	0
   580  };
   581  static uchar	ystcw[] =
   582  {
   583  	Ynone,	Ym,	Zo_m,	2,
   584  	Ym,	Ynone,	Zm_o,	2,
   585  	0
   586  };
   587  static uchar	ysvrs[] =
   588  {
   589  	Ynone,	Ym,	Zo_m,	2,
   590  	Ym,	Ynone,	Zm_o,	2,
   591  	0
   592  };
   593  static uchar	ymm[] = 
   594  {
   595  	Ymm,	Ymr,	Zm_r_xm,	1,
   596  	Yxm,	Yxr,	Zm_r_xm,	2,
   597  	0
   598  };
   599  static uchar	yxm[] = 
   600  {
   601  	Yxm,	Yxr,	Zm_r_xm,	1,
   602  	0
   603  };
   604  static uchar	yxcvm1[] = 
   605  {
   606  	Yxm,	Yxr,	Zm_r_xm,	2,
   607  	Yxm,	Ymr,	Zm_r_xm,	2,
   608  	0
   609  };
   610  static uchar	yxcvm2[] =
   611  {
   612  	Yxm,	Yxr,	Zm_r_xm,	2,
   613  	Ymm,	Yxr,	Zm_r_xm,	2,
   614  	0
   615  };
   616  /*
   617  static uchar	yxmq[] = 
   618  {
   619  	Yxm,	Yxr,	Zm_r_xm,	2,
   620  	0
   621  };
   622  */
   623  static uchar	yxr[] = 
   624  {
   625  	Yxr,	Yxr,	Zm_r_xm,	1,
   626  	0
   627  };
   628  static uchar	yxr_ml[] =
   629  {
   630  	Yxr,	Yml,	Zr_m_xm,	1,
   631  	0
   632  };
   633  static uchar	ymr[] =
   634  {
   635  	Ymr,	Ymr,	Zm_r,	1,
   636  	0
   637  };
   638  static uchar	ymr_ml[] =
   639  {
   640  	Ymr,	Yml,	Zr_m_xm,	1,
   641  	0
   642  };
   643  static uchar	yxcmp[] =
   644  {
   645  	Yxm,	Yxr, Zm_r_xm,	1,
   646  	0
   647  };
   648  static uchar	yxcmpi[] =
   649  {
   650  	Yxm,	Yxr, Zm_r_i_xm,	2,
   651  	0
   652  };
   653  static uchar	yxmov[] =
   654  {
   655  	Yxm,	Yxr,	Zm_r_xm,	1,
   656  	Yxr,	Yxm,	Zr_m_xm,	1,
   657  	0
   658  };
   659  static uchar	yxcvfl[] = 
   660  {
   661  	Yxm,	Yrl,	Zm_r_xm,	1,
   662  	0
   663  };
   664  static uchar	yxcvlf[] =
   665  {
   666  	Yml,	Yxr,	Zm_r_xm,	1,
   667  	0
   668  };
   669  static uchar	yxcvfq[] = 
   670  {
   671  	Yxm,	Yrl,	Zm_r_xm,	2,
   672  	0
   673  };
   674  static uchar	yxcvqf[] =
   675  {
   676  	Yml,	Yxr,	Zm_r_xm,	2,
   677  	0
   678  };
   679  static uchar	yps[] = 
   680  {
   681  	Ymm,	Ymr,	Zm_r_xm,	1,
   682  	Yi8,	Ymr,	Zibo_m_xm,	2,
   683  	Yxm,	Yxr,	Zm_r_xm,	2,
   684  	Yi8,	Yxr,	Zibo_m_xm,	3,
   685  	0
   686  };
   687  static uchar	yxrrl[] =
   688  {
   689  	Yxr,	Yrl,	Zm_r,	1,
   690  	0
   691  };
   692  static uchar	ymfp[] =
   693  {
   694  	Ymm,	Ymr,	Zm_r_3d,	1,
   695  	0,
   696  };
   697  static uchar	ymrxr[] =
   698  {
   699  	Ymr,	Yxr,	Zm_r,	1,
   700  	Yxm,	Yxr,	Zm_r_xm,	1,
   701  	0
   702  };
   703  static uchar	ymshuf[] =
   704  {
   705  	Ymm,	Ymr,	Zibm_r,	2,
   706  	0
   707  };
   708  static uchar	ymshufb[] =
   709  {
   710  	Yxm,	Yxr,	Zm2_r,	2,
   711  	0
   712  };
   713  static uchar	yxshuf[] =
   714  {
   715  	Yxm,	Yxr,	Zibm_r,	2,
   716  	0
   717  };
   718  static uchar	yextrw[] =
   719  {
   720  	Yxr,	Yrl,	Zibm_r,	2,
   721  	0
   722  };
   723  static uchar	yinsrw[] =
   724  {
   725  	Yml,	Yxr,	Zibm_r,	2,
   726  	0
   727  };
   728  static uchar	yinsr[] =
   729  {
   730  	Ymm,	Yxr,	Zibm_r,	3,
   731  	0
   732  };
   733  static uchar	ypsdq[] =
   734  {
   735  	Yi8,	Yxr,	Zibo_m,	2,
   736  	0
   737  };
   738  static uchar	ymskb[] =
   739  {
   740  	Yxr,	Yrl,	Zm_r_xm,	2,
   741  	Ymr,	Yrl,	Zm_r_xm,	1,
   742  	0
   743  };
   744  static uchar	ycrc32l[] =
   745  {
   746  	Yml,	Yrl,	Zlitm_r,	0,
   747  };
   748  static uchar	yprefetch[] =
   749  {
   750  	Ym,	Ynone,	Zm_o,	2,
   751  	0,
   752  };
   753  static uchar	yaes[] =
   754  {
   755  	Yxm,	Yxr,	Zlitm_r,	2,
   756  	0
   757  };
   758  static uchar	yaes2[] =
   759  {
   760  	Yxm,	Yxr,	Zibm_r,	2,
   761  	0
   762  };
   763  
   764  /*
   765   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   766   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   767   * the entry with the given p->as and then looks through the ytable for that
   768   * instruction (the second field in the optab struct) for a line whose first
   769   * two values match the Ytypes of the p->from and p->to operands.  The function
   770   * oclass in span.c computes the specific Ytype of an operand and then the set
   771   * of more general Ytypes that it satisfies is implied by the ycover table, set
   772   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   773   * from the more general 8-bit constants, but instinit says
   774   *
   775   *        ycover[Yi0*Ymax + Ys32] = 1;
   776   *        ycover[Yi1*Ymax + Ys32] = 1;
   777   *        ycover[Yi8*Ymax + Ys32] = 1;
   778   *
   779   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   780   * if that's what an instruction can handle.
   781   *
   782   * In parallel with the scan through the ytable for the appropriate line, there
   783   * is a z pointer that starts out pointing at the strange magic byte list in
   784   * the Optab struct.  With each step past a non-matching ytable line, z
   785   * advances by the 4th entry in the line.  When a matching line is found, that
   786   * z pointer has the extra data to use in laying down the instruction bytes.
   787   * The actual bytes laid down are a function of the 3rd entry in the line (that
   788   * is, the Ztype) and the z bytes.
   789   *
   790   * For example, let's look at AADDL.  The optab line says:
   791   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   792   *
   793   * and yaddl says
   794   *        uchar   yaddl[] =
   795   *        {
   796   *                Yi8,    Yml,    Zibo_m, 2,
   797   *                Yi32,   Yax,    Zil_,   1,
   798   *                Yi32,   Yml,    Zilo_m, 2,
   799   *                Yrl,    Yml,    Zr_m,   1,
   800   *                Yml,    Yrl,    Zm_r,   1,
   801   *                0
   802   *        };
   803   *
   804   * so there are 5 possible types of ADDL instruction that can be laid down, and
   805   * possible states used to lay them down (Ztype and z pointer, assuming z
   806   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   807   *
   808   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   809   *        Yi32, Yax -> Zil_, z+2 (0x05)
   810   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   811   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   812   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   813   *
   814   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   815   * relatively straightforward as this program goes.
   816   *
   817   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   818   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   819   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   820   * Zilo_m is the same but a long (32-bit) immediate.
   821   */
   822  static Optab optab[] =
   823  /*	as, ytab, andproto, opcode */
   824  {
   825  	{ AXXX },
   826  	{ AAAA,		ynone,	P32, {0x37} },
   827  	{ AAAD,		ynone,	P32, {0xd5,0x0a} },
   828  	{ AAAM,		ynone,	P32, {0xd4,0x0a} },
   829  	{ AAAS,		ynone,	P32, {0x3f} },
   830  	{ AADCB,	yxorb,	Pb, {0x14,0x80,(02),0x10,0x10} },
   831  	{ AADCL,	yxorl,	Px, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   832  	{ AADCQ,	yxorl,	Pw, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   833  	{ AADCW,	yxorl,	Pe, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   834  	{ AADDB,	yxorb,	Pb, {0x04,0x80,(00),0x00,0x02} },
   835  	{ AADDL,	yaddl,	Px, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   836  	{ AADDPD,	yxm,	Pq, {0x58} },
   837  	{ AADDPS,	yxm,	Pm, {0x58} },
   838  	{ AADDQ,	yaddl,	Pw, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   839  	{ AADDSD,	yxm,	Pf2, {0x58} },
   840  	{ AADDSS,	yxm,	Pf3, {0x58} },
   841  	{ AADDW,	yaddl,	Pe, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   842  	{ AADJSP },
   843  	{ AANDB,	yxorb,	Pb, {0x24,0x80,(04),0x20,0x22} },
   844  	{ AANDL,	yxorl,	Px, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   845  	{ AANDNPD,	yxm,	Pq, {0x55} },
   846  	{ AANDNPS,	yxm,	Pm, {0x55} },
   847  	{ AANDPD,	yxm,	Pq, {0x54} },
   848  	{ AANDPS,	yxm,	Pq, {0x54} },
   849  	{ AANDQ,	yxorl,	Pw, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   850  	{ AANDW,	yxorl,	Pe, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   851  	{ AARPL,	yrl_ml,	P32, {0x63} },
   852  	{ ABOUNDL,	yrl_m,	P32, {0x62} },
   853  	{ ABOUNDW,	yrl_m,	Pe, {0x62} },
   854  	{ ABSFL,	yml_rl,	Pm, {0xbc} },
   855  	{ ABSFQ,	yml_rl,	Pw, {0x0f,0xbc} },
   856  	{ ABSFW,	yml_rl,	Pq, {0xbc} },
   857  	{ ABSRL,	yml_rl,	Pm, {0xbd} },
   858  	{ ABSRQ,	yml_rl,	Pw, {0x0f,0xbd} },
   859  	{ ABSRW,	yml_rl,	Pq, {0xbd} },
   860  	{ ABSWAPL,	ybswap,	Px, {0x0f,0xc8} },
   861  	{ ABSWAPQ,	ybswap,	Pw, {0x0f,0xc8} },
   862  	{ ABTCL,	ybtl,	Pm, {0xba,(07),0xbb} },
   863  	{ ABTCQ,	ybtl,	Pw, {0x0f,0xba,(07),0x0f,0xbb} },
   864  	{ ABTCW,	ybtl,	Pq, {0xba,(07),0xbb} },
   865  	{ ABTL,		ybtl,	Pm, {0xba,(04),0xa3} },
   866  	{ ABTQ,		ybtl,	Pw, {0x0f,0xba,(04),0x0f,0xa3}},
   867  	{ ABTRL,	ybtl,	Pm, {0xba,(06),0xb3} },
   868  	{ ABTRQ,	ybtl,	Pw, {0x0f,0xba,(06),0x0f,0xb3} },
   869  	{ ABTRW,	ybtl,	Pq, {0xba,(06),0xb3} },
   870  	{ ABTSL,	ybtl,	Pm, {0xba,(05),0xab } },
   871  	{ ABTSQ,	ybtl,	Pw, {0x0f,0xba,(05),0x0f,0xab} },
   872  	{ ABTSW,	ybtl,	Pq, {0xba,(05),0xab } },
   873  	{ ABTW,		ybtl,	Pq, {0xba,(04),0xa3} },
   874  	{ ABYTE,	ybyte,	Px, {1} },
   875  	{ ACALL,	ycall,	Px, {0xff,(02),0xe8} },
   876  	{ ACDQ,		ynone,	Px, {0x99} },
   877  	{ ACLC,		ynone,	Px, {0xf8} },
   878  	{ ACLD,		ynone,	Px, {0xfc} },
   879  	{ ACLI,		ynone,	Px, {0xfa} },
   880  	{ ACLTS,	ynone,	Pm, {0x06} },
   881  	{ ACMC,		ynone,	Px, {0xf5} },
   882  	{ ACMOVLCC,	yml_rl,	Pm, {0x43} },
   883  	{ ACMOVLCS,	yml_rl,	Pm, {0x42} },
   884  	{ ACMOVLEQ,	yml_rl,	Pm, {0x44} },
   885  	{ ACMOVLGE,	yml_rl,	Pm, {0x4d} },
   886  	{ ACMOVLGT,	yml_rl,	Pm, {0x4f} },
   887  	{ ACMOVLHI,	yml_rl,	Pm, {0x47} },
   888  	{ ACMOVLLE,	yml_rl,	Pm, {0x4e} },
   889  	{ ACMOVLLS,	yml_rl,	Pm, {0x46} },
   890  	{ ACMOVLLT,	yml_rl,	Pm, {0x4c} },
   891  	{ ACMOVLMI,	yml_rl,	Pm, {0x48} },
   892  	{ ACMOVLNE,	yml_rl,	Pm, {0x45} },
   893  	{ ACMOVLOC,	yml_rl,	Pm, {0x41} },
   894  	{ ACMOVLOS,	yml_rl,	Pm, {0x40} },
   895  	{ ACMOVLPC,	yml_rl,	Pm, {0x4b} },
   896  	{ ACMOVLPL,	yml_rl,	Pm, {0x49} },
   897  	{ ACMOVLPS,	yml_rl,	Pm, {0x4a} },
   898  	{ ACMOVQCC,	yml_rl,	Pw, {0x0f,0x43} },
   899  	{ ACMOVQCS,	yml_rl,	Pw, {0x0f,0x42} },
   900  	{ ACMOVQEQ,	yml_rl,	Pw, {0x0f,0x44} },
   901  	{ ACMOVQGE,	yml_rl,	Pw, {0x0f,0x4d} },
   902  	{ ACMOVQGT,	yml_rl,	Pw, {0x0f,0x4f} },
   903  	{ ACMOVQHI,	yml_rl,	Pw, {0x0f,0x47} },
   904  	{ ACMOVQLE,	yml_rl,	Pw, {0x0f,0x4e} },
   905  	{ ACMOVQLS,	yml_rl,	Pw, {0x0f,0x46} },
   906  	{ ACMOVQLT,	yml_rl,	Pw, {0x0f,0x4c} },
   907  	{ ACMOVQMI,	yml_rl,	Pw, {0x0f,0x48} },
   908  	{ ACMOVQNE,	yml_rl,	Pw, {0x0f,0x45} },
   909  	{ ACMOVQOC,	yml_rl,	Pw, {0x0f,0x41} },
   910  	{ ACMOVQOS,	yml_rl,	Pw, {0x0f,0x40} },
   911  	{ ACMOVQPC,	yml_rl,	Pw, {0x0f,0x4b} },
   912  	{ ACMOVQPL,	yml_rl,	Pw, {0x0f,0x49} },
   913  	{ ACMOVQPS,	yml_rl,	Pw, {0x0f,0x4a} },
   914  	{ ACMOVWCC,	yml_rl,	Pq, {0x43} },
   915  	{ ACMOVWCS,	yml_rl,	Pq, {0x42} },
   916  	{ ACMOVWEQ,	yml_rl,	Pq, {0x44} },
   917  	{ ACMOVWGE,	yml_rl,	Pq, {0x4d} },
   918  	{ ACMOVWGT,	yml_rl,	Pq, {0x4f} },
   919  	{ ACMOVWHI,	yml_rl,	Pq, {0x47} },
   920  	{ ACMOVWLE,	yml_rl,	Pq, {0x4e} },
   921  	{ ACMOVWLS,	yml_rl,	Pq, {0x46} },
   922  	{ ACMOVWLT,	yml_rl,	Pq, {0x4c} },
   923  	{ ACMOVWMI,	yml_rl,	Pq, {0x48} },
   924  	{ ACMOVWNE,	yml_rl,	Pq, {0x45} },
   925  	{ ACMOVWOC,	yml_rl,	Pq, {0x41} },
   926  	{ ACMOVWOS,	yml_rl,	Pq, {0x40} },
   927  	{ ACMOVWPC,	yml_rl,	Pq, {0x4b} },
   928  	{ ACMOVWPL,	yml_rl,	Pq, {0x49} },
   929  	{ ACMOVWPS,	yml_rl,	Pq, {0x4a} },
   930  	{ ACMPB,	ycmpb,	Pb, {0x3c,0x80,(07),0x38,0x3a} },
   931  	{ ACMPL,	ycmpl,	Px, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   932  	{ ACMPPD,	yxcmpi,	Px, {Pe,0xc2} },
   933  	{ ACMPPS,	yxcmpi,	Pm, {0xc2,0} },
   934  	{ ACMPQ,	ycmpl,	Pw, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   935  	{ ACMPSB,	ynone,	Pb, {0xa6} },
   936  	{ ACMPSD,	yxcmpi,	Px, {Pf2,0xc2} },
   937  	{ ACMPSL,	ynone,	Px, {0xa7} },
   938  	{ ACMPSQ,	ynone,	Pw, {0xa7} },
   939  	{ ACMPSS,	yxcmpi,	Px, {Pf3,0xc2} },
   940  	{ ACMPSW,	ynone,	Pe, {0xa7} },
   941  	{ ACMPW,	ycmpl,	Pe, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   942  	{ ACOMISD,	yxcmp,	Pe, {0x2f} },
   943  	{ ACOMISS,	yxcmp,	Pm, {0x2f} },
   944  	{ ACPUID,	ynone,	Pm, {0xa2} },
   945  	{ ACVTPL2PD,	yxcvm2,	Px, {Pf3,0xe6,Pe,0x2a} },
   946  	{ ACVTPL2PS,	yxcvm2,	Pm, {0x5b,0,0x2a,0,} },
   947  	{ ACVTPD2PL,	yxcvm1,	Px, {Pf2,0xe6,Pe,0x2d} },
   948  	{ ACVTPD2PS,	yxm,	Pe, {0x5a} },
   949  	{ ACVTPS2PL,	yxcvm1, Px, {Pe,0x5b,Pm,0x2d} },
   950  	{ ACVTPS2PD,	yxm,	Pm, {0x5a} },
   951  	{ API2FW,	ymfp,	Px, {0x0c} },
   952  	{ ACVTSD2SL,	yxcvfl, Pf2, {0x2d} },
   953  	{ ACVTSD2SQ,	yxcvfq, Pw, {Pf2,0x2d} },
   954  	{ ACVTSD2SS,	yxm,	Pf2, {0x5a} },
   955  	{ ACVTSL2SD,	yxcvlf, Pf2, {0x2a} },
   956  	{ ACVTSQ2SD,	yxcvqf, Pw, {Pf2,0x2a} },
   957  	{ ACVTSL2SS,	yxcvlf, Pf3, {0x2a} },
   958  	{ ACVTSQ2SS,	yxcvqf, Pw, {Pf3,0x2a} },
   959  	{ ACVTSS2SD,	yxm,	Pf3, {0x5a} },
   960  	{ ACVTSS2SL,	yxcvfl, Pf3, {0x2d} },
   961  	{ ACVTSS2SQ,	yxcvfq, Pw, {Pf3,0x2d} },
   962  	{ ACVTTPD2PL,	yxcvm1,	Px, {Pe,0xe6,Pe,0x2c} },
   963  	{ ACVTTPS2PL,	yxcvm1,	Px, {Pf3,0x5b,Pm,0x2c} },
   964  	{ ACVTTSD2SL,	yxcvfl, Pf2, {0x2c} },
   965  	{ ACVTTSD2SQ,	yxcvfq, Pw, {Pf2,0x2c} },
   966  	{ ACVTTSS2SL,	yxcvfl,	Pf3, {0x2c} },
   967  	{ ACVTTSS2SQ,	yxcvfq, Pw, {Pf3,0x2c} },
   968  	{ ACWD,		ynone,	Pe, {0x99} },
   969  	{ ACQO,		ynone,	Pw, {0x99} },
   970  	{ ADAA,		ynone,	P32, {0x27} },
   971  	{ ADAS,		ynone,	P32, {0x2f} },
   972  	{ ADATA },
   973  	{ ADECB,	yincb,	Pb, {0xfe,(01)} },
   974  	{ ADECL,	yincl,	Px, {0xff,(01)} },
   975  	{ ADECQ,	yincl,	Pw, {0xff,(01)} },
   976  	{ ADECW,	yincw,	Pe, {0xff,(01)} },
   977  	{ ADIVB,	ydivb,	Pb, {0xf6,(06)} },
   978  	{ ADIVL,	ydivl,	Px, {0xf7,(06)} },
   979  	{ ADIVPD,	yxm,	Pe, {0x5e} },
   980  	{ ADIVPS,	yxm,	Pm, {0x5e} },
   981  	{ ADIVQ,	ydivl,	Pw, {0xf7,(06)} },
   982  	{ ADIVSD,	yxm,	Pf2, {0x5e} },
   983  	{ ADIVSS,	yxm,	Pf3, {0x5e} },
   984  	{ ADIVW,	ydivl,	Pe, {0xf7,(06)} },
   985  	{ AEMMS,	ynone,	Pm, {0x77} },
   986  	{ AENTER },				/* botch */
   987  	{ AFXRSTOR,	ysvrs,	Pm, {0xae,(01),0xae,(01)} },
   988  	{ AFXSAVE,	ysvrs,	Pm, {0xae,(00),0xae,(00)} },
   989  	{ AFXRSTOR64,	ysvrs,	Pw, {0x0f,0xae,(01),0x0f,0xae,(01)} },
   990  	{ AFXSAVE64,	ysvrs,	Pw, {0x0f,0xae,(00),0x0f,0xae,(00)} },
   991  	{ AGLOBL },
   992  	{ AGOK },
   993  	{ AHISTORY },
   994  	{ AHLT,		ynone,	Px, {0xf4} },
   995  	{ AIDIVB,	ydivb,	Pb, {0xf6,(07)} },
   996  	{ AIDIVL,	ydivl,	Px, {0xf7,(07)} },
   997  	{ AIDIVQ,	ydivl,	Pw, {0xf7,(07)} },
   998  	{ AIDIVW,	ydivl,	Pe, {0xf7,(07)} },
   999  	{ AIMULB,	ydivb,	Pb, {0xf6,(05)} },
  1000  	{ AIMULL,	yimul,	Px, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1001  	{ AIMULQ,	yimul,	Pw, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1002  	{ AIMULW,	yimul,	Pe, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1003  	{ AIMUL3Q,	yimul3,	Pw, {0x6b,(00)} },
  1004  	{ AINB,		yin,	Pb, {0xe4,0xec} },
  1005  	{ AINCB,	yincb,	Pb, {0xfe,(00)} },
  1006  	{ AINCL,	yincl,	Px, {0xff,(00)} },
  1007  	{ AINCQ,	yincl,	Pw, {0xff,(00)} },
  1008  	{ AINCW,	yincw,	Pe, {0xff,(00)} },
  1009  	{ AINL,		yin,	Px, {0xe5,0xed} },
  1010  	{ AINSB,	ynone,	Pb, {0x6c} },
  1011  	{ AINSL,	ynone,	Px, {0x6d} },
  1012  	{ AINSW,	ynone,	Pe, {0x6d} },
  1013  	{ AINT,		yint,	Px, {0xcd} },
  1014  	{ AINTO,	ynone,	P32, {0xce} },
  1015  	{ AINW,		yin,	Pe, {0xe5,0xed} },
  1016  	{ AIRETL,	ynone,	Px, {0xcf} },
  1017  	{ AIRETQ,	ynone,	Pw, {0xcf} },
  1018  	{ AIRETW,	ynone,	Pe, {0xcf} },
  1019  	{ AJCC,		yjcond,	Px, {0x73,0x83,(00)} },
  1020  	{ AJCS,		yjcond,	Px, {0x72,0x82} },
  1021  	{ AJCXZL,	yloop,	Px, {0xe3} },
  1022  	{ AJCXZQ,	yloop,	Px, {0xe3} },
  1023  	{ AJEQ,		yjcond,	Px, {0x74,0x84} },
  1024  	{ AJGE,		yjcond,	Px, {0x7d,0x8d} },
  1025  	{ AJGT,		yjcond,	Px, {0x7f,0x8f} },
  1026  	{ AJHI,		yjcond,	Px, {0x77,0x87} },
  1027  	{ AJLE,		yjcond,	Px, {0x7e,0x8e} },
  1028  	{ AJLS,		yjcond,	Px, {0x76,0x86} },
  1029  	{ AJLT,		yjcond,	Px, {0x7c,0x8c} },
  1030  	{ AJMI,		yjcond,	Px, {0x78,0x88} },
  1031  	{ AJMP,		yjmp,	Px, {0xff,(04),0xeb,0xe9} },
  1032  	{ AJNE,		yjcond,	Px, {0x75,0x85} },
  1033  	{ AJOC,		yjcond,	Px, {0x71,0x81,(00)} },
  1034  	{ AJOS,		yjcond,	Px, {0x70,0x80,(00)} },
  1035  	{ AJPC,		yjcond,	Px, {0x7b,0x8b} },
  1036  	{ AJPL,		yjcond,	Px, {0x79,0x89} },
  1037  	{ AJPS,		yjcond,	Px, {0x7a,0x8a} },
  1038  	{ ALAHF,	ynone,	Px, {0x9f} },
  1039  	{ ALARL,	yml_rl,	Pm, {0x02} },
  1040  	{ ALARW,	yml_rl,	Pq, {0x02} },
  1041  	{ ALDMXCSR,	ysvrs,	Pm, {0xae,(02),0xae,(02)} },
  1042  	{ ALEAL,	ym_rl,	Px, {0x8d} },
  1043  	{ ALEAQ,	ym_rl,	Pw, {0x8d} },
  1044  	{ ALEAVEL,	ynone,	P32, {0xc9} },
  1045  	{ ALEAVEQ,	ynone,	Py, {0xc9} },
  1046  	{ ALEAVEW,	ynone,	Pe, {0xc9} },
  1047  	{ ALEAW,	ym_rl,	Pe, {0x8d} },
  1048  	{ ALOCK,	ynone,	Px, {0xf0} },
  1049  	{ ALODSB,	ynone,	Pb, {0xac} },
  1050  	{ ALODSL,	ynone,	Px, {0xad} },
  1051  	{ ALODSQ,	ynone,	Pw, {0xad} },
  1052  	{ ALODSW,	ynone,	Pe, {0xad} },
  1053  	{ ALONG,	ybyte,	Px, {4} },
  1054  	{ ALOOP,	yloop,	Px, {0xe2} },
  1055  	{ ALOOPEQ,	yloop,	Px, {0xe1} },
  1056  	{ ALOOPNE,	yloop,	Px, {0xe0} },
  1057  	{ ALSLL,	yml_rl,	Pm, {0x03 } },
  1058  	{ ALSLW,	yml_rl,	Pq, {0x03 } },
  1059  	{ AMASKMOVOU,	yxr,	Pe, {0xf7} },
  1060  	{ AMASKMOVQ,	ymr,	Pm, {0xf7} },
  1061  	{ AMAXPD,	yxm,	Pe, {0x5f} },
  1062  	{ AMAXPS,	yxm,	Pm, {0x5f} },
  1063  	{ AMAXSD,	yxm,	Pf2, {0x5f} },
  1064  	{ AMAXSS,	yxm,	Pf3, {0x5f} },
  1065  	{ AMINPD,	yxm,	Pe, {0x5d} },
  1066  	{ AMINPS,	yxm,	Pm, {0x5d} },
  1067  	{ AMINSD,	yxm,	Pf2, {0x5d} },
  1068  	{ AMINSS,	yxm,	Pf3, {0x5d} },
  1069  	{ AMOVAPD,	yxmov,	Pe, {0x28,0x29} },
  1070  	{ AMOVAPS,	yxmov,	Pm, {0x28,0x29} },
  1071  	{ AMOVB,	ymovb,	Pb, {0x88,0x8a,0xb0,0xc6,(00)} },
  1072  	{ AMOVBLSX,	ymb_rl,	Pm, {0xbe} },
  1073  	{ AMOVBLZX,	ymb_rl,	Pm, {0xb6} },
  1074  	{ AMOVBQSX,	ymb_rl,	Pw, {0x0f,0xbe} },
  1075  	{ AMOVBQZX,	ymb_rl,	Pm, {0xb6} },
  1076  	{ AMOVBWSX,	ymb_rl,	Pq, {0xbe} },
  1077  	{ AMOVBWZX,	ymb_rl,	Pq, {0xb6} },
  1078  	{ AMOVO,	yxmov,	Pe, {0x6f,0x7f} },
  1079  	{ AMOVOU,	yxmov,	Pf3, {0x6f,0x7f} },
  1080  	{ AMOVHLPS,	yxr,	Pm, {0x12} },
  1081  	{ AMOVHPD,	yxmov,	Pe, {0x16,0x17} },
  1082  	{ AMOVHPS,	yxmov,	Pm, {0x16,0x17} },
  1083  	{ AMOVL,	ymovl,	Px, {0x89,0x8b,0x31,0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e,0} },
  1084  	{ AMOVLHPS,	yxr,	Pm, {0x16} },
  1085  	{ AMOVLPD,	yxmov,	Pe, {0x12,0x13} },
  1086  	{ AMOVLPS,	yxmov,	Pm, {0x12,0x13} },
  1087  	{ AMOVLQSX,	yml_rl,	Pw, {0x63} },
  1088  	{ AMOVLQZX,	yml_rl,	Px, {0x8b} },
  1089  	{ AMOVMSKPD,	yxrrl,	Pq, {0x50} },
  1090  	{ AMOVMSKPS,	yxrrl,	Pm, {0x50} },
  1091  	{ AMOVNTO,	yxr_ml,	Pe, {0xe7} },
  1092  	{ AMOVNTPD,	yxr_ml,	Pe, {0x2b} },
  1093  	{ AMOVNTPS,	yxr_ml,	Pm, {0x2b} },
  1094  	{ AMOVNTQ,	ymr_ml,	Pm, {0xe7} },
  1095  	{ AMOVQ,	ymovq,	Pw, {0x89, 0x8b, 0x31, 0xc7,(00), 0xb8, 0xc7,(00), 0x6f, 0x7f, 0x6e, 0x7e, Pf2,0xd6, Pf3,0x7e, Pe,0xd6, Pe,0x6e, Pe,0x7e,0} },
  1096  	{ AMOVQOZX,	ymrxr,	Pf3, {0xd6,0x7e} },
  1097  	{ AMOVSB,	ynone,	Pb, {0xa4} },
  1098  	{ AMOVSD,	yxmov,	Pf2, {0x10,0x11} },
  1099  	{ AMOVSL,	ynone,	Px, {0xa5} },
  1100  	{ AMOVSQ,	ynone,	Pw, {0xa5} },
  1101  	{ AMOVSS,	yxmov,	Pf3, {0x10,0x11} },
  1102  	{ AMOVSW,	ynone,	Pe, {0xa5} },
  1103  	{ AMOVUPD,	yxmov,	Pe, {0x10,0x11} },
  1104  	{ AMOVUPS,	yxmov,	Pm, {0x10,0x11} },
  1105  	{ AMOVW,	ymovw,	Pe, {0x89,0x8b,0x31,0xb8,0xc7,(00),0} },
  1106  	{ AMOVWLSX,	yml_rl,	Pm, {0xbf} },
  1107  	{ AMOVWLZX,	yml_rl,	Pm, {0xb7} },
  1108  	{ AMOVWQSX,	yml_rl,	Pw, {0x0f,0xbf} },
  1109  	{ AMOVWQZX,	yml_rl,	Pw, {0x0f,0xb7} },
  1110  	{ AMULB,	ydivb,	Pb, {0xf6,(04)} },
  1111  	{ AMULL,	ydivl,	Px, {0xf7,(04)} },
  1112  	{ AMULPD,	yxm,	Pe, {0x59} },
  1113  	{ AMULPS,	yxm,	Ym, {0x59} },
  1114  	{ AMULQ,	ydivl,	Pw, {0xf7,(04)} },
  1115  	{ AMULSD,	yxm,	Pf2, {0x59} },
  1116  	{ AMULSS,	yxm,	Pf3, {0x59} },
  1117  	{ AMULW,	ydivl,	Pe, {0xf7,(04)} },
  1118  	{ ANAME },
  1119  	{ ANEGB,	yscond,	Pb, {0xf6,(03)} },
  1120  	{ ANEGL,	yscond,	Px, {0xf7,(03)} },
  1121  	{ ANEGQ,	yscond,	Pw, {0xf7,(03)} },
  1122  	{ ANEGW,	yscond,	Pe, {0xf7,(03)} },
  1123  	{ ANOP,		ynop,	Px, {0,0} },
  1124  	{ ANOTB,	yscond,	Pb, {0xf6,(02)} },
  1125  	{ ANOTL,	yscond,	Px, {0xf7,(02)} },
  1126  	{ ANOTQ,	yscond,	Pw, {0xf7,(02)} },
  1127  	{ ANOTW,	yscond,	Pe, {0xf7,(02)} },
  1128  	{ AORB,		yxorb,	Pb, {0x0c,0x80,(01),0x08,0x0a} },
  1129  	{ AORL,		yxorl,	Px, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1130  	{ AORPD,	yxm,	Pq, {0x56} },
  1131  	{ AORPS,	yxm,	Pm, {0x56} },
  1132  	{ AORQ,		yxorl,	Pw, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1133  	{ AORW,		yxorl,	Pe, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1134  	{ AOUTB,	yin,	Pb, {0xe6,0xee} },
  1135  	{ AOUTL,	yin,	Px, {0xe7,0xef} },
  1136  	{ AOUTSB,	ynone,	Pb, {0x6e} },
  1137  	{ AOUTSL,	ynone,	Px, {0x6f} },
  1138  	{ AOUTSW,	ynone,	Pe, {0x6f} },
  1139  	{ AOUTW,	yin,	Pe, {0xe7,0xef} },
  1140  	{ APACKSSLW,	ymm,	Py, {0x6b,Pe,0x6b} },
  1141  	{ APACKSSWB,	ymm,	Py, {0x63,Pe,0x63} },
  1142  	{ APACKUSWB,	ymm,	Py, {0x67,Pe,0x67} },
  1143  	{ APADDB,	ymm,	Py, {0xfc,Pe,0xfc} },
  1144  	{ APADDL,	ymm,	Py, {0xfe,Pe,0xfe} },
  1145  	{ APADDQ,	yxm,	Pe, {0xd4} },
  1146  	{ APADDSB,	ymm,	Py, {0xec,Pe,0xec} },
  1147  	{ APADDSW,	ymm,	Py, {0xed,Pe,0xed} },
  1148  	{ APADDUSB,	ymm,	Py, {0xdc,Pe,0xdc} },
  1149  	{ APADDUSW,	ymm,	Py, {0xdd,Pe,0xdd} },
  1150  	{ APADDW,	ymm,	Py, {0xfd,Pe,0xfd} },
  1151  	{ APAND,	ymm,	Py, {0xdb,Pe,0xdb} },
  1152  	{ APANDN,	ymm,	Py, {0xdf,Pe,0xdf} },
  1153  	{ APAUSE,	ynone,	Px, {0xf3,0x90} },
  1154  	{ APAVGB,	ymm,	Py, {0xe0,Pe,0xe0} },
  1155  	{ APAVGW,	ymm,	Py, {0xe3,Pe,0xe3} },
  1156  	{ APCMPEQB,	ymm,	Py, {0x74,Pe,0x74} },
  1157  	{ APCMPEQL,	ymm,	Py, {0x76,Pe,0x76} },
  1158  	{ APCMPEQW,	ymm,	Py, {0x75,Pe,0x75} },
  1159  	{ APCMPGTB,	ymm,	Py, {0x64,Pe,0x64} },
  1160  	{ APCMPGTL,	ymm,	Py, {0x66,Pe,0x66} },
  1161  	{ APCMPGTW,	ymm,	Py, {0x65,Pe,0x65} },
  1162  	{ APEXTRW,	yextrw,	Pq, {0xc5,(00)} },
  1163  	{ APF2IL,	ymfp,	Px, {0x1d} },
  1164  	{ APF2IW,	ymfp,	Px, {0x1c} },
  1165  	{ API2FL,	ymfp,	Px, {0x0d} },
  1166  	{ APFACC,	ymfp,	Px, {0xae} },
  1167  	{ APFADD,	ymfp,	Px, {0x9e} },
  1168  	{ APFCMPEQ,	ymfp,	Px, {0xb0} },
  1169  	{ APFCMPGE,	ymfp,	Px, {0x90} },
  1170  	{ APFCMPGT,	ymfp,	Px, {0xa0} },
  1171  	{ APFMAX,	ymfp,	Px, {0xa4} },
  1172  	{ APFMIN,	ymfp,	Px, {0x94} },
  1173  	{ APFMUL,	ymfp,	Px, {0xb4} },
  1174  	{ APFNACC,	ymfp,	Px, {0x8a} },
  1175  	{ APFPNACC,	ymfp,	Px, {0x8e} },
  1176  	{ APFRCP,	ymfp,	Px, {0x96} },
  1177  	{ APFRCPIT1,	ymfp,	Px, {0xa6} },
  1178  	{ APFRCPI2T,	ymfp,	Px, {0xb6} },
  1179  	{ APFRSQIT1,	ymfp,	Px, {0xa7} },
  1180  	{ APFRSQRT,	ymfp,	Px, {0x97} },
  1181  	{ APFSUB,	ymfp,	Px, {0x9a} },
  1182  	{ APFSUBR,	ymfp,	Px, {0xaa} },
  1183  	{ APINSRW,	yinsrw,	Pq, {0xc4,(00)} },
  1184  	{ APINSRD,	yinsr,	Pq, {0x3a, 0x22, (00)} },
  1185  	{ APINSRQ,	yinsr,	Pq3, {0x3a, 0x22, (00)} },
  1186  	{ APMADDWL,	ymm,	Py, {0xf5,Pe,0xf5} },
  1187  	{ APMAXSW,	yxm,	Pe, {0xee} },
  1188  	{ APMAXUB,	yxm,	Pe, {0xde} },
  1189  	{ APMINSW,	yxm,	Pe, {0xea} },
  1190  	{ APMINUB,	yxm,	Pe, {0xda} },
  1191  	{ APMOVMSKB,	ymskb,	Px, {Pe,0xd7,0xd7} },
  1192  	{ APMULHRW,	ymfp,	Px, {0xb7} },
  1193  	{ APMULHUW,	ymm,	Py, {0xe4,Pe,0xe4} },
  1194  	{ APMULHW,	ymm,	Py, {0xe5,Pe,0xe5} },
  1195  	{ APMULLW,	ymm,	Py, {0xd5,Pe,0xd5} },
  1196  	{ APMULULQ,	ymm,	Py, {0xf4,Pe,0xf4} },
  1197  	{ APOPAL,	ynone,	P32, {0x61} },
  1198  	{ APOPAW,	ynone,	Pe, {0x61} },
  1199  	{ APOPFL,	ynone,	P32, {0x9d} },
  1200  	{ APOPFQ,	ynone,	Py, {0x9d} },
  1201  	{ APOPFW,	ynone,	Pe, {0x9d} },
  1202  	{ APOPL,	ypopl,	P32, {0x58,0x8f,(00)} },
  1203  	{ APOPQ,	ypopl,	Py, {0x58,0x8f,(00)} },
  1204  	{ APOPW,	ypopl,	Pe, {0x58,0x8f,(00)} },
  1205  	{ APOR,		ymm,	Py, {0xeb,Pe,0xeb} },
  1206  	{ APSADBW,	yxm,	Pq, {0xf6} },
  1207  	{ APSHUFHW,	yxshuf,	Pf3, {0x70,(00)} },
  1208  	{ APSHUFL,	yxshuf,	Pq, {0x70,(00)} },
  1209  	{ APSHUFLW,	yxshuf,	Pf2, {0x70,(00)} },
  1210  	{ APSHUFW,	ymshuf,	Pm, {0x70,(00)} },
  1211  	{ APSHUFB,	ymshufb,Pq, {0x38, 0x00} },
  1212  	{ APSLLO,	ypsdq,	Pq, {0x73,(07)} },
  1213  	{ APSLLL,	yps,	Py, {0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06)} },
  1214  	{ APSLLQ,	yps,	Py, {0xf3, 0x73,(06), Pe,0xf3, Pe,0x73,(06)} },
  1215  	{ APSLLW,	yps,	Py, {0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06)} },
  1216  	{ APSRAL,	yps,	Py, {0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04)} },
  1217  	{ APSRAW,	yps,	Py, {0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04)} },
  1218  	{ APSRLO,	ypsdq,	Pq, {0x73,(03)} },
  1219  	{ APSRLL,	yps,	Py, {0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02)} },
  1220  	{ APSRLQ,	yps,	Py, {0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02)} },
  1221  	{ APSRLW,	yps,	Py, {0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02)} },
  1222  	{ APSUBB,	yxm,	Pe, {0xf8} },
  1223  	{ APSUBL,	yxm,	Pe, {0xfa} },
  1224  	{ APSUBQ,	yxm,	Pe, {0xfb} },
  1225  	{ APSUBSB,	yxm,	Pe, {0xe8} },
  1226  	{ APSUBSW,	yxm,	Pe, {0xe9} },
  1227  	{ APSUBUSB,	yxm,	Pe, {0xd8} },
  1228  	{ APSUBUSW,	yxm,	Pe, {0xd9} },
  1229  	{ APSUBW,	yxm,	Pe, {0xf9} },
  1230  	{ APSWAPL,	ymfp,	Px, {0xbb} },
  1231  	{ APUNPCKHBW,	ymm,	Py, {0x68,Pe,0x68} },
  1232  	{ APUNPCKHLQ,	ymm,	Py, {0x6a,Pe,0x6a} },
  1233  	{ APUNPCKHQDQ,	yxm,	Pe, {0x6d} },
  1234  	{ APUNPCKHWL,	ymm,	Py, {0x69,Pe,0x69} },
  1235  	{ APUNPCKLBW,	ymm,	Py, {0x60,Pe,0x60} },
  1236  	{ APUNPCKLLQ,	ymm,	Py, {0x62,Pe,0x62} },
  1237  	{ APUNPCKLQDQ,	yxm,	Pe, {0x6c} },
  1238  	{ APUNPCKLWL,	ymm,	Py, {0x61,Pe,0x61} },
  1239  	{ APUSHAL,	ynone,	P32, {0x60} },
  1240  	{ APUSHAW,	ynone,	Pe, {0x60} },
  1241  	{ APUSHFL,	ynone,	P32, {0x9c} },
  1242  	{ APUSHFQ,	ynone,	Py, {0x9c} },
  1243  	{ APUSHFW,	ynone,	Pe, {0x9c} },
  1244  	{ APUSHL,	ypushl,	P32, {0x50,0xff,(06),0x6a,0x68} },
  1245  	{ APUSHQ,	ypushl,	Py, {0x50,0xff,(06),0x6a,0x68} },
  1246  	{ APUSHW,	ypushl,	Pe, {0x50,0xff,(06),0x6a,0x68} },
  1247  	{ APXOR,	ymm,	Py, {0xef,Pe,0xef} },
  1248  	{ AQUAD,	ybyte,	Px, {8} },
  1249  	{ ARCLB,	yshb,	Pb, {0xd0,(02),0xc0,(02),0xd2,(02)} },
  1250  	{ ARCLL,	yshl,	Px, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1251  	{ ARCLQ,	yshl,	Pw, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1252  	{ ARCLW,	yshl,	Pe, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1253  	{ ARCPPS,	yxm,	Pm, {0x53} },
  1254  	{ ARCPSS,	yxm,	Pf3, {0x53} },
  1255  	{ ARCRB,	yshb,	Pb, {0xd0,(03),0xc0,(03),0xd2,(03)} },
  1256  	{ ARCRL,	yshl,	Px, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1257  	{ ARCRQ,	yshl,	Pw, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1258  	{ ARCRW,	yshl,	Pe, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1259  	{ AREP,		ynone,	Px, {0xf3} },
  1260  	{ AREPN,	ynone,	Px, {0xf2} },
  1261  	{ ARET,		ynone,	Px, {0xc3} },
  1262  	{ ARETFW,	yret,	Pe, {0xcb,0xca} },
  1263  	{ ARETFL,	yret,	Px, {0xcb,0xca} },
  1264  	{ ARETFQ,	yret,	Pw, {0xcb,0xca} },
  1265  	{ AROLB,	yshb,	Pb, {0xd0,(00),0xc0,(00),0xd2,(00)} },
  1266  	{ AROLL,	yshl,	Px, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1267  	{ AROLQ,	yshl,	Pw, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1268  	{ AROLW,	yshl,	Pe, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1269  	{ ARORB,	yshb,	Pb, {0xd0,(01),0xc0,(01),0xd2,(01)} },
  1270  	{ ARORL,	yshl,	Px, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1271  	{ ARORQ,	yshl,	Pw, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1272  	{ ARORW,	yshl,	Pe, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1273  	{ ARSQRTPS,	yxm,	Pm, {0x52} },
  1274  	{ ARSQRTSS,	yxm,	Pf3, {0x52} },
  1275  	{ ASAHF,	ynone,	Px, {0x86,0xe0,0x50,0x9d} },	/* XCHGB AH,AL; PUSH AX; POPFL */
  1276  	{ ASALB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
  1277  	{ ASALL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1278  	{ ASALQ,	yshl,	Pw, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1279  	{ ASALW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1280  	{ ASARB,	yshb,	Pb, {0xd0,(07),0xc0,(07),0xd2,(07)} },
  1281  	{ ASARL,	yshl,	Px, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1282  	{ ASARQ,	yshl,	Pw, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1283  	{ ASARW,	yshl,	Pe, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1284  	{ ASBBB,	yxorb,	Pb, {0x1c,0x80,(03),0x18,0x1a} },
  1285  	{ ASBBL,	yxorl,	Px, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1286  	{ ASBBQ,	yxorl,	Pw, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1287  	{ ASBBW,	yxorl,	Pe, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1288  	{ ASCASB,	ynone,	Pb, {0xae} },
  1289  	{ ASCASL,	ynone,	Px, {0xaf} },
  1290  	{ ASCASQ,	ynone,	Pw, {0xaf} },
  1291  	{ ASCASW,	ynone,	Pe, {0xaf} },
  1292  	{ ASETCC,	yscond,	Pm, {0x93,(00)} },
  1293  	{ ASETCS,	yscond,	Pm, {0x92,(00)} },
  1294  	{ ASETEQ,	yscond,	Pm, {0x94,(00)} },
  1295  	{ ASETGE,	yscond,	Pm, {0x9d,(00)} },
  1296  	{ ASETGT,	yscond,	Pm, {0x9f,(00)} },
  1297  	{ ASETHI,	yscond,	Pm, {0x97,(00)} },
  1298  	{ ASETLE,	yscond,	Pm, {0x9e,(00)} },
  1299  	{ ASETLS,	yscond,	Pm, {0x96,(00)} },
  1300  	{ ASETLT,	yscond,	Pm, {0x9c,(00)} },
  1301  	{ ASETMI,	yscond,	Pm, {0x98,(00)} },
  1302  	{ ASETNE,	yscond,	Pm, {0x95,(00)} },
  1303  	{ ASETOC,	yscond,	Pm, {0x91,(00)} },
  1304  	{ ASETOS,	yscond,	Pm, {0x90,(00)} },
  1305  	{ ASETPC,	yscond,	Pm, {0x9b,(00)} },
  1306  	{ ASETPL,	yscond,	Pm, {0x99,(00)} },
  1307  	{ ASETPS,	yscond,	Pm, {0x9a,(00)} },
  1308  	{ ASHLB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
  1309  	{ ASHLL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1310  	{ ASHLQ,	yshl,	Pw, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1311  	{ ASHLW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1312  	{ ASHRB,	yshb,	Pb, {0xd0,(05),0xc0,(05),0xd2,(05)} },
  1313  	{ ASHRL,	yshl,	Px, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1314  	{ ASHRQ,	yshl,	Pw, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1315  	{ ASHRW,	yshl,	Pe, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1316  	{ ASHUFPD,	yxshuf,	Pq, {0xc6,(00)} },
  1317  	{ ASHUFPS,	yxshuf,	Pm, {0xc6,(00)} },
  1318  	{ ASQRTPD,	yxm,	Pe, {0x51} },
  1319  	{ ASQRTPS,	yxm,	Pm, {0x51} },
  1320  	{ ASQRTSD,	yxm,	Pf2, {0x51} },
  1321  	{ ASQRTSS,	yxm,	Pf3, {0x51} },
  1322  	{ ASTC,		ynone,	Px, {0xf9} },
  1323  	{ ASTD,		ynone,	Px, {0xfd} },
  1324  	{ ASTI,		ynone,	Px, {0xfb} },
  1325  	{ ASTMXCSR,	ysvrs,	Pm, {0xae,(03),0xae,(03)} },
  1326  	{ ASTOSB,	ynone,	Pb, {0xaa} },
  1327  	{ ASTOSL,	ynone,	Px, {0xab} },
  1328  	{ ASTOSQ,	ynone,	Pw, {0xab} },
  1329  	{ ASTOSW,	ynone,	Pe, {0xab} },
  1330  	{ ASUBB,	yxorb,	Pb, {0x2c,0x80,(05),0x28,0x2a} },
  1331  	{ ASUBL,	yaddl,	Px, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1332  	{ ASUBPD,	yxm,	Pe, {0x5c} },
  1333  	{ ASUBPS,	yxm,	Pm, {0x5c} },
  1334  	{ ASUBQ,	yaddl,	Pw, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1335  	{ ASUBSD,	yxm,	Pf2, {0x5c} },
  1336  	{ ASUBSS,	yxm,	Pf3, {0x5c} },
  1337  	{ ASUBW,	yaddl,	Pe, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1338  	{ ASWAPGS,	ynone,	Pm, {0x01,0xf8} },
  1339  	{ ASYSCALL,	ynone,	Px, {0x0f,0x05} },	/* fast syscall */
  1340  	{ ATESTB,	ytestb,	Pb, {0xa8,0xf6,(00),0x84,0x84} },
  1341  	{ ATESTL,	ytestl,	Px, {0xa9,0xf7,(00),0x85,0x85} },
  1342  	{ ATESTQ,	ytestl,	Pw, {0xa9,0xf7,(00),0x85,0x85} },
  1343  	{ ATESTW,	ytestl,	Pe, {0xa9,0xf7,(00),0x85,0x85} },
  1344  	{ ATEXT,	ytext,	Px },
  1345  	{ AUCOMISD,	yxcmp,	Pe, {0x2e} },
  1346  	{ AUCOMISS,	yxcmp,	Pm, {0x2e} },
  1347  	{ AUNPCKHPD,	yxm,	Pe, {0x15} },
  1348  	{ AUNPCKHPS,	yxm,	Pm, {0x15} },
  1349  	{ AUNPCKLPD,	yxm,	Pe, {0x14} },
  1350  	{ AUNPCKLPS,	yxm,	Pm, {0x14} },
  1351  	{ AVERR,	ydivl,	Pm, {0x00,(04)} },
  1352  	{ AVERW,	ydivl,	Pm, {0x00,(05)} },
  1353  	{ AWAIT,	ynone,	Px, {0x9b} },
  1354  	{ AWORD,	ybyte,	Px, {2} },
  1355  	{ AXCHGB,	yml_mb,	Pb, {0x86,0x86} },
  1356  	{ AXCHGL,	yxchg,	Px, {0x90,0x90,0x87,0x87} },
  1357  	{ AXCHGQ,	yxchg,	Pw, {0x90,0x90,0x87,0x87} },
  1358  	{ AXCHGW,	yxchg,	Pe, {0x90,0x90,0x87,0x87} },
  1359  	{ AXLAT,	ynone,	Px, {0xd7} },
  1360  	{ AXORB,	yxorb,	Pb, {0x34,0x80,(06),0x30,0x32} },
  1361  	{ AXORL,	yxorl,	Px, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1362  	{ AXORPD,	yxm,	Pe, {0x57} },
  1363  	{ AXORPS,	yxm,	Pm, {0x57} },
  1364  	{ AXORQ,	yxorl,	Pw, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1365  	{ AXORW,	yxorl,	Pe, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1366  
  1367  	{ AFMOVB,	yfmvx,	Px, {0xdf,(04)} },
  1368  	{ AFMOVBP,	yfmvp,	Px, {0xdf,(06)} },
  1369  	{ AFMOVD,	yfmvd,	Px, {0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02)} },
  1370  	{ AFMOVDP,	yfmvdp,	Px, {0xdd,(03),0xdd,(03)} },
  1371  	{ AFMOVF,	yfmvf,	Px, {0xd9,(00),0xd9,(02)} },
  1372  	{ AFMOVFP,	yfmvp,	Px, {0xd9,(03)} },
  1373  	{ AFMOVL,	yfmvf,	Px, {0xdb,(00),0xdb,(02)} },
  1374  	{ AFMOVLP,	yfmvp,	Px, {0xdb,(03)} },
  1375  	{ AFMOVV,	yfmvx,	Px, {0xdf,(05)} },
  1376  	{ AFMOVVP,	yfmvp,	Px, {0xdf,(07)} },
  1377  	{ AFMOVW,	yfmvf,	Px, {0xdf,(00),0xdf,(02)} },
  1378  	{ AFMOVWP,	yfmvp,	Px, {0xdf,(03)} },
  1379  	{ AFMOVX,	yfmvx,	Px, {0xdb,(05)} },
  1380  	{ AFMOVXP,	yfmvp,	Px, {0xdb,(07)} },
  1381  
  1382  	{ AFCOMB },
  1383  	{ AFCOMBP },
  1384  	{ AFCOMD,	yfadd,	Px, {0xdc,(02),0xd8,(02),0xdc,(02)} },	/* botch */
  1385  	{ AFCOMDP,	yfadd,	Px, {0xdc,(03),0xd8,(03),0xdc,(03)} },	/* botch */
  1386  	{ AFCOMDPP,	ycompp,	Px, {0xde,(03)} },
  1387  	{ AFCOMF,	yfmvx,	Px, {0xd8,(02)} },
  1388  	{ AFCOMFP,	yfmvx,	Px, {0xd8,(03)} },
  1389  	{ AFCOML,	yfmvx,	Px, {0xda,(02)} },
  1390  	{ AFCOMLP,	yfmvx,	Px, {0xda,(03)} },
  1391  	{ AFCOMW,	yfmvx,	Px, {0xde,(02)} },
  1392  	{ AFCOMWP,	yfmvx,	Px, {0xde,(03)} },
  1393  
  1394  	{ AFUCOM,	ycompp,	Px, {0xdd,(04)} },
  1395  	{ AFUCOMP,	ycompp, Px, {0xdd,(05)} },
  1396  	{ AFUCOMPP,	ycompp,	Px, {0xda,(13)} },
  1397  
  1398  	{ AFADDDP,	yfaddp,	Px, {0xde,(00)} },
  1399  	{ AFADDW,	yfmvx,	Px, {0xde,(00)} },
  1400  	{ AFADDL,	yfmvx,	Px, {0xda,(00)} },
  1401  	{ AFADDF,	yfmvx,	Px, {0xd8,(00)} },
  1402  	{ AFADDD,	yfadd,	Px, {0xdc,(00),0xd8,(00),0xdc,(00)} },
  1403  
  1404  	{ AFMULDP,	yfaddp,	Px, {0xde,(01)} },
  1405  	{ AFMULW,	yfmvx,	Px, {0xde,(01)} },
  1406  	{ AFMULL,	yfmvx,	Px, {0xda,(01)} },
  1407  	{ AFMULF,	yfmvx,	Px, {0xd8,(01)} },
  1408  	{ AFMULD,	yfadd,	Px, {0xdc,(01),0xd8,(01),0xdc,(01)} },
  1409  
  1410  	{ AFSUBDP,	yfaddp,	Px, {0xde,(05)} },
  1411  	{ AFSUBW,	yfmvx,	Px, {0xde,(04)} },
  1412  	{ AFSUBL,	yfmvx,	Px, {0xda,(04)} },
  1413  	{ AFSUBF,	yfmvx,	Px, {0xd8,(04)} },
  1414  	{ AFSUBD,	yfadd,	Px, {0xdc,(04),0xd8,(04),0xdc,(05)} },
  1415  
  1416  	{ AFSUBRDP,	yfaddp,	Px, {0xde,(04)} },
  1417  	{ AFSUBRW,	yfmvx,	Px, {0xde,(05)} },
  1418  	{ AFSUBRL,	yfmvx,	Px, {0xda,(05)} },
  1419  	{ AFSUBRF,	yfmvx,	Px, {0xd8,(05)} },
  1420  	{ AFSUBRD,	yfadd,	Px, {0xdc,(05),0xd8,(05),0xdc,(04)} },
  1421  
  1422  	{ AFDIVDP,	yfaddp,	Px, {0xde,(07)} },
  1423  	{ AFDIVW,	yfmvx,	Px, {0xde,(06)} },
  1424  	{ AFDIVL,	yfmvx,	Px, {0xda,(06)} },
  1425  	{ AFDIVF,	yfmvx,	Px, {0xd8,(06)} },
  1426  	{ AFDIVD,	yfadd,	Px, {0xdc,(06),0xd8,(06),0xdc,(07)} },
  1427  
  1428  	{ AFDIVRDP,	yfaddp,	Px, {0xde,(06)} },
  1429  	{ AFDIVRW,	yfmvx,	Px, {0xde,(07)} },
  1430  	{ AFDIVRL,	yfmvx,	Px, {0xda,(07)} },
  1431  	{ AFDIVRF,	yfmvx,	Px, {0xd8,(07)} },
  1432  	{ AFDIVRD,	yfadd,	Px, {0xdc,(07),0xd8,(07),0xdc,(06)} },
  1433  
  1434  	{ AFXCHD,	yfxch,	Px, {0xd9,(01),0xd9,(01)} },
  1435  	{ AFFREE },
  1436  	{ AFLDCW,	ystcw,	Px, {0xd9,(05),0xd9,(05)} },
  1437  	{ AFLDENV,	ystcw,	Px, {0xd9,(04),0xd9,(04)} },
  1438  	{ AFRSTOR,	ysvrs,	Px, {0xdd,(04),0xdd,(04)} },
  1439  	{ AFSAVE,	ysvrs,	Px, {0xdd,(06),0xdd,(06)} },
  1440  	{ AFSTCW,	ystcw,	Px, {0xd9,(07),0xd9,(07)} },
  1441  	{ AFSTENV,	ystcw,	Px, {0xd9,(06),0xd9,(06)} },
  1442  	{ AFSTSW,	ystsw,	Px, {0xdd,(07),0xdf,0xe0} },
  1443  	{ AF2XM1,	ynone,	Px, {0xd9, 0xf0} },
  1444  	{ AFABS,	ynone,	Px, {0xd9, 0xe1} },
  1445  	{ AFCHS,	ynone,	Px, {0xd9, 0xe0} },
  1446  	{ AFCLEX,	ynone,	Px, {0xdb, 0xe2} },
  1447  	{ AFCOS,	ynone,	Px, {0xd9, 0xff} },
  1448  	{ AFDECSTP,	ynone,	Px, {0xd9, 0xf6} },
  1449  	{ AFINCSTP,	ynone,	Px, {0xd9, 0xf7} },
  1450  	{ AFINIT,	ynone,	Px, {0xdb, 0xe3} },
  1451  	{ AFLD1,	ynone,	Px, {0xd9, 0xe8} },
  1452  	{ AFLDL2E,	ynone,	Px, {0xd9, 0xea} },
  1453  	{ AFLDL2T,	ynone,	Px, {0xd9, 0xe9} },
  1454  	{ AFLDLG2,	ynone,	Px, {0xd9, 0xec} },
  1455  	{ AFLDLN2,	ynone,	Px, {0xd9, 0xed} },
  1456  	{ AFLDPI,	ynone,	Px, {0xd9, 0xeb} },
  1457  	{ AFLDZ,	ynone,	Px, {0xd9, 0xee} },
  1458  	{ AFNOP,	ynone,	Px, {0xd9, 0xd0} },
  1459  	{ AFPATAN,	ynone,	Px, {0xd9, 0xf3} },
  1460  	{ AFPREM,	ynone,	Px, {0xd9, 0xf8} },
  1461  	{ AFPREM1,	ynone,	Px, {0xd9, 0xf5} },
  1462  	{ AFPTAN,	ynone,	Px, {0xd9, 0xf2} },
  1463  	{ AFRNDINT,	ynone,	Px, {0xd9, 0xfc} },
  1464  	{ AFSCALE,	ynone,	Px, {0xd9, 0xfd} },
  1465  	{ AFSIN,	ynone,	Px, {0xd9, 0xfe} },
  1466  	{ AFSINCOS,	ynone,	Px, {0xd9, 0xfb} },
  1467  	{ AFSQRT,	ynone,	Px, {0xd9, 0xfa} },
  1468  	{ AFTST,	ynone,	Px, {0xd9, 0xe4} },
  1469  	{ AFXAM,	ynone,	Px, {0xd9, 0xe5} },
  1470  	{ AFXTRACT,	ynone,	Px, {0xd9, 0xf4} },
  1471  	{ AFYL2X,	ynone,	Px, {0xd9, 0xf1} },
  1472  	{ AFYL2XP1,	ynone,	Px, {0xd9, 0xf9} },
  1473  
  1474  	{ ACMPXCHGB,	yrb_mb,	Pb, {0x0f,0xb0} },
  1475  	{ ACMPXCHGL,	yrl_ml,	Px, {0x0f,0xb1} },
  1476  	{ ACMPXCHGW,	yrl_ml,	Pe, {0x0f,0xb1} },
  1477  	{ ACMPXCHGQ,	yrl_ml,	Pw, {0x0f,0xb1} },
  1478  	{ ACMPXCHG8B,	yscond,	Pm, {0xc7,(01)} },
  1479  	{ AINVD,	ynone,	Pm, {0x08} },
  1480  	{ AINVLPG,	ymbs,	Pm, {0x01,(07)} },
  1481  	{ ALFENCE,	ynone,	Pm, {0xae,0xe8} },
  1482  	{ AMFENCE,	ynone,	Pm, {0xae,0xf0} },
  1483  	{ AMOVNTIL,	yrl_ml,	Pm, {0xc3} },
  1484  	{ AMOVNTIQ,	yrl_ml, Pw, {0x0f,0xc3} },
  1485  	{ ARDMSR,	ynone,	Pm, {0x32} },
  1486  	{ ARDPMC,	ynone,	Pm, {0x33} },
  1487  	{ ARDTSC,	ynone,	Pm, {0x31} },
  1488  	{ ARSM,		ynone,	Pm, {0xaa} },
  1489  	{ ASFENCE,	ynone,	Pm, {0xae,0xf8} },
  1490  	{ ASYSRET,	ynone,	Pm, {0x07} },
  1491  	{ AWBINVD,	ynone,	Pm, {0x09} },
  1492  	{ AWRMSR,	ynone,	Pm, {0x30} },
  1493  
  1494  	{ AXADDB,	yrb_mb,	Pb, {0x0f,0xc0} },
  1495  	{ AXADDL,	yrl_ml,	Px, {0x0f,0xc1} },
  1496  	{ AXADDQ,	yrl_ml,	Pw, {0x0f,0xc1} },
  1497  	{ AXADDW,	yrl_ml,	Pe, {0x0f,0xc1} },
  1498  
  1499  	{ ACRC32B,       ycrc32l,Px, {0xf2,0x0f,0x38,0xf0,0} },
  1500  	{ ACRC32Q,       ycrc32l,Pw, {0xf2,0x0f,0x38,0xf1,0} },
  1501  	
  1502  	{ APREFETCHT0,	yprefetch,	Pm,	{0x18,(01)} },
  1503  	{ APREFETCHT1,	yprefetch,	Pm,	{0x18,(02)} },
  1504  	{ APREFETCHT2,	yprefetch,	Pm,	{0x18,(03)} },
  1505  	{ APREFETCHNTA,	yprefetch,	Pm,	{0x18,(00)} },
  1506  	
  1507  	{ AMOVQL,	yrl_ml,	Px, {0x89} },
  1508  
  1509  	{ AUNDEF,		ynone,	Px, {0x0f, 0x0b} },
  1510  
  1511  	{ AAESENC,	yaes,	Pq, {0x38,0xdc,(0)} },
  1512  	{ AAESENCLAST,	yaes,	Pq, {0x38,0xdd,(0)} },
  1513  	{ AAESDEC,	yaes,	Pq, {0x38,0xde,(0)} },
  1514  	{ AAESDECLAST,	yaes,	Pq, {0x38,0xdf,(0)} },
  1515  	{ AAESIMC,	yaes,	Pq, {0x38,0xdb,(0)} },
  1516  	{ AAESKEYGENASSIST,	yaes2,	Pq, {0x3a,0xdf,(0)} },
  1517  
  1518  	{ APSHUFD,	yaes2,	Pq,	{0x70,(0)} },
  1519  	{ APCLMULQDQ,	yxshuf,	Pq, {0x3a,0x44,0} },
  1520  
  1521  	{ AUSEFIELD,	ynop,	Px, {0,0} },
  1522  	{ ATYPE },
  1523  	{ AFUNCDATA,	yfuncdata,	Px, {0,0} },
  1524  	{ APCDATA,	ypcdata,	Px, {0,0} },
  1525  	{ ACHECKNIL },
  1526  	{ AVARDEF },
  1527  	{ AVARKILL },
  1528  	{ ADUFFCOPY,	yduff,	Px, {0xe8} },
  1529  	{ ADUFFZERO,	yduff,	Px, {0xe8} },
  1530  
  1531  	{ AEND },
  1532  	{0}
  1533  };
  1534  
  1535  static Optab*	opindex[ALAST+1];
  1536  static vlong	vaddr(Link*, Addr*, Reloc*);
  1537  
  1538  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1539  // This happens on systems like Solaris that call .so functions instead of system calls.
  1540  // It does not seem to be necessary for any other systems. This is probably working
  1541  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1542  // what that bug is. And this does fix it.
  1543  static int
  1544  isextern(LSym *s)
  1545  {
  1546  	// All the Solaris dynamic imports from libc.so begin with "libc·", which
  1547  	// the compiler rewrites to "libc." by the time liblink gets it.
  1548  	return strncmp(s->name, "libc.", 5) == 0;
  1549  }
  1550  
  1551  // single-instruction no-ops of various lengths.
  1552  // constructed by hand and disassembled with gdb to verify.
  1553  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1554  static uchar nop[][16] = {
  1555  	{0x90},
  1556  	{0x66, 0x90},
  1557  	{0x0F, 0x1F, 0x00},
  1558  	{0x0F, 0x1F, 0x40, 0x00},
  1559  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1560  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1561  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1562  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1563  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1564  	// Native Client rejects the repeated 0x66 prefix.
  1565  	// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1566  };
  1567  
  1568  static void
  1569  fillnop(uchar *p, int n)
  1570  {
  1571  	int m;
  1572  
  1573  	while(n > 0) {
  1574  		m = n;
  1575  		if(m > nelem(nop))
  1576  			m = nelem(nop);
  1577  		memmove(p, nop[m-1], m);
  1578  		p += m;
  1579  		n -= m;
  1580  	}
  1581  }
  1582  
  1583  static void instinit(void);
  1584  
  1585  static int32
  1586  naclpad(Link *ctxt, LSym *s, int32 c, int32 pad)
  1587  {
  1588  	symgrow(ctxt, s, c+pad);
  1589  	fillnop(s->p+c, pad);
  1590  	return c+pad;
  1591  }
  1592  
  1593  static int
  1594  spadjop(Link *ctxt, Prog *p, int l, int q)
  1595  {
  1596  	if(p->mode != 64 || ctxt->arch->ptrsize == 4)
  1597  		return l;
  1598  	return q;
  1599  }
  1600  
  1601  void
  1602  span6(Link *ctxt, LSym *s)
  1603  {
  1604  	Prog *p, *q;
  1605  	int32 c, v, loop;
  1606  	uchar *bp;
  1607  	int n, m, i;
  1608  
  1609  	ctxt->cursym = s;
  1610  	
  1611  	if(s->p != nil)
  1612  		return;
  1613  	
  1614  	if(ycover[0] == 0)
  1615  		instinit();
  1616  	
  1617  	for(p = ctxt->cursym->text; p != nil; p = p->link) {
  1618  		n = 0;
  1619  		if(p->to.type == D_BRANCH)
  1620  			if(p->pcond == nil)
  1621  				p->pcond = p;
  1622  		if((q = p->pcond) != nil)
  1623  			if(q->back != 2)
  1624  				n = 1;
  1625  		p->back = n;
  1626  		if(p->as == AADJSP) {
  1627  			p->to.type = D_SP;
  1628  			v = -p->from.offset;
  1629  			p->from.offset = v;
  1630  			p->as = spadjop(ctxt, p, AADDL, AADDQ);
  1631  			if(v < 0) {
  1632  				p->as = spadjop(ctxt, p, ASUBL, ASUBQ);
  1633  				v = -v;
  1634  				p->from.offset = v;
  1635  			}
  1636  			if(v == 0)
  1637  				p->as = ANOP;
  1638  		}
  1639  	}
  1640  
  1641  	for(p = s->text; p != nil; p = p->link) {
  1642  		p->back = 2;	// use short branches first time through
  1643  		if((q = p->pcond) != nil && (q->back & 2)) {
  1644  			p->back |= 1;	// backward jump
  1645  			q->back |= 4;   // loop head
  1646  		}
  1647  
  1648  		if(p->as == AADJSP) {
  1649  			p->to.type = D_SP;
  1650  			v = -p->from.offset;
  1651  			p->from.offset = v;
  1652  			p->as = spadjop(ctxt, p, AADDL, AADDQ);
  1653  			if(v < 0) {
  1654  				p->as = spadjop(ctxt, p, ASUBL, ASUBQ);
  1655  				v = -v;
  1656  				p->from.offset = v;
  1657  			}
  1658  			if(v == 0)
  1659  				p->as = ANOP;
  1660  		}
  1661  	}
  1662  	
  1663  	n = 0;
  1664  	do {
  1665  		loop = 0;
  1666  		memset(s->r, 0, s->nr*sizeof s->r[0]);
  1667  		s->nr = 0;
  1668  		s->np = 0;
  1669  		c = 0;
  1670  		for(p = s->text; p != nil; p = p->link) {
  1671  			if(ctxt->headtype == Hnacl && p->isize > 0) {
  1672  				static LSym *deferreturn;
  1673  				
  1674  				if(deferreturn == nil)
  1675  					deferreturn = linklookup(ctxt, "runtime.deferreturn", 0);
  1676  
  1677  				// pad everything to avoid crossing 32-byte boundary
  1678  				if((c>>5) != ((c+p->isize-1)>>5))
  1679  					c = naclpad(ctxt, s, c, -c&31);
  1680  				// pad call deferreturn to start at 32-byte boundary
  1681  				// so that subtracting 5 in jmpdefer will jump back
  1682  				// to that boundary and rerun the call.
  1683  				if(p->as == ACALL && p->to.sym == deferreturn)
  1684  					c = naclpad(ctxt, s, c, -c&31);
  1685  				// pad call to end at 32-byte boundary
  1686  				if(p->as == ACALL)
  1687  					c = naclpad(ctxt, s, c, -(c+p->isize)&31);
  1688  				
  1689  				// the linker treats REP and STOSQ as different instructions
  1690  				// but in fact the REP is a prefix on the STOSQ.
  1691  				// make sure REP has room for 2 more bytes, so that
  1692  				// padding will not be inserted before the next instruction.
  1693  				if((p->as == AREP || p->as == AREPN) && (c>>5) != ((c+3-1)>>5))
  1694  					c = naclpad(ctxt, s, c, -c&31);
  1695  				
  1696  				// same for LOCK.
  1697  				// various instructions follow; the longest is 4 bytes.
  1698  				// give ourselves 8 bytes so as to avoid surprises.
  1699  				if(p->as == ALOCK && (c>>5) != ((c+8-1)>>5))
  1700  					c = naclpad(ctxt, s, c, -c&31);
  1701  			}
  1702  
  1703  			if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
  1704  				// pad with NOPs
  1705  				v = -c&(LoopAlign-1);
  1706  				if(v <= MaxLoopPad) {
  1707  					symgrow(ctxt, s, c+v);
  1708  					fillnop(s->p+c, v);
  1709  					c += v;
  1710  				}
  1711  			}
  1712  
  1713  			p->pc = c;
  1714  
  1715  			// process forward jumps to p
  1716  			for(q = p->comefrom; q != nil; q = q->forwd) {
  1717  				v = p->pc - (q->pc + q->mark);
  1718  				if(q->back & 2)	{	// short
  1719  					if(v > 127) {
  1720  						loop++;
  1721  						q->back ^= 2;
  1722  					}
  1723  					if(q->as == AJCXZL)
  1724  						s->p[q->pc+2] = v;
  1725  					else
  1726  						s->p[q->pc+1] = v;
  1727  				} else {
  1728  					bp = s->p + q->pc + q->mark - 4;
  1729  					*bp++ = v;
  1730  					*bp++ = v>>8;
  1731  					*bp++ = v>>16;
  1732  					*bp = v>>24;
  1733  				}	
  1734  			}
  1735  			p->comefrom = nil;
  1736  
  1737  			p->pc = c;
  1738  			asmins(ctxt, p);
  1739  			m = ctxt->andptr-ctxt->and;
  1740  			if(p->isize != m) {
  1741  				p->isize = m;
  1742  				loop++;
  1743  			}
  1744  			symgrow(ctxt, s, p->pc+m);
  1745  			memmove(s->p+p->pc, ctxt->and, m);
  1746  			p->mark = m;
  1747  			c += m;
  1748  		}
  1749  		if(++n > 20) {
  1750  			ctxt->diag("span must be looping");
  1751  			sysfatal("loop");
  1752  		}
  1753  	} while(loop);
  1754  	
  1755  	if(ctxt->headtype == Hnacl)
  1756  		c = naclpad(ctxt, s, c, -c&31);
  1757  	
  1758  	c += -c&(FuncAlign-1);
  1759  	s->size = c;
  1760  
  1761  	if(0 /* debug['a'] > 1 */) {
  1762  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
  1763  		for(i=0; i<s->np; i++) {
  1764  			print(" %.2ux", s->p[i]);
  1765  			if(i%16 == 15)
  1766  				print("\n  %.6ux", i+1);
  1767  		}
  1768  		if(i%16)
  1769  			print("\n");
  1770  	
  1771  		for(i=0; i<s->nr; i++) {
  1772  			Reloc *r;
  1773  			
  1774  			r = &s->r[i];
  1775  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
  1776  		}
  1777  	}
  1778  }
  1779  
  1780  static void
  1781  instinit(void)
  1782  {
  1783  	int c, i;
  1784  
  1785  	for(i=1; optab[i].as; i++) {
  1786  		c = optab[i].as;
  1787  		if(opindex[c] != nil)
  1788  			sysfatal("phase error in optab: %d (%A)", i, c);
  1789  		opindex[c] = &optab[i];
  1790  	}
  1791  
  1792  	for(i=0; i<Ymax; i++)
  1793  		ycover[i*Ymax + i] = 1;
  1794  
  1795  	ycover[Yi0*Ymax + Yi8] = 1;
  1796  	ycover[Yi1*Ymax + Yi8] = 1;
  1797  
  1798  	ycover[Yi0*Ymax + Ys32] = 1;
  1799  	ycover[Yi1*Ymax + Ys32] = 1;
  1800  	ycover[Yi8*Ymax + Ys32] = 1;
  1801  
  1802  	ycover[Yi0*Ymax + Yi32] = 1;
  1803  	ycover[Yi1*Ymax + Yi32] = 1;
  1804  	ycover[Yi8*Ymax + Yi32] = 1;
  1805  	ycover[Ys32*Ymax + Yi32] = 1;
  1806  
  1807  	ycover[Yi0*Ymax + Yi64] = 1;
  1808  	ycover[Yi1*Ymax + Yi64] = 1;
  1809  	ycover[Yi8*Ymax + Yi64] = 1;
  1810  	ycover[Ys32*Ymax + Yi64] = 1;
  1811  	ycover[Yi32*Ymax + Yi64] = 1;
  1812  
  1813  	ycover[Yal*Ymax + Yrb] = 1;
  1814  	ycover[Ycl*Ymax + Yrb] = 1;
  1815  	ycover[Yax*Ymax + Yrb] = 1;
  1816  	ycover[Ycx*Ymax + Yrb] = 1;
  1817  	ycover[Yrx*Ymax + Yrb] = 1;
  1818  	ycover[Yrl*Ymax + Yrb] = 1;
  1819  
  1820  	ycover[Ycl*Ymax + Ycx] = 1;
  1821  
  1822  	ycover[Yax*Ymax + Yrx] = 1;
  1823  	ycover[Ycx*Ymax + Yrx] = 1;
  1824  
  1825  	ycover[Yax*Ymax + Yrl] = 1;
  1826  	ycover[Ycx*Ymax + Yrl] = 1;
  1827  	ycover[Yrx*Ymax + Yrl] = 1;
  1828  
  1829  	ycover[Yf0*Ymax + Yrf] = 1;
  1830  
  1831  	ycover[Yal*Ymax + Ymb] = 1;
  1832  	ycover[Ycl*Ymax + Ymb] = 1;
  1833  	ycover[Yax*Ymax + Ymb] = 1;
  1834  	ycover[Ycx*Ymax + Ymb] = 1;
  1835  	ycover[Yrx*Ymax + Ymb] = 1;
  1836  	ycover[Yrb*Ymax + Ymb] = 1;
  1837  	ycover[Yrl*Ymax + Ymb] = 1;
  1838  	ycover[Ym*Ymax + Ymb] = 1;
  1839  
  1840  	ycover[Yax*Ymax + Yml] = 1;
  1841  	ycover[Ycx*Ymax + Yml] = 1;
  1842  	ycover[Yrx*Ymax + Yml] = 1;
  1843  	ycover[Yrl*Ymax + Yml] = 1;
  1844  	ycover[Ym*Ymax + Yml] = 1;
  1845  
  1846  	ycover[Yax*Ymax + Ymm] = 1;
  1847  	ycover[Ycx*Ymax + Ymm] = 1;
  1848  	ycover[Yrx*Ymax + Ymm] = 1;
  1849  	ycover[Yrl*Ymax + Ymm] = 1;
  1850  	ycover[Ym*Ymax + Ymm] = 1;
  1851  	ycover[Ymr*Ymax + Ymm] = 1;
  1852  
  1853  	ycover[Ym*Ymax + Yxm] = 1;
  1854  	ycover[Yxr*Ymax + Yxm] = 1;
  1855  
  1856  	for(i=0; i<D_NONE; i++) {
  1857  		reg[i] = -1;
  1858  		if(i >= D_AL && i <= D_R15B) {
  1859  			reg[i] = (i-D_AL) & 7;
  1860  			if(i >= D_SPB && i <= D_DIB)
  1861  				regrex[i] = 0x40;
  1862  			if(i >= D_R8B && i <= D_R15B)
  1863  				regrex[i] = Rxr | Rxx | Rxb;
  1864  		}
  1865  		if(i >= D_AH && i<= D_BH)
  1866  			reg[i] = 4 + ((i-D_AH) & 7);
  1867  		if(i >= D_AX && i <= D_R15) {
  1868  			reg[i] = (i-D_AX) & 7;
  1869  			if(i >= D_R8)
  1870  				regrex[i] = Rxr | Rxx | Rxb;
  1871  		}
  1872  		if(i >= D_F0 && i <= D_F0+7)
  1873  			reg[i] = (i-D_F0) & 7;
  1874  		if(i >= D_M0 && i <= D_M0+7)
  1875  			reg[i] = (i-D_M0) & 7;
  1876  		if(i >= D_X0 && i <= D_X0+15) {
  1877  			reg[i] = (i-D_X0) & 7;
  1878  			if(i >= D_X0+8)
  1879  				regrex[i] = Rxr | Rxx | Rxb;
  1880  		}
  1881  		if(i >= D_CR+8 && i <= D_CR+15)
  1882  			regrex[i] = Rxr;
  1883  	}
  1884  }
  1885  
  1886  static int
  1887  prefixof(Link *ctxt, Addr *a)
  1888  {
  1889  	switch(a->type) {
  1890  	case D_INDIR+D_CS:
  1891  		return 0x2e;
  1892  	case D_INDIR+D_DS:
  1893  		return 0x3e;
  1894  	case D_INDIR+D_ES:
  1895  		return 0x26;
  1896  	case D_INDIR+D_FS:
  1897  		return 0x64;
  1898  	case D_INDIR+D_GS:
  1899  		return 0x65;
  1900  	case D_INDIR+D_TLS:
  1901  		// NOTE: Systems listed here should be only systems that
  1902  		// support direct TLS references like 8(TLS) implemented as
  1903  		// direct references from FS or GS. Systems that require
  1904  		// the initial-exec model, where you load the TLS base into
  1905  		// a register and then index from that register, do not reach
  1906  		// this code and should not be listed.
  1907  		switch(ctxt->headtype) {
  1908  		default:
  1909  			sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
  1910  		case Hdragonfly:
  1911  		case Hfreebsd:
  1912  		case Hlinux:
  1913  		case Hnetbsd:
  1914  		case Hopenbsd:
  1915  		case Hsolaris:
  1916  			return 0x64; // FS
  1917  		case Hdarwin:
  1918  			return 0x65; // GS
  1919  		}
  1920  	}
  1921  	switch(a->index) {
  1922  	case D_CS:
  1923  		return 0x2e;
  1924  	case D_DS:
  1925  		return 0x3e;
  1926  	case D_ES:
  1927  		return 0x26;
  1928  	case D_FS:
  1929  		return 0x64;
  1930  	case D_GS:
  1931  		return 0x65;
  1932  	}
  1933  	return 0;
  1934  }
  1935  
  1936  static int
  1937  oclass(Link *ctxt, Addr *a)
  1938  {
  1939  	vlong v;
  1940  	int32 l;
  1941  
  1942  	if(a->type >= D_INDIR || a->index != D_NONE) {
  1943  		if(a->index != D_NONE && a->scale == 0) {
  1944  			if(a->type == D_ADDR) {
  1945  				switch(a->index) {
  1946  				case D_EXTERN:
  1947  				case D_STATIC:
  1948  					if(a->sym != nil && isextern(a->sym))
  1949  						return Yi32;
  1950  					return Yiauto; // use pc-relative addressing
  1951  				case D_AUTO:
  1952  				case D_PARAM:
  1953  					return Yiauto;
  1954  				}
  1955  				return Yxxx;
  1956  			}
  1957  			return Ycol;
  1958  		}
  1959  		return Ym;
  1960  	}
  1961  	switch(a->type)
  1962  	{
  1963  	case D_AL:
  1964  		return Yal;
  1965  
  1966  	case D_AX:
  1967  		return Yax;
  1968  
  1969  /*
  1970  	case D_SPB:
  1971  */
  1972  	case D_BPB:
  1973  	case D_SIB:
  1974  	case D_DIB:
  1975  	case D_R8B:
  1976  	case D_R9B:
  1977  	case D_R10B:
  1978  	case D_R11B:
  1979  	case D_R12B:
  1980  	case D_R13B:
  1981  	case D_R14B:
  1982  	case D_R15B:
  1983  		if(ctxt->asmode != 64)
  1984  			return Yxxx;
  1985  	case D_DL:
  1986  	case D_BL:
  1987  	case D_AH:
  1988  	case D_CH:
  1989  	case D_DH:
  1990  	case D_BH:
  1991  		return Yrb;
  1992  
  1993  	case D_CL:
  1994  		return Ycl;
  1995  
  1996  	case D_CX:
  1997  		return Ycx;
  1998  
  1999  	case D_DX:
  2000  	case D_BX:
  2001  		return Yrx;
  2002  
  2003  	case D_R8:	/* not really Yrl */
  2004  	case D_R9:
  2005  	case D_R10:
  2006  	case D_R11:
  2007  	case D_R12:
  2008  	case D_R13:
  2009  	case D_R14:
  2010  	case D_R15:
  2011  		if(ctxt->asmode != 64)
  2012  			return Yxxx;
  2013  	case D_SP:
  2014  	case D_BP:
  2015  	case D_SI:
  2016  	case D_DI:
  2017  		return Yrl;
  2018  
  2019  	case D_F0+0:
  2020  		return	Yf0;
  2021  
  2022  	case D_F0+1:
  2023  	case D_F0+2:
  2024  	case D_F0+3:
  2025  	case D_F0+4:
  2026  	case D_F0+5:
  2027  	case D_F0+6:
  2028  	case D_F0+7:
  2029  		return	Yrf;
  2030  
  2031  	case D_M0+0:
  2032  	case D_M0+1:
  2033  	case D_M0+2:
  2034  	case D_M0+3:
  2035  	case D_M0+4:
  2036  	case D_M0+5:
  2037  	case D_M0+6:
  2038  	case D_M0+7:
  2039  		return	Ymr;
  2040  
  2041  	case D_X0+0:
  2042  	case D_X0+1:
  2043  	case D_X0+2:
  2044  	case D_X0+3:
  2045  	case D_X0+4:
  2046  	case D_X0+5:
  2047  	case D_X0+6:
  2048  	case D_X0+7:
  2049  	case D_X0+8:
  2050  	case D_X0+9:
  2051  	case D_X0+10:
  2052  	case D_X0+11:
  2053  	case D_X0+12:
  2054  	case D_X0+13:
  2055  	case D_X0+14:
  2056  	case D_X0+15:
  2057  		return	Yxr;
  2058  
  2059  	case D_NONE:
  2060  		return Ynone;
  2061  
  2062  	case D_CS:	return	Ycs;
  2063  	case D_SS:	return	Yss;
  2064  	case D_DS:	return	Yds;
  2065  	case D_ES:	return	Yes;
  2066  	case D_FS:	return	Yfs;
  2067  	case D_GS:	return	Ygs;
  2068  	case D_TLS:	return	Ytls;
  2069  
  2070  	case D_GDTR:	return	Ygdtr;
  2071  	case D_IDTR:	return	Yidtr;
  2072  	case D_LDTR:	return	Yldtr;
  2073  	case D_MSW:	return	Ymsw;
  2074  	case D_TASK:	return	Ytask;
  2075  
  2076  	case D_CR+0:	return	Ycr0;
  2077  	case D_CR+1:	return	Ycr1;
  2078  	case D_CR+2:	return	Ycr2;
  2079  	case D_CR+3:	return	Ycr3;
  2080  	case D_CR+4:	return	Ycr4;
  2081  	case D_CR+5:	return	Ycr5;
  2082  	case D_CR+6:	return	Ycr6;
  2083  	case D_CR+7:	return	Ycr7;
  2084  	case D_CR+8:	return	Ycr8;
  2085  
  2086  	case D_DR+0:	return	Ydr0;
  2087  	case D_DR+1:	return	Ydr1;
  2088  	case D_DR+2:	return	Ydr2;
  2089  	case D_DR+3:	return	Ydr3;
  2090  	case D_DR+4:	return	Ydr4;
  2091  	case D_DR+5:	return	Ydr5;
  2092  	case D_DR+6:	return	Ydr6;
  2093  	case D_DR+7:	return	Ydr7;
  2094  
  2095  	case D_TR+0:	return	Ytr0;
  2096  	case D_TR+1:	return	Ytr1;
  2097  	case D_TR+2:	return	Ytr2;
  2098  	case D_TR+3:	return	Ytr3;
  2099  	case D_TR+4:	return	Ytr4;
  2100  	case D_TR+5:	return	Ytr5;
  2101  	case D_TR+6:	return	Ytr6;
  2102  	case D_TR+7:	return	Ytr7;
  2103  
  2104  	case D_EXTERN:
  2105  	case D_STATIC:
  2106  	case D_AUTO:
  2107  	case D_PARAM:
  2108  		return Ym;
  2109  
  2110  	case D_CONST:
  2111  	case D_ADDR:
  2112  		if(a->sym == nil) {
  2113  			v = a->offset;
  2114  			if(v == 0)
  2115  				return Yi0;
  2116  			if(v == 1)
  2117  				return Yi1;
  2118  			if(v >= -128 && v <= 127)
  2119  				return Yi8;
  2120  			l = v;
  2121  			if((vlong)l == v)
  2122  				return Ys32;	/* can sign extend */
  2123  			if((v>>32) == 0)
  2124  				return Yi32;	/* unsigned */
  2125  			return Yi64;
  2126  		}
  2127  		return Yi32;
  2128  
  2129  	case D_BRANCH:
  2130  		return Ybr;
  2131  	}
  2132  	return Yxxx;
  2133  }
  2134  
  2135  static void
  2136  asmidx(Link *ctxt, int scale, int index, int base)
  2137  {
  2138  	int i;
  2139  
  2140  	switch(index) {
  2141  	default:
  2142  		goto bad;
  2143  
  2144  	case D_NONE:
  2145  		i = 4 << 3;
  2146  		goto bas;
  2147  
  2148  	case D_R8:
  2149  	case D_R9:
  2150  	case D_R10:
  2151  	case D_R11:
  2152  	case D_R12:
  2153  	case D_R13:
  2154  	case D_R14:
  2155  	case D_R15:
  2156  		if(ctxt->asmode != 64)
  2157  			goto bad;
  2158  	case D_AX:
  2159  	case D_CX:
  2160  	case D_DX:
  2161  	case D_BX:
  2162  	case D_BP:
  2163  	case D_SI:
  2164  	case D_DI:
  2165  		i = reg[index] << 3;
  2166  		break;
  2167  	}
  2168  	switch(scale) {
  2169  	default:
  2170  		goto bad;
  2171  	case 1:
  2172  		break;
  2173  	case 2:
  2174  		i |= (1<<6);
  2175  		break;
  2176  	case 4:
  2177  		i |= (2<<6);
  2178  		break;
  2179  	case 8:
  2180  		i |= (3<<6);
  2181  		break;
  2182  	}
  2183  bas:
  2184  	switch(base) {
  2185  	default:
  2186  		goto bad;
  2187  	case D_NONE:	/* must be mod=00 */
  2188  		i |= 5;
  2189  		break;
  2190  	case D_R8:
  2191  	case D_R9:
  2192  	case D_R10:
  2193  	case D_R11:
  2194  	case D_R12:
  2195  	case D_R13:
  2196  	case D_R14:
  2197  	case D_R15:
  2198  		if(ctxt->asmode != 64)
  2199  			goto bad;
  2200  	case D_AX:
  2201  	case D_CX:
  2202  	case D_DX:
  2203  	case D_BX:
  2204  	case D_SP:
  2205  	case D_BP:
  2206  	case D_SI:
  2207  	case D_DI:
  2208  		i |= reg[base];
  2209  		break;
  2210  	}
  2211  	*ctxt->andptr++ = i;
  2212  	return;
  2213  bad:
  2214  	ctxt->diag("asmidx: bad address %d/%d/%d", scale, index, base);
  2215  	*ctxt->andptr++ = 0;
  2216  	return;
  2217  }
  2218  
  2219  static void
  2220  put4(Link *ctxt, int32 v)
  2221  {
  2222  	ctxt->andptr[0] = v;
  2223  	ctxt->andptr[1] = v>>8;
  2224  	ctxt->andptr[2] = v>>16;
  2225  	ctxt->andptr[3] = v>>24;
  2226  	ctxt->andptr += 4;
  2227  }
  2228  
  2229  static void
  2230  relput4(Link *ctxt, Prog *p, Addr *a)
  2231  {
  2232  	vlong v;
  2233  	Reloc rel, *r;
  2234  	
  2235  	v = vaddr(ctxt, a, &rel);
  2236  	if(rel.siz != 0) {
  2237  		if(rel.siz != 4)
  2238  			ctxt->diag("bad reloc");
  2239  		r = addrel(ctxt->cursym);
  2240  		*r = rel;
  2241  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2242  	}
  2243  	put4(ctxt, v);
  2244  }
  2245  
  2246  static void
  2247  put8(Link *ctxt, vlong v)
  2248  {
  2249  	ctxt->andptr[0] = v;
  2250  	ctxt->andptr[1] = v>>8;
  2251  	ctxt->andptr[2] = v>>16;
  2252  	ctxt->andptr[3] = v>>24;
  2253  	ctxt->andptr[4] = v>>32;
  2254  	ctxt->andptr[5] = v>>40;
  2255  	ctxt->andptr[6] = v>>48;
  2256  	ctxt->andptr[7] = v>>56;
  2257  	ctxt->andptr += 8;
  2258  }
  2259  
  2260  /*
  2261  static void
  2262  relput8(Prog *p, Addr *a)
  2263  {
  2264  	vlong v;
  2265  	Reloc rel, *r;
  2266  	
  2267  	v = vaddr(ctxt, a, &rel);
  2268  	if(rel.siz != 0) {
  2269  		r = addrel(ctxt->cursym);
  2270  		*r = rel;
  2271  		r->siz = 8;
  2272  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2273  	}
  2274  	put8(ctxt, v);
  2275  }
  2276  */
  2277  
  2278  static vlong
  2279  vaddr(Link *ctxt, Addr *a, Reloc *r)
  2280  {
  2281  	int t;
  2282  	vlong v;
  2283  	LSym *s;
  2284  	
  2285  	if(r != nil)
  2286  		memset(r, 0, sizeof *r);
  2287  
  2288  	t = a->type;
  2289  	v = a->offset;
  2290  	if(t == D_ADDR)
  2291  		t = a->index;
  2292  	switch(t) {
  2293  	case D_STATIC:
  2294  	case D_EXTERN:
  2295  		s = a->sym;
  2296  		if(r == nil) {
  2297  			ctxt->diag("need reloc for %D", a);
  2298  			sysfatal("reloc");
  2299  		}
  2300  		if(isextern(s)) {
  2301  			r->siz = 4;
  2302  			r->type = R_ADDR;
  2303  		} else {
  2304  			r->siz = 4;
  2305  			r->type = R_PCREL;
  2306  		}
  2307  		r->off = -1;	// caller must fill in
  2308  		r->sym = s;
  2309  		r->add = v;
  2310  		v = 0;
  2311  		if(s->type == STLSBSS) {
  2312  			r->xadd = r->add - r->siz;
  2313  			r->type = R_TLS;
  2314  			r->xsym = s;
  2315  		}
  2316  		break;
  2317  	
  2318  	case D_INDIR+D_TLS:
  2319  		if(r == nil) {
  2320  			ctxt->diag("need reloc for %D", a);
  2321  			sysfatal("reloc");
  2322  		}
  2323  		r->type = R_TLS_LE;
  2324  		r->siz = 4;
  2325  		r->off = -1;	// caller must fill in
  2326  		r->add = v;
  2327  		v = 0;
  2328  		break;
  2329  	}
  2330  	return v;
  2331  }
  2332  
  2333  static void
  2334  asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
  2335  {
  2336  	int32 v;
  2337  	int t, scale;
  2338  	Reloc rel;
  2339  
  2340  	USED(m64);
  2341  	rex &= (0x40 | Rxr);
  2342  	v = a->offset;
  2343  	t = a->type;
  2344  	rel.siz = 0;
  2345  	if(a->index != D_NONE && a->index != D_TLS) {
  2346  		if(t < D_INDIR) { 
  2347  			switch(t) {
  2348  			default:
  2349  				goto bad;
  2350  			case D_EXTERN:
  2351  			case D_STATIC:
  2352  				if(!isextern(a->sym))
  2353  					goto bad;
  2354  				t = D_NONE;
  2355  				v = vaddr(ctxt, a, &rel);
  2356  				break;
  2357  			case D_AUTO:
  2358  			case D_PARAM:
  2359  				t = D_SP;
  2360  				break;
  2361  			}
  2362  		} else
  2363  			t -= D_INDIR;
  2364  		ctxt->rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
  2365  		if(t == D_NONE) {
  2366  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  2367  			asmidx(ctxt, a->scale, a->index, t);
  2368  			goto putrelv;
  2369  		}
  2370  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
  2371  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  2372  			asmidx(ctxt, a->scale, a->index, t);
  2373  			return;
  2374  		}
  2375  		if(v >= -128 && v < 128 && rel.siz == 0) {
  2376  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  2377  			asmidx(ctxt, a->scale, a->index, t);
  2378  			*ctxt->andptr++ = v;
  2379  			return;
  2380  		}
  2381  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  2382  		asmidx(ctxt, a->scale, a->index, t);
  2383  		goto putrelv;
  2384  	}
  2385  	if(t >= D_AL && t <= D_X0+15) {
  2386  		if(v)
  2387  			goto bad;
  2388  		*ctxt->andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
  2389  		ctxt->rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
  2390  		return;
  2391  	}
  2392  	
  2393  	scale = a->scale;
  2394  	if(t < D_INDIR) {
  2395  		switch(a->type) {
  2396  		default:
  2397  			goto bad;
  2398  		case D_STATIC:
  2399  		case D_EXTERN:
  2400  			t = D_NONE;
  2401  			v = vaddr(ctxt, a, &rel);
  2402  			break;
  2403  		case D_AUTO:
  2404  		case D_PARAM:
  2405  			t = D_SP;
  2406  			break;
  2407  		}
  2408  		scale = 1;
  2409  	} else
  2410  		t -= D_INDIR;
  2411  	if(t == D_TLS)
  2412  		v = vaddr(ctxt, a, &rel);
  2413  
  2414  	ctxt->rexflag |= (regrex[t] & Rxb) | rex;
  2415  	if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
  2416  		if((a->sym == nil || !isextern(a->sym)) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) {
  2417  			*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
  2418  			goto putrelv;
  2419  		}
  2420  		/* temporary */
  2421  		*ctxt->andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
  2422  		*ctxt->andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
  2423  		goto putrelv;
  2424  	}
  2425  	if(t == D_SP || t == D_R12) {
  2426  		if(v == 0) {
  2427  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  2428  			asmidx(ctxt, scale, D_NONE, t);
  2429  			return;
  2430  		}
  2431  		if(v >= -128 && v < 128) {
  2432  			*ctxt->andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
  2433  			asmidx(ctxt, scale, D_NONE, t);
  2434  			*ctxt->andptr++ = v;
  2435  			return;
  2436  		}
  2437  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  2438  		asmidx(ctxt, scale, D_NONE, t);
  2439  		goto putrelv;
  2440  	}
  2441  	if(t >= D_AX && t <= D_R15) {
  2442  		if(a->index == D_TLS) {
  2443  			memset(&rel, 0, sizeof rel);
  2444  			rel.type = R_TLS_IE;
  2445  			rel.siz = 4;
  2446  			rel.sym = nil;
  2447  			rel.add = v;
  2448  			v = 0;
  2449  		}
  2450  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
  2451  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  2452  			return;
  2453  		}
  2454  		if(v >= -128 && v < 128 && rel.siz == 0) {
  2455  			ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
  2456  			ctxt->andptr[1] = v;
  2457  			ctxt->andptr += 2;
  2458  			return;
  2459  		}
  2460  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  2461  		goto putrelv;
  2462  	}
  2463  	goto bad;
  2464  	
  2465  putrelv:
  2466  	if(rel.siz != 0) {
  2467  		Reloc *r;
  2468  
  2469  		if(rel.siz != 4) {
  2470  			ctxt->diag("bad rel");
  2471  			goto bad;
  2472  		}
  2473  		r = addrel(ctxt->cursym);
  2474  		*r = rel;
  2475  		r->off = ctxt->curp->pc + ctxt->andptr - ctxt->and;
  2476  	}
  2477  		
  2478  	put4(ctxt, v);
  2479  	return;
  2480  
  2481  bad:
  2482  	ctxt->diag("asmand: bad address %D", a);
  2483  	return;
  2484  }
  2485  
  2486  static void
  2487  asmand(Link *ctxt, Addr *a, Addr *ra)
  2488  {
  2489  	asmandsz(ctxt, a, reg[ra->type], regrex[ra->type], 0);
  2490  }
  2491  
  2492  static void
  2493  asmando(Link *ctxt, Addr *a, int o)
  2494  {
  2495  	asmandsz(ctxt, a, o, 0, 0);
  2496  }
  2497  
  2498  static void
  2499  bytereg(Addr *a, uint8 *t)
  2500  {
  2501  	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
  2502  		a->type = D_AL + (a->type-D_AX);
  2503  		*t = 0;
  2504  	}
  2505  }
  2506  
  2507  enum {
  2508  	E = 0xff,
  2509  };
  2510  static Movtab	ymovtab[] =
  2511  {
  2512  /* push */
  2513  	{APUSHL,	Ycs,	Ynone,	0,	{0x0e,E,0,0}},
  2514  	{APUSHL,	Yss,	Ynone,	0,	{0x16,E,0,0}},
  2515  	{APUSHL,	Yds,	Ynone,	0,	{0x1e,E,0,0}},
  2516  	{APUSHL,	Yes,	Ynone,	0,	{0x06,E,0,0}},
  2517  	{APUSHL,	Yfs,	Ynone,	0,	{0x0f,0xa0,E,0}},
  2518  	{APUSHL,	Ygs,	Ynone,	0,	{0x0f,0xa8,E,0}},
  2519  	{APUSHQ,	Yfs,	Ynone,	0,	{0x0f,0xa0,E,0}},
  2520  	{APUSHQ,	Ygs,	Ynone,	0,	{0x0f,0xa8,E,0}},
  2521  
  2522  	{APUSHW,	Ycs,	Ynone,	0,	{Pe,0x0e,E,0}},
  2523  	{APUSHW,	Yss,	Ynone,	0,	{Pe,0x16,E,0}},
  2524  	{APUSHW,	Yds,	Ynone,	0,	{Pe,0x1e,E,0}},
  2525  	{APUSHW,	Yes,	Ynone,	0,	{Pe,0x06,E,0}},
  2526  	{APUSHW,	Yfs,	Ynone,	0,	{Pe,0x0f,0xa0,E}},
  2527  	{APUSHW,	Ygs,	Ynone,	0,	{Pe,0x0f,0xa8,E}},
  2528  
  2529  /* pop */
  2530  	{APOPL,	Ynone,	Yds,	0,	{0x1f,E,0,0}},
  2531  	{APOPL,	Ynone,	Yes,	0,	{0x07,E,0,0}},
  2532  	{APOPL,	Ynone,	Yss,	0,	{0x17,E,0,0}},
  2533  	{APOPL,	Ynone,	Yfs,	0,	{0x0f,0xa1,E,0}},
  2534  	{APOPL,	Ynone,	Ygs,	0,	{0x0f,0xa9,E,0}},
  2535  	{APOPQ,	Ynone,	Yfs,	0,	{0x0f,0xa1,E,0}},
  2536  	{APOPQ,	Ynone,	Ygs,	0,	{0x0f,0xa9,E,0}},
  2537  
  2538  	{APOPW,	Ynone,	Yds,	0,	{Pe,0x1f,E,0}},
  2539  	{APOPW,	Ynone,	Yes,	0,	{Pe,0x07,E,0}},
  2540  	{APOPW,	Ynone,	Yss,	0,	{Pe,0x17,E,0}},
  2541  	{APOPW,	Ynone,	Yfs,	0,	{Pe,0x0f,0xa1,E}},
  2542  	{APOPW,	Ynone,	Ygs,	0,	{Pe,0x0f,0xa9,E}},
  2543  
  2544  /* mov seg */
  2545  	{AMOVW,	Yes,	Yml,	1,	{0x8c,0,0,0}},
  2546  	{AMOVW,	Ycs,	Yml,	1,	{0x8c,1,0,0}},
  2547  	{AMOVW,	Yss,	Yml,	1,	{0x8c,2,0,0}},
  2548  	{AMOVW,	Yds,	Yml,	1,	{0x8c,3,0,0}},
  2549  	{AMOVW,	Yfs,	Yml,	1,	{0x8c,4,0,0}},
  2550  	{AMOVW,	Ygs,	Yml,	1,	{0x8c,5,0,0}},
  2551  
  2552  	{AMOVW,	Yml,	Yes,	2,	{0x8e,0,0,0}},
  2553  	{AMOVW,	Yml,	Ycs,	2,	{0x8e,1,0,0}},
  2554  	{AMOVW,	Yml,	Yss,	2,	{0x8e,2,0,0}},
  2555  	{AMOVW,	Yml,	Yds,	2,	{0x8e,3,0,0}},
  2556  	{AMOVW,	Yml,	Yfs,	2,	{0x8e,4,0,0}},
  2557  	{AMOVW,	Yml,	Ygs,	2,	{0x8e,5,0,0}},
  2558  
  2559  /* mov cr */
  2560  	{AMOVL,	Ycr0,	Yml,	3,	{0x0f,0x20,0,0}},
  2561  	{AMOVL,	Ycr2,	Yml,	3,	{0x0f,0x20,2,0}},
  2562  	{AMOVL,	Ycr3,	Yml,	3,	{0x0f,0x20,3,0}},
  2563  	{AMOVL,	Ycr4,	Yml,	3,	{0x0f,0x20,4,0}},
  2564  	{AMOVL,	Ycr8,	Yml,	3,	{0x0f,0x20,8,0}},
  2565  	{AMOVQ,	Ycr0,	Yml,	3,	{0x0f,0x20,0,0}},
  2566  	{AMOVQ,	Ycr2,	Yml,	3,	{0x0f,0x20,2,0}},
  2567  	{AMOVQ,	Ycr3,	Yml,	3,	{0x0f,0x20,3,0}},
  2568  	{AMOVQ,	Ycr4,	Yml,	3,	{0x0f,0x20,4,0}},
  2569  	{AMOVQ,	Ycr8,	Yml,	3,	{0x0f,0x20,8,0}},
  2570  
  2571  	{AMOVL,	Yml,	Ycr0,	4,	{0x0f,0x22,0,0}},
  2572  	{AMOVL,	Yml,	Ycr2,	4,	{0x0f,0x22,2,0}},
  2573  	{AMOVL,	Yml,	Ycr3,	4,	{0x0f,0x22,3,0}},
  2574  	{AMOVL,	Yml,	Ycr4,	4,	{0x0f,0x22,4,0}},
  2575  	{AMOVL,	Yml,	Ycr8,	4,	{0x0f,0x22,8,0}},
  2576  	{AMOVQ,	Yml,	Ycr0,	4,	{0x0f,0x22,0,0}},
  2577  	{AMOVQ,	Yml,	Ycr2,	4,	{0x0f,0x22,2,0}},
  2578  	{AMOVQ,	Yml,	Ycr3,	4,	{0x0f,0x22,3,0}},
  2579  	{AMOVQ,	Yml,	Ycr4,	4,	{0x0f,0x22,4,0}},
  2580  	{AMOVQ,	Yml,	Ycr8,	4,	{0x0f,0x22,8,0}},
  2581  
  2582  /* mov dr */
  2583  	{AMOVL,	Ydr0,	Yml,	3,	{0x0f,0x21,0,0}},
  2584  	{AMOVL,	Ydr6,	Yml,	3,	{0x0f,0x21,6,0}},
  2585  	{AMOVL,	Ydr7,	Yml,	3,	{0x0f,0x21,7,0}},
  2586  	{AMOVQ,	Ydr0,	Yml,	3,	{0x0f,0x21,0,0}},
  2587  	{AMOVQ,	Ydr6,	Yml,	3,	{0x0f,0x21,6,0}},
  2588  	{AMOVQ,	Ydr7,	Yml,	3,	{0x0f,0x21,7,0}},
  2589  
  2590  	{AMOVL,	Yml,	Ydr0,	4,	{0x0f,0x23,0,0}},
  2591  	{AMOVL,	Yml,	Ydr6,	4,	{0x0f,0x23,6,0}},
  2592  	{AMOVL,	Yml,	Ydr7,	4,	{0x0f,0x23,7,0}},
  2593  	{AMOVQ,	Yml,	Ydr0,	4,	{0x0f,0x23,0,0}},
  2594  	{AMOVQ,	Yml,	Ydr6,	4,	{0x0f,0x23,6,0}},
  2595  	{AMOVQ,	Yml,	Ydr7,	4,	{0x0f,0x23,7,0}},
  2596  
  2597  /* mov tr */
  2598  	{AMOVL,	Ytr6,	Yml,	3,	{0x0f,0x24,6,0}},
  2599  	{AMOVL,	Ytr7,	Yml,	3,	{0x0f,0x24,7,0}},
  2600  
  2601  	{AMOVL,	Yml,	Ytr6,	4,	{0x0f,0x26,6,E}},
  2602  	{AMOVL,	Yml,	Ytr7,	4,	{0x0f,0x26,7,E}},
  2603  
  2604  /* lgdt, sgdt, lidt, sidt */
  2605  	{AMOVL,	Ym,	Ygdtr,	4,	{0x0f,0x01,2,0}},
  2606  	{AMOVL,	Ygdtr,	Ym,	3,	{0x0f,0x01,0,0}},
  2607  	{AMOVL,	Ym,	Yidtr,	4,	{0x0f,0x01,3,0}},
  2608  	{AMOVL,	Yidtr,	Ym,	3,	{0x0f,0x01,1,0}},
  2609  	{AMOVQ,	Ym,	Ygdtr,	4,	{0x0f,0x01,2,0}},
  2610  	{AMOVQ,	Ygdtr,	Ym,	3,	{0x0f,0x01,0,0}},
  2611  	{AMOVQ,	Ym,	Yidtr,	4,	{0x0f,0x01,3,0}},
  2612  	{AMOVQ,	Yidtr,	Ym,	3,	{0x0f,0x01,1,0}},
  2613  
  2614  /* lldt, sldt */
  2615  	{AMOVW,	Yml,	Yldtr,	4,	{0x0f,0x00,2,0}},
  2616  	{AMOVW,	Yldtr,	Yml,	3,	{0x0f,0x00,0,0}},
  2617  
  2618  /* lmsw, smsw */
  2619  	{AMOVW,	Yml,	Ymsw,	4,	{0x0f,0x01,6,0}},
  2620  	{AMOVW,	Ymsw,	Yml,	3,	{0x0f,0x01,4,0}},
  2621  
  2622  /* ltr, str */
  2623  	{AMOVW,	Yml,	Ytask,	4,	{0x0f,0x00,3,0}},
  2624  	{AMOVW,	Ytask,	Yml,	3,	{0x0f,0x00,1,0}},
  2625  
  2626  /* load full pointer */
  2627  	{AMOVL,	Yml,	Ycol,	5,	{0,0,0,0}},
  2628  	{AMOVW,	Yml,	Ycol,	5,	{Pe,0,0,0}},
  2629  
  2630  /* double shift */
  2631  	{ASHLL,	Ycol,	Yml,	6,	{0xa4,0xa5,0,0}},
  2632  	{ASHRL,	Ycol,	Yml,	6,	{0xac,0xad,0,0}},
  2633  	{ASHLQ,	Ycol,	Yml,	6,	{Pw,0xa4,0xa5,0}},
  2634  	{ASHRQ,	Ycol,	Yml,	6,	{Pw,0xac,0xad,0}},
  2635  	{ASHLW,	Ycol,	Yml,	6,	{Pe,0xa4,0xa5,0}},
  2636  	{ASHRW,	Ycol,	Yml,	6,	{Pe,0xac,0xad,0}},
  2637  
  2638  /* load TLS base */
  2639  	{AMOVQ,	Ytls,	Yrl,	7,	{0,0,0,0}},
  2640  
  2641  	{0}
  2642  };
  2643  
  2644  static int
  2645  isax(Addr *a)
  2646  {
  2647  
  2648  	switch(a->type) {
  2649  	case D_AX:
  2650  	case D_AL:
  2651  	case D_AH:
  2652  	case D_INDIR+D_AX:
  2653  		return 1;
  2654  	}
  2655  	if(a->index == D_AX)
  2656  		return 1;
  2657  	return 0;
  2658  }
  2659  
  2660  static void
  2661  subreg(Prog *p, int from, int to)
  2662  {
  2663  
  2664  	if(0 /*debug['Q']*/)
  2665  		print("\n%P	s/%R/%R/\n", p, from, to);
  2666  
  2667  	if(p->from.type == from)
  2668  		p->from.type = to;
  2669  	if(p->to.type == from)
  2670  		p->to.type = to;
  2671  
  2672  	if(p->from.index == from)
  2673  		p->from.index = to;
  2674  	if(p->to.index == from)
  2675  		p->to.index = to;
  2676  
  2677  	from += D_INDIR;
  2678  	if(p->from.type == from)
  2679  		p->from.type = to+D_INDIR;
  2680  	if(p->to.type == from)
  2681  		p->to.type = to+D_INDIR;
  2682  
  2683  	if(0 /*debug['Q']*/)
  2684  		print("%P\n", p);
  2685  }
  2686  
  2687  static int
  2688  mediaop(Link *ctxt, Optab *o, int op, int osize, int z)
  2689  {
  2690  	switch(op){
  2691  	case Pm:
  2692  	case Pe:
  2693  	case Pf2:
  2694  	case Pf3:
  2695  		if(osize != 1){
  2696  			if(op != Pm)
  2697  				*ctxt->andptr++ = op;
  2698  			*ctxt->andptr++ = Pm;
  2699  			op = o->op[++z];
  2700  			break;
  2701  		}
  2702  	default:
  2703  		if(ctxt->andptr == ctxt->and || ctxt->and[ctxt->andptr - ctxt->and - 1] != Pm)
  2704  			*ctxt->andptr++ = Pm;
  2705  		break;
  2706  	}
  2707  	*ctxt->andptr++ = op;
  2708  	return z;
  2709  }
  2710  
  2711  static void
  2712  doasm(Link *ctxt, Prog *p)
  2713  {
  2714  	Optab *o;
  2715  	Prog *q, pp;
  2716  	uchar *t;
  2717  	Movtab *mo;
  2718  	int z, op, ft, tt, xo, l, pre;
  2719  	vlong v;
  2720  	Reloc rel, *r;
  2721  	Addr *a;
  2722  	
  2723  	ctxt->curp = p;	// TODO
  2724  
  2725  	o = opindex[p->as];
  2726  	if(o == nil) {
  2727  		ctxt->diag("asmins: missing op %P", p);
  2728  		return;
  2729  	}
  2730  	
  2731  	pre = prefixof(ctxt, &p->from);
  2732  	if(pre)
  2733  		*ctxt->andptr++ = pre;
  2734  	pre = prefixof(ctxt, &p->to);
  2735  	if(pre)
  2736  		*ctxt->andptr++ = pre;
  2737  
  2738  	if(p->ft == 0)
  2739  		p->ft = oclass(ctxt, &p->from);
  2740  	if(p->tt == 0)
  2741  		p->tt = oclass(ctxt, &p->to);
  2742  
  2743  	ft = p->ft * Ymax;
  2744  	tt = p->tt * Ymax;
  2745  
  2746  	t = o->ytab;
  2747  	if(t == 0) {
  2748  		ctxt->diag("asmins: noproto %P", p);
  2749  		return;
  2750  	}
  2751  	xo = o->op[0] == 0x0f;
  2752  	for(z=0; *t; z+=t[3]+xo,t+=4)
  2753  		if(ycover[ft+t[0]])
  2754  		if(ycover[tt+t[1]])
  2755  			goto found;
  2756  	goto domov;
  2757  
  2758  found:
  2759  	switch(o->prefix) {
  2760  	case Pq:	/* 16 bit escape and opcode escape */
  2761  		*ctxt->andptr++ = Pe;
  2762  		*ctxt->andptr++ = Pm;
  2763  		break;
  2764  	case Pq3:	/* 16 bit escape, Rex.w, and opcode escape */
  2765  		*ctxt->andptr++ = Pe;
  2766  		*ctxt->andptr++ = Pw;
  2767  		*ctxt->andptr++ = Pm;
  2768  		break;
  2769  
  2770  	case Pf2:	/* xmm opcode escape */
  2771  	case Pf3:
  2772  		*ctxt->andptr++ = o->prefix;
  2773  		*ctxt->andptr++ = Pm;
  2774  		break;
  2775  
  2776  	case Pm:	/* opcode escape */
  2777  		*ctxt->andptr++ = Pm;
  2778  		break;
  2779  
  2780  	case Pe:	/* 16 bit escape */
  2781  		*ctxt->andptr++ = Pe;
  2782  		break;
  2783  
  2784  	case Pw:	/* 64-bit escape */
  2785  		if(p->mode != 64)
  2786  			ctxt->diag("asmins: illegal 64: %P", p);
  2787  		ctxt->rexflag |= Pw;
  2788  		break;
  2789  
  2790  	case Pb:	/* botch */
  2791  		bytereg(&p->from, &p->ft);
  2792  		bytereg(&p->to, &p->tt);
  2793  		break;
  2794  
  2795  	case P32:	/* 32 bit but illegal if 64-bit mode */
  2796  		if(p->mode == 64)
  2797  			ctxt->diag("asmins: illegal in 64-bit mode: %P", p);
  2798  		break;
  2799  
  2800  	case Py:	/* 64-bit only, no prefix */
  2801  		if(p->mode != 64)
  2802  			ctxt->diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
  2803  		break;
  2804  	}
  2805  
  2806  	if(z >= nelem(o->op))
  2807  		sysfatal("asmins bad table %P", p);
  2808  	op = o->op[z];
  2809  	if(op == 0x0f) {
  2810  		*ctxt->andptr++ = op;
  2811  		op = o->op[++z];
  2812  	}
  2813  	switch(t[2]) {
  2814  	default:
  2815  		ctxt->diag("asmins: unknown z %d %P", t[2], p);
  2816  		return;
  2817  
  2818  	case Zpseudo:
  2819  		break;
  2820  
  2821  	case Zlit:
  2822  		for(; op = o->op[z]; z++)
  2823  			*ctxt->andptr++ = op;
  2824  		break;
  2825  
  2826  	case Zlitm_r:
  2827  		for(; op = o->op[z]; z++)
  2828  			*ctxt->andptr++ = op;
  2829  		asmand(ctxt, &p->from, &p->to);
  2830  		break;
  2831  
  2832  	case Zmb_r:
  2833  		bytereg(&p->from, &p->ft);
  2834  		/* fall through */
  2835  	case Zm_r:
  2836  		*ctxt->andptr++ = op;
  2837  		asmand(ctxt, &p->from, &p->to);
  2838  		break;
  2839  	case Zm2_r:
  2840  		*ctxt->andptr++ = op;
  2841  		*ctxt->andptr++ = o->op[z+1];
  2842  		asmand(ctxt, &p->from, &p->to);
  2843  		break;
  2844  
  2845  	case Zm_r_xm:
  2846  		mediaop(ctxt, o, op, t[3], z);
  2847  		asmand(ctxt, &p->from, &p->to);
  2848  		break;
  2849  
  2850  	case Zm_r_xm_nr:
  2851  		ctxt->rexflag = 0;
  2852  		mediaop(ctxt, o, op, t[3], z);
  2853  		asmand(ctxt, &p->from, &p->to);
  2854  		break;
  2855  
  2856  	case Zm_r_i_xm:
  2857  		mediaop(ctxt, o, op, t[3], z);
  2858  		asmand(ctxt, &p->from, &p->to);
  2859  		*ctxt->andptr++ = p->to.offset;
  2860  		break;
  2861  
  2862  	case Zm_r_3d:
  2863  		*ctxt->andptr++ = 0x0f;
  2864  		*ctxt->andptr++ = 0x0f;
  2865  		asmand(ctxt, &p->from, &p->to);
  2866  		*ctxt->andptr++ = op;
  2867  		break;
  2868  
  2869  	case Zibm_r:
  2870  		while ((op = o->op[z++]) != 0)
  2871  			*ctxt->andptr++ = op;
  2872  		asmand(ctxt, &p->from, &p->to);
  2873  		*ctxt->andptr++ = p->to.offset;
  2874  		break;
  2875  
  2876  	case Zaut_r:
  2877  		*ctxt->andptr++ = 0x8d;	/* leal */
  2878  		if(p->from.type != D_ADDR)
  2879  			ctxt->diag("asmins: Zaut sb type ADDR");
  2880  		p->from.type = p->from.index;
  2881  		p->from.index = D_NONE;
  2882  		asmand(ctxt, &p->from, &p->to);
  2883  		p->from.index = p->from.type;
  2884  		p->from.type = D_ADDR;
  2885  		break;
  2886  
  2887  	case Zm_o:
  2888  		*ctxt->andptr++ = op;
  2889  		asmando(ctxt, &p->from, o->op[z+1]);
  2890  		break;
  2891  
  2892  	case Zr_m:
  2893  		*ctxt->andptr++ = op;
  2894  		asmand(ctxt, &p->to, &p->from);
  2895  		break;
  2896  
  2897  	case Zr_m_xm:
  2898  		mediaop(ctxt, o, op, t[3], z);
  2899  		asmand(ctxt, &p->to, &p->from);
  2900  		break;
  2901  
  2902  	case Zr_m_xm_nr:
  2903  		ctxt->rexflag = 0;
  2904  		mediaop(ctxt, o, op, t[3], z);
  2905  		asmand(ctxt, &p->to, &p->from);
  2906  		break;
  2907  
  2908  	case Zr_m_i_xm:
  2909  		mediaop(ctxt, o, op, t[3], z);
  2910  		asmand(ctxt, &p->to, &p->from);
  2911  		*ctxt->andptr++ = p->from.offset;
  2912  		break;
  2913  
  2914  	case Zo_m:
  2915  		*ctxt->andptr++ = op;
  2916  		asmando(ctxt, &p->to, o->op[z+1]);
  2917  		break;
  2918  
  2919  	case Zcallindreg:
  2920  		r = addrel(ctxt->cursym);
  2921  		r->off = p->pc;
  2922  		r->type = R_CALLIND;
  2923  		r->siz = 0;
  2924  		// fallthrough
  2925  	case Zo_m64:
  2926  		*ctxt->andptr++ = op;
  2927  		asmandsz(ctxt, &p->to, o->op[z+1], 0, 1);
  2928  		break;
  2929  
  2930  	case Zm_ibo:
  2931  		*ctxt->andptr++ = op;
  2932  		asmando(ctxt, &p->from, o->op[z+1]);
  2933  		*ctxt->andptr++ = vaddr(ctxt, &p->to, nil);
  2934  		break;
  2935  
  2936  	case Zibo_m:
  2937  		*ctxt->andptr++ = op;
  2938  		asmando(ctxt, &p->to, o->op[z+1]);
  2939  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2940  		break;
  2941  
  2942  	case Zibo_m_xm:
  2943  		z = mediaop(ctxt, o, op, t[3], z);
  2944  		asmando(ctxt, &p->to, o->op[z+1]);
  2945  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2946  		break;
  2947  
  2948  	case Z_ib:
  2949  	case Zib_:
  2950  		if(t[2] == Zib_)
  2951  			a = &p->from;
  2952  		else
  2953  			a = &p->to;
  2954  		*ctxt->andptr++ = op;
  2955  		*ctxt->andptr++ = vaddr(ctxt, a, nil);
  2956  		break;
  2957  
  2958  	case Zib_rp:
  2959  		ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40);
  2960  		*ctxt->andptr++ = op + reg[p->to.type];
  2961  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2962  		break;
  2963  
  2964  	case Zil_rp:
  2965  		ctxt->rexflag |= regrex[p->to.type] & Rxb;
  2966  		*ctxt->andptr++ = op + reg[p->to.type];
  2967  		if(o->prefix == Pe) {
  2968  			v = vaddr(ctxt, &p->from, nil);
  2969  			*ctxt->andptr++ = v;
  2970  			*ctxt->andptr++ = v>>8;
  2971  		}
  2972  		else
  2973  			relput4(ctxt, p, &p->from);
  2974  		break;
  2975  
  2976  	case Zo_iw:
  2977  		*ctxt->andptr++ = op;
  2978  		if(p->from.type != D_NONE){
  2979  			v = vaddr(ctxt, &p->from, nil);
  2980  			*ctxt->andptr++ = v;
  2981  			*ctxt->andptr++ = v>>8;
  2982  		}
  2983  		break;
  2984  
  2985  	case Ziq_rp:
  2986  		v = vaddr(ctxt, &p->from, &rel);
  2987  		l = v>>32;
  2988  		if(l == 0 && rel.siz != 8){
  2989  			//p->mark |= 0100;
  2990  			//print("zero: %llux %P\n", v, p);
  2991  			ctxt->rexflag &= ~(0x40|Rxw);
  2992  			ctxt->rexflag |= regrex[p->to.type] & Rxb;
  2993  			*ctxt->andptr++ = 0xb8 + reg[p->to.type];
  2994  			if(rel.type != 0) {
  2995  				r = addrel(ctxt->cursym);
  2996  				*r = rel;
  2997  				r->off = p->pc + ctxt->andptr - ctxt->and;
  2998  			}
  2999  			put4(ctxt, v);
  3000  		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
  3001  			//p->mark |= 0100;
  3002  			//print("sign: %llux %P\n", v, p);
  3003  			*ctxt->andptr ++ = 0xc7;
  3004  			asmando(ctxt, &p->to, 0);
  3005  			put4(ctxt, v);
  3006  		}else{	/* need all 8 */
  3007  			//print("all: %llux %P\n", v, p);
  3008  			ctxt->rexflag |= regrex[p->to.type] & Rxb;
  3009  			*ctxt->andptr++ = op + reg[p->to.type];
  3010  			if(rel.type != 0) {
  3011  				r = addrel(ctxt->cursym);
  3012  				*r = rel;
  3013  				r->off = p->pc + ctxt->andptr - ctxt->and;
  3014  			}
  3015  			put8(ctxt, v);
  3016  		}
  3017  		break;
  3018  
  3019  	case Zib_rr:
  3020  		*ctxt->andptr++ = op;
  3021  		asmand(ctxt, &p->to, &p->to);
  3022  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  3023  		break;
  3024  
  3025  	case Z_il:
  3026  	case Zil_:
  3027  		if(t[2] == Zil_)
  3028  			a = &p->from;
  3029  		else
  3030  			a = &p->to;
  3031  		*ctxt->andptr++ = op;
  3032  		if(o->prefix == Pe) {
  3033  			v = vaddr(ctxt, a, nil);
  3034  			*ctxt->andptr++ = v;
  3035  			*ctxt->andptr++ = v>>8;
  3036  		}
  3037  		else
  3038  			relput4(ctxt, p, a);
  3039  		break;
  3040  
  3041  	case Zm_ilo:
  3042  	case Zilo_m:
  3043  		*ctxt->andptr++ = op;
  3044  		if(t[2] == Zilo_m) {
  3045  			a = &p->from;
  3046  			asmando(ctxt, &p->to, o->op[z+1]);
  3047  		} else {
  3048  			a = &p->to;
  3049  			asmando(ctxt, &p->from, o->op[z+1]);
  3050  		}
  3051  		if(o->prefix == Pe) {
  3052  			v = vaddr(ctxt, a, nil);
  3053  			*ctxt->andptr++ = v;
  3054  			*ctxt->andptr++ = v>>8;
  3055  		}
  3056  		else
  3057  			relput4(ctxt, p, a);
  3058  		break;
  3059  
  3060  	case Zil_rr:
  3061  		*ctxt->andptr++ = op;
  3062  		asmand(ctxt, &p->to, &p->to);
  3063  		if(o->prefix == Pe) {
  3064  			v = vaddr(ctxt, &p->from, nil);
  3065  			*ctxt->andptr++ = v;
  3066  			*ctxt->andptr++ = v>>8;
  3067  		}
  3068  		else
  3069  			relput4(ctxt, p, &p->from);
  3070  		break;
  3071  
  3072  	case Z_rp:
  3073  		ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40);
  3074  		*ctxt->andptr++ = op + reg[p->to.type];
  3075  		break;
  3076  
  3077  	case Zrp_:
  3078  		ctxt->rexflag |= regrex[p->from.type] & (Rxb|0x40);
  3079  		*ctxt->andptr++ = op + reg[p->from.type];
  3080  		break;
  3081  
  3082  	case Zclr:
  3083  		ctxt->rexflag &= ~Pw;
  3084  		*ctxt->andptr++ = op;
  3085  		asmand(ctxt, &p->to, &p->to);
  3086  		break;
  3087  
  3088  	case Zcall:
  3089  		if(p->to.sym == nil) {
  3090  			ctxt->diag("call without target");
  3091  			sysfatal("bad code");
  3092  		}
  3093  		*ctxt->andptr++ = op;
  3094  		r = addrel(ctxt->cursym);
  3095  		r->off = p->pc + ctxt->andptr - ctxt->and;
  3096  		r->sym = p->to.sym;
  3097  		r->add = p->to.offset;
  3098  		r->type = R_CALL;
  3099  		r->siz = 4;
  3100  		put4(ctxt, 0);
  3101  		break;
  3102  
  3103  	case Zbr:
  3104  	case Zjmp:
  3105  	case Zloop:
  3106  		// TODO: jump across functions needs reloc
  3107  		if(p->to.sym != nil) {
  3108  			if(t[2] != Zjmp) {
  3109  				ctxt->diag("branch to ATEXT");
  3110  				sysfatal("bad code");
  3111  			}
  3112  			*ctxt->andptr++ = o->op[z+1];
  3113  			r = addrel(ctxt->cursym);
  3114  			r->off = p->pc + ctxt->andptr - ctxt->and;
  3115  			r->sym = p->to.sym;
  3116  			r->type = R_PCREL;
  3117  			r->siz = 4;
  3118  			put4(ctxt, 0);
  3119  			break;
  3120  		}
  3121  		// Assumes q is in this function.
  3122  		// TODO: Check in input, preserve in brchain.
  3123  
  3124  		// Fill in backward jump now.
  3125  		q = p->pcond;
  3126  		if(q == nil) {
  3127  			ctxt->diag("jmp/branch/loop without target");
  3128  			sysfatal("bad code");
  3129  		}
  3130  		if(p->back & 1) {
  3131  			v = q->pc - (p->pc + 2);
  3132  			if(v >= -128) {
  3133  				if(p->as == AJCXZL)
  3134  					*ctxt->andptr++ = 0x67;
  3135  				*ctxt->andptr++ = op;
  3136  				*ctxt->andptr++ = v;
  3137  			} else if(t[2] == Zloop) {
  3138  				ctxt->diag("loop too far: %P", p);
  3139  			} else {
  3140  				v -= 5-2;
  3141  				if(t[2] == Zbr) {
  3142  					*ctxt->andptr++ = 0x0f;
  3143  					v--;
  3144  				}
  3145  				*ctxt->andptr++ = o->op[z+1];
  3146  				*ctxt->andptr++ = v;
  3147  				*ctxt->andptr++ = v>>8;
  3148  				*ctxt->andptr++ = v>>16;
  3149  				*ctxt->andptr++ = v>>24;
  3150  			}
  3151  			break;
  3152  		}
  3153  		
  3154  		// Annotate target; will fill in later.
  3155  		p->forwd = q->comefrom;
  3156  		q->comefrom = p;
  3157  		if(p->back & 2)	{ // short
  3158  			if(p->as == AJCXZL)
  3159  				*ctxt->andptr++ = 0x67;
  3160  			*ctxt->andptr++ = op;
  3161  			*ctxt->andptr++ = 0;
  3162  		} else if(t[2] == Zloop) {
  3163  			ctxt->diag("loop too far: %P", p);
  3164  		} else {
  3165  			if(t[2] == Zbr)
  3166  				*ctxt->andptr++ = 0x0f;
  3167  			*ctxt->andptr++ = o->op[z+1];
  3168  			*ctxt->andptr++ = 0;
  3169  			*ctxt->andptr++ = 0;
  3170  			*ctxt->andptr++ = 0;
  3171  			*ctxt->andptr++ = 0;
  3172  		}
  3173  		break;
  3174  				
  3175  /*
  3176  		v = q->pc - p->pc - 2;
  3177  		if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3178  			*ctxt->andptr++ = op;
  3179  			*ctxt->andptr++ = v;
  3180  		} else {
  3181  			v -= 5-2;
  3182  			if(t[2] == Zbr) {
  3183  				*ctxt->andptr++ = 0x0f;
  3184  				v--;
  3185  			}
  3186  			*ctxt->andptr++ = o->op[z+1];
  3187  			*ctxt->andptr++ = v;
  3188  			*ctxt->andptr++ = v>>8;
  3189  			*ctxt->andptr++ = v>>16;
  3190  			*ctxt->andptr++ = v>>24;
  3191  		}
  3192  */
  3193  		break;
  3194  
  3195  	case Zbyte:
  3196  		v = vaddr(ctxt, &p->from, &rel);
  3197  		if(rel.siz != 0) {
  3198  			rel.siz = op;
  3199  			r = addrel(ctxt->cursym);
  3200  			*r = rel;
  3201  			r->off = p->pc + ctxt->andptr - ctxt->and;
  3202  		}
  3203  		*ctxt->andptr++ = v;
  3204  		if(op > 1) {
  3205  			*ctxt->andptr++ = v>>8;
  3206  			if(op > 2) {
  3207  				*ctxt->andptr++ = v>>16;
  3208  				*ctxt->andptr++ = v>>24;
  3209  				if(op > 4) {
  3210  					*ctxt->andptr++ = v>>32;
  3211  					*ctxt->andptr++ = v>>40;
  3212  					*ctxt->andptr++ = v>>48;
  3213  					*ctxt->andptr++ = v>>56;
  3214  				}
  3215  			}
  3216  		}
  3217  		break;
  3218  	}
  3219  	return;
  3220  
  3221  domov:
  3222  	for(mo=ymovtab; mo->as; mo++)
  3223  		if(p->as == mo->as)
  3224  		if(ycover[ft+mo->ft])
  3225  		if(ycover[tt+mo->tt]){
  3226  			t = mo->op;
  3227  			goto mfound;
  3228  		}
  3229  bad:
  3230  	if(p->mode != 64){
  3231  		/*
  3232  		 * here, the assembly has failed.
  3233  		 * if its a byte instruction that has
  3234  		 * unaddressable registers, try to
  3235  		 * exchange registers and reissue the
  3236  		 * instruction with the operands renamed.
  3237  		 */
  3238  		pp = *p;
  3239  		z = p->from.type;
  3240  		if(z >= D_BP && z <= D_DI) {
  3241  			if(isax(&p->to) || p->to.type == D_NONE) {
  3242  				// We certainly don't want to exchange
  3243  				// with AX if the op is MUL or DIV.
  3244  				*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  3245  				asmando(ctxt, &p->from, reg[D_BX]);
  3246  				subreg(&pp, z, D_BX);
  3247  				doasm(ctxt, &pp);
  3248  				*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  3249  				asmando(ctxt, &p->from, reg[D_BX]);
  3250  			} else {
  3251  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  3252  				subreg(&pp, z, D_AX);
  3253  				doasm(ctxt, &pp);
  3254  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  3255  			}
  3256  			return;
  3257  		}
  3258  		z = p->to.type;
  3259  		if(z >= D_BP && z <= D_DI) {
  3260  			if(isax(&p->from)) {
  3261  				*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  3262  				asmando(ctxt, &p->to, reg[D_BX]);
  3263  				subreg(&pp, z, D_BX);
  3264  				doasm(ctxt, &pp);
  3265  				*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  3266  				asmando(ctxt, &p->to, reg[D_BX]);
  3267  			} else {
  3268  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  3269  				subreg(&pp, z, D_AX);
  3270  				doasm(ctxt, &pp);
  3271  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  3272  			}
  3273  			return;
  3274  		}
  3275  	}
  3276  	ctxt->diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
  3277  	return;
  3278  
  3279  mfound:
  3280  	switch(mo->code) {
  3281  	default:
  3282  		ctxt->diag("asmins: unknown mov %d %P", mo->code, p);
  3283  		break;
  3284  
  3285  	case 0:	/* lit */
  3286  		for(z=0; t[z]!=E; z++)
  3287  			*ctxt->andptr++ = t[z];
  3288  		break;
  3289  
  3290  	case 1:	/* r,m */
  3291  		*ctxt->andptr++ = t[0];
  3292  		asmando(ctxt, &p->to, t[1]);
  3293  		break;
  3294  
  3295  	case 2:	/* m,r */
  3296  		*ctxt->andptr++ = t[0];
  3297  		asmando(ctxt, &p->from, t[1]);
  3298  		break;
  3299  
  3300  	case 3:	/* r,m - 2op */
  3301  		*ctxt->andptr++ = t[0];
  3302  		*ctxt->andptr++ = t[1];
  3303  		asmando(ctxt, &p->to, t[2]);
  3304  		ctxt->rexflag |= regrex[p->from.type] & (Rxr|0x40);
  3305  		break;
  3306  
  3307  	case 4:	/* m,r - 2op */
  3308  		*ctxt->andptr++ = t[0];
  3309  		*ctxt->andptr++ = t[1];
  3310  		asmando(ctxt, &p->from, t[2]);
  3311  		ctxt->rexflag |= regrex[p->to.type] & (Rxr|0x40);
  3312  		break;
  3313  
  3314  	case 5:	/* load full pointer, trash heap */
  3315  		if(t[0])
  3316  			*ctxt->andptr++ = t[0];
  3317  		switch(p->to.index) {
  3318  		default:
  3319  			goto bad;
  3320  		case D_DS:
  3321  			*ctxt->andptr++ = 0xc5;
  3322  			break;
  3323  		case D_SS:
  3324  			*ctxt->andptr++ = 0x0f;
  3325  			*ctxt->andptr++ = 0xb2;
  3326  			break;
  3327  		case D_ES:
  3328  			*ctxt->andptr++ = 0xc4;
  3329  			break;
  3330  		case D_FS:
  3331  			*ctxt->andptr++ = 0x0f;
  3332  			*ctxt->andptr++ = 0xb4;
  3333  			break;
  3334  		case D_GS:
  3335  			*ctxt->andptr++ = 0x0f;
  3336  			*ctxt->andptr++ = 0xb5;
  3337  			break;
  3338  		}
  3339  		asmand(ctxt, &p->from, &p->to);
  3340  		break;
  3341  
  3342  	case 6:	/* double shift */
  3343  		if(t[0] == Pw){
  3344  			if(p->mode != 64)
  3345  				ctxt->diag("asmins: illegal 64: %P", p);
  3346  			ctxt->rexflag |= Pw;
  3347  			t++;
  3348  		}else if(t[0] == Pe){
  3349  			*ctxt->andptr++ = Pe;
  3350  			t++;
  3351  		}
  3352  		z = p->from.type;
  3353  		switch(z) {
  3354  		default:
  3355  			goto bad;
  3356  		case D_CONST:
  3357  			*ctxt->andptr++ = 0x0f;
  3358  			*ctxt->andptr++ = t[0];
  3359  			asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  3360  			*ctxt->andptr++ = p->from.offset;
  3361  			break;
  3362  		case D_CL:
  3363  		case D_CX:
  3364  			*ctxt->andptr++ = 0x0f;
  3365  			*ctxt->andptr++ = t[1];
  3366  			asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  3367  			break;
  3368  		}
  3369  		break;
  3370  	
  3371  	case 7:	/* mov tls, r */
  3372  		// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3373  		// where you load the TLS base register into a register and then index off that
  3374  		// register to access the actual TLS variables. Systems that allow direct TLS access
  3375  		// are handled in prefixof above and should not be listed here.
  3376  		switch(ctxt->headtype) {
  3377  		default:
  3378  			sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
  3379  
  3380  		case Hplan9:
  3381  			if(ctxt->plan9privates == nil)
  3382  				ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
  3383  			memset(&pp.from, 0, sizeof pp.from);
  3384  			pp.from.type = D_EXTERN;
  3385  			pp.from.sym = ctxt->plan9privates;
  3386  			pp.from.offset = 0;
  3387  			pp.from.index = D_NONE;
  3388  			ctxt->rexflag |= Pw;
  3389  			*ctxt->andptr++ = 0x8B;
  3390  			asmand(ctxt, &pp.from, &p->to);
  3391  			break;
  3392  
  3393  		case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3394  			// TLS base is 0(FS).
  3395  			pp.from = p->from;
  3396  			pp.from.type = D_INDIR+D_NONE;
  3397  			pp.from.offset = 0;
  3398  			pp.from.index = D_NONE;
  3399  			pp.from.scale = 0;
  3400  			ctxt->rexflag |= Pw;
  3401  			*ctxt->andptr++ = 0x64; // FS
  3402  			*ctxt->andptr++ = 0x8B;
  3403  			asmand(ctxt, &pp.from, &p->to);
  3404  			break;
  3405  		
  3406  		case Hwindows:
  3407  			// Windows TLS base is always 0x28(GS).
  3408  			pp.from = p->from;
  3409  			pp.from.type = D_INDIR+D_GS;
  3410  			pp.from.offset = 0x28;
  3411  			pp.from.index = D_NONE;
  3412  			pp.from.scale = 0;
  3413  			ctxt->rexflag |= Pw;
  3414  			*ctxt->andptr++ = 0x65; // GS
  3415  			*ctxt->andptr++ = 0x8B;
  3416  			asmand(ctxt, &pp.from, &p->to);
  3417  			break;
  3418  		}
  3419  		break;
  3420  	}
  3421  }
  3422  
  3423  static uchar naclret[] = {
  3424  	0x5e, // POPL SI
  3425  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  3426  	0x83, 0xe6, 0xe0,	// ANDL $~31, SI
  3427  	0x4c, 0x01, 0xfe,	// ADDQ R15, SI
  3428  	0xff, 0xe6, // JMP SI
  3429  };
  3430  
  3431  static uchar naclspfix[] = {
  3432  	0x4c, 0x01, 0xfc, // ADDQ R15, SP
  3433  };
  3434  
  3435  static uchar naclbpfix[] = {
  3436  	0x4c, 0x01, 0xfd, // ADDQ R15, BP
  3437  };
  3438  
  3439  static uchar naclmovs[] = {
  3440  	0x89, 0xf6,	// MOVL SI, SI
  3441  	0x49, 0x8d, 0x34, 0x37,	// LEAQ (R15)(SI*1), SI
  3442  	0x89, 0xff,	// MOVL DI, DI
  3443  	0x49, 0x8d, 0x3c, 0x3f,	// LEAQ (R15)(DI*1), DI
  3444  };
  3445  
  3446  static uchar naclstos[] = {
  3447  	0x89, 0xff,	// MOVL DI, DI
  3448  	0x49, 0x8d, 0x3c, 0x3f,	// LEAQ (R15)(DI*1), DI
  3449  };
  3450  
  3451  static void
  3452  nacltrunc(Link *ctxt, int reg)
  3453  {	
  3454  	if(reg >= D_R8)
  3455  		*ctxt->andptr++ = 0x45;
  3456  	reg = (reg - D_AX) & 7;
  3457  	*ctxt->andptr++ = 0x89;
  3458  	*ctxt->andptr++ = (3<<6) | (reg<<3) | reg;
  3459  }
  3460  
  3461  static void
  3462  asmins(Link *ctxt, Prog *p)
  3463  {
  3464  	int i, n, np, c;
  3465  	uchar *and0;
  3466  	Reloc *r;
  3467  	
  3468  	ctxt->andptr = ctxt->and;
  3469  	ctxt->asmode = p->mode;
  3470  	
  3471  	if(p->as == AUSEFIELD) {
  3472  		r = addrel(ctxt->cursym);
  3473  		r->off = 0;
  3474  		r->siz = 0;
  3475  		r->sym = p->from.sym;
  3476  		r->type = R_USEFIELD;
  3477  		return;
  3478  	}
  3479  	
  3480  	if(ctxt->headtype == Hnacl) {
  3481  		if(p->as == AREP) {
  3482  			ctxt->rep++;
  3483  			return;
  3484  		}
  3485  		if(p->as == AREPN) {
  3486  			ctxt->repn++;
  3487  			return;
  3488  		}
  3489  		if(p->as == ALOCK) {
  3490  			ctxt->lock++;
  3491  			return;
  3492  		}
  3493  		if(p->as != ALEAQ && p->as != ALEAL) {
  3494  			if(p->from.index != D_NONE && p->from.scale > 0)
  3495  				nacltrunc(ctxt, p->from.index);
  3496  			if(p->to.index != D_NONE && p->to.scale > 0)
  3497  				nacltrunc(ctxt, p->to.index);
  3498  		}
  3499  		switch(p->as) {
  3500  		case ARET:
  3501  			memmove(ctxt->andptr, naclret, sizeof naclret);
  3502  			ctxt->andptr += sizeof naclret;
  3503  			return;
  3504  		case ACALL:
  3505  		case AJMP:
  3506  			if(D_AX <= p->to.type && p->to.type <= D_DI) {
  3507  				// ANDL $~31, reg
  3508  				*ctxt->andptr++ = 0x83;
  3509  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_AX);
  3510  				*ctxt->andptr++ = 0xe0;
  3511  				// ADDQ R15, reg
  3512  				*ctxt->andptr++ = 0x4c;
  3513  				*ctxt->andptr++ = 0x01;
  3514  				*ctxt->andptr++ = 0xf8 | (p->to.type - D_AX);
  3515  			}
  3516  			if(D_R8 <= p->to.type && p->to.type <= D_R15) {
  3517  				// ANDL $~31, reg
  3518  				*ctxt->andptr++ = 0x41;
  3519  				*ctxt->andptr++ = 0x83;
  3520  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_R8);
  3521  				*ctxt->andptr++ = 0xe0;
  3522  				// ADDQ R15, reg
  3523  				*ctxt->andptr++ = 0x4d;
  3524  				*ctxt->andptr++ = 0x01;
  3525  				*ctxt->andptr++ = 0xf8 | (p->to.type - D_R8);
  3526  			}
  3527  			break;
  3528  		case AINT:
  3529  			*ctxt->andptr++ = 0xf4;
  3530  			return;
  3531  		case ASCASB:
  3532  		case ASCASW:
  3533  		case ASCASL:
  3534  		case ASCASQ:
  3535  		case ASTOSB:
  3536  		case ASTOSW:
  3537  		case ASTOSL:
  3538  		case ASTOSQ:
  3539  			memmove(ctxt->andptr, naclstos, sizeof naclstos);
  3540  			ctxt->andptr += sizeof naclstos;
  3541  			break;
  3542  		case AMOVSB:
  3543  		case AMOVSW:
  3544  		case AMOVSL:
  3545  		case AMOVSQ:
  3546  			memmove(ctxt->andptr, naclmovs, sizeof naclmovs);
  3547  			ctxt->andptr += sizeof naclmovs;
  3548  			break;
  3549  		}
  3550  		if(ctxt->rep) {
  3551  			*ctxt->andptr++ = 0xf3;
  3552  			ctxt->rep = 0;
  3553  		}
  3554  		if(ctxt->repn) {
  3555  			*ctxt->andptr++ = 0xf2;
  3556  			ctxt->repn = 0;
  3557  		}
  3558  		if(ctxt->lock) {
  3559  			*ctxt->andptr++ = 0xf0;
  3560  			ctxt->lock = 0;
  3561  		}
  3562  	}		
  3563  
  3564  	ctxt->rexflag = 0;
  3565  	and0 = ctxt->andptr;
  3566  	ctxt->asmode = p->mode;
  3567  	doasm(ctxt, p);
  3568  	if(ctxt->rexflag){
  3569  		/*
  3570  		 * as befits the whole approach of the architecture,
  3571  		 * the rex prefix must appear before the first opcode byte
  3572  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  3573  		 * before the 0f opcode escape!), or it might be ignored.
  3574  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  3575  		 */
  3576  		if(p->mode != 64)
  3577  			ctxt->diag("asmins: illegal in mode %d: %P", p->mode, p);
  3578  		n = ctxt->andptr - and0;
  3579  		for(np = 0; np < n; np++) {
  3580  			c = and0[np];
  3581  			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
  3582  				break;
  3583  		}
  3584  		memmove(and0+np+1, and0+np, n-np);
  3585  		and0[np] = 0x40 | ctxt->rexflag;
  3586  		ctxt->andptr++;
  3587  	}
  3588  	n = ctxt->andptr - ctxt->and;
  3589  	for(i=ctxt->cursym->nr-1; i>=0; i--) {
  3590  		r = ctxt->cursym->r+i;
  3591  		if(r->off < p->pc)
  3592  			break;
  3593  		if(ctxt->rexflag)
  3594  			r->off++;
  3595  		if(r->type == R_PCREL || r->type == R_CALL)
  3596  			r->add -= p->pc + n - (r->off + r->siz);
  3597  	}
  3598  
  3599  	if(ctxt->headtype == Hnacl && p->as != ACMPL && p->as != ACMPQ) {
  3600  		switch(p->to.type) {
  3601  		case D_SP:
  3602  			memmove(ctxt->andptr, naclspfix, sizeof naclspfix);
  3603  			ctxt->andptr += sizeof naclspfix;
  3604  			break;
  3605  		case D_BP:
  3606  			memmove(ctxt->andptr, naclbpfix, sizeof naclbpfix);
  3607  			ctxt->andptr += sizeof naclbpfix;
  3608  			break;
  3609  		}
  3610  	}
  3611  }