github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/liblink/asm6.c (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include <u.h>
    34  #include <libc.h>
    35  #include <bio.h>
    36  #include <link.h>
    37  #include "../cmd/6l/6.out.h"
    38  #include "../runtime/stack.h"
    39  
    40  enum
    41  {
    42  	MaxAlign = 32,	// max data alignment
    43  	
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	// 
    57  	LoopAlign = 16,
    58  	MaxLoopPad = 0,
    59  
    60  	FuncAlign = 16
    61  };
    62  
    63  typedef	struct	Optab	Optab;
    64  typedef	struct	Movtab	Movtab;
    65  
    66  struct	Optab
    67  {
    68  	short	as;
    69  	uchar*	ytab;
    70  	uchar	prefix;
    71  	uchar	op[23];
    72  };
    73  struct	Movtab
    74  {
    75  	short	as;
    76  	uchar	ft;
    77  	uchar	tt;
    78  	uchar	code;
    79  	uchar	op[4];
    80  };
    81  
    82  enum
    83  {
    84  	Yxxx		= 0,
    85  	Ynone,
    86  	Yi0,
    87  	Yi1,
    88  	Yi8,
    89  	Ys32,
    90  	Yi32,
    91  	Yi64,
    92  	Yiauto,
    93  	Yal,
    94  	Ycl,
    95  	Yax,
    96  	Ycx,
    97  	Yrb,
    98  	Yrl,
    99  	Yrf,
   100  	Yf0,
   101  	Yrx,
   102  	Ymb,
   103  	Yml,
   104  	Ym,
   105  	Ybr,
   106  	Ycol,
   107  
   108  	Ycs,	Yss,	Yds,	Yes,	Yfs,	Ygs,
   109  	Ygdtr,	Yidtr,	Yldtr,	Ymsw,	Ytask,
   110  	Ycr0,	Ycr1,	Ycr2,	Ycr3,	Ycr4,	Ycr5,	Ycr6,	Ycr7,	Ycr8,
   111  	Ydr0,	Ydr1,	Ydr2,	Ydr3,	Ydr4,	Ydr5,	Ydr6,	Ydr7,
   112  	Ytr0,	Ytr1,	Ytr2,	Ytr3,	Ytr4,	Ytr5,	Ytr6,	Ytr7,	Yrl32,	Yrl64,
   113  	Ymr, Ymm,
   114  	Yxr, Yxm,
   115  	Ytls,
   116  	Ymax,
   117  
   118  	Zxxx		= 0,
   119  
   120  	Zlit,
   121  	Zlitm_r,
   122  	Z_rp,
   123  	Zbr,
   124  	Zcall,
   125  	Zcallindreg,
   126  	Zib_,
   127  	Zib_rp,
   128  	Zibo_m,
   129  	Zibo_m_xm,
   130  	Zil_,
   131  	Zil_rp,
   132  	Ziq_rp,
   133  	Zilo_m,
   134  	Ziqo_m,
   135  	Zjmp,
   136  	Zloop,
   137  	Zo_iw,
   138  	Zm_o,
   139  	Zm_r,
   140  	Zm2_r,
   141  	Zm_r_xm,
   142  	Zm_r_i_xm,
   143  	Zm_r_3d,
   144  	Zm_r_xm_nr,
   145  	Zr_m_xm_nr,
   146  	Zibm_r,	/* mmx1,mmx2/mem64,imm8 */
   147  	Zmb_r,
   148  	Zaut_r,
   149  	Zo_m,
   150  	Zo_m64,
   151  	Zpseudo,
   152  	Zr_m,
   153  	Zr_m_xm,
   154  	Zr_m_i_xm,
   155  	Zrp_,
   156  	Z_ib,
   157  	Z_il,
   158  	Zm_ibo,
   159  	Zm_ilo,
   160  	Zib_rr,
   161  	Zil_rr,
   162  	Zclr,
   163  	Zbyte,
   164  	Zmax,
   165  
   166  	Px		= 0,
   167  	P32		= 0x32,	/* 32-bit only */
   168  	Pe		= 0x66,	/* operand escape */
   169  	Pm		= 0x0f,	/* 2byte opcode escape */
   170  	Pq		= 0xff,	/* both escapes: 66 0f */
   171  	Pb		= 0xfe,	/* byte operands */
   172  	Pf2		= 0xf2,	/* xmm escape 1: f2 0f */
   173  	Pf3		= 0xf3,	/* xmm escape 2: f3 0f */
   174  	Pq3		= 0x67, /* xmm escape 3: 66 48 0f */
   175  	Pw		= 0x48,	/* Rex.w */
   176  	Py		= 0x80,	/* defaults to 64-bit mode */
   177  
   178  	Rxf		= 1<<9,	/* internal flag for Rxr on from */
   179  	Rxt		= 1<<8,	/* internal flag for Rxr on to */
   180  	Rxw		= 1<<3,	/* =1, 64-bit operand size */
   181  	Rxr		= 1<<2,	/* extend modrm reg */
   182  	Rxx		= 1<<1,	/* extend sib index */
   183  	Rxb		= 1<<0,	/* extend modrm r/m, sib base, or opcode reg */
   184  
   185  	Maxand	= 10,		/* in -a output width of the byte codes */
   186  };
   187  
   188  static uchar ycover[Ymax*Ymax];
   189  static	int	reg[D_NONE];
   190  static	int	regrex[D_NONE+1];
   191  static	void	asmins(Link *ctxt, Prog *p);
   192  
   193  static uchar	ynone[] =
   194  {
   195  	Ynone,	Ynone,	Zlit,	1,
   196  	0
   197  };
   198  static uchar	ytext[] =
   199  {
   200  	Ymb,	Yi64,	Zpseudo,1,
   201  	0
   202  };
   203  static uchar	ynop[] =
   204  {
   205  	Ynone,	Ynone,	Zpseudo,0,
   206  	Ynone,	Yiauto,	Zpseudo,0,
   207  	Ynone,	Yml,	Zpseudo,0,
   208  	Ynone,	Yrf,	Zpseudo,0,
   209  	Ynone,	Yxr,	Zpseudo,0,
   210  	Yiauto,	Ynone,	Zpseudo,0,
   211  	Yml,	Ynone,	Zpseudo,0,
   212  	Yrf,	Ynone,	Zpseudo,0,
   213  	Yxr,	Ynone,	Zpseudo,1,
   214  	0
   215  };
   216  static uchar	yfuncdata[] =
   217  {
   218  	Yi32,	Ym,	Zpseudo,	0,
   219  	0
   220  };
   221  static uchar	ypcdata[] = 
   222  {
   223  	Yi32,	Yi32,	Zpseudo,	0,
   224  	0
   225  };
   226  static uchar	yxorb[] =
   227  {
   228  	Yi32,	Yal,	Zib_,	1,
   229  	Yi32,	Ymb,	Zibo_m,	2,
   230  	Yrb,	Ymb,	Zr_m,	1,
   231  	Ymb,	Yrb,	Zm_r,	1,
   232  	0
   233  };
   234  static uchar	yxorl[] =
   235  {
   236  	Yi8,	Yml,	Zibo_m,	2,
   237  	Yi32,	Yax,	Zil_,	1,
   238  	Yi32,	Yml,	Zilo_m,	2,
   239  	Yrl,	Yml,	Zr_m,	1,
   240  	Yml,	Yrl,	Zm_r,	1,
   241  	0
   242  };
   243  static uchar	yaddl[] =
   244  {
   245  	Yi8,	Yml,	Zibo_m,	2,
   246  	Yi32,	Yax,	Zil_,	1,
   247  	Yi32,	Yml,	Zilo_m,	2,
   248  	Yrl,	Yml,	Zr_m,	1,
   249  	Yml,	Yrl,	Zm_r,	1,
   250  	0
   251  };
   252  static uchar	yincb[] =
   253  {
   254  	Ynone,	Ymb,	Zo_m,	2,
   255  	0
   256  };
   257  static uchar	yincw[] =
   258  {
   259  	Ynone,	Yml,	Zo_m,	2,
   260  	0
   261  };
   262  static uchar	yincl[] =
   263  {
   264  	Ynone,	Yml,	Zo_m,	2,
   265  	0
   266  };
   267  static uchar	ycmpb[] =
   268  {
   269  	Yal,	Yi32,	Z_ib,	1,
   270  	Ymb,	Yi32,	Zm_ibo,	2,
   271  	Ymb,	Yrb,	Zm_r,	1,
   272  	Yrb,	Ymb,	Zr_m,	1,
   273  	0
   274  };
   275  static uchar	ycmpl[] =
   276  {
   277  	Yml,	Yi8,	Zm_ibo,	2,
   278  	Yax,	Yi32,	Z_il,	1,
   279  	Yml,	Yi32,	Zm_ilo,	2,
   280  	Yml,	Yrl,	Zm_r,	1,
   281  	Yrl,	Yml,	Zr_m,	1,
   282  	0
   283  };
   284  static uchar	yshb[] =
   285  {
   286  	Yi1,	Ymb,	Zo_m,	2,
   287  	Yi32,	Ymb,	Zibo_m,	2,
   288  	Ycx,	Ymb,	Zo_m,	2,
   289  	0
   290  };
   291  static uchar	yshl[] =
   292  {
   293  	Yi1,	Yml,	Zo_m,	2,
   294  	Yi32,	Yml,	Zibo_m,	2,
   295  	Ycl,	Yml,	Zo_m,	2,
   296  	Ycx,	Yml,	Zo_m,	2,
   297  	0
   298  };
   299  static uchar	ytestb[] =
   300  {
   301  	Yi32,	Yal,	Zib_,	1,
   302  	Yi32,	Ymb,	Zibo_m,	2,
   303  	Yrb,	Ymb,	Zr_m,	1,
   304  	Ymb,	Yrb,	Zm_r,	1,
   305  	0
   306  };
   307  static uchar	ytestl[] =
   308  {
   309  	Yi32,	Yax,	Zil_,	1,
   310  	Yi32,	Yml,	Zilo_m,	2,
   311  	Yrl,	Yml,	Zr_m,	1,
   312  	Yml,	Yrl,	Zm_r,	1,
   313  	0
   314  };
   315  static uchar	ymovb[] =
   316  {
   317  	Yrb,	Ymb,	Zr_m,	1,
   318  	Ymb,	Yrb,	Zm_r,	1,
   319  	Yi32,	Yrb,	Zib_rp,	1,
   320  	Yi32,	Ymb,	Zibo_m,	2,
   321  	0
   322  };
   323  static uchar	ymbs[] =
   324  {
   325  	Ymb,	Ynone,	Zm_o,	2,
   326  	0
   327  };
   328  static uchar	ybtl[] =
   329  {
   330  	Yi8,	Yml,	Zibo_m,	2,
   331  	Yrl,	Yml,	Zr_m,	1,
   332  	0
   333  };
   334  static uchar	ymovw[] =
   335  {
   336  	Yrl,	Yml,	Zr_m,	1,
   337  	Yml,	Yrl,	Zm_r,	1,
   338  	Yi0,	Yrl,	Zclr,	1,
   339  	Yi32,	Yrl,	Zil_rp,	1,
   340  	Yi32,	Yml,	Zilo_m,	2,
   341  	Yiauto,	Yrl,	Zaut_r,	2,
   342  	0
   343  };
   344  static uchar	ymovl[] =
   345  {
   346  	Yrl,	Yml,	Zr_m,	1,
   347  	Yml,	Yrl,	Zm_r,	1,
   348  	Yi0,	Yrl,	Zclr,	1,
   349  	Yi32,	Yrl,	Zil_rp,	1,
   350  	Yi32,	Yml,	Zilo_m,	2,
   351  	Yml,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
   352  	Ymr,	Yml,	Zr_m_xm,	1,	// MMX MOVD
   353  	Yml,	Yxr,	Zm_r_xm,	2,	// XMM MOVD (32 bit)
   354  	Yxr,	Yml,	Zr_m_xm,	2,	// XMM MOVD (32 bit)
   355  	Yiauto,	Yrl,	Zaut_r,	2,
   356  	0
   357  };
   358  static uchar	yret[] =
   359  {
   360  	Ynone,	Ynone,	Zo_iw,	1,
   361  	Yi32,	Ynone,	Zo_iw,	1,
   362  	0
   363  };
   364  static uchar	ymovq[] =
   365  {
   366  	Yrl,	Yml,	Zr_m,	1,	// 0x89
   367  	Yml,	Yrl,	Zm_r,	1,	// 0x8b
   368  	Yi0,	Yrl,	Zclr,	1,	// 0x31
   369  	Ys32,	Yrl,	Zilo_m,	2,	// 32 bit signed 0xc7,(0)
   370  	Yi64,	Yrl,	Ziq_rp,	1,	// 0xb8 -- 32/64 bit immediate
   371  	Yi32,	Yml,	Zilo_m,	2,	// 0xc7,(0)
   372  	Ym,	Ymr,	Zm_r_xm_nr,	1,	// MMX MOVQ (shorter encoding)
   373  	Ymr,	Ym,	Zr_m_xm_nr,	1,	// MMX MOVQ
   374  	Ymm,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
   375  	Ymr,	Ymm,	Zr_m_xm,	1,	// MMX MOVD
   376  	Yxr,	Ymr,	Zm_r_xm_nr,	2,	// MOVDQ2Q
   377  	Yxm,	Yxr,	Zm_r_xm_nr,	2, // MOVQ xmm1/m64 -> xmm2
   378  	Yxr,	Yxm,	Zr_m_xm_nr,	2, // MOVQ xmm1 -> xmm2/m64
   379  	Yml,	Yxr,	Zm_r_xm,	2,	// MOVD xmm load
   380  	Yxr,	Yml,	Zr_m_xm,	2,	// MOVD xmm store
   381  	Yiauto,	Yrl,	Zaut_r,	2,	// built-in LEAQ
   382  	0
   383  };
   384  static uchar	ym_rl[] =
   385  {
   386  	Ym,	Yrl,	Zm_r,	1,
   387  	0
   388  };
   389  static uchar	yrl_m[] =
   390  {
   391  	Yrl,	Ym,	Zr_m,	1,
   392  	0
   393  };
   394  static uchar	ymb_rl[] =
   395  {
   396  	Ymb,	Yrl,	Zmb_r,	1,
   397  	0
   398  };
   399  static uchar	yml_rl[] =
   400  {
   401  	Yml,	Yrl,	Zm_r,	1,
   402  	0
   403  };
   404  static uchar	yrl_ml[] =
   405  {
   406  	Yrl,	Yml,	Zr_m,	1,
   407  	0
   408  };
   409  static uchar	yml_mb[] =
   410  {
   411  	Yrb,	Ymb,	Zr_m,	1,
   412  	Ymb,	Yrb,	Zm_r,	1,
   413  	0
   414  };
   415  static uchar	yrb_mb[] =
   416  {
   417  	Yrb,	Ymb,	Zr_m,	1,
   418  	0
   419  };
   420  static uchar	yxchg[] =
   421  {
   422  	Yax,	Yrl,	Z_rp,	1,
   423  	Yrl,	Yax,	Zrp_,	1,
   424  	Yrl,	Yml,	Zr_m,	1,
   425  	Yml,	Yrl,	Zm_r,	1,
   426  	0
   427  };
   428  static uchar	ydivl[] =
   429  {
   430  	Yml,	Ynone,	Zm_o,	2,
   431  	0
   432  };
   433  static uchar	ydivb[] =
   434  {
   435  	Ymb,	Ynone,	Zm_o,	2,
   436  	0
   437  };
   438  static uchar	yimul[] =
   439  {
   440  	Yml,	Ynone,	Zm_o,	2,
   441  	Yi8,	Yrl,	Zib_rr,	1,
   442  	Yi32,	Yrl,	Zil_rr,	1,
   443  	Yml,	Yrl,	Zm_r,	2,
   444  	0
   445  };
   446  static uchar	yimul3[] =
   447  {
   448  	Yml,	Yrl,	Zibm_r,	2,
   449  	0
   450  };
   451  static uchar	ybyte[] =
   452  {
   453  	Yi64,	Ynone,	Zbyte,	1,
   454  	0
   455  };
   456  static uchar	yin[] =
   457  {
   458  	Yi32,	Ynone,	Zib_,	1,
   459  	Ynone,	Ynone,	Zlit,	1,
   460  	0
   461  };
   462  static uchar	yint[] =
   463  {
   464  	Yi32,	Ynone,	Zib_,	1,
   465  	0
   466  };
   467  static uchar	ypushl[] =
   468  {
   469  	Yrl,	Ynone,	Zrp_,	1,
   470  	Ym,	Ynone,	Zm_o,	2,
   471  	Yi8,	Ynone,	Zib_,	1,
   472  	Yi32,	Ynone,	Zil_,	1,
   473  	0
   474  };
   475  static uchar	ypopl[] =
   476  {
   477  	Ynone,	Yrl,	Z_rp,	1,
   478  	Ynone,	Ym,	Zo_m,	2,
   479  	0
   480  };
   481  static uchar	ybswap[] =
   482  {
   483  	Ynone,	Yrl,	Z_rp,	2,
   484  	0,
   485  };
   486  static uchar	yscond[] =
   487  {
   488  	Ynone,	Ymb,	Zo_m,	2,
   489  	0
   490  };
   491  static uchar	yjcond[] =
   492  {
   493  	Ynone,	Ybr,	Zbr,	0,
   494  	Yi0,	Ybr,	Zbr,	0,
   495  	Yi1,	Ybr,	Zbr,	1,
   496  	0
   497  };
   498  static uchar	yloop[] =
   499  {
   500  	Ynone,	Ybr,	Zloop,	1,
   501  	0
   502  };
   503  static uchar	ycall[] =
   504  {
   505  	Ynone,	Yml,	Zcallindreg,	0,
   506  	Yrx,	Yrx,	Zcallindreg,	2,
   507  	Ynone,	Ybr,	Zcall,	1,
   508  	0
   509  };
   510  static uchar	yduff[] =
   511  {
   512  	Ynone,	Yi32,	Zcall,	1,
   513  	0
   514  };
   515  static uchar	yjmp[] =
   516  {
   517  	Ynone,	Yml,	Zo_m64,	2,
   518  	Ynone,	Ybr,	Zjmp,	1,
   519  	0
   520  };
   521  
   522  static uchar	yfmvd[] =
   523  {
   524  	Ym,	Yf0,	Zm_o,	2,
   525  	Yf0,	Ym,	Zo_m,	2,
   526  	Yrf,	Yf0,	Zm_o,	2,
   527  	Yf0,	Yrf,	Zo_m,	2,
   528  	0
   529  };
   530  static uchar	yfmvdp[] =
   531  {
   532  	Yf0,	Ym,	Zo_m,	2,
   533  	Yf0,	Yrf,	Zo_m,	2,
   534  	0
   535  };
   536  static uchar	yfmvf[] =
   537  {
   538  	Ym,	Yf0,	Zm_o,	2,
   539  	Yf0,	Ym,	Zo_m,	2,
   540  	0
   541  };
   542  static uchar	yfmvx[] =
   543  {
   544  	Ym,	Yf0,	Zm_o,	2,
   545  	0
   546  };
   547  static uchar	yfmvp[] =
   548  {
   549  	Yf0,	Ym,	Zo_m,	2,
   550  	0
   551  };
   552  static uchar	yfadd[] =
   553  {
   554  	Ym,	Yf0,	Zm_o,	2,
   555  	Yrf,	Yf0,	Zm_o,	2,
   556  	Yf0,	Yrf,	Zo_m,	2,
   557  	0
   558  };
   559  static uchar	yfaddp[] =
   560  {
   561  	Yf0,	Yrf,	Zo_m,	2,
   562  	0
   563  };
   564  static uchar	yfxch[] =
   565  {
   566  	Yf0,	Yrf,	Zo_m,	2,
   567  	Yrf,	Yf0,	Zm_o,	2,
   568  	0
   569  };
   570  static uchar	ycompp[] =
   571  {
   572  	Yf0,	Yrf,	Zo_m,	2,	/* botch is really f0,f1 */
   573  	0
   574  };
   575  static uchar	ystsw[] =
   576  {
   577  	Ynone,	Ym,	Zo_m,	2,
   578  	Ynone,	Yax,	Zlit,	1,
   579  	0
   580  };
   581  static uchar	ystcw[] =
   582  {
   583  	Ynone,	Ym,	Zo_m,	2,
   584  	Ym,	Ynone,	Zm_o,	2,
   585  	0
   586  };
   587  static uchar	ysvrs[] =
   588  {
   589  	Ynone,	Ym,	Zo_m,	2,
   590  	Ym,	Ynone,	Zm_o,	2,
   591  	0
   592  };
   593  static uchar	ymm[] = 
   594  {
   595  	Ymm,	Ymr,	Zm_r_xm,	1,
   596  	Yxm,	Yxr,	Zm_r_xm,	2,
   597  	0
   598  };
   599  static uchar	yxm[] = 
   600  {
   601  	Yxm,	Yxr,	Zm_r_xm,	1,
   602  	0
   603  };
   604  static uchar	yxcvm1[] = 
   605  {
   606  	Yxm,	Yxr,	Zm_r_xm,	2,
   607  	Yxm,	Ymr,	Zm_r_xm,	2,
   608  	0
   609  };
   610  static uchar	yxcvm2[] =
   611  {
   612  	Yxm,	Yxr,	Zm_r_xm,	2,
   613  	Ymm,	Yxr,	Zm_r_xm,	2,
   614  	0
   615  };
   616  /*
   617  static uchar	yxmq[] = 
   618  {
   619  	Yxm,	Yxr,	Zm_r_xm,	2,
   620  	0
   621  };
   622  */
   623  static uchar	yxr[] = 
   624  {
   625  	Yxr,	Yxr,	Zm_r_xm,	1,
   626  	0
   627  };
   628  static uchar	yxr_ml[] =
   629  {
   630  	Yxr,	Yml,	Zr_m_xm,	1,
   631  	0
   632  };
   633  static uchar	ymr[] =
   634  {
   635  	Ymr,	Ymr,	Zm_r,	1,
   636  	0
   637  };
   638  static uchar	ymr_ml[] =
   639  {
   640  	Ymr,	Yml,	Zr_m_xm,	1,
   641  	0
   642  };
   643  static uchar	yxcmp[] =
   644  {
   645  	Yxm,	Yxr, Zm_r_xm,	1,
   646  	0
   647  };
   648  static uchar	yxcmpi[] =
   649  {
   650  	Yxm,	Yxr, Zm_r_i_xm,	2,
   651  	0
   652  };
   653  static uchar	yxmov[] =
   654  {
   655  	Yxm,	Yxr,	Zm_r_xm,	1,
   656  	Yxr,	Yxm,	Zr_m_xm,	1,
   657  	0
   658  };
   659  static uchar	yxcvfl[] = 
   660  {
   661  	Yxm,	Yrl,	Zm_r_xm,	1,
   662  	0
   663  };
   664  static uchar	yxcvlf[] =
   665  {
   666  	Yml,	Yxr,	Zm_r_xm,	1,
   667  	0
   668  };
   669  static uchar	yxcvfq[] = 
   670  {
   671  	Yxm,	Yrl,	Zm_r_xm,	2,
   672  	0
   673  };
   674  static uchar	yxcvqf[] =
   675  {
   676  	Yml,	Yxr,	Zm_r_xm,	2,
   677  	0
   678  };
   679  static uchar	yps[] = 
   680  {
   681  	Ymm,	Ymr,	Zm_r_xm,	1,
   682  	Yi8,	Ymr,	Zibo_m_xm,	2,
   683  	Yxm,	Yxr,	Zm_r_xm,	2,
   684  	Yi8,	Yxr,	Zibo_m_xm,	3,
   685  	0
   686  };
   687  static uchar	yxrrl[] =
   688  {
   689  	Yxr,	Yrl,	Zm_r,	1,
   690  	0
   691  };
   692  static uchar	ymfp[] =
   693  {
   694  	Ymm,	Ymr,	Zm_r_3d,	1,
   695  	0,
   696  };
   697  static uchar	ymrxr[] =
   698  {
   699  	Ymr,	Yxr,	Zm_r,	1,
   700  	Yxm,	Yxr,	Zm_r_xm,	1,
   701  	0
   702  };
   703  static uchar	ymshuf[] =
   704  {
   705  	Ymm,	Ymr,	Zibm_r,	2,
   706  	0
   707  };
   708  static uchar	ymshufb[] =
   709  {
   710  	Yxm,	Yxr,	Zm2_r,	2,
   711  	0
   712  };
   713  static uchar	yxshuf[] =
   714  {
   715  	Yxm,	Yxr,	Zibm_r,	2,
   716  	0
   717  };
   718  static uchar	yextrw[] =
   719  {
   720  	Yxr,	Yrl,	Zibm_r,	2,
   721  	0
   722  };
   723  static uchar	yinsrw[] =
   724  {
   725  	Yml,	Yxr,	Zibm_r,	2,
   726  	0
   727  };
   728  static uchar	yinsr[] =
   729  {
   730  	Ymm,	Yxr,	Zibm_r,	3,
   731  	0
   732  };
   733  static uchar	ypsdq[] =
   734  {
   735  	Yi8,	Yxr,	Zibo_m,	2,
   736  	0
   737  };
   738  static uchar	ymskb[] =
   739  {
   740  	Yxr,	Yrl,	Zm_r_xm,	2,
   741  	Ymr,	Yrl,	Zm_r_xm,	1,
   742  	0
   743  };
   744  static uchar	ycrc32l[] =
   745  {
   746  	Yml,	Yrl,	Zlitm_r,	0,
   747  };
   748  static uchar	yprefetch[] =
   749  {
   750  	Ym,	Ynone,	Zm_o,	2,
   751  	0,
   752  };
   753  static uchar	yaes[] =
   754  {
   755  	Yxm,	Yxr,	Zlitm_r,	2,
   756  	0
   757  };
   758  static uchar	yaes2[] =
   759  {
   760  	Yxm,	Yxr,	Zibm_r,	2,
   761  	0
   762  };
   763  
   764  /*
   765   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   766   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   767   * the entry with the given p->as and then looks through the ytable for that
   768   * instruction (the second field in the optab struct) for a line whose first
   769   * two values match the Ytypes of the p->from and p->to operands.  The function
   770   * oclass in span.c computes the specific Ytype of an operand and then the set
   771   * of more general Ytypes that it satisfies is implied by the ycover table, set
   772   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   773   * from the more general 8-bit constants, but instinit says
   774   *
   775   *        ycover[Yi0*Ymax + Ys32] = 1;
   776   *        ycover[Yi1*Ymax + Ys32] = 1;
   777   *        ycover[Yi8*Ymax + Ys32] = 1;
   778   *
   779   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   780   * if that's what an instruction can handle.
   781   *
   782   * In parallel with the scan through the ytable for the appropriate line, there
   783   * is a z pointer that starts out pointing at the strange magic byte list in
   784   * the Optab struct.  With each step past a non-matching ytable line, z
   785   * advances by the 4th entry in the line.  When a matching line is found, that
   786   * z pointer has the extra data to use in laying down the instruction bytes.
   787   * The actual bytes laid down are a function of the 3rd entry in the line (that
   788   * is, the Ztype) and the z bytes.
   789   *
   790   * For example, let's look at AADDL.  The optab line says:
   791   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   792   *
   793   * and yaddl says
   794   *        uchar   yaddl[] =
   795   *        {
   796   *                Yi8,    Yml,    Zibo_m, 2,
   797   *                Yi32,   Yax,    Zil_,   1,
   798   *                Yi32,   Yml,    Zilo_m, 2,
   799   *                Yrl,    Yml,    Zr_m,   1,
   800   *                Yml,    Yrl,    Zm_r,   1,
   801   *                0
   802   *        };
   803   *
   804   * so there are 5 possible types of ADDL instruction that can be laid down, and
   805   * possible states used to lay them down (Ztype and z pointer, assuming z
   806   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   807   *
   808   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   809   *        Yi32, Yax -> Zil_, z+2 (0x05)
   810   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   811   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   812   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   813   *
   814   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   815   * relatively straightforward as this program goes.
   816   *
   817   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   818   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   819   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   820   * Zilo_m is the same but a long (32-bit) immediate.
   821   */
   822  static Optab optab[] =
   823  /*	as, ytab, andproto, opcode */
   824  {
   825  	{ AXXX },
   826  	{ AAAA,		ynone,	P32, {0x37} },
   827  	{ AAAD,		ynone,	P32, {0xd5,0x0a} },
   828  	{ AAAM,		ynone,	P32, {0xd4,0x0a} },
   829  	{ AAAS,		ynone,	P32, {0x3f} },
   830  	{ AADCB,	yxorb,	Pb, {0x14,0x80,(02),0x10,0x10} },
   831  	{ AADCL,	yxorl,	Px, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   832  	{ AADCQ,	yxorl,	Pw, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   833  	{ AADCW,	yxorl,	Pe, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   834  	{ AADDB,	yxorb,	Pb, {0x04,0x80,(00),0x00,0x02} },
   835  	{ AADDL,	yaddl,	Px, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   836  	{ AADDPD,	yxm,	Pq, {0x58} },
   837  	{ AADDPS,	yxm,	Pm, {0x58} },
   838  	{ AADDQ,	yaddl,	Pw, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   839  	{ AADDSD,	yxm,	Pf2, {0x58} },
   840  	{ AADDSS,	yxm,	Pf3, {0x58} },
   841  	{ AADDW,	yaddl,	Pe, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   842  	{ AADJSP },
   843  	{ AANDB,	yxorb,	Pb, {0x24,0x80,(04),0x20,0x22} },
   844  	{ AANDL,	yxorl,	Px, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   845  	{ AANDNPD,	yxm,	Pq, {0x55} },
   846  	{ AANDNPS,	yxm,	Pm, {0x55} },
   847  	{ AANDPD,	yxm,	Pq, {0x54} },
   848  	{ AANDPS,	yxm,	Pq, {0x54} },
   849  	{ AANDQ,	yxorl,	Pw, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   850  	{ AANDW,	yxorl,	Pe, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   851  	{ AARPL,	yrl_ml,	P32, {0x63} },
   852  	{ ABOUNDL,	yrl_m,	P32, {0x62} },
   853  	{ ABOUNDW,	yrl_m,	Pe, {0x62} },
   854  	{ ABSFL,	yml_rl,	Pm, {0xbc} },
   855  	{ ABSFQ,	yml_rl,	Pw, {0x0f,0xbc} },
   856  	{ ABSFW,	yml_rl,	Pq, {0xbc} },
   857  	{ ABSRL,	yml_rl,	Pm, {0xbd} },
   858  	{ ABSRQ,	yml_rl,	Pw, {0x0f,0xbd} },
   859  	{ ABSRW,	yml_rl,	Pq, {0xbd} },
   860  	{ ABSWAPL,	ybswap,	Px, {0x0f,0xc8} },
   861  	{ ABSWAPQ,	ybswap,	Pw, {0x0f,0xc8} },
   862  	{ ABTCL,	ybtl,	Pm, {0xba,(07),0xbb} },
   863  	{ ABTCQ,	ybtl,	Pw, {0x0f,0xba,(07),0x0f,0xbb} },
   864  	{ ABTCW,	ybtl,	Pq, {0xba,(07),0xbb} },
   865  	{ ABTL,		ybtl,	Pm, {0xba,(04),0xa3} },
   866  	{ ABTQ,		ybtl,	Pw, {0x0f,0xba,(04),0x0f,0xa3}},
   867  	{ ABTRL,	ybtl,	Pm, {0xba,(06),0xb3} },
   868  	{ ABTRQ,	ybtl,	Pw, {0x0f,0xba,(06),0x0f,0xb3} },
   869  	{ ABTRW,	ybtl,	Pq, {0xba,(06),0xb3} },
   870  	{ ABTSL,	ybtl,	Pm, {0xba,(05),0xab } },
   871  	{ ABTSQ,	ybtl,	Pw, {0x0f,0xba,(05),0x0f,0xab} },
   872  	{ ABTSW,	ybtl,	Pq, {0xba,(05),0xab } },
   873  	{ ABTW,		ybtl,	Pq, {0xba,(04),0xa3} },
   874  	{ ABYTE,	ybyte,	Px, {1} },
   875  	{ ACALL,	ycall,	Px, {0xff,(02),0xe8} },
   876  	{ ACDQ,		ynone,	Px, {0x99} },
   877  	{ ACLC,		ynone,	Px, {0xf8} },
   878  	{ ACLD,		ynone,	Px, {0xfc} },
   879  	{ ACLI,		ynone,	Px, {0xfa} },
   880  	{ ACLTS,	ynone,	Pm, {0x06} },
   881  	{ ACMC,		ynone,	Px, {0xf5} },
   882  	{ ACMOVLCC,	yml_rl,	Pm, {0x43} },
   883  	{ ACMOVLCS,	yml_rl,	Pm, {0x42} },
   884  	{ ACMOVLEQ,	yml_rl,	Pm, {0x44} },
   885  	{ ACMOVLGE,	yml_rl,	Pm, {0x4d} },
   886  	{ ACMOVLGT,	yml_rl,	Pm, {0x4f} },
   887  	{ ACMOVLHI,	yml_rl,	Pm, {0x47} },
   888  	{ ACMOVLLE,	yml_rl,	Pm, {0x4e} },
   889  	{ ACMOVLLS,	yml_rl,	Pm, {0x46} },
   890  	{ ACMOVLLT,	yml_rl,	Pm, {0x4c} },
   891  	{ ACMOVLMI,	yml_rl,	Pm, {0x48} },
   892  	{ ACMOVLNE,	yml_rl,	Pm, {0x45} },
   893  	{ ACMOVLOC,	yml_rl,	Pm, {0x41} },
   894  	{ ACMOVLOS,	yml_rl,	Pm, {0x40} },
   895  	{ ACMOVLPC,	yml_rl,	Pm, {0x4b} },
   896  	{ ACMOVLPL,	yml_rl,	Pm, {0x49} },
   897  	{ ACMOVLPS,	yml_rl,	Pm, {0x4a} },
   898  	{ ACMOVQCC,	yml_rl,	Pw, {0x0f,0x43} },
   899  	{ ACMOVQCS,	yml_rl,	Pw, {0x0f,0x42} },
   900  	{ ACMOVQEQ,	yml_rl,	Pw, {0x0f,0x44} },
   901  	{ ACMOVQGE,	yml_rl,	Pw, {0x0f,0x4d} },
   902  	{ ACMOVQGT,	yml_rl,	Pw, {0x0f,0x4f} },
   903  	{ ACMOVQHI,	yml_rl,	Pw, {0x0f,0x47} },
   904  	{ ACMOVQLE,	yml_rl,	Pw, {0x0f,0x4e} },
   905  	{ ACMOVQLS,	yml_rl,	Pw, {0x0f,0x46} },
   906  	{ ACMOVQLT,	yml_rl,	Pw, {0x0f,0x4c} },
   907  	{ ACMOVQMI,	yml_rl,	Pw, {0x0f,0x48} },
   908  	{ ACMOVQNE,	yml_rl,	Pw, {0x0f,0x45} },
   909  	{ ACMOVQOC,	yml_rl,	Pw, {0x0f,0x41} },
   910  	{ ACMOVQOS,	yml_rl,	Pw, {0x0f,0x40} },
   911  	{ ACMOVQPC,	yml_rl,	Pw, {0x0f,0x4b} },
   912  	{ ACMOVQPL,	yml_rl,	Pw, {0x0f,0x49} },
   913  	{ ACMOVQPS,	yml_rl,	Pw, {0x0f,0x4a} },
   914  	{ ACMOVWCC,	yml_rl,	Pq, {0x43} },
   915  	{ ACMOVWCS,	yml_rl,	Pq, {0x42} },
   916  	{ ACMOVWEQ,	yml_rl,	Pq, {0x44} },
   917  	{ ACMOVWGE,	yml_rl,	Pq, {0x4d} },
   918  	{ ACMOVWGT,	yml_rl,	Pq, {0x4f} },
   919  	{ ACMOVWHI,	yml_rl,	Pq, {0x47} },
   920  	{ ACMOVWLE,	yml_rl,	Pq, {0x4e} },
   921  	{ ACMOVWLS,	yml_rl,	Pq, {0x46} },
   922  	{ ACMOVWLT,	yml_rl,	Pq, {0x4c} },
   923  	{ ACMOVWMI,	yml_rl,	Pq, {0x48} },
   924  	{ ACMOVWNE,	yml_rl,	Pq, {0x45} },
   925  	{ ACMOVWOC,	yml_rl,	Pq, {0x41} },
   926  	{ ACMOVWOS,	yml_rl,	Pq, {0x40} },
   927  	{ ACMOVWPC,	yml_rl,	Pq, {0x4b} },
   928  	{ ACMOVWPL,	yml_rl,	Pq, {0x49} },
   929  	{ ACMOVWPS,	yml_rl,	Pq, {0x4a} },
   930  	{ ACMPB,	ycmpb,	Pb, {0x3c,0x80,(07),0x38,0x3a} },
   931  	{ ACMPL,	ycmpl,	Px, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   932  	{ ACMPPD,	yxcmpi,	Px, {Pe,0xc2} },
   933  	{ ACMPPS,	yxcmpi,	Pm, {0xc2,0} },
   934  	{ ACMPQ,	ycmpl,	Pw, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   935  	{ ACMPSB,	ynone,	Pb, {0xa6} },
   936  	{ ACMPSD,	yxcmpi,	Px, {Pf2,0xc2} },
   937  	{ ACMPSL,	ynone,	Px, {0xa7} },
   938  	{ ACMPSQ,	ynone,	Pw, {0xa7} },
   939  	{ ACMPSS,	yxcmpi,	Px, {Pf3,0xc2} },
   940  	{ ACMPSW,	ynone,	Pe, {0xa7} },
   941  	{ ACMPW,	ycmpl,	Pe, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   942  	{ ACOMISD,	yxcmp,	Pe, {0x2f} },
   943  	{ ACOMISS,	yxcmp,	Pm, {0x2f} },
   944  	{ ACPUID,	ynone,	Pm, {0xa2} },
   945  	{ ACVTPL2PD,	yxcvm2,	Px, {Pf3,0xe6,Pe,0x2a} },
   946  	{ ACVTPL2PS,	yxcvm2,	Pm, {0x5b,0,0x2a,0,} },
   947  	{ ACVTPD2PL,	yxcvm1,	Px, {Pf2,0xe6,Pe,0x2d} },
   948  	{ ACVTPD2PS,	yxm,	Pe, {0x5a} },
   949  	{ ACVTPS2PL,	yxcvm1, Px, {Pe,0x5b,Pm,0x2d} },
   950  	{ ACVTPS2PD,	yxm,	Pm, {0x5a} },
   951  	{ API2FW,	ymfp,	Px, {0x0c} },
   952  	{ ACVTSD2SL,	yxcvfl, Pf2, {0x2d} },
   953  	{ ACVTSD2SQ,	yxcvfq, Pw, {Pf2,0x2d} },
   954  	{ ACVTSD2SS,	yxm,	Pf2, {0x5a} },
   955  	{ ACVTSL2SD,	yxcvlf, Pf2, {0x2a} },
   956  	{ ACVTSQ2SD,	yxcvqf, Pw, {Pf2,0x2a} },
   957  	{ ACVTSL2SS,	yxcvlf, Pf3, {0x2a} },
   958  	{ ACVTSQ2SS,	yxcvqf, Pw, {Pf3,0x2a} },
   959  	{ ACVTSS2SD,	yxm,	Pf3, {0x5a} },
   960  	{ ACVTSS2SL,	yxcvfl, Pf3, {0x2d} },
   961  	{ ACVTSS2SQ,	yxcvfq, Pw, {Pf3,0x2d} },
   962  	{ ACVTTPD2PL,	yxcvm1,	Px, {Pe,0xe6,Pe,0x2c} },
   963  	{ ACVTTPS2PL,	yxcvm1,	Px, {Pf3,0x5b,Pm,0x2c} },
   964  	{ ACVTTSD2SL,	yxcvfl, Pf2, {0x2c} },
   965  	{ ACVTTSD2SQ,	yxcvfq, Pw, {Pf2,0x2c} },
   966  	{ ACVTTSS2SL,	yxcvfl,	Pf3, {0x2c} },
   967  	{ ACVTTSS2SQ,	yxcvfq, Pw, {Pf3,0x2c} },
   968  	{ ACWD,		ynone,	Pe, {0x99} },
   969  	{ ACQO,		ynone,	Pw, {0x99} },
   970  	{ ADAA,		ynone,	P32, {0x27} },
   971  	{ ADAS,		ynone,	P32, {0x2f} },
   972  	{ ADATA },
   973  	{ ADECB,	yincb,	Pb, {0xfe,(01)} },
   974  	{ ADECL,	yincl,	Px, {0xff,(01)} },
   975  	{ ADECQ,	yincl,	Pw, {0xff,(01)} },
   976  	{ ADECW,	yincw,	Pe, {0xff,(01)} },
   977  	{ ADIVB,	ydivb,	Pb, {0xf6,(06)} },
   978  	{ ADIVL,	ydivl,	Px, {0xf7,(06)} },
   979  	{ ADIVPD,	yxm,	Pe, {0x5e} },
   980  	{ ADIVPS,	yxm,	Pm, {0x5e} },
   981  	{ ADIVQ,	ydivl,	Pw, {0xf7,(06)} },
   982  	{ ADIVSD,	yxm,	Pf2, {0x5e} },
   983  	{ ADIVSS,	yxm,	Pf3, {0x5e} },
   984  	{ ADIVW,	ydivl,	Pe, {0xf7,(06)} },
   985  	{ AEMMS,	ynone,	Pm, {0x77} },
   986  	{ AENTER },				/* botch */
   987  	{ AFXRSTOR,	ysvrs,	Pm, {0xae,(01),0xae,(01)} },
   988  	{ AFXSAVE,	ysvrs,	Pm, {0xae,(00),0xae,(00)} },
   989  	{ AFXRSTOR64,	ysvrs,	Pw, {0x0f,0xae,(01),0x0f,0xae,(01)} },
   990  	{ AFXSAVE64,	ysvrs,	Pw, {0x0f,0xae,(00),0x0f,0xae,(00)} },
   991  	{ AGLOBL },
   992  	{ AGOK },
   993  	{ AHISTORY },
   994  	{ AHLT,		ynone,	Px, {0xf4} },
   995  	{ AIDIVB,	ydivb,	Pb, {0xf6,(07)} },
   996  	{ AIDIVL,	ydivl,	Px, {0xf7,(07)} },
   997  	{ AIDIVQ,	ydivl,	Pw, {0xf7,(07)} },
   998  	{ AIDIVW,	ydivl,	Pe, {0xf7,(07)} },
   999  	{ AIMULB,	ydivb,	Pb, {0xf6,(05)} },
  1000  	{ AIMULL,	yimul,	Px, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1001  	{ AIMULQ,	yimul,	Pw, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1002  	{ AIMULW,	yimul,	Pe, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1003  	{ AIMUL3Q,	yimul3,	Pw, {0x6b,(00)} },
  1004  	{ AINB,		yin,	Pb, {0xe4,0xec} },
  1005  	{ AINCB,	yincb,	Pb, {0xfe,(00)} },
  1006  	{ AINCL,	yincl,	Px, {0xff,(00)} },
  1007  	{ AINCQ,	yincl,	Pw, {0xff,(00)} },
  1008  	{ AINCW,	yincw,	Pe, {0xff,(00)} },
  1009  	{ AINL,		yin,	Px, {0xe5,0xed} },
  1010  	{ AINSB,	ynone,	Pb, {0x6c} },
  1011  	{ AINSL,	ynone,	Px, {0x6d} },
  1012  	{ AINSW,	ynone,	Pe, {0x6d} },
  1013  	{ AINT,		yint,	Px, {0xcd} },
  1014  	{ AINTO,	ynone,	P32, {0xce} },
  1015  	{ AINW,		yin,	Pe, {0xe5,0xed} },
  1016  	{ AIRETL,	ynone,	Px, {0xcf} },
  1017  	{ AIRETQ,	ynone,	Pw, {0xcf} },
  1018  	{ AIRETW,	ynone,	Pe, {0xcf} },
  1019  	{ AJCC,		yjcond,	Px, {0x73,0x83,(00)} },
  1020  	{ AJCS,		yjcond,	Px, {0x72,0x82} },
  1021  	{ AJCXZL,	yloop,	Px, {0xe3} },
  1022  	{ AJCXZQ,	yloop,	Px, {0xe3} },
  1023  	{ AJEQ,		yjcond,	Px, {0x74,0x84} },
  1024  	{ AJGE,		yjcond,	Px, {0x7d,0x8d} },
  1025  	{ AJGT,		yjcond,	Px, {0x7f,0x8f} },
  1026  	{ AJHI,		yjcond,	Px, {0x77,0x87} },
  1027  	{ AJLE,		yjcond,	Px, {0x7e,0x8e} },
  1028  	{ AJLS,		yjcond,	Px, {0x76,0x86} },
  1029  	{ AJLT,		yjcond,	Px, {0x7c,0x8c} },
  1030  	{ AJMI,		yjcond,	Px, {0x78,0x88} },
  1031  	{ AJMP,		yjmp,	Px, {0xff,(04),0xeb,0xe9} },
  1032  	{ AJNE,		yjcond,	Px, {0x75,0x85} },
  1033  	{ AJOC,		yjcond,	Px, {0x71,0x81,(00)} },
  1034  	{ AJOS,		yjcond,	Px, {0x70,0x80,(00)} },
  1035  	{ AJPC,		yjcond,	Px, {0x7b,0x8b} },
  1036  	{ AJPL,		yjcond,	Px, {0x79,0x89} },
  1037  	{ AJPS,		yjcond,	Px, {0x7a,0x8a} },
  1038  	{ ALAHF,	ynone,	Px, {0x9f} },
  1039  	{ ALARL,	yml_rl,	Pm, {0x02} },
  1040  	{ ALARW,	yml_rl,	Pq, {0x02} },
  1041  	{ ALDMXCSR,	ysvrs,	Pm, {0xae,(02),0xae,(02)} },
  1042  	{ ALEAL,	ym_rl,	Px, {0x8d} },
  1043  	{ ALEAQ,	ym_rl,	Pw, {0x8d} },
  1044  	{ ALEAVEL,	ynone,	P32, {0xc9} },
  1045  	{ ALEAVEQ,	ynone,	Py, {0xc9} },
  1046  	{ ALEAVEW,	ynone,	Pe, {0xc9} },
  1047  	{ ALEAW,	ym_rl,	Pe, {0x8d} },
  1048  	{ ALOCK,	ynone,	Px, {0xf0} },
  1049  	{ ALODSB,	ynone,	Pb, {0xac} },
  1050  	{ ALODSL,	ynone,	Px, {0xad} },
  1051  	{ ALODSQ,	ynone,	Pw, {0xad} },
  1052  	{ ALODSW,	ynone,	Pe, {0xad} },
  1053  	{ ALONG,	ybyte,	Px, {4} },
  1054  	{ ALOOP,	yloop,	Px, {0xe2} },
  1055  	{ ALOOPEQ,	yloop,	Px, {0xe1} },
  1056  	{ ALOOPNE,	yloop,	Px, {0xe0} },
  1057  	{ ALSLL,	yml_rl,	Pm, {0x03 } },
  1058  	{ ALSLW,	yml_rl,	Pq, {0x03 } },
  1059  	{ AMASKMOVOU,	yxr,	Pe, {0xf7} },
  1060  	{ AMASKMOVQ,	ymr,	Pm, {0xf7} },
  1061  	{ AMAXPD,	yxm,	Pe, {0x5f} },
  1062  	{ AMAXPS,	yxm,	Pm, {0x5f} },
  1063  	{ AMAXSD,	yxm,	Pf2, {0x5f} },
  1064  	{ AMAXSS,	yxm,	Pf3, {0x5f} },
  1065  	{ AMINPD,	yxm,	Pe, {0x5d} },
  1066  	{ AMINPS,	yxm,	Pm, {0x5d} },
  1067  	{ AMINSD,	yxm,	Pf2, {0x5d} },
  1068  	{ AMINSS,	yxm,	Pf3, {0x5d} },
  1069  	{ AMOVAPD,	yxmov,	Pe, {0x28,0x29} },
  1070  	{ AMOVAPS,	yxmov,	Pm, {0x28,0x29} },
  1071  	{ AMOVB,	ymovb,	Pb, {0x88,0x8a,0xb0,0xc6,(00)} },
  1072  	{ AMOVBLSX,	ymb_rl,	Pm, {0xbe} },
  1073  	{ AMOVBLZX,	ymb_rl,	Pm, {0xb6} },
  1074  	{ AMOVBQSX,	ymb_rl,	Pw, {0x0f,0xbe} },
  1075  	{ AMOVBQZX,	ymb_rl,	Pm, {0xb6} },
  1076  	{ AMOVBWSX,	ymb_rl,	Pq, {0xbe} },
  1077  	{ AMOVBWZX,	ymb_rl,	Pq, {0xb6} },
  1078  	{ AMOVO,	yxmov,	Pe, {0x6f,0x7f} },
  1079  	{ AMOVOU,	yxmov,	Pf3, {0x6f,0x7f} },
  1080  	{ AMOVHLPS,	yxr,	Pm, {0x12} },
  1081  	{ AMOVHPD,	yxmov,	Pe, {0x16,0x17} },
  1082  	{ AMOVHPS,	yxmov,	Pm, {0x16,0x17} },
  1083  	{ AMOVL,	ymovl,	Px, {0x89,0x8b,0x31,0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e,0} },
  1084  	{ AMOVLHPS,	yxr,	Pm, {0x16} },
  1085  	{ AMOVLPD,	yxmov,	Pe, {0x12,0x13} },
  1086  	{ AMOVLPS,	yxmov,	Pm, {0x12,0x13} },
  1087  	{ AMOVLQSX,	yml_rl,	Pw, {0x63} },
  1088  	{ AMOVLQZX,	yml_rl,	Px, {0x8b} },
  1089  	{ AMOVMSKPD,	yxrrl,	Pq, {0x50} },
  1090  	{ AMOVMSKPS,	yxrrl,	Pm, {0x50} },
  1091  	{ AMOVNTO,	yxr_ml,	Pe, {0xe7} },
  1092  	{ AMOVNTPD,	yxr_ml,	Pe, {0x2b} },
  1093  	{ AMOVNTPS,	yxr_ml,	Pm, {0x2b} },
  1094  	{ AMOVNTQ,	ymr_ml,	Pm, {0xe7} },
  1095  	{ AMOVQ,	ymovq,	Pw, {0x89, 0x8b, 0x31, 0xc7,(00), 0xb8, 0xc7,(00), 0x6f, 0x7f, 0x6e, 0x7e, Pf2,0xd6, Pf3,0x7e, Pe,0xd6, Pe,0x6e, Pe,0x7e,0} },
  1096  	{ AMOVQOZX,	ymrxr,	Pf3, {0xd6,0x7e} },
  1097  	{ AMOVSB,	ynone,	Pb, {0xa4} },
  1098  	{ AMOVSD,	yxmov,	Pf2, {0x10,0x11} },
  1099  	{ AMOVSL,	ynone,	Px, {0xa5} },
  1100  	{ AMOVSQ,	ynone,	Pw, {0xa5} },
  1101  	{ AMOVSS,	yxmov,	Pf3, {0x10,0x11} },
  1102  	{ AMOVSW,	ynone,	Pe, {0xa5} },
  1103  	{ AMOVUPD,	yxmov,	Pe, {0x10,0x11} },
  1104  	{ AMOVUPS,	yxmov,	Pm, {0x10,0x11} },
  1105  	{ AMOVW,	ymovw,	Pe, {0x89,0x8b,0x31,0xb8,0xc7,(00),0} },
  1106  	{ AMOVWLSX,	yml_rl,	Pm, {0xbf} },
  1107  	{ AMOVWLZX,	yml_rl,	Pm, {0xb7} },
  1108  	{ AMOVWQSX,	yml_rl,	Pw, {0x0f,0xbf} },
  1109  	{ AMOVWQZX,	yml_rl,	Pw, {0x0f,0xb7} },
  1110  	{ AMULB,	ydivb,	Pb, {0xf6,(04)} },
  1111  	{ AMULL,	ydivl,	Px, {0xf7,(04)} },
  1112  	{ AMULPD,	yxm,	Pe, {0x59} },
  1113  	{ AMULPS,	yxm,	Ym, {0x59} },
  1114  	{ AMULQ,	ydivl,	Pw, {0xf7,(04)} },
  1115  	{ AMULSD,	yxm,	Pf2, {0x59} },
  1116  	{ AMULSS,	yxm,	Pf3, {0x59} },
  1117  	{ AMULW,	ydivl,	Pe, {0xf7,(04)} },
  1118  	{ ANAME },
  1119  	{ ANEGB,	yscond,	Pb, {0xf6,(03)} },
  1120  	{ ANEGL,	yscond,	Px, {0xf7,(03)} },
  1121  	{ ANEGQ,	yscond,	Pw, {0xf7,(03)} },
  1122  	{ ANEGW,	yscond,	Pe, {0xf7,(03)} },
  1123  	{ ANOP,		ynop,	Px, {0,0} },
  1124  	{ ANOTB,	yscond,	Pb, {0xf6,(02)} },
  1125  	{ ANOTL,	yscond,	Px, {0xf7,(02)} },
  1126  	{ ANOTQ,	yscond,	Pw, {0xf7,(02)} },
  1127  	{ ANOTW,	yscond,	Pe, {0xf7,(02)} },
  1128  	{ AORB,		yxorb,	Pb, {0x0c,0x80,(01),0x08,0x0a} },
  1129  	{ AORL,		yxorl,	Px, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1130  	{ AORPD,	yxm,	Pq, {0x56} },
  1131  	{ AORPS,	yxm,	Pm, {0x56} },
  1132  	{ AORQ,		yxorl,	Pw, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1133  	{ AORW,		yxorl,	Pe, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1134  	{ AOUTB,	yin,	Pb, {0xe6,0xee} },
  1135  	{ AOUTL,	yin,	Px, {0xe7,0xef} },
  1136  	{ AOUTSB,	ynone,	Pb, {0x6e} },
  1137  	{ AOUTSL,	ynone,	Px, {0x6f} },
  1138  	{ AOUTSW,	ynone,	Pe, {0x6f} },
  1139  	{ AOUTW,	yin,	Pe, {0xe7,0xef} },
  1140  	{ APACKSSLW,	ymm,	Py, {0x6b,Pe,0x6b} },
  1141  	{ APACKSSWB,	ymm,	Py, {0x63,Pe,0x63} },
  1142  	{ APACKUSWB,	ymm,	Py, {0x67,Pe,0x67} },
  1143  	{ APADDB,	ymm,	Py, {0xfc,Pe,0xfc} },
  1144  	{ APADDL,	ymm,	Py, {0xfe,Pe,0xfe} },
  1145  	{ APADDQ,	yxm,	Pe, {0xd4} },
  1146  	{ APADDSB,	ymm,	Py, {0xec,Pe,0xec} },
  1147  	{ APADDSW,	ymm,	Py, {0xed,Pe,0xed} },
  1148  	{ APADDUSB,	ymm,	Py, {0xdc,Pe,0xdc} },
  1149  	{ APADDUSW,	ymm,	Py, {0xdd,Pe,0xdd} },
  1150  	{ APADDW,	ymm,	Py, {0xfd,Pe,0xfd} },
  1151  	{ APAND,	ymm,	Py, {0xdb,Pe,0xdb} },
  1152  	{ APANDN,	ymm,	Py, {0xdf,Pe,0xdf} },
  1153  	{ APAUSE,	ynone,	Px, {0xf3,0x90} },
  1154  	{ APAVGB,	ymm,	Py, {0xe0,Pe,0xe0} },
  1155  	{ APAVGW,	ymm,	Py, {0xe3,Pe,0xe3} },
  1156  	{ APCMPEQB,	ymm,	Py, {0x74,Pe,0x74} },
  1157  	{ APCMPEQL,	ymm,	Py, {0x76,Pe,0x76} },
  1158  	{ APCMPEQW,	ymm,	Py, {0x75,Pe,0x75} },
  1159  	{ APCMPGTB,	ymm,	Py, {0x64,Pe,0x64} },
  1160  	{ APCMPGTL,	ymm,	Py, {0x66,Pe,0x66} },
  1161  	{ APCMPGTW,	ymm,	Py, {0x65,Pe,0x65} },
  1162  	{ APEXTRW,	yextrw,	Pq, {0xc5,(00)} },
  1163  	{ APF2IL,	ymfp,	Px, {0x1d} },
  1164  	{ APF2IW,	ymfp,	Px, {0x1c} },
  1165  	{ API2FL,	ymfp,	Px, {0x0d} },
  1166  	{ APFACC,	ymfp,	Px, {0xae} },
  1167  	{ APFADD,	ymfp,	Px, {0x9e} },
  1168  	{ APFCMPEQ,	ymfp,	Px, {0xb0} },
  1169  	{ APFCMPGE,	ymfp,	Px, {0x90} },
  1170  	{ APFCMPGT,	ymfp,	Px, {0xa0} },
  1171  	{ APFMAX,	ymfp,	Px, {0xa4} },
  1172  	{ APFMIN,	ymfp,	Px, {0x94} },
  1173  	{ APFMUL,	ymfp,	Px, {0xb4} },
  1174  	{ APFNACC,	ymfp,	Px, {0x8a} },
  1175  	{ APFPNACC,	ymfp,	Px, {0x8e} },
  1176  	{ APFRCP,	ymfp,	Px, {0x96} },
  1177  	{ APFRCPIT1,	ymfp,	Px, {0xa6} },
  1178  	{ APFRCPI2T,	ymfp,	Px, {0xb6} },
  1179  	{ APFRSQIT1,	ymfp,	Px, {0xa7} },
  1180  	{ APFRSQRT,	ymfp,	Px, {0x97} },
  1181  	{ APFSUB,	ymfp,	Px, {0x9a} },
  1182  	{ APFSUBR,	ymfp,	Px, {0xaa} },
  1183  	{ APINSRW,	yinsrw,	Pq, {0xc4,(00)} },
  1184  	{ APINSRD,	yinsr,	Pq, {0x3a, 0x22, (00)} },
  1185  	{ APINSRQ,	yinsr,	Pq3, {0x3a, 0x22, (00)} },
  1186  	{ APMADDWL,	ymm,	Py, {0xf5,Pe,0xf5} },
  1187  	{ APMAXSW,	yxm,	Pe, {0xee} },
  1188  	{ APMAXUB,	yxm,	Pe, {0xde} },
  1189  	{ APMINSW,	yxm,	Pe, {0xea} },
  1190  	{ APMINUB,	yxm,	Pe, {0xda} },
  1191  	{ APMOVMSKB,	ymskb,	Px, {Pe,0xd7,0xd7} },
  1192  	{ APMULHRW,	ymfp,	Px, {0xb7} },
  1193  	{ APMULHUW,	ymm,	Py, {0xe4,Pe,0xe4} },
  1194  	{ APMULHW,	ymm,	Py, {0xe5,Pe,0xe5} },
  1195  	{ APMULLW,	ymm,	Py, {0xd5,Pe,0xd5} },
  1196  	{ APMULULQ,	ymm,	Py, {0xf4,Pe,0xf4} },
  1197  	{ APOPAL,	ynone,	P32, {0x61} },
  1198  	{ APOPAW,	ynone,	Pe, {0x61} },
  1199  	{ APOPFL,	ynone,	P32, {0x9d} },
  1200  	{ APOPFQ,	ynone,	Py, {0x9d} },
  1201  	{ APOPFW,	ynone,	Pe, {0x9d} },
  1202  	{ APOPL,	ypopl,	P32, {0x58,0x8f,(00)} },
  1203  	{ APOPQ,	ypopl,	Py, {0x58,0x8f,(00)} },
  1204  	{ APOPW,	ypopl,	Pe, {0x58,0x8f,(00)} },
  1205  	{ APOR,		ymm,	Py, {0xeb,Pe,0xeb} },
  1206  	{ APSADBW,	yxm,	Pq, {0xf6} },
  1207  	{ APSHUFHW,	yxshuf,	Pf3, {0x70,(00)} },
  1208  	{ APSHUFL,	yxshuf,	Pq, {0x70,(00)} },
  1209  	{ APSHUFLW,	yxshuf,	Pf2, {0x70,(00)} },
  1210  	{ APSHUFW,	ymshuf,	Pm, {0x70,(00)} },
  1211  	{ APSHUFB,	ymshufb,Pq, {0x38, 0x00} },
  1212  	{ APSLLO,	ypsdq,	Pq, {0x73,(07)} },
  1213  	{ APSLLL,	yps,	Py, {0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06)} },
  1214  	{ APSLLQ,	yps,	Py, {0xf3, 0x73,(06), Pe,0xf3, Pe,0x73,(06)} },
  1215  	{ APSLLW,	yps,	Py, {0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06)} },
  1216  	{ APSRAL,	yps,	Py, {0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04)} },
  1217  	{ APSRAW,	yps,	Py, {0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04)} },
  1218  	{ APSRLO,	ypsdq,	Pq, {0x73,(03)} },
  1219  	{ APSRLL,	yps,	Py, {0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02)} },
  1220  	{ APSRLQ,	yps,	Py, {0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02)} },
  1221  	{ APSRLW,	yps,	Py, {0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02)} },
  1222  	{ APSUBB,	yxm,	Pe, {0xf8} },
  1223  	{ APSUBL,	yxm,	Pe, {0xfa} },
  1224  	{ APSUBQ,	yxm,	Pe, {0xfb} },
  1225  	{ APSUBSB,	yxm,	Pe, {0xe8} },
  1226  	{ APSUBSW,	yxm,	Pe, {0xe9} },
  1227  	{ APSUBUSB,	yxm,	Pe, {0xd8} },
  1228  	{ APSUBUSW,	yxm,	Pe, {0xd9} },
  1229  	{ APSUBW,	yxm,	Pe, {0xf9} },
  1230  	{ APSWAPL,	ymfp,	Px, {0xbb} },
  1231  	{ APUNPCKHBW,	ymm,	Py, {0x68,Pe,0x68} },
  1232  	{ APUNPCKHLQ,	ymm,	Py, {0x6a,Pe,0x6a} },
  1233  	{ APUNPCKHQDQ,	yxm,	Pe, {0x6d} },
  1234  	{ APUNPCKHWL,	ymm,	Py, {0x69,Pe,0x69} },
  1235  	{ APUNPCKLBW,	ymm,	Py, {0x60,Pe,0x60} },
  1236  	{ APUNPCKLLQ,	ymm,	Py, {0x62,Pe,0x62} },
  1237  	{ APUNPCKLQDQ,	yxm,	Pe, {0x6c} },
  1238  	{ APUNPCKLWL,	ymm,	Py, {0x61,Pe,0x61} },
  1239  	{ APUSHAL,	ynone,	P32, {0x60} },
  1240  	{ APUSHAW,	ynone,	Pe, {0x60} },
  1241  	{ APUSHFL,	ynone,	P32, {0x9c} },
  1242  	{ APUSHFQ,	ynone,	Py, {0x9c} },
  1243  	{ APUSHFW,	ynone,	Pe, {0x9c} },
  1244  	{ APUSHL,	ypushl,	P32, {0x50,0xff,(06),0x6a,0x68} },
  1245  	{ APUSHQ,	ypushl,	Py, {0x50,0xff,(06),0x6a,0x68} },
  1246  	{ APUSHW,	ypushl,	Pe, {0x50,0xff,(06),0x6a,0x68} },
  1247  	{ APXOR,	ymm,	Py, {0xef,Pe,0xef} },
  1248  	{ AQUAD,	ybyte,	Px, {8} },
  1249  	{ ARCLB,	yshb,	Pb, {0xd0,(02),0xc0,(02),0xd2,(02)} },
  1250  	{ ARCLL,	yshl,	Px, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1251  	{ ARCLQ,	yshl,	Pw, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1252  	{ ARCLW,	yshl,	Pe, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1253  	{ ARCPPS,	yxm,	Pm, {0x53} },
  1254  	{ ARCPSS,	yxm,	Pf3, {0x53} },
  1255  	{ ARCRB,	yshb,	Pb, {0xd0,(03),0xc0,(03),0xd2,(03)} },
  1256  	{ ARCRL,	yshl,	Px, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1257  	{ ARCRQ,	yshl,	Pw, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1258  	{ ARCRW,	yshl,	Pe, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1259  	{ AREP,		ynone,	Px, {0xf3} },
  1260  	{ AREPN,	ynone,	Px, {0xf2} },
  1261  	{ ARET,		ynone,	Px, {0xc3} },
  1262  	{ ARETFW,	yret,	Pe, {0xcb,0xca} },
  1263  	{ ARETFL,	yret,	Px, {0xcb,0xca} },
  1264  	{ ARETFQ,	yret,	Pw, {0xcb,0xca} },
  1265  	{ AROLB,	yshb,	Pb, {0xd0,(00),0xc0,(00),0xd2,(00)} },
  1266  	{ AROLL,	yshl,	Px, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1267  	{ AROLQ,	yshl,	Pw, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1268  	{ AROLW,	yshl,	Pe, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1269  	{ ARORB,	yshb,	Pb, {0xd0,(01),0xc0,(01),0xd2,(01)} },
  1270  	{ ARORL,	yshl,	Px, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1271  	{ ARORQ,	yshl,	Pw, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1272  	{ ARORW,	yshl,	Pe, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1273  	{ ARSQRTPS,	yxm,	Pm, {0x52} },
  1274  	{ ARSQRTSS,	yxm,	Pf3, {0x52} },
  1275  	{ ASAHF,	ynone,	Px, {0x86,0xe0,0x50,0x9d} },	/* XCHGB AH,AL; PUSH AX; POPFL */
  1276  	{ ASALB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
  1277  	{ ASALL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1278  	{ ASALQ,	yshl,	Pw, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1279  	{ ASALW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1280  	{ ASARB,	yshb,	Pb, {0xd0,(07),0xc0,(07),0xd2,(07)} },
  1281  	{ ASARL,	yshl,	Px, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1282  	{ ASARQ,	yshl,	Pw, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1283  	{ ASARW,	yshl,	Pe, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1284  	{ ASBBB,	yxorb,	Pb, {0x1c,0x80,(03),0x18,0x1a} },
  1285  	{ ASBBL,	yxorl,	Px, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1286  	{ ASBBQ,	yxorl,	Pw, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1287  	{ ASBBW,	yxorl,	Pe, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1288  	{ ASCASB,	ynone,	Pb, {0xae} },
  1289  	{ ASCASL,	ynone,	Px, {0xaf} },
  1290  	{ ASCASQ,	ynone,	Pw, {0xaf} },
  1291  	{ ASCASW,	ynone,	Pe, {0xaf} },
  1292  	{ ASETCC,	yscond,	Pb, {0x0f,0x93,(00)} },
  1293  	{ ASETCS,	yscond,	Pb, {0x0f,0x92,(00)} },
  1294  	{ ASETEQ,	yscond,	Pb, {0x0f,0x94,(00)} },
  1295  	{ ASETGE,	yscond,	Pb, {0x0f,0x9d,(00)} },
  1296  	{ ASETGT,	yscond,	Pb, {0x0f,0x9f,(00)} },
  1297  	{ ASETHI,	yscond,	Pb, {0x0f,0x97,(00)} },
  1298  	{ ASETLE,	yscond,	Pb, {0x0f,0x9e,(00)} },
  1299  	{ ASETLS,	yscond,	Pb, {0x0f,0x96,(00)} },
  1300  	{ ASETLT,	yscond,	Pb, {0x0f,0x9c,(00)} },
  1301  	{ ASETMI,	yscond,	Pb, {0x0f,0x98,(00)} },
  1302  	{ ASETNE,	yscond,	Pb, {0x0f,0x95,(00)} },
  1303  	{ ASETOC,	yscond,	Pb, {0x0f,0x91,(00)} },
  1304  	{ ASETOS,	yscond,	Pb, {0x0f,0x90,(00)} },
  1305  	{ ASETPC,	yscond,	Pb, {0x0f,0x9b,(00)} },
  1306  	{ ASETPL,	yscond,	Pb, {0x0f,0x99,(00)} },
  1307  	{ ASETPS,	yscond,	Pb, {0x0f,0x9a,(00)} },
  1308  	{ ASHLB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
  1309  	{ ASHLL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1310  	{ ASHLQ,	yshl,	Pw, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1311  	{ ASHLW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1312  	{ ASHRB,	yshb,	Pb, {0xd0,(05),0xc0,(05),0xd2,(05)} },
  1313  	{ ASHRL,	yshl,	Px, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1314  	{ ASHRQ,	yshl,	Pw, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1315  	{ ASHRW,	yshl,	Pe, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1316  	{ ASHUFPD,	yxshuf,	Pq, {0xc6,(00)} },
  1317  	{ ASHUFPS,	yxshuf,	Pm, {0xc6,(00)} },
  1318  	{ ASQRTPD,	yxm,	Pe, {0x51} },
  1319  	{ ASQRTPS,	yxm,	Pm, {0x51} },
  1320  	{ ASQRTSD,	yxm,	Pf2, {0x51} },
  1321  	{ ASQRTSS,	yxm,	Pf3, {0x51} },
  1322  	{ ASTC,		ynone,	Px, {0xf9} },
  1323  	{ ASTD,		ynone,	Px, {0xfd} },
  1324  	{ ASTI,		ynone,	Px, {0xfb} },
  1325  	{ ASTMXCSR,	ysvrs,	Pm, {0xae,(03),0xae,(03)} },
  1326  	{ ASTOSB,	ynone,	Pb, {0xaa} },
  1327  	{ ASTOSL,	ynone,	Px, {0xab} },
  1328  	{ ASTOSQ,	ynone,	Pw, {0xab} },
  1329  	{ ASTOSW,	ynone,	Pe, {0xab} },
  1330  	{ ASUBB,	yxorb,	Pb, {0x2c,0x80,(05),0x28,0x2a} },
  1331  	{ ASUBL,	yaddl,	Px, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1332  	{ ASUBPD,	yxm,	Pe, {0x5c} },
  1333  	{ ASUBPS,	yxm,	Pm, {0x5c} },
  1334  	{ ASUBQ,	yaddl,	Pw, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1335  	{ ASUBSD,	yxm,	Pf2, {0x5c} },
  1336  	{ ASUBSS,	yxm,	Pf3, {0x5c} },
  1337  	{ ASUBW,	yaddl,	Pe, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1338  	{ ASWAPGS,	ynone,	Pm, {0x01,0xf8} },
  1339  	{ ASYSCALL,	ynone,	Px, {0x0f,0x05} },	/* fast syscall */
  1340  	{ ATESTB,	ytestb,	Pb, {0xa8,0xf6,(00),0x84,0x84} },
  1341  	{ ATESTL,	ytestl,	Px, {0xa9,0xf7,(00),0x85,0x85} },
  1342  	{ ATESTQ,	ytestl,	Pw, {0xa9,0xf7,(00),0x85,0x85} },
  1343  	{ ATESTW,	ytestl,	Pe, {0xa9,0xf7,(00),0x85,0x85} },
  1344  	{ ATEXT,	ytext,	Px },
  1345  	{ AUCOMISD,	yxcmp,	Pe, {0x2e} },
  1346  	{ AUCOMISS,	yxcmp,	Pm, {0x2e} },
  1347  	{ AUNPCKHPD,	yxm,	Pe, {0x15} },
  1348  	{ AUNPCKHPS,	yxm,	Pm, {0x15} },
  1349  	{ AUNPCKLPD,	yxm,	Pe, {0x14} },
  1350  	{ AUNPCKLPS,	yxm,	Pm, {0x14} },
  1351  	{ AVERR,	ydivl,	Pm, {0x00,(04)} },
  1352  	{ AVERW,	ydivl,	Pm, {0x00,(05)} },
  1353  	{ AWAIT,	ynone,	Px, {0x9b} },
  1354  	{ AWORD,	ybyte,	Px, {2} },
  1355  	{ AXCHGB,	yml_mb,	Pb, {0x86,0x86} },
  1356  	{ AXCHGL,	yxchg,	Px, {0x90,0x90,0x87,0x87} },
  1357  	{ AXCHGQ,	yxchg,	Pw, {0x90,0x90,0x87,0x87} },
  1358  	{ AXCHGW,	yxchg,	Pe, {0x90,0x90,0x87,0x87} },
  1359  	{ AXLAT,	ynone,	Px, {0xd7} },
  1360  	{ AXORB,	yxorb,	Pb, {0x34,0x80,(06),0x30,0x32} },
  1361  	{ AXORL,	yxorl,	Px, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1362  	{ AXORPD,	yxm,	Pe, {0x57} },
  1363  	{ AXORPS,	yxm,	Pm, {0x57} },
  1364  	{ AXORQ,	yxorl,	Pw, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1365  	{ AXORW,	yxorl,	Pe, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1366  
  1367  	{ AFMOVB,	yfmvx,	Px, {0xdf,(04)} },
  1368  	{ AFMOVBP,	yfmvp,	Px, {0xdf,(06)} },
  1369  	{ AFMOVD,	yfmvd,	Px, {0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02)} },
  1370  	{ AFMOVDP,	yfmvdp,	Px, {0xdd,(03),0xdd,(03)} },
  1371  	{ AFMOVF,	yfmvf,	Px, {0xd9,(00),0xd9,(02)} },
  1372  	{ AFMOVFP,	yfmvp,	Px, {0xd9,(03)} },
  1373  	{ AFMOVL,	yfmvf,	Px, {0xdb,(00),0xdb,(02)} },
  1374  	{ AFMOVLP,	yfmvp,	Px, {0xdb,(03)} },
  1375  	{ AFMOVV,	yfmvx,	Px, {0xdf,(05)} },
  1376  	{ AFMOVVP,	yfmvp,	Px, {0xdf,(07)} },
  1377  	{ AFMOVW,	yfmvf,	Px, {0xdf,(00),0xdf,(02)} },
  1378  	{ AFMOVWP,	yfmvp,	Px, {0xdf,(03)} },
  1379  	{ AFMOVX,	yfmvx,	Px, {0xdb,(05)} },
  1380  	{ AFMOVXP,	yfmvp,	Px, {0xdb,(07)} },
  1381  
  1382  	{ AFCOMB },
  1383  	{ AFCOMBP },
  1384  	{ AFCOMD,	yfadd,	Px, {0xdc,(02),0xd8,(02),0xdc,(02)} },	/* botch */
  1385  	{ AFCOMDP,	yfadd,	Px, {0xdc,(03),0xd8,(03),0xdc,(03)} },	/* botch */
  1386  	{ AFCOMDPP,	ycompp,	Px, {0xde,(03)} },
  1387  	{ AFCOMF,	yfmvx,	Px, {0xd8,(02)} },
  1388  	{ AFCOMFP,	yfmvx,	Px, {0xd8,(03)} },
  1389  	{ AFCOML,	yfmvx,	Px, {0xda,(02)} },
  1390  	{ AFCOMLP,	yfmvx,	Px, {0xda,(03)} },
  1391  	{ AFCOMW,	yfmvx,	Px, {0xde,(02)} },
  1392  	{ AFCOMWP,	yfmvx,	Px, {0xde,(03)} },
  1393  
  1394  	{ AFUCOM,	ycompp,	Px, {0xdd,(04)} },
  1395  	{ AFUCOMP,	ycompp, Px, {0xdd,(05)} },
  1396  	{ AFUCOMPP,	ycompp,	Px, {0xda,(13)} },
  1397  
  1398  	{ AFADDDP,	yfaddp,	Px, {0xde,(00)} },
  1399  	{ AFADDW,	yfmvx,	Px, {0xde,(00)} },
  1400  	{ AFADDL,	yfmvx,	Px, {0xda,(00)} },
  1401  	{ AFADDF,	yfmvx,	Px, {0xd8,(00)} },
  1402  	{ AFADDD,	yfadd,	Px, {0xdc,(00),0xd8,(00),0xdc,(00)} },
  1403  
  1404  	{ AFMULDP,	yfaddp,	Px, {0xde,(01)} },
  1405  	{ AFMULW,	yfmvx,	Px, {0xde,(01)} },
  1406  	{ AFMULL,	yfmvx,	Px, {0xda,(01)} },
  1407  	{ AFMULF,	yfmvx,	Px, {0xd8,(01)} },
  1408  	{ AFMULD,	yfadd,	Px, {0xdc,(01),0xd8,(01),0xdc,(01)} },
  1409  
  1410  	{ AFSUBDP,	yfaddp,	Px, {0xde,(05)} },
  1411  	{ AFSUBW,	yfmvx,	Px, {0xde,(04)} },
  1412  	{ AFSUBL,	yfmvx,	Px, {0xda,(04)} },
  1413  	{ AFSUBF,	yfmvx,	Px, {0xd8,(04)} },
  1414  	{ AFSUBD,	yfadd,	Px, {0xdc,(04),0xd8,(04),0xdc,(05)} },
  1415  
  1416  	{ AFSUBRDP,	yfaddp,	Px, {0xde,(04)} },
  1417  	{ AFSUBRW,	yfmvx,	Px, {0xde,(05)} },
  1418  	{ AFSUBRL,	yfmvx,	Px, {0xda,(05)} },
  1419  	{ AFSUBRF,	yfmvx,	Px, {0xd8,(05)} },
  1420  	{ AFSUBRD,	yfadd,	Px, {0xdc,(05),0xd8,(05),0xdc,(04)} },
  1421  
  1422  	{ AFDIVDP,	yfaddp,	Px, {0xde,(07)} },
  1423  	{ AFDIVW,	yfmvx,	Px, {0xde,(06)} },
  1424  	{ AFDIVL,	yfmvx,	Px, {0xda,(06)} },
  1425  	{ AFDIVF,	yfmvx,	Px, {0xd8,(06)} },
  1426  	{ AFDIVD,	yfadd,	Px, {0xdc,(06),0xd8,(06),0xdc,(07)} },
  1427  
  1428  	{ AFDIVRDP,	yfaddp,	Px, {0xde,(06)} },
  1429  	{ AFDIVRW,	yfmvx,	Px, {0xde,(07)} },
  1430  	{ AFDIVRL,	yfmvx,	Px, {0xda,(07)} },
  1431  	{ AFDIVRF,	yfmvx,	Px, {0xd8,(07)} },
  1432  	{ AFDIVRD,	yfadd,	Px, {0xdc,(07),0xd8,(07),0xdc,(06)} },
  1433  
  1434  	{ AFXCHD,	yfxch,	Px, {0xd9,(01),0xd9,(01)} },
  1435  	{ AFFREE },
  1436  	{ AFLDCW,	ystcw,	Px, {0xd9,(05),0xd9,(05)} },
  1437  	{ AFLDENV,	ystcw,	Px, {0xd9,(04),0xd9,(04)} },
  1438  	{ AFRSTOR,	ysvrs,	Px, {0xdd,(04),0xdd,(04)} },
  1439  	{ AFSAVE,	ysvrs,	Px, {0xdd,(06),0xdd,(06)} },
  1440  	{ AFSTCW,	ystcw,	Px, {0xd9,(07),0xd9,(07)} },
  1441  	{ AFSTENV,	ystcw,	Px, {0xd9,(06),0xd9,(06)} },
  1442  	{ AFSTSW,	ystsw,	Px, {0xdd,(07),0xdf,0xe0} },
  1443  	{ AF2XM1,	ynone,	Px, {0xd9, 0xf0} },
  1444  	{ AFABS,	ynone,	Px, {0xd9, 0xe1} },
  1445  	{ AFCHS,	ynone,	Px, {0xd9, 0xe0} },
  1446  	{ AFCLEX,	ynone,	Px, {0xdb, 0xe2} },
  1447  	{ AFCOS,	ynone,	Px, {0xd9, 0xff} },
  1448  	{ AFDECSTP,	ynone,	Px, {0xd9, 0xf6} },
  1449  	{ AFINCSTP,	ynone,	Px, {0xd9, 0xf7} },
  1450  	{ AFINIT,	ynone,	Px, {0xdb, 0xe3} },
  1451  	{ AFLD1,	ynone,	Px, {0xd9, 0xe8} },
  1452  	{ AFLDL2E,	ynone,	Px, {0xd9, 0xea} },
  1453  	{ AFLDL2T,	ynone,	Px, {0xd9, 0xe9} },
  1454  	{ AFLDLG2,	ynone,	Px, {0xd9, 0xec} },
  1455  	{ AFLDLN2,	ynone,	Px, {0xd9, 0xed} },
  1456  	{ AFLDPI,	ynone,	Px, {0xd9, 0xeb} },
  1457  	{ AFLDZ,	ynone,	Px, {0xd9, 0xee} },
  1458  	{ AFNOP,	ynone,	Px, {0xd9, 0xd0} },
  1459  	{ AFPATAN,	ynone,	Px, {0xd9, 0xf3} },
  1460  	{ AFPREM,	ynone,	Px, {0xd9, 0xf8} },
  1461  	{ AFPREM1,	ynone,	Px, {0xd9, 0xf5} },
  1462  	{ AFPTAN,	ynone,	Px, {0xd9, 0xf2} },
  1463  	{ AFRNDINT,	ynone,	Px, {0xd9, 0xfc} },
  1464  	{ AFSCALE,	ynone,	Px, {0xd9, 0xfd} },
  1465  	{ AFSIN,	ynone,	Px, {0xd9, 0xfe} },
  1466  	{ AFSINCOS,	ynone,	Px, {0xd9, 0xfb} },
  1467  	{ AFSQRT,	ynone,	Px, {0xd9, 0xfa} },
  1468  	{ AFTST,	ynone,	Px, {0xd9, 0xe4} },
  1469  	{ AFXAM,	ynone,	Px, {0xd9, 0xe5} },
  1470  	{ AFXTRACT,	ynone,	Px, {0xd9, 0xf4} },
  1471  	{ AFYL2X,	ynone,	Px, {0xd9, 0xf1} },
  1472  	{ AFYL2XP1,	ynone,	Px, {0xd9, 0xf9} },
  1473  
  1474  	{ ACMPXCHGB,	yrb_mb,	Pb, {0x0f,0xb0} },
  1475  	{ ACMPXCHGL,	yrl_ml,	Px, {0x0f,0xb1} },
  1476  	{ ACMPXCHGW,	yrl_ml,	Pe, {0x0f,0xb1} },
  1477  	{ ACMPXCHGQ,	yrl_ml,	Pw, {0x0f,0xb1} },
  1478  	{ ACMPXCHG8B,	yscond,	Pm, {0xc7,(01)} },
  1479  	{ AINVD,	ynone,	Pm, {0x08} },
  1480  	{ AINVLPG,	ymbs,	Pm, {0x01,(07)} },
  1481  	{ ALFENCE,	ynone,	Pm, {0xae,0xe8} },
  1482  	{ AMFENCE,	ynone,	Pm, {0xae,0xf0} },
  1483  	{ AMOVNTIL,	yrl_ml,	Pm, {0xc3} },
  1484  	{ AMOVNTIQ,	yrl_ml, Pw, {0x0f,0xc3} },
  1485  	{ ARDMSR,	ynone,	Pm, {0x32} },
  1486  	{ ARDPMC,	ynone,	Pm, {0x33} },
  1487  	{ ARDTSC,	ynone,	Pm, {0x31} },
  1488  	{ ARSM,		ynone,	Pm, {0xaa} },
  1489  	{ ASFENCE,	ynone,	Pm, {0xae,0xf8} },
  1490  	{ ASYSRET,	ynone,	Pm, {0x07} },
  1491  	{ AWBINVD,	ynone,	Pm, {0x09} },
  1492  	{ AWRMSR,	ynone,	Pm, {0x30} },
  1493  
  1494  	{ AXADDB,	yrb_mb,	Pb, {0x0f,0xc0} },
  1495  	{ AXADDL,	yrl_ml,	Px, {0x0f,0xc1} },
  1496  	{ AXADDQ,	yrl_ml,	Pw, {0x0f,0xc1} },
  1497  	{ AXADDW,	yrl_ml,	Pe, {0x0f,0xc1} },
  1498  
  1499  	{ ACRC32B,       ycrc32l,Px, {0xf2,0x0f,0x38,0xf0,0} },
  1500  	{ ACRC32Q,       ycrc32l,Pw, {0xf2,0x0f,0x38,0xf1,0} },
  1501  	
  1502  	{ APREFETCHT0,	yprefetch,	Pm,	{0x18,(01)} },
  1503  	{ APREFETCHT1,	yprefetch,	Pm,	{0x18,(02)} },
  1504  	{ APREFETCHT2,	yprefetch,	Pm,	{0x18,(03)} },
  1505  	{ APREFETCHNTA,	yprefetch,	Pm,	{0x18,(00)} },
  1506  	
  1507  	{ AMOVQL,	yrl_ml,	Px, {0x89} },
  1508  
  1509  	{ AUNDEF,		ynone,	Px, {0x0f, 0x0b} },
  1510  
  1511  	{ AAESENC,	yaes,	Pq, {0x38,0xdc,(0)} },
  1512  	{ AAESENCLAST,	yaes,	Pq, {0x38,0xdd,(0)} },
  1513  	{ AAESDEC,	yaes,	Pq, {0x38,0xde,(0)} },
  1514  	{ AAESDECLAST,	yaes,	Pq, {0x38,0xdf,(0)} },
  1515  	{ AAESIMC,	yaes,	Pq, {0x38,0xdb,(0)} },
  1516  	{ AAESKEYGENASSIST,	yaes2,	Pq, {0x3a,0xdf,(0)} },
  1517  
  1518  	{ APSHUFD,	yaes2,	Pq,	{0x70,(0)} },
  1519  	{ APCLMULQDQ,	yxshuf,	Pq, {0x3a,0x44,0} },
  1520  
  1521  	{ AUSEFIELD,	ynop,	Px, {0,0} },
  1522  	{ ATYPE },
  1523  	{ AFUNCDATA,	yfuncdata,	Px, {0,0} },
  1524  	{ APCDATA,	ypcdata,	Px, {0,0} },
  1525  	{ ACHECKNIL },
  1526  	{ AVARDEF },
  1527  	{ AVARKILL },
  1528  	{ ADUFFCOPY,	yduff,	Px, {0xe8} },
  1529  	{ ADUFFZERO,	yduff,	Px, {0xe8} },
  1530  
  1531  	{ AEND },
  1532  	{0}
  1533  };
  1534  
  1535  static Optab*	opindex[ALAST+1];
  1536  static vlong	vaddr(Link*, Addr*, Reloc*);
  1537  
  1538  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1539  // This happens on systems like Solaris that call .so functions instead of system calls.
  1540  // It does not seem to be necessary for any other systems. This is probably working
  1541  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1542  // what that bug is. And this does fix it.
  1543  static int
  1544  isextern(LSym *s)
  1545  {
  1546  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1547  	return strncmp(s->name, "libc_", 5) == 0;
  1548  }
  1549  
  1550  // single-instruction no-ops of various lengths.
  1551  // constructed by hand and disassembled with gdb to verify.
  1552  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1553  static uchar nop[][16] = {
  1554  	{0x90},
  1555  	{0x66, 0x90},
  1556  	{0x0F, 0x1F, 0x00},
  1557  	{0x0F, 0x1F, 0x40, 0x00},
  1558  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1559  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1560  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1561  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1562  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1563  	// Native Client rejects the repeated 0x66 prefix.
  1564  	// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1565  };
  1566  
  1567  static void
  1568  fillnop(uchar *p, int n)
  1569  {
  1570  	int m;
  1571  
  1572  	while(n > 0) {
  1573  		m = n;
  1574  		if(m > nelem(nop))
  1575  			m = nelem(nop);
  1576  		memmove(p, nop[m-1], m);
  1577  		p += m;
  1578  		n -= m;
  1579  	}
  1580  }
  1581  
  1582  static void instinit(void);
  1583  
  1584  static int32
  1585  naclpad(Link *ctxt, LSym *s, int32 c, int32 pad)
  1586  {
  1587  	symgrow(ctxt, s, c+pad);
  1588  	fillnop(s->p+c, pad);
  1589  	return c+pad;
  1590  }
  1591  
  1592  static int
  1593  spadjop(Link *ctxt, Prog *p, int l, int q)
  1594  {
  1595  	if(p->mode != 64 || ctxt->arch->ptrsize == 4)
  1596  		return l;
  1597  	return q;
  1598  }
  1599  
  1600  void
  1601  span6(Link *ctxt, LSym *s)
  1602  {
  1603  	Prog *p, *q;
  1604  	int32 c, v, loop;
  1605  	uchar *bp;
  1606  	int n, m, i;
  1607  
  1608  	ctxt->cursym = s;
  1609  	
  1610  	if(s->p != nil)
  1611  		return;
  1612  	
  1613  	if(ycover[0] == 0)
  1614  		instinit();
  1615  	
  1616  	for(p = ctxt->cursym->text; p != nil; p = p->link) {
  1617  		n = 0;
  1618  		if(p->to.type == D_BRANCH)
  1619  			if(p->pcond == nil)
  1620  				p->pcond = p;
  1621  		if((q = p->pcond) != nil)
  1622  			if(q->back != 2)
  1623  				n = 1;
  1624  		p->back = n;
  1625  		if(p->as == AADJSP) {
  1626  			p->to.type = D_SP;
  1627  			v = -p->from.offset;
  1628  			p->from.offset = v;
  1629  			p->as = spadjop(ctxt, p, AADDL, AADDQ);
  1630  			if(v < 0) {
  1631  				p->as = spadjop(ctxt, p, ASUBL, ASUBQ);
  1632  				v = -v;
  1633  				p->from.offset = v;
  1634  			}
  1635  			if(v == 0)
  1636  				p->as = ANOP;
  1637  		}
  1638  	}
  1639  
  1640  	for(p = s->text; p != nil; p = p->link) {
  1641  		p->back = 2;	// use short branches first time through
  1642  		if((q = p->pcond) != nil && (q->back & 2)) {
  1643  			p->back |= 1;	// backward jump
  1644  			q->back |= 4;   // loop head
  1645  		}
  1646  
  1647  		if(p->as == AADJSP) {
  1648  			p->to.type = D_SP;
  1649  			v = -p->from.offset;
  1650  			p->from.offset = v;
  1651  			p->as = spadjop(ctxt, p, AADDL, AADDQ);
  1652  			if(v < 0) {
  1653  				p->as = spadjop(ctxt, p, ASUBL, ASUBQ);
  1654  				v = -v;
  1655  				p->from.offset = v;
  1656  			}
  1657  			if(v == 0)
  1658  				p->as = ANOP;
  1659  		}
  1660  	}
  1661  	
  1662  	n = 0;
  1663  	do {
  1664  		loop = 0;
  1665  		memset(s->r, 0, s->nr*sizeof s->r[0]);
  1666  		s->nr = 0;
  1667  		s->np = 0;
  1668  		c = 0;
  1669  		for(p = s->text; p != nil; p = p->link) {
  1670  			if(ctxt->headtype == Hnacl && p->isize > 0) {
  1671  				static LSym *deferreturn;
  1672  				
  1673  				if(deferreturn == nil)
  1674  					deferreturn = linklookup(ctxt, "runtime.deferreturn", 0);
  1675  
  1676  				// pad everything to avoid crossing 32-byte boundary
  1677  				if((c>>5) != ((c+p->isize-1)>>5))
  1678  					c = naclpad(ctxt, s, c, -c&31);
  1679  				// pad call deferreturn to start at 32-byte boundary
  1680  				// so that subtracting 5 in jmpdefer will jump back
  1681  				// to that boundary and rerun the call.
  1682  				if(p->as == ACALL && p->to.sym == deferreturn)
  1683  					c = naclpad(ctxt, s, c, -c&31);
  1684  				// pad call to end at 32-byte boundary
  1685  				if(p->as == ACALL)
  1686  					c = naclpad(ctxt, s, c, -(c+p->isize)&31);
  1687  				
  1688  				// the linker treats REP and STOSQ as different instructions
  1689  				// but in fact the REP is a prefix on the STOSQ.
  1690  				// make sure REP has room for 2 more bytes, so that
  1691  				// padding will not be inserted before the next instruction.
  1692  				if((p->as == AREP || p->as == AREPN) && (c>>5) != ((c+3-1)>>5))
  1693  					c = naclpad(ctxt, s, c, -c&31);
  1694  				
  1695  				// same for LOCK.
  1696  				// various instructions follow; the longest is 4 bytes.
  1697  				// give ourselves 8 bytes so as to avoid surprises.
  1698  				if(p->as == ALOCK && (c>>5) != ((c+8-1)>>5))
  1699  					c = naclpad(ctxt, s, c, -c&31);
  1700  			}
  1701  
  1702  			if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
  1703  				// pad with NOPs
  1704  				v = -c&(LoopAlign-1);
  1705  				if(v <= MaxLoopPad) {
  1706  					symgrow(ctxt, s, c+v);
  1707  					fillnop(s->p+c, v);
  1708  					c += v;
  1709  				}
  1710  			}
  1711  
  1712  			p->pc = c;
  1713  
  1714  			// process forward jumps to p
  1715  			for(q = p->comefrom; q != nil; q = q->forwd) {
  1716  				v = p->pc - (q->pc + q->mark);
  1717  				if(q->back & 2)	{	// short
  1718  					if(v > 127) {
  1719  						loop++;
  1720  						q->back ^= 2;
  1721  					}
  1722  					if(q->as == AJCXZL)
  1723  						s->p[q->pc+2] = v;
  1724  					else
  1725  						s->p[q->pc+1] = v;
  1726  				} else {
  1727  					bp = s->p + q->pc + q->mark - 4;
  1728  					*bp++ = v;
  1729  					*bp++ = v>>8;
  1730  					*bp++ = v>>16;
  1731  					*bp = v>>24;
  1732  				}	
  1733  			}
  1734  			p->comefrom = nil;
  1735  
  1736  			p->pc = c;
  1737  			asmins(ctxt, p);
  1738  			m = ctxt->andptr-ctxt->and;
  1739  			if(p->isize != m) {
  1740  				p->isize = m;
  1741  				loop++;
  1742  			}
  1743  			symgrow(ctxt, s, p->pc+m);
  1744  			memmove(s->p+p->pc, ctxt->and, m);
  1745  			p->mark = m;
  1746  			c += m;
  1747  		}
  1748  		if(++n > 20) {
  1749  			ctxt->diag("span must be looping");
  1750  			sysfatal("loop");
  1751  		}
  1752  	} while(loop);
  1753  	
  1754  	if(ctxt->headtype == Hnacl)
  1755  		c = naclpad(ctxt, s, c, -c&31);
  1756  	
  1757  	c += -c&(FuncAlign-1);
  1758  	s->size = c;
  1759  
  1760  	if(0 /* debug['a'] > 1 */) {
  1761  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
  1762  		for(i=0; i<s->np; i++) {
  1763  			print(" %.2ux", s->p[i]);
  1764  			if(i%16 == 15)
  1765  				print("\n  %.6ux", i+1);
  1766  		}
  1767  		if(i%16)
  1768  			print("\n");
  1769  	
  1770  		for(i=0; i<s->nr; i++) {
  1771  			Reloc *r;
  1772  			
  1773  			r = &s->r[i];
  1774  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
  1775  		}
  1776  	}
  1777  }
  1778  
  1779  static void
  1780  instinit(void)
  1781  {
  1782  	int c, i;
  1783  
  1784  	for(i=1; optab[i].as; i++) {
  1785  		c = optab[i].as;
  1786  		if(opindex[c] != nil)
  1787  			sysfatal("phase error in optab: %d (%A)", i, c);
  1788  		opindex[c] = &optab[i];
  1789  	}
  1790  
  1791  	for(i=0; i<Ymax; i++)
  1792  		ycover[i*Ymax + i] = 1;
  1793  
  1794  	ycover[Yi0*Ymax + Yi8] = 1;
  1795  	ycover[Yi1*Ymax + Yi8] = 1;
  1796  
  1797  	ycover[Yi0*Ymax + Ys32] = 1;
  1798  	ycover[Yi1*Ymax + Ys32] = 1;
  1799  	ycover[Yi8*Ymax + Ys32] = 1;
  1800  
  1801  	ycover[Yi0*Ymax + Yi32] = 1;
  1802  	ycover[Yi1*Ymax + Yi32] = 1;
  1803  	ycover[Yi8*Ymax + Yi32] = 1;
  1804  	ycover[Ys32*Ymax + Yi32] = 1;
  1805  
  1806  	ycover[Yi0*Ymax + Yi64] = 1;
  1807  	ycover[Yi1*Ymax + Yi64] = 1;
  1808  	ycover[Yi8*Ymax + Yi64] = 1;
  1809  	ycover[Ys32*Ymax + Yi64] = 1;
  1810  	ycover[Yi32*Ymax + Yi64] = 1;
  1811  
  1812  	ycover[Yal*Ymax + Yrb] = 1;
  1813  	ycover[Ycl*Ymax + Yrb] = 1;
  1814  	ycover[Yax*Ymax + Yrb] = 1;
  1815  	ycover[Ycx*Ymax + Yrb] = 1;
  1816  	ycover[Yrx*Ymax + Yrb] = 1;
  1817  	ycover[Yrl*Ymax + Yrb] = 1;
  1818  
  1819  	ycover[Ycl*Ymax + Ycx] = 1;
  1820  
  1821  	ycover[Yax*Ymax + Yrx] = 1;
  1822  	ycover[Ycx*Ymax + Yrx] = 1;
  1823  
  1824  	ycover[Yax*Ymax + Yrl] = 1;
  1825  	ycover[Ycx*Ymax + Yrl] = 1;
  1826  	ycover[Yrx*Ymax + Yrl] = 1;
  1827  
  1828  	ycover[Yf0*Ymax + Yrf] = 1;
  1829  
  1830  	ycover[Yal*Ymax + Ymb] = 1;
  1831  	ycover[Ycl*Ymax + Ymb] = 1;
  1832  	ycover[Yax*Ymax + Ymb] = 1;
  1833  	ycover[Ycx*Ymax + Ymb] = 1;
  1834  	ycover[Yrx*Ymax + Ymb] = 1;
  1835  	ycover[Yrb*Ymax + Ymb] = 1;
  1836  	ycover[Yrl*Ymax + Ymb] = 1;
  1837  	ycover[Ym*Ymax + Ymb] = 1;
  1838  
  1839  	ycover[Yax*Ymax + Yml] = 1;
  1840  	ycover[Ycx*Ymax + Yml] = 1;
  1841  	ycover[Yrx*Ymax + Yml] = 1;
  1842  	ycover[Yrl*Ymax + Yml] = 1;
  1843  	ycover[Ym*Ymax + Yml] = 1;
  1844  
  1845  	ycover[Yax*Ymax + Ymm] = 1;
  1846  	ycover[Ycx*Ymax + Ymm] = 1;
  1847  	ycover[Yrx*Ymax + Ymm] = 1;
  1848  	ycover[Yrl*Ymax + Ymm] = 1;
  1849  	ycover[Ym*Ymax + Ymm] = 1;
  1850  	ycover[Ymr*Ymax + Ymm] = 1;
  1851  
  1852  	ycover[Ym*Ymax + Yxm] = 1;
  1853  	ycover[Yxr*Ymax + Yxm] = 1;
  1854  
  1855  	for(i=0; i<D_NONE; i++) {
  1856  		reg[i] = -1;
  1857  		if(i >= D_AL && i <= D_R15B) {
  1858  			reg[i] = (i-D_AL) & 7;
  1859  			if(i >= D_SPB && i <= D_DIB)
  1860  				regrex[i] = 0x40;
  1861  			if(i >= D_R8B && i <= D_R15B)
  1862  				regrex[i] = Rxr | Rxx | Rxb;
  1863  		}
  1864  		if(i >= D_AH && i<= D_BH)
  1865  			reg[i] = 4 + ((i-D_AH) & 7);
  1866  		if(i >= D_AX && i <= D_R15) {
  1867  			reg[i] = (i-D_AX) & 7;
  1868  			if(i >= D_R8)
  1869  				regrex[i] = Rxr | Rxx | Rxb;
  1870  		}
  1871  		if(i >= D_F0 && i <= D_F0+7)
  1872  			reg[i] = (i-D_F0) & 7;
  1873  		if(i >= D_M0 && i <= D_M0+7)
  1874  			reg[i] = (i-D_M0) & 7;
  1875  		if(i >= D_X0 && i <= D_X0+15) {
  1876  			reg[i] = (i-D_X0) & 7;
  1877  			if(i >= D_X0+8)
  1878  				regrex[i] = Rxr | Rxx | Rxb;
  1879  		}
  1880  		if(i >= D_CR+8 && i <= D_CR+15)
  1881  			regrex[i] = Rxr;
  1882  	}
  1883  }
  1884  
  1885  static int
  1886  prefixof(Link *ctxt, Addr *a)
  1887  {
  1888  	switch(a->type) {
  1889  	case D_INDIR+D_CS:
  1890  		return 0x2e;
  1891  	case D_INDIR+D_DS:
  1892  		return 0x3e;
  1893  	case D_INDIR+D_ES:
  1894  		return 0x26;
  1895  	case D_INDIR+D_FS:
  1896  		return 0x64;
  1897  	case D_INDIR+D_GS:
  1898  		return 0x65;
  1899  	case D_INDIR+D_TLS:
  1900  		// NOTE: Systems listed here should be only systems that
  1901  		// support direct TLS references like 8(TLS) implemented as
  1902  		// direct references from FS or GS. Systems that require
  1903  		// the initial-exec model, where you load the TLS base into
  1904  		// a register and then index from that register, do not reach
  1905  		// this code and should not be listed.
  1906  		switch(ctxt->headtype) {
  1907  		default:
  1908  			sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
  1909  		case Hdragonfly:
  1910  		case Hfreebsd:
  1911  		case Hlinux:
  1912  		case Hnetbsd:
  1913  		case Hopenbsd:
  1914  		case Hsolaris:
  1915  			return 0x64; // FS
  1916  		case Hdarwin:
  1917  			return 0x65; // GS
  1918  		}
  1919  	}
  1920  	switch(a->index) {
  1921  	case D_CS:
  1922  		return 0x2e;
  1923  	case D_DS:
  1924  		return 0x3e;
  1925  	case D_ES:
  1926  		return 0x26;
  1927  	case D_FS:
  1928  		return 0x64;
  1929  	case D_GS:
  1930  		return 0x65;
  1931  	}
  1932  	return 0;
  1933  }
  1934  
  1935  static int
  1936  oclass(Link *ctxt, Addr *a)
  1937  {
  1938  	vlong v;
  1939  	int32 l;
  1940  
  1941  	if(a->type >= D_INDIR || a->index != D_NONE) {
  1942  		if(a->index != D_NONE && a->scale == 0) {
  1943  			if(a->type == D_ADDR) {
  1944  				switch(a->index) {
  1945  				case D_EXTERN:
  1946  				case D_STATIC:
  1947  					if(a->sym != nil && isextern(a->sym))
  1948  						return Yi32;
  1949  					return Yiauto; // use pc-relative addressing
  1950  				case D_AUTO:
  1951  				case D_PARAM:
  1952  					return Yiauto;
  1953  				}
  1954  				return Yxxx;
  1955  			}
  1956  			return Ycol;
  1957  		}
  1958  		return Ym;
  1959  	}
  1960  	switch(a->type)
  1961  	{
  1962  	case D_AL:
  1963  		return Yal;
  1964  
  1965  	case D_AX:
  1966  		return Yax;
  1967  
  1968  /*
  1969  	case D_SPB:
  1970  */
  1971  	case D_BPB:
  1972  	case D_SIB:
  1973  	case D_DIB:
  1974  	case D_R8B:
  1975  	case D_R9B:
  1976  	case D_R10B:
  1977  	case D_R11B:
  1978  	case D_R12B:
  1979  	case D_R13B:
  1980  	case D_R14B:
  1981  	case D_R15B:
  1982  		if(ctxt->asmode != 64)
  1983  			return Yxxx;
  1984  	case D_DL:
  1985  	case D_BL:
  1986  	case D_AH:
  1987  	case D_CH:
  1988  	case D_DH:
  1989  	case D_BH:
  1990  		return Yrb;
  1991  
  1992  	case D_CL:
  1993  		return Ycl;
  1994  
  1995  	case D_CX:
  1996  		return Ycx;
  1997  
  1998  	case D_DX:
  1999  	case D_BX:
  2000  		return Yrx;
  2001  
  2002  	case D_R8:	/* not really Yrl */
  2003  	case D_R9:
  2004  	case D_R10:
  2005  	case D_R11:
  2006  	case D_R12:
  2007  	case D_R13:
  2008  	case D_R14:
  2009  	case D_R15:
  2010  		if(ctxt->asmode != 64)
  2011  			return Yxxx;
  2012  	case D_SP:
  2013  	case D_BP:
  2014  	case D_SI:
  2015  	case D_DI:
  2016  		return Yrl;
  2017  
  2018  	case D_F0+0:
  2019  		return	Yf0;
  2020  
  2021  	case D_F0+1:
  2022  	case D_F0+2:
  2023  	case D_F0+3:
  2024  	case D_F0+4:
  2025  	case D_F0+5:
  2026  	case D_F0+6:
  2027  	case D_F0+7:
  2028  		return	Yrf;
  2029  
  2030  	case D_M0+0:
  2031  	case D_M0+1:
  2032  	case D_M0+2:
  2033  	case D_M0+3:
  2034  	case D_M0+4:
  2035  	case D_M0+5:
  2036  	case D_M0+6:
  2037  	case D_M0+7:
  2038  		return	Ymr;
  2039  
  2040  	case D_X0+0:
  2041  	case D_X0+1:
  2042  	case D_X0+2:
  2043  	case D_X0+3:
  2044  	case D_X0+4:
  2045  	case D_X0+5:
  2046  	case D_X0+6:
  2047  	case D_X0+7:
  2048  	case D_X0+8:
  2049  	case D_X0+9:
  2050  	case D_X0+10:
  2051  	case D_X0+11:
  2052  	case D_X0+12:
  2053  	case D_X0+13:
  2054  	case D_X0+14:
  2055  	case D_X0+15:
  2056  		return	Yxr;
  2057  
  2058  	case D_NONE:
  2059  		return Ynone;
  2060  
  2061  	case D_CS:	return	Ycs;
  2062  	case D_SS:	return	Yss;
  2063  	case D_DS:	return	Yds;
  2064  	case D_ES:	return	Yes;
  2065  	case D_FS:	return	Yfs;
  2066  	case D_GS:	return	Ygs;
  2067  	case D_TLS:	return	Ytls;
  2068  
  2069  	case D_GDTR:	return	Ygdtr;
  2070  	case D_IDTR:	return	Yidtr;
  2071  	case D_LDTR:	return	Yldtr;
  2072  	case D_MSW:	return	Ymsw;
  2073  	case D_TASK:	return	Ytask;
  2074  
  2075  	case D_CR+0:	return	Ycr0;
  2076  	case D_CR+1:	return	Ycr1;
  2077  	case D_CR+2:	return	Ycr2;
  2078  	case D_CR+3:	return	Ycr3;
  2079  	case D_CR+4:	return	Ycr4;
  2080  	case D_CR+5:	return	Ycr5;
  2081  	case D_CR+6:	return	Ycr6;
  2082  	case D_CR+7:	return	Ycr7;
  2083  	case D_CR+8:	return	Ycr8;
  2084  
  2085  	case D_DR+0:	return	Ydr0;
  2086  	case D_DR+1:	return	Ydr1;
  2087  	case D_DR+2:	return	Ydr2;
  2088  	case D_DR+3:	return	Ydr3;
  2089  	case D_DR+4:	return	Ydr4;
  2090  	case D_DR+5:	return	Ydr5;
  2091  	case D_DR+6:	return	Ydr6;
  2092  	case D_DR+7:	return	Ydr7;
  2093  
  2094  	case D_TR+0:	return	Ytr0;
  2095  	case D_TR+1:	return	Ytr1;
  2096  	case D_TR+2:	return	Ytr2;
  2097  	case D_TR+3:	return	Ytr3;
  2098  	case D_TR+4:	return	Ytr4;
  2099  	case D_TR+5:	return	Ytr5;
  2100  	case D_TR+6:	return	Ytr6;
  2101  	case D_TR+7:	return	Ytr7;
  2102  
  2103  	case D_EXTERN:
  2104  	case D_STATIC:
  2105  	case D_AUTO:
  2106  	case D_PARAM:
  2107  		return Ym;
  2108  
  2109  	case D_CONST:
  2110  	case D_ADDR:
  2111  		if(a->sym == nil) {
  2112  			v = a->offset;
  2113  			if(v == 0)
  2114  				return Yi0;
  2115  			if(v == 1)
  2116  				return Yi1;
  2117  			if(v >= -128 && v <= 127)
  2118  				return Yi8;
  2119  			l = v;
  2120  			if((vlong)l == v)
  2121  				return Ys32;	/* can sign extend */
  2122  			if((v>>32) == 0)
  2123  				return Yi32;	/* unsigned */
  2124  			return Yi64;
  2125  		}
  2126  		return Yi32;
  2127  
  2128  	case D_BRANCH:
  2129  		return Ybr;
  2130  	}
  2131  	return Yxxx;
  2132  }
  2133  
  2134  static void
  2135  asmidx(Link *ctxt, int scale, int index, int base)
  2136  {
  2137  	int i;
  2138  
  2139  	switch(index) {
  2140  	default:
  2141  		goto bad;
  2142  
  2143  	case D_NONE:
  2144  		i = 4 << 3;
  2145  		goto bas;
  2146  
  2147  	case D_R8:
  2148  	case D_R9:
  2149  	case D_R10:
  2150  	case D_R11:
  2151  	case D_R12:
  2152  	case D_R13:
  2153  	case D_R14:
  2154  	case D_R15:
  2155  		if(ctxt->asmode != 64)
  2156  			goto bad;
  2157  	case D_AX:
  2158  	case D_CX:
  2159  	case D_DX:
  2160  	case D_BX:
  2161  	case D_BP:
  2162  	case D_SI:
  2163  	case D_DI:
  2164  		i = reg[index] << 3;
  2165  		break;
  2166  	}
  2167  	switch(scale) {
  2168  	default:
  2169  		goto bad;
  2170  	case 1:
  2171  		break;
  2172  	case 2:
  2173  		i |= (1<<6);
  2174  		break;
  2175  	case 4:
  2176  		i |= (2<<6);
  2177  		break;
  2178  	case 8:
  2179  		i |= (3<<6);
  2180  		break;
  2181  	}
  2182  bas:
  2183  	switch(base) {
  2184  	default:
  2185  		goto bad;
  2186  	case D_NONE:	/* must be mod=00 */
  2187  		i |= 5;
  2188  		break;
  2189  	case D_R8:
  2190  	case D_R9:
  2191  	case D_R10:
  2192  	case D_R11:
  2193  	case D_R12:
  2194  	case D_R13:
  2195  	case D_R14:
  2196  	case D_R15:
  2197  		if(ctxt->asmode != 64)
  2198  			goto bad;
  2199  	case D_AX:
  2200  	case D_CX:
  2201  	case D_DX:
  2202  	case D_BX:
  2203  	case D_SP:
  2204  	case D_BP:
  2205  	case D_SI:
  2206  	case D_DI:
  2207  		i |= reg[base];
  2208  		break;
  2209  	}
  2210  	*ctxt->andptr++ = i;
  2211  	return;
  2212  bad:
  2213  	ctxt->diag("asmidx: bad address %d/%d/%d", scale, index, base);
  2214  	*ctxt->andptr++ = 0;
  2215  	return;
  2216  }
  2217  
  2218  static void
  2219  put4(Link *ctxt, int32 v)
  2220  {
  2221  	ctxt->andptr[0] = v;
  2222  	ctxt->andptr[1] = v>>8;
  2223  	ctxt->andptr[2] = v>>16;
  2224  	ctxt->andptr[3] = v>>24;
  2225  	ctxt->andptr += 4;
  2226  }
  2227  
  2228  static void
  2229  relput4(Link *ctxt, Prog *p, Addr *a)
  2230  {
  2231  	vlong v;
  2232  	Reloc rel, *r;
  2233  	
  2234  	v = vaddr(ctxt, a, &rel);
  2235  	if(rel.siz != 0) {
  2236  		if(rel.siz != 4)
  2237  			ctxt->diag("bad reloc");
  2238  		r = addrel(ctxt->cursym);
  2239  		*r = rel;
  2240  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2241  	}
  2242  	put4(ctxt, v);
  2243  }
  2244  
  2245  static void
  2246  put8(Link *ctxt, vlong v)
  2247  {
  2248  	ctxt->andptr[0] = v;
  2249  	ctxt->andptr[1] = v>>8;
  2250  	ctxt->andptr[2] = v>>16;
  2251  	ctxt->andptr[3] = v>>24;
  2252  	ctxt->andptr[4] = v>>32;
  2253  	ctxt->andptr[5] = v>>40;
  2254  	ctxt->andptr[6] = v>>48;
  2255  	ctxt->andptr[7] = v>>56;
  2256  	ctxt->andptr += 8;
  2257  }
  2258  
  2259  /*
  2260  static void
  2261  relput8(Prog *p, Addr *a)
  2262  {
  2263  	vlong v;
  2264  	Reloc rel, *r;
  2265  	
  2266  	v = vaddr(ctxt, a, &rel);
  2267  	if(rel.siz != 0) {
  2268  		r = addrel(ctxt->cursym);
  2269  		*r = rel;
  2270  		r->siz = 8;
  2271  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2272  	}
  2273  	put8(ctxt, v);
  2274  }
  2275  */
  2276  
  2277  static vlong
  2278  vaddr(Link *ctxt, Addr *a, Reloc *r)
  2279  {
  2280  	int t;
  2281  	vlong v;
  2282  	LSym *s;
  2283  	
  2284  	if(r != nil)
  2285  		memset(r, 0, sizeof *r);
  2286  
  2287  	t = a->type;
  2288  	v = a->offset;
  2289  	if(t == D_ADDR)
  2290  		t = a->index;
  2291  	switch(t) {
  2292  	case D_STATIC:
  2293  	case D_EXTERN:
  2294  		s = a->sym;
  2295  		if(r == nil) {
  2296  			ctxt->diag("need reloc for %D", a);
  2297  			sysfatal("reloc");
  2298  		}
  2299  		if(isextern(s)) {
  2300  			r->siz = 4;
  2301  			r->type = R_ADDR;
  2302  		} else {
  2303  			r->siz = 4;
  2304  			r->type = R_PCREL;
  2305  		}
  2306  		r->off = -1;	// caller must fill in
  2307  		r->sym = s;
  2308  		r->add = v;
  2309  		v = 0;
  2310  		if(s->type == STLSBSS) {
  2311  			r->xadd = r->add - r->siz;
  2312  			r->type = R_TLS;
  2313  			r->xsym = s;
  2314  		}
  2315  		break;
  2316  	
  2317  	case D_INDIR+D_TLS:
  2318  		if(r == nil) {
  2319  			ctxt->diag("need reloc for %D", a);
  2320  			sysfatal("reloc");
  2321  		}
  2322  		r->type = R_TLS_LE;
  2323  		r->siz = 4;
  2324  		r->off = -1;	// caller must fill in
  2325  		r->add = v;
  2326  		v = 0;
  2327  		break;
  2328  	}
  2329  	return v;
  2330  }
  2331  
  2332  static void
  2333  asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
  2334  {
  2335  	int32 v;
  2336  	int t, scale;
  2337  	Reloc rel;
  2338  
  2339  	USED(m64);
  2340  	rex &= (0x40 | Rxr);
  2341  	v = a->offset;
  2342  	t = a->type;
  2343  	rel.siz = 0;
  2344  	if(a->index != D_NONE && a->index != D_TLS) {
  2345  		if(t < D_INDIR) { 
  2346  			switch(t) {
  2347  			default:
  2348  				goto bad;
  2349  			case D_EXTERN:
  2350  			case D_STATIC:
  2351  				if(!isextern(a->sym))
  2352  					goto bad;
  2353  				t = D_NONE;
  2354  				v = vaddr(ctxt, a, &rel);
  2355  				break;
  2356  			case D_AUTO:
  2357  			case D_PARAM:
  2358  				t = D_SP;
  2359  				break;
  2360  			}
  2361  		} else
  2362  			t -= D_INDIR;
  2363  		ctxt->rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
  2364  		if(t == D_NONE) {
  2365  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  2366  			asmidx(ctxt, a->scale, a->index, t);
  2367  			goto putrelv;
  2368  		}
  2369  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
  2370  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  2371  			asmidx(ctxt, a->scale, a->index, t);
  2372  			return;
  2373  		}
  2374  		if(v >= -128 && v < 128 && rel.siz == 0) {
  2375  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  2376  			asmidx(ctxt, a->scale, a->index, t);
  2377  			*ctxt->andptr++ = v;
  2378  			return;
  2379  		}
  2380  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  2381  		asmidx(ctxt, a->scale, a->index, t);
  2382  		goto putrelv;
  2383  	}
  2384  	if(t >= D_AL && t <= D_X0+15) {
  2385  		if(v)
  2386  			goto bad;
  2387  		*ctxt->andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
  2388  		ctxt->rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
  2389  		return;
  2390  	}
  2391  	
  2392  	scale = a->scale;
  2393  	if(t < D_INDIR) {
  2394  		switch(a->type) {
  2395  		default:
  2396  			goto bad;
  2397  		case D_STATIC:
  2398  		case D_EXTERN:
  2399  			t = D_NONE;
  2400  			v = vaddr(ctxt, a, &rel);
  2401  			break;
  2402  		case D_AUTO:
  2403  		case D_PARAM:
  2404  			t = D_SP;
  2405  			break;
  2406  		}
  2407  		scale = 1;
  2408  	} else
  2409  		t -= D_INDIR;
  2410  	if(t == D_TLS)
  2411  		v = vaddr(ctxt, a, &rel);
  2412  
  2413  	ctxt->rexflag |= (regrex[t] & Rxb) | rex;
  2414  	if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
  2415  		if((a->sym == nil || !isextern(a->sym)) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) {
  2416  			*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
  2417  			goto putrelv;
  2418  		}
  2419  		/* temporary */
  2420  		*ctxt->andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
  2421  		*ctxt->andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
  2422  		goto putrelv;
  2423  	}
  2424  	if(t == D_SP || t == D_R12) {
  2425  		if(v == 0) {
  2426  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  2427  			asmidx(ctxt, scale, D_NONE, t);
  2428  			return;
  2429  		}
  2430  		if(v >= -128 && v < 128) {
  2431  			*ctxt->andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
  2432  			asmidx(ctxt, scale, D_NONE, t);
  2433  			*ctxt->andptr++ = v;
  2434  			return;
  2435  		}
  2436  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  2437  		asmidx(ctxt, scale, D_NONE, t);
  2438  		goto putrelv;
  2439  	}
  2440  	if(t >= D_AX && t <= D_R15) {
  2441  		if(a->index == D_TLS) {
  2442  			memset(&rel, 0, sizeof rel);
  2443  			rel.type = R_TLS_IE;
  2444  			rel.siz = 4;
  2445  			rel.sym = nil;
  2446  			rel.add = v;
  2447  			v = 0;
  2448  		}
  2449  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
  2450  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  2451  			return;
  2452  		}
  2453  		if(v >= -128 && v < 128 && rel.siz == 0) {
  2454  			ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
  2455  			ctxt->andptr[1] = v;
  2456  			ctxt->andptr += 2;
  2457  			return;
  2458  		}
  2459  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  2460  		goto putrelv;
  2461  	}
  2462  	goto bad;
  2463  	
  2464  putrelv:
  2465  	if(rel.siz != 0) {
  2466  		Reloc *r;
  2467  
  2468  		if(rel.siz != 4) {
  2469  			ctxt->diag("bad rel");
  2470  			goto bad;
  2471  		}
  2472  		r = addrel(ctxt->cursym);
  2473  		*r = rel;
  2474  		r->off = ctxt->curp->pc + ctxt->andptr - ctxt->and;
  2475  	}
  2476  		
  2477  	put4(ctxt, v);
  2478  	return;
  2479  
  2480  bad:
  2481  	ctxt->diag("asmand: bad address %D", a);
  2482  	return;
  2483  }
  2484  
  2485  static void
  2486  asmand(Link *ctxt, Addr *a, Addr *ra)
  2487  {
  2488  	asmandsz(ctxt, a, reg[ra->type], regrex[ra->type], 0);
  2489  }
  2490  
  2491  static void
  2492  asmando(Link *ctxt, Addr *a, int o)
  2493  {
  2494  	asmandsz(ctxt, a, o, 0, 0);
  2495  }
  2496  
  2497  static void
  2498  bytereg(Addr *a, uint8 *t)
  2499  {
  2500  	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
  2501  		a->type = D_AL + (a->type-D_AX);
  2502  		*t = 0;
  2503  	}
  2504  }
  2505  
  2506  enum {
  2507  	E = 0xff,
  2508  };
  2509  static Movtab	ymovtab[] =
  2510  {
  2511  /* push */
  2512  	{APUSHL,	Ycs,	Ynone,	0,	{0x0e,E,0,0}},
  2513  	{APUSHL,	Yss,	Ynone,	0,	{0x16,E,0,0}},
  2514  	{APUSHL,	Yds,	Ynone,	0,	{0x1e,E,0,0}},
  2515  	{APUSHL,	Yes,	Ynone,	0,	{0x06,E,0,0}},
  2516  	{APUSHL,	Yfs,	Ynone,	0,	{0x0f,0xa0,E,0}},
  2517  	{APUSHL,	Ygs,	Ynone,	0,	{0x0f,0xa8,E,0}},
  2518  	{APUSHQ,	Yfs,	Ynone,	0,	{0x0f,0xa0,E,0}},
  2519  	{APUSHQ,	Ygs,	Ynone,	0,	{0x0f,0xa8,E,0}},
  2520  
  2521  	{APUSHW,	Ycs,	Ynone,	0,	{Pe,0x0e,E,0}},
  2522  	{APUSHW,	Yss,	Ynone,	0,	{Pe,0x16,E,0}},
  2523  	{APUSHW,	Yds,	Ynone,	0,	{Pe,0x1e,E,0}},
  2524  	{APUSHW,	Yes,	Ynone,	0,	{Pe,0x06,E,0}},
  2525  	{APUSHW,	Yfs,	Ynone,	0,	{Pe,0x0f,0xa0,E}},
  2526  	{APUSHW,	Ygs,	Ynone,	0,	{Pe,0x0f,0xa8,E}},
  2527  
  2528  /* pop */
  2529  	{APOPL,	Ynone,	Yds,	0,	{0x1f,E,0,0}},
  2530  	{APOPL,	Ynone,	Yes,	0,	{0x07,E,0,0}},
  2531  	{APOPL,	Ynone,	Yss,	0,	{0x17,E,0,0}},
  2532  	{APOPL,	Ynone,	Yfs,	0,	{0x0f,0xa1,E,0}},
  2533  	{APOPL,	Ynone,	Ygs,	0,	{0x0f,0xa9,E,0}},
  2534  	{APOPQ,	Ynone,	Yfs,	0,	{0x0f,0xa1,E,0}},
  2535  	{APOPQ,	Ynone,	Ygs,	0,	{0x0f,0xa9,E,0}},
  2536  
  2537  	{APOPW,	Ynone,	Yds,	0,	{Pe,0x1f,E,0}},
  2538  	{APOPW,	Ynone,	Yes,	0,	{Pe,0x07,E,0}},
  2539  	{APOPW,	Ynone,	Yss,	0,	{Pe,0x17,E,0}},
  2540  	{APOPW,	Ynone,	Yfs,	0,	{Pe,0x0f,0xa1,E}},
  2541  	{APOPW,	Ynone,	Ygs,	0,	{Pe,0x0f,0xa9,E}},
  2542  
  2543  /* mov seg */
  2544  	{AMOVW,	Yes,	Yml,	1,	{0x8c,0,0,0}},
  2545  	{AMOVW,	Ycs,	Yml,	1,	{0x8c,1,0,0}},
  2546  	{AMOVW,	Yss,	Yml,	1,	{0x8c,2,0,0}},
  2547  	{AMOVW,	Yds,	Yml,	1,	{0x8c,3,0,0}},
  2548  	{AMOVW,	Yfs,	Yml,	1,	{0x8c,4,0,0}},
  2549  	{AMOVW,	Ygs,	Yml,	1,	{0x8c,5,0,0}},
  2550  
  2551  	{AMOVW,	Yml,	Yes,	2,	{0x8e,0,0,0}},
  2552  	{AMOVW,	Yml,	Ycs,	2,	{0x8e,1,0,0}},
  2553  	{AMOVW,	Yml,	Yss,	2,	{0x8e,2,0,0}},
  2554  	{AMOVW,	Yml,	Yds,	2,	{0x8e,3,0,0}},
  2555  	{AMOVW,	Yml,	Yfs,	2,	{0x8e,4,0,0}},
  2556  	{AMOVW,	Yml,	Ygs,	2,	{0x8e,5,0,0}},
  2557  
  2558  /* mov cr */
  2559  	{AMOVL,	Ycr0,	Yml,	3,	{0x0f,0x20,0,0}},
  2560  	{AMOVL,	Ycr2,	Yml,	3,	{0x0f,0x20,2,0}},
  2561  	{AMOVL,	Ycr3,	Yml,	3,	{0x0f,0x20,3,0}},
  2562  	{AMOVL,	Ycr4,	Yml,	3,	{0x0f,0x20,4,0}},
  2563  	{AMOVL,	Ycr8,	Yml,	3,	{0x0f,0x20,8,0}},
  2564  	{AMOVQ,	Ycr0,	Yml,	3,	{0x0f,0x20,0,0}},
  2565  	{AMOVQ,	Ycr2,	Yml,	3,	{0x0f,0x20,2,0}},
  2566  	{AMOVQ,	Ycr3,	Yml,	3,	{0x0f,0x20,3,0}},
  2567  	{AMOVQ,	Ycr4,	Yml,	3,	{0x0f,0x20,4,0}},
  2568  	{AMOVQ,	Ycr8,	Yml,	3,	{0x0f,0x20,8,0}},
  2569  
  2570  	{AMOVL,	Yml,	Ycr0,	4,	{0x0f,0x22,0,0}},
  2571  	{AMOVL,	Yml,	Ycr2,	4,	{0x0f,0x22,2,0}},
  2572  	{AMOVL,	Yml,	Ycr3,	4,	{0x0f,0x22,3,0}},
  2573  	{AMOVL,	Yml,	Ycr4,	4,	{0x0f,0x22,4,0}},
  2574  	{AMOVL,	Yml,	Ycr8,	4,	{0x0f,0x22,8,0}},
  2575  	{AMOVQ,	Yml,	Ycr0,	4,	{0x0f,0x22,0,0}},
  2576  	{AMOVQ,	Yml,	Ycr2,	4,	{0x0f,0x22,2,0}},
  2577  	{AMOVQ,	Yml,	Ycr3,	4,	{0x0f,0x22,3,0}},
  2578  	{AMOVQ,	Yml,	Ycr4,	4,	{0x0f,0x22,4,0}},
  2579  	{AMOVQ,	Yml,	Ycr8,	4,	{0x0f,0x22,8,0}},
  2580  
  2581  /* mov dr */
  2582  	{AMOVL,	Ydr0,	Yml,	3,	{0x0f,0x21,0,0}},
  2583  	{AMOVL,	Ydr6,	Yml,	3,	{0x0f,0x21,6,0}},
  2584  	{AMOVL,	Ydr7,	Yml,	3,	{0x0f,0x21,7,0}},
  2585  	{AMOVQ,	Ydr0,	Yml,	3,	{0x0f,0x21,0,0}},
  2586  	{AMOVQ,	Ydr6,	Yml,	3,	{0x0f,0x21,6,0}},
  2587  	{AMOVQ,	Ydr7,	Yml,	3,	{0x0f,0x21,7,0}},
  2588  
  2589  	{AMOVL,	Yml,	Ydr0,	4,	{0x0f,0x23,0,0}},
  2590  	{AMOVL,	Yml,	Ydr6,	4,	{0x0f,0x23,6,0}},
  2591  	{AMOVL,	Yml,	Ydr7,	4,	{0x0f,0x23,7,0}},
  2592  	{AMOVQ,	Yml,	Ydr0,	4,	{0x0f,0x23,0,0}},
  2593  	{AMOVQ,	Yml,	Ydr6,	4,	{0x0f,0x23,6,0}},
  2594  	{AMOVQ,	Yml,	Ydr7,	4,	{0x0f,0x23,7,0}},
  2595  
  2596  /* mov tr */
  2597  	{AMOVL,	Ytr6,	Yml,	3,	{0x0f,0x24,6,0}},
  2598  	{AMOVL,	Ytr7,	Yml,	3,	{0x0f,0x24,7,0}},
  2599  
  2600  	{AMOVL,	Yml,	Ytr6,	4,	{0x0f,0x26,6,E}},
  2601  	{AMOVL,	Yml,	Ytr7,	4,	{0x0f,0x26,7,E}},
  2602  
  2603  /* lgdt, sgdt, lidt, sidt */
  2604  	{AMOVL,	Ym,	Ygdtr,	4,	{0x0f,0x01,2,0}},
  2605  	{AMOVL,	Ygdtr,	Ym,	3,	{0x0f,0x01,0,0}},
  2606  	{AMOVL,	Ym,	Yidtr,	4,	{0x0f,0x01,3,0}},
  2607  	{AMOVL,	Yidtr,	Ym,	3,	{0x0f,0x01,1,0}},
  2608  	{AMOVQ,	Ym,	Ygdtr,	4,	{0x0f,0x01,2,0}},
  2609  	{AMOVQ,	Ygdtr,	Ym,	3,	{0x0f,0x01,0,0}},
  2610  	{AMOVQ,	Ym,	Yidtr,	4,	{0x0f,0x01,3,0}},
  2611  	{AMOVQ,	Yidtr,	Ym,	3,	{0x0f,0x01,1,0}},
  2612  
  2613  /* lldt, sldt */
  2614  	{AMOVW,	Yml,	Yldtr,	4,	{0x0f,0x00,2,0}},
  2615  	{AMOVW,	Yldtr,	Yml,	3,	{0x0f,0x00,0,0}},
  2616  
  2617  /* lmsw, smsw */
  2618  	{AMOVW,	Yml,	Ymsw,	4,	{0x0f,0x01,6,0}},
  2619  	{AMOVW,	Ymsw,	Yml,	3,	{0x0f,0x01,4,0}},
  2620  
  2621  /* ltr, str */
  2622  	{AMOVW,	Yml,	Ytask,	4,	{0x0f,0x00,3,0}},
  2623  	{AMOVW,	Ytask,	Yml,	3,	{0x0f,0x00,1,0}},
  2624  
  2625  /* load full pointer */
  2626  	{AMOVL,	Yml,	Ycol,	5,	{0,0,0,0}},
  2627  	{AMOVW,	Yml,	Ycol,	5,	{Pe,0,0,0}},
  2628  
  2629  /* double shift */
  2630  	{ASHLL,	Ycol,	Yml,	6,	{0xa4,0xa5,0,0}},
  2631  	{ASHRL,	Ycol,	Yml,	6,	{0xac,0xad,0,0}},
  2632  	{ASHLQ,	Ycol,	Yml,	6,	{Pw,0xa4,0xa5,0}},
  2633  	{ASHRQ,	Ycol,	Yml,	6,	{Pw,0xac,0xad,0}},
  2634  	{ASHLW,	Ycol,	Yml,	6,	{Pe,0xa4,0xa5,0}},
  2635  	{ASHRW,	Ycol,	Yml,	6,	{Pe,0xac,0xad,0}},
  2636  
  2637  /* load TLS base */
  2638  	{AMOVQ,	Ytls,	Yrl,	7,	{0,0,0,0}},
  2639  
  2640  	{0}
  2641  };
  2642  
  2643  static int
  2644  isax(Addr *a)
  2645  {
  2646  
  2647  	switch(a->type) {
  2648  	case D_AX:
  2649  	case D_AL:
  2650  	case D_AH:
  2651  	case D_INDIR+D_AX:
  2652  		return 1;
  2653  	}
  2654  	if(a->index == D_AX)
  2655  		return 1;
  2656  	return 0;
  2657  }
  2658  
  2659  static void
  2660  subreg(Prog *p, int from, int to)
  2661  {
  2662  
  2663  	if(0 /*debug['Q']*/)
  2664  		print("\n%P	s/%R/%R/\n", p, from, to);
  2665  
  2666  	if(p->from.type == from)
  2667  		p->from.type = to;
  2668  	if(p->to.type == from)
  2669  		p->to.type = to;
  2670  
  2671  	if(p->from.index == from)
  2672  		p->from.index = to;
  2673  	if(p->to.index == from)
  2674  		p->to.index = to;
  2675  
  2676  	from += D_INDIR;
  2677  	if(p->from.type == from)
  2678  		p->from.type = to+D_INDIR;
  2679  	if(p->to.type == from)
  2680  		p->to.type = to+D_INDIR;
  2681  
  2682  	if(0 /*debug['Q']*/)
  2683  		print("%P\n", p);
  2684  }
  2685  
  2686  static int
  2687  mediaop(Link *ctxt, Optab *o, int op, int osize, int z)
  2688  {
  2689  	switch(op){
  2690  	case Pm:
  2691  	case Pe:
  2692  	case Pf2:
  2693  	case Pf3:
  2694  		if(osize != 1){
  2695  			if(op != Pm)
  2696  				*ctxt->andptr++ = op;
  2697  			*ctxt->andptr++ = Pm;
  2698  			op = o->op[++z];
  2699  			break;
  2700  		}
  2701  	default:
  2702  		if(ctxt->andptr == ctxt->and || ctxt->and[ctxt->andptr - ctxt->and - 1] != Pm)
  2703  			*ctxt->andptr++ = Pm;
  2704  		break;
  2705  	}
  2706  	*ctxt->andptr++ = op;
  2707  	return z;
  2708  }
  2709  
  2710  static void
  2711  doasm(Link *ctxt, Prog *p)
  2712  {
  2713  	Optab *o;
  2714  	Prog *q, pp;
  2715  	uchar *t;
  2716  	Movtab *mo;
  2717  	int z, op, ft, tt, xo, l, pre;
  2718  	vlong v;
  2719  	Reloc rel, *r;
  2720  	Addr *a;
  2721  	
  2722  	ctxt->curp = p;	// TODO
  2723  
  2724  	o = opindex[p->as];
  2725  	if(o == nil) {
  2726  		ctxt->diag("asmins: missing op %P", p);
  2727  		return;
  2728  	}
  2729  	
  2730  	pre = prefixof(ctxt, &p->from);
  2731  	if(pre)
  2732  		*ctxt->andptr++ = pre;
  2733  	pre = prefixof(ctxt, &p->to);
  2734  	if(pre)
  2735  		*ctxt->andptr++ = pre;
  2736  
  2737  	if(p->ft == 0)
  2738  		p->ft = oclass(ctxt, &p->from);
  2739  	if(p->tt == 0)
  2740  		p->tt = oclass(ctxt, &p->to);
  2741  
  2742  	ft = p->ft * Ymax;
  2743  	tt = p->tt * Ymax;
  2744  
  2745  	t = o->ytab;
  2746  	if(t == 0) {
  2747  		ctxt->diag("asmins: noproto %P", p);
  2748  		return;
  2749  	}
  2750  	xo = o->op[0] == 0x0f;
  2751  	for(z=0; *t; z+=t[3]+xo,t+=4)
  2752  		if(ycover[ft+t[0]])
  2753  		if(ycover[tt+t[1]])
  2754  			goto found;
  2755  	goto domov;
  2756  
  2757  found:
  2758  	switch(o->prefix) {
  2759  	case Pq:	/* 16 bit escape and opcode escape */
  2760  		*ctxt->andptr++ = Pe;
  2761  		*ctxt->andptr++ = Pm;
  2762  		break;
  2763  	case Pq3:	/* 16 bit escape, Rex.w, and opcode escape */
  2764  		*ctxt->andptr++ = Pe;
  2765  		*ctxt->andptr++ = Pw;
  2766  		*ctxt->andptr++ = Pm;
  2767  		break;
  2768  
  2769  	case Pf2:	/* xmm opcode escape */
  2770  	case Pf3:
  2771  		*ctxt->andptr++ = o->prefix;
  2772  		*ctxt->andptr++ = Pm;
  2773  		break;
  2774  
  2775  	case Pm:	/* opcode escape */
  2776  		*ctxt->andptr++ = Pm;
  2777  		break;
  2778  
  2779  	case Pe:	/* 16 bit escape */
  2780  		*ctxt->andptr++ = Pe;
  2781  		break;
  2782  
  2783  	case Pw:	/* 64-bit escape */
  2784  		if(p->mode != 64)
  2785  			ctxt->diag("asmins: illegal 64: %P", p);
  2786  		ctxt->rexflag |= Pw;
  2787  		break;
  2788  
  2789  	case Pb:	/* botch */
  2790  		bytereg(&p->from, &p->ft);
  2791  		bytereg(&p->to, &p->tt);
  2792  		break;
  2793  
  2794  	case P32:	/* 32 bit but illegal if 64-bit mode */
  2795  		if(p->mode == 64)
  2796  			ctxt->diag("asmins: illegal in 64-bit mode: %P", p);
  2797  		break;
  2798  
  2799  	case Py:	/* 64-bit only, no prefix */
  2800  		if(p->mode != 64)
  2801  			ctxt->diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
  2802  		break;
  2803  	}
  2804  
  2805  	if(z >= nelem(o->op))
  2806  		sysfatal("asmins bad table %P", p);
  2807  	op = o->op[z];
  2808  	if(op == 0x0f) {
  2809  		*ctxt->andptr++ = op;
  2810  		op = o->op[++z];
  2811  	}
  2812  	switch(t[2]) {
  2813  	default:
  2814  		ctxt->diag("asmins: unknown z %d %P", t[2], p);
  2815  		return;
  2816  
  2817  	case Zpseudo:
  2818  		break;
  2819  
  2820  	case Zlit:
  2821  		for(; op = o->op[z]; z++)
  2822  			*ctxt->andptr++ = op;
  2823  		break;
  2824  
  2825  	case Zlitm_r:
  2826  		for(; op = o->op[z]; z++)
  2827  			*ctxt->andptr++ = op;
  2828  		asmand(ctxt, &p->from, &p->to);
  2829  		break;
  2830  
  2831  	case Zmb_r:
  2832  		bytereg(&p->from, &p->ft);
  2833  		/* fall through */
  2834  	case Zm_r:
  2835  		*ctxt->andptr++ = op;
  2836  		asmand(ctxt, &p->from, &p->to);
  2837  		break;
  2838  	case Zm2_r:
  2839  		*ctxt->andptr++ = op;
  2840  		*ctxt->andptr++ = o->op[z+1];
  2841  		asmand(ctxt, &p->from, &p->to);
  2842  		break;
  2843  
  2844  	case Zm_r_xm:
  2845  		mediaop(ctxt, o, op, t[3], z);
  2846  		asmand(ctxt, &p->from, &p->to);
  2847  		break;
  2848  
  2849  	case Zm_r_xm_nr:
  2850  		ctxt->rexflag = 0;
  2851  		mediaop(ctxt, o, op, t[3], z);
  2852  		asmand(ctxt, &p->from, &p->to);
  2853  		break;
  2854  
  2855  	case Zm_r_i_xm:
  2856  		mediaop(ctxt, o, op, t[3], z);
  2857  		asmand(ctxt, &p->from, &p->to);
  2858  		*ctxt->andptr++ = p->to.offset;
  2859  		break;
  2860  
  2861  	case Zm_r_3d:
  2862  		*ctxt->andptr++ = 0x0f;
  2863  		*ctxt->andptr++ = 0x0f;
  2864  		asmand(ctxt, &p->from, &p->to);
  2865  		*ctxt->andptr++ = op;
  2866  		break;
  2867  
  2868  	case Zibm_r:
  2869  		while ((op = o->op[z++]) != 0)
  2870  			*ctxt->andptr++ = op;
  2871  		asmand(ctxt, &p->from, &p->to);
  2872  		*ctxt->andptr++ = p->to.offset;
  2873  		break;
  2874  
  2875  	case Zaut_r:
  2876  		*ctxt->andptr++ = 0x8d;	/* leal */
  2877  		if(p->from.type != D_ADDR)
  2878  			ctxt->diag("asmins: Zaut sb type ADDR");
  2879  		p->from.type = p->from.index;
  2880  		p->from.index = D_NONE;
  2881  		asmand(ctxt, &p->from, &p->to);
  2882  		p->from.index = p->from.type;
  2883  		p->from.type = D_ADDR;
  2884  		break;
  2885  
  2886  	case Zm_o:
  2887  		*ctxt->andptr++ = op;
  2888  		asmando(ctxt, &p->from, o->op[z+1]);
  2889  		break;
  2890  
  2891  	case Zr_m:
  2892  		*ctxt->andptr++ = op;
  2893  		asmand(ctxt, &p->to, &p->from);
  2894  		break;
  2895  
  2896  	case Zr_m_xm:
  2897  		mediaop(ctxt, o, op, t[3], z);
  2898  		asmand(ctxt, &p->to, &p->from);
  2899  		break;
  2900  
  2901  	case Zr_m_xm_nr:
  2902  		ctxt->rexflag = 0;
  2903  		mediaop(ctxt, o, op, t[3], z);
  2904  		asmand(ctxt, &p->to, &p->from);
  2905  		break;
  2906  
  2907  	case Zr_m_i_xm:
  2908  		mediaop(ctxt, o, op, t[3], z);
  2909  		asmand(ctxt, &p->to, &p->from);
  2910  		*ctxt->andptr++ = p->from.offset;
  2911  		break;
  2912  
  2913  	case Zo_m:
  2914  		*ctxt->andptr++ = op;
  2915  		asmando(ctxt, &p->to, o->op[z+1]);
  2916  		break;
  2917  
  2918  	case Zcallindreg:
  2919  		r = addrel(ctxt->cursym);
  2920  		r->off = p->pc;
  2921  		r->type = R_CALLIND;
  2922  		r->siz = 0;
  2923  		// fallthrough
  2924  	case Zo_m64:
  2925  		*ctxt->andptr++ = op;
  2926  		asmandsz(ctxt, &p->to, o->op[z+1], 0, 1);
  2927  		break;
  2928  
  2929  	case Zm_ibo:
  2930  		*ctxt->andptr++ = op;
  2931  		asmando(ctxt, &p->from, o->op[z+1]);
  2932  		*ctxt->andptr++ = vaddr(ctxt, &p->to, nil);
  2933  		break;
  2934  
  2935  	case Zibo_m:
  2936  		*ctxt->andptr++ = op;
  2937  		asmando(ctxt, &p->to, o->op[z+1]);
  2938  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2939  		break;
  2940  
  2941  	case Zibo_m_xm:
  2942  		z = mediaop(ctxt, o, op, t[3], z);
  2943  		asmando(ctxt, &p->to, o->op[z+1]);
  2944  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2945  		break;
  2946  
  2947  	case Z_ib:
  2948  	case Zib_:
  2949  		if(t[2] == Zib_)
  2950  			a = &p->from;
  2951  		else
  2952  			a = &p->to;
  2953  		*ctxt->andptr++ = op;
  2954  		*ctxt->andptr++ = vaddr(ctxt, a, nil);
  2955  		break;
  2956  
  2957  	case Zib_rp:
  2958  		ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40);
  2959  		*ctxt->andptr++ = op + reg[p->to.type];
  2960  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2961  		break;
  2962  
  2963  	case Zil_rp:
  2964  		ctxt->rexflag |= regrex[p->to.type] & Rxb;
  2965  		*ctxt->andptr++ = op + reg[p->to.type];
  2966  		if(o->prefix == Pe) {
  2967  			v = vaddr(ctxt, &p->from, nil);
  2968  			*ctxt->andptr++ = v;
  2969  			*ctxt->andptr++ = v>>8;
  2970  		}
  2971  		else
  2972  			relput4(ctxt, p, &p->from);
  2973  		break;
  2974  
  2975  	case Zo_iw:
  2976  		*ctxt->andptr++ = op;
  2977  		if(p->from.type != D_NONE){
  2978  			v = vaddr(ctxt, &p->from, nil);
  2979  			*ctxt->andptr++ = v;
  2980  			*ctxt->andptr++ = v>>8;
  2981  		}
  2982  		break;
  2983  
  2984  	case Ziq_rp:
  2985  		v = vaddr(ctxt, &p->from, &rel);
  2986  		l = v>>32;
  2987  		if(l == 0 && rel.siz != 8){
  2988  			//p->mark |= 0100;
  2989  			//print("zero: %llux %P\n", v, p);
  2990  			ctxt->rexflag &= ~(0x40|Rxw);
  2991  			ctxt->rexflag |= regrex[p->to.type] & Rxb;
  2992  			*ctxt->andptr++ = 0xb8 + reg[p->to.type];
  2993  			if(rel.type != 0) {
  2994  				r = addrel(ctxt->cursym);
  2995  				*r = rel;
  2996  				r->off = p->pc + ctxt->andptr - ctxt->and;
  2997  			}
  2998  			put4(ctxt, v);
  2999  		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
  3000  			//p->mark |= 0100;
  3001  			//print("sign: %llux %P\n", v, p);
  3002  			*ctxt->andptr ++ = 0xc7;
  3003  			asmando(ctxt, &p->to, 0);
  3004  			put4(ctxt, v);
  3005  		}else{	/* need all 8 */
  3006  			//print("all: %llux %P\n", v, p);
  3007  			ctxt->rexflag |= regrex[p->to.type] & Rxb;
  3008  			*ctxt->andptr++ = op + reg[p->to.type];
  3009  			if(rel.type != 0) {
  3010  				r = addrel(ctxt->cursym);
  3011  				*r = rel;
  3012  				r->off = p->pc + ctxt->andptr - ctxt->and;
  3013  			}
  3014  			put8(ctxt, v);
  3015  		}
  3016  		break;
  3017  
  3018  	case Zib_rr:
  3019  		*ctxt->andptr++ = op;
  3020  		asmand(ctxt, &p->to, &p->to);
  3021  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  3022  		break;
  3023  
  3024  	case Z_il:
  3025  	case Zil_:
  3026  		if(t[2] == Zil_)
  3027  			a = &p->from;
  3028  		else
  3029  			a = &p->to;
  3030  		*ctxt->andptr++ = op;
  3031  		if(o->prefix == Pe) {
  3032  			v = vaddr(ctxt, a, nil);
  3033  			*ctxt->andptr++ = v;
  3034  			*ctxt->andptr++ = v>>8;
  3035  		}
  3036  		else
  3037  			relput4(ctxt, p, a);
  3038  		break;
  3039  
  3040  	case Zm_ilo:
  3041  	case Zilo_m:
  3042  		*ctxt->andptr++ = op;
  3043  		if(t[2] == Zilo_m) {
  3044  			a = &p->from;
  3045  			asmando(ctxt, &p->to, o->op[z+1]);
  3046  		} else {
  3047  			a = &p->to;
  3048  			asmando(ctxt, &p->from, o->op[z+1]);
  3049  		}
  3050  		if(o->prefix == Pe) {
  3051  			v = vaddr(ctxt, a, nil);
  3052  			*ctxt->andptr++ = v;
  3053  			*ctxt->andptr++ = v>>8;
  3054  		}
  3055  		else
  3056  			relput4(ctxt, p, a);
  3057  		break;
  3058  
  3059  	case Zil_rr:
  3060  		*ctxt->andptr++ = op;
  3061  		asmand(ctxt, &p->to, &p->to);
  3062  		if(o->prefix == Pe) {
  3063  			v = vaddr(ctxt, &p->from, nil);
  3064  			*ctxt->andptr++ = v;
  3065  			*ctxt->andptr++ = v>>8;
  3066  		}
  3067  		else
  3068  			relput4(ctxt, p, &p->from);
  3069  		break;
  3070  
  3071  	case Z_rp:
  3072  		ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40);
  3073  		*ctxt->andptr++ = op + reg[p->to.type];
  3074  		break;
  3075  
  3076  	case Zrp_:
  3077  		ctxt->rexflag |= regrex[p->from.type] & (Rxb|0x40);
  3078  		*ctxt->andptr++ = op + reg[p->from.type];
  3079  		break;
  3080  
  3081  	case Zclr:
  3082  		ctxt->rexflag &= ~Pw;
  3083  		*ctxt->andptr++ = op;
  3084  		asmand(ctxt, &p->to, &p->to);
  3085  		break;
  3086  
  3087  	case Zcall:
  3088  		if(p->to.sym == nil) {
  3089  			ctxt->diag("call without target");
  3090  			sysfatal("bad code");
  3091  		}
  3092  		*ctxt->andptr++ = op;
  3093  		r = addrel(ctxt->cursym);
  3094  		r->off = p->pc + ctxt->andptr - ctxt->and;
  3095  		r->sym = p->to.sym;
  3096  		r->add = p->to.offset;
  3097  		r->type = R_CALL;
  3098  		r->siz = 4;
  3099  		put4(ctxt, 0);
  3100  		break;
  3101  
  3102  	case Zbr:
  3103  	case Zjmp:
  3104  	case Zloop:
  3105  		// TODO: jump across functions needs reloc
  3106  		if(p->to.sym != nil) {
  3107  			if(t[2] != Zjmp) {
  3108  				ctxt->diag("branch to ATEXT");
  3109  				sysfatal("bad code");
  3110  			}
  3111  			*ctxt->andptr++ = o->op[z+1];
  3112  			r = addrel(ctxt->cursym);
  3113  			r->off = p->pc + ctxt->andptr - ctxt->and;
  3114  			r->sym = p->to.sym;
  3115  			r->type = R_PCREL;
  3116  			r->siz = 4;
  3117  			put4(ctxt, 0);
  3118  			break;
  3119  		}
  3120  		// Assumes q is in this function.
  3121  		// TODO: Check in input, preserve in brchain.
  3122  
  3123  		// Fill in backward jump now.
  3124  		q = p->pcond;
  3125  		if(q == nil) {
  3126  			ctxt->diag("jmp/branch/loop without target");
  3127  			sysfatal("bad code");
  3128  		}
  3129  		if(p->back & 1) {
  3130  			v = q->pc - (p->pc + 2);
  3131  			if(v >= -128) {
  3132  				if(p->as == AJCXZL)
  3133  					*ctxt->andptr++ = 0x67;
  3134  				*ctxt->andptr++ = op;
  3135  				*ctxt->andptr++ = v;
  3136  			} else if(t[2] == Zloop) {
  3137  				ctxt->diag("loop too far: %P", p);
  3138  			} else {
  3139  				v -= 5-2;
  3140  				if(t[2] == Zbr) {
  3141  					*ctxt->andptr++ = 0x0f;
  3142  					v--;
  3143  				}
  3144  				*ctxt->andptr++ = o->op[z+1];
  3145  				*ctxt->andptr++ = v;
  3146  				*ctxt->andptr++ = v>>8;
  3147  				*ctxt->andptr++ = v>>16;
  3148  				*ctxt->andptr++ = v>>24;
  3149  			}
  3150  			break;
  3151  		}
  3152  		
  3153  		// Annotate target; will fill in later.
  3154  		p->forwd = q->comefrom;
  3155  		q->comefrom = p;
  3156  		if(p->back & 2)	{ // short
  3157  			if(p->as == AJCXZL)
  3158  				*ctxt->andptr++ = 0x67;
  3159  			*ctxt->andptr++ = op;
  3160  			*ctxt->andptr++ = 0;
  3161  		} else if(t[2] == Zloop) {
  3162  			ctxt->diag("loop too far: %P", p);
  3163  		} else {
  3164  			if(t[2] == Zbr)
  3165  				*ctxt->andptr++ = 0x0f;
  3166  			*ctxt->andptr++ = o->op[z+1];
  3167  			*ctxt->andptr++ = 0;
  3168  			*ctxt->andptr++ = 0;
  3169  			*ctxt->andptr++ = 0;
  3170  			*ctxt->andptr++ = 0;
  3171  		}
  3172  		break;
  3173  				
  3174  /*
  3175  		v = q->pc - p->pc - 2;
  3176  		if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3177  			*ctxt->andptr++ = op;
  3178  			*ctxt->andptr++ = v;
  3179  		} else {
  3180  			v -= 5-2;
  3181  			if(t[2] == Zbr) {
  3182  				*ctxt->andptr++ = 0x0f;
  3183  				v--;
  3184  			}
  3185  			*ctxt->andptr++ = o->op[z+1];
  3186  			*ctxt->andptr++ = v;
  3187  			*ctxt->andptr++ = v>>8;
  3188  			*ctxt->andptr++ = v>>16;
  3189  			*ctxt->andptr++ = v>>24;
  3190  		}
  3191  */
  3192  		break;
  3193  
  3194  	case Zbyte:
  3195  		v = vaddr(ctxt, &p->from, &rel);
  3196  		if(rel.siz != 0) {
  3197  			rel.siz = op;
  3198  			r = addrel(ctxt->cursym);
  3199  			*r = rel;
  3200  			r->off = p->pc + ctxt->andptr - ctxt->and;
  3201  		}
  3202  		*ctxt->andptr++ = v;
  3203  		if(op > 1) {
  3204  			*ctxt->andptr++ = v>>8;
  3205  			if(op > 2) {
  3206  				*ctxt->andptr++ = v>>16;
  3207  				*ctxt->andptr++ = v>>24;
  3208  				if(op > 4) {
  3209  					*ctxt->andptr++ = v>>32;
  3210  					*ctxt->andptr++ = v>>40;
  3211  					*ctxt->andptr++ = v>>48;
  3212  					*ctxt->andptr++ = v>>56;
  3213  				}
  3214  			}
  3215  		}
  3216  		break;
  3217  	}
  3218  	return;
  3219  
  3220  domov:
  3221  	for(mo=ymovtab; mo->as; mo++)
  3222  		if(p->as == mo->as)
  3223  		if(ycover[ft+mo->ft])
  3224  		if(ycover[tt+mo->tt]){
  3225  			t = mo->op;
  3226  			goto mfound;
  3227  		}
  3228  bad:
  3229  	if(p->mode != 64){
  3230  		/*
  3231  		 * here, the assembly has failed.
  3232  		 * if its a byte instruction that has
  3233  		 * unaddressable registers, try to
  3234  		 * exchange registers and reissue the
  3235  		 * instruction with the operands renamed.
  3236  		 */
  3237  		pp = *p;
  3238  		z = p->from.type;
  3239  		if(z >= D_BP && z <= D_DI) {
  3240  			if(isax(&p->to) || p->to.type == D_NONE) {
  3241  				// We certainly don't want to exchange
  3242  				// with AX if the op is MUL or DIV.
  3243  				*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  3244  				asmando(ctxt, &p->from, reg[D_BX]);
  3245  				subreg(&pp, z, D_BX);
  3246  				doasm(ctxt, &pp);
  3247  				*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  3248  				asmando(ctxt, &p->from, reg[D_BX]);
  3249  			} else {
  3250  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  3251  				subreg(&pp, z, D_AX);
  3252  				doasm(ctxt, &pp);
  3253  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  3254  			}
  3255  			return;
  3256  		}
  3257  		z = p->to.type;
  3258  		if(z >= D_BP && z <= D_DI) {
  3259  			if(isax(&p->from)) {
  3260  				*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  3261  				asmando(ctxt, &p->to, reg[D_BX]);
  3262  				subreg(&pp, z, D_BX);
  3263  				doasm(ctxt, &pp);
  3264  				*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  3265  				asmando(ctxt, &p->to, reg[D_BX]);
  3266  			} else {
  3267  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  3268  				subreg(&pp, z, D_AX);
  3269  				doasm(ctxt, &pp);
  3270  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  3271  			}
  3272  			return;
  3273  		}
  3274  	}
  3275  	ctxt->diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
  3276  	return;
  3277  
  3278  mfound:
  3279  	switch(mo->code) {
  3280  	default:
  3281  		ctxt->diag("asmins: unknown mov %d %P", mo->code, p);
  3282  		break;
  3283  
  3284  	case 0:	/* lit */
  3285  		for(z=0; t[z]!=E; z++)
  3286  			*ctxt->andptr++ = t[z];
  3287  		break;
  3288  
  3289  	case 1:	/* r,m */
  3290  		*ctxt->andptr++ = t[0];
  3291  		asmando(ctxt, &p->to, t[1]);
  3292  		break;
  3293  
  3294  	case 2:	/* m,r */
  3295  		*ctxt->andptr++ = t[0];
  3296  		asmando(ctxt, &p->from, t[1]);
  3297  		break;
  3298  
  3299  	case 3:	/* r,m - 2op */
  3300  		*ctxt->andptr++ = t[0];
  3301  		*ctxt->andptr++ = t[1];
  3302  		asmando(ctxt, &p->to, t[2]);
  3303  		ctxt->rexflag |= regrex[p->from.type] & (Rxr|0x40);
  3304  		break;
  3305  
  3306  	case 4:	/* m,r - 2op */
  3307  		*ctxt->andptr++ = t[0];
  3308  		*ctxt->andptr++ = t[1];
  3309  		asmando(ctxt, &p->from, t[2]);
  3310  		ctxt->rexflag |= regrex[p->to.type] & (Rxr|0x40);
  3311  		break;
  3312  
  3313  	case 5:	/* load full pointer, trash heap */
  3314  		if(t[0])
  3315  			*ctxt->andptr++ = t[0];
  3316  		switch(p->to.index) {
  3317  		default:
  3318  			goto bad;
  3319  		case D_DS:
  3320  			*ctxt->andptr++ = 0xc5;
  3321  			break;
  3322  		case D_SS:
  3323  			*ctxt->andptr++ = 0x0f;
  3324  			*ctxt->andptr++ = 0xb2;
  3325  			break;
  3326  		case D_ES:
  3327  			*ctxt->andptr++ = 0xc4;
  3328  			break;
  3329  		case D_FS:
  3330  			*ctxt->andptr++ = 0x0f;
  3331  			*ctxt->andptr++ = 0xb4;
  3332  			break;
  3333  		case D_GS:
  3334  			*ctxt->andptr++ = 0x0f;
  3335  			*ctxt->andptr++ = 0xb5;
  3336  			break;
  3337  		}
  3338  		asmand(ctxt, &p->from, &p->to);
  3339  		break;
  3340  
  3341  	case 6:	/* double shift */
  3342  		if(t[0] == Pw){
  3343  			if(p->mode != 64)
  3344  				ctxt->diag("asmins: illegal 64: %P", p);
  3345  			ctxt->rexflag |= Pw;
  3346  			t++;
  3347  		}else if(t[0] == Pe){
  3348  			*ctxt->andptr++ = Pe;
  3349  			t++;
  3350  		}
  3351  		z = p->from.type;
  3352  		switch(z) {
  3353  		default:
  3354  			goto bad;
  3355  		case D_CONST:
  3356  			*ctxt->andptr++ = 0x0f;
  3357  			*ctxt->andptr++ = t[0];
  3358  			asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  3359  			*ctxt->andptr++ = p->from.offset;
  3360  			break;
  3361  		case D_CL:
  3362  		case D_CX:
  3363  			*ctxt->andptr++ = 0x0f;
  3364  			*ctxt->andptr++ = t[1];
  3365  			asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  3366  			break;
  3367  		}
  3368  		break;
  3369  	
  3370  	case 7:	/* mov tls, r */
  3371  		// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3372  		// where you load the TLS base register into a register and then index off that
  3373  		// register to access the actual TLS variables. Systems that allow direct TLS access
  3374  		// are handled in prefixof above and should not be listed here.
  3375  		switch(ctxt->headtype) {
  3376  		default:
  3377  			sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
  3378  
  3379  		case Hplan9:
  3380  			if(ctxt->plan9privates == nil)
  3381  				ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
  3382  			memset(&pp.from, 0, sizeof pp.from);
  3383  			pp.from.type = D_EXTERN;
  3384  			pp.from.sym = ctxt->plan9privates;
  3385  			pp.from.offset = 0;
  3386  			pp.from.index = D_NONE;
  3387  			ctxt->rexflag |= Pw;
  3388  			*ctxt->andptr++ = 0x8B;
  3389  			asmand(ctxt, &pp.from, &p->to);
  3390  			break;
  3391  
  3392  		case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3393  			// TLS base is 0(FS).
  3394  			pp.from = p->from;
  3395  			pp.from.type = D_INDIR+D_NONE;
  3396  			pp.from.offset = 0;
  3397  			pp.from.index = D_NONE;
  3398  			pp.from.scale = 0;
  3399  			ctxt->rexflag |= Pw;
  3400  			*ctxt->andptr++ = 0x64; // FS
  3401  			*ctxt->andptr++ = 0x8B;
  3402  			asmand(ctxt, &pp.from, &p->to);
  3403  			break;
  3404  		
  3405  		case Hwindows:
  3406  			// Windows TLS base is always 0x28(GS).
  3407  			pp.from = p->from;
  3408  			pp.from.type = D_INDIR+D_GS;
  3409  			pp.from.offset = 0x28;
  3410  			pp.from.index = D_NONE;
  3411  			pp.from.scale = 0;
  3412  			ctxt->rexflag |= Pw;
  3413  			*ctxt->andptr++ = 0x65; // GS
  3414  			*ctxt->andptr++ = 0x8B;
  3415  			asmand(ctxt, &pp.from, &p->to);
  3416  			break;
  3417  		}
  3418  		break;
  3419  	}
  3420  }
  3421  
  3422  static uchar naclret[] = {
  3423  	0x5e, // POPL SI
  3424  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  3425  	0x83, 0xe6, 0xe0,	// ANDL $~31, SI
  3426  	0x4c, 0x01, 0xfe,	// ADDQ R15, SI
  3427  	0xff, 0xe6, // JMP SI
  3428  };
  3429  
  3430  static uchar naclspfix[] = {
  3431  	0x4c, 0x01, 0xfc, // ADDQ R15, SP
  3432  };
  3433  
  3434  static uchar naclbpfix[] = {
  3435  	0x4c, 0x01, 0xfd, // ADDQ R15, BP
  3436  };
  3437  
  3438  static uchar naclmovs[] = {
  3439  	0x89, 0xf6,	// MOVL SI, SI
  3440  	0x49, 0x8d, 0x34, 0x37,	// LEAQ (R15)(SI*1), SI
  3441  	0x89, 0xff,	// MOVL DI, DI
  3442  	0x49, 0x8d, 0x3c, 0x3f,	// LEAQ (R15)(DI*1), DI
  3443  };
  3444  
  3445  static uchar naclstos[] = {
  3446  	0x89, 0xff,	// MOVL DI, DI
  3447  	0x49, 0x8d, 0x3c, 0x3f,	// LEAQ (R15)(DI*1), DI
  3448  };
  3449  
  3450  static void
  3451  nacltrunc(Link *ctxt, int reg)
  3452  {	
  3453  	if(reg >= D_R8)
  3454  		*ctxt->andptr++ = 0x45;
  3455  	reg = (reg - D_AX) & 7;
  3456  	*ctxt->andptr++ = 0x89;
  3457  	*ctxt->andptr++ = (3<<6) | (reg<<3) | reg;
  3458  }
  3459  
  3460  static void
  3461  asmins(Link *ctxt, Prog *p)
  3462  {
  3463  	int i, n, np, c;
  3464  	uchar *and0;
  3465  	Reloc *r;
  3466  	
  3467  	ctxt->andptr = ctxt->and;
  3468  	ctxt->asmode = p->mode;
  3469  	
  3470  	if(p->as == AUSEFIELD) {
  3471  		r = addrel(ctxt->cursym);
  3472  		r->off = 0;
  3473  		r->siz = 0;
  3474  		r->sym = p->from.sym;
  3475  		r->type = R_USEFIELD;
  3476  		return;
  3477  	}
  3478  	
  3479  	if(ctxt->headtype == Hnacl) {
  3480  		if(p->as == AREP) {
  3481  			ctxt->rep++;
  3482  			return;
  3483  		}
  3484  		if(p->as == AREPN) {
  3485  			ctxt->repn++;
  3486  			return;
  3487  		}
  3488  		if(p->as == ALOCK) {
  3489  			ctxt->lock++;
  3490  			return;
  3491  		}
  3492  		if(p->as != ALEAQ && p->as != ALEAL) {
  3493  			if(p->from.index != D_NONE && p->from.scale > 0)
  3494  				nacltrunc(ctxt, p->from.index);
  3495  			if(p->to.index != D_NONE && p->to.scale > 0)
  3496  				nacltrunc(ctxt, p->to.index);
  3497  		}
  3498  		switch(p->as) {
  3499  		case ARET:
  3500  			memmove(ctxt->andptr, naclret, sizeof naclret);
  3501  			ctxt->andptr += sizeof naclret;
  3502  			return;
  3503  		case ACALL:
  3504  		case AJMP:
  3505  			if(D_AX <= p->to.type && p->to.type <= D_DI) {
  3506  				// ANDL $~31, reg
  3507  				*ctxt->andptr++ = 0x83;
  3508  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_AX);
  3509  				*ctxt->andptr++ = 0xe0;
  3510  				// ADDQ R15, reg
  3511  				*ctxt->andptr++ = 0x4c;
  3512  				*ctxt->andptr++ = 0x01;
  3513  				*ctxt->andptr++ = 0xf8 | (p->to.type - D_AX);
  3514  			}
  3515  			if(D_R8 <= p->to.type && p->to.type <= D_R15) {
  3516  				// ANDL $~31, reg
  3517  				*ctxt->andptr++ = 0x41;
  3518  				*ctxt->andptr++ = 0x83;
  3519  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_R8);
  3520  				*ctxt->andptr++ = 0xe0;
  3521  				// ADDQ R15, reg
  3522  				*ctxt->andptr++ = 0x4d;
  3523  				*ctxt->andptr++ = 0x01;
  3524  				*ctxt->andptr++ = 0xf8 | (p->to.type - D_R8);
  3525  			}
  3526  			break;
  3527  		case AINT:
  3528  			*ctxt->andptr++ = 0xf4;
  3529  			return;
  3530  		case ASCASB:
  3531  		case ASCASW:
  3532  		case ASCASL:
  3533  		case ASCASQ:
  3534  		case ASTOSB:
  3535  		case ASTOSW:
  3536  		case ASTOSL:
  3537  		case ASTOSQ:
  3538  			memmove(ctxt->andptr, naclstos, sizeof naclstos);
  3539  			ctxt->andptr += sizeof naclstos;
  3540  			break;
  3541  		case AMOVSB:
  3542  		case AMOVSW:
  3543  		case AMOVSL:
  3544  		case AMOVSQ:
  3545  			memmove(ctxt->andptr, naclmovs, sizeof naclmovs);
  3546  			ctxt->andptr += sizeof naclmovs;
  3547  			break;
  3548  		}
  3549  		if(ctxt->rep) {
  3550  			*ctxt->andptr++ = 0xf3;
  3551  			ctxt->rep = 0;
  3552  		}
  3553  		if(ctxt->repn) {
  3554  			*ctxt->andptr++ = 0xf2;
  3555  			ctxt->repn = 0;
  3556  		}
  3557  		if(ctxt->lock) {
  3558  			*ctxt->andptr++ = 0xf0;
  3559  			ctxt->lock = 0;
  3560  		}
  3561  	}		
  3562  
  3563  	ctxt->rexflag = 0;
  3564  	and0 = ctxt->andptr;
  3565  	ctxt->asmode = p->mode;
  3566  	doasm(ctxt, p);
  3567  	if(ctxt->rexflag){
  3568  		/*
  3569  		 * as befits the whole approach of the architecture,
  3570  		 * the rex prefix must appear before the first opcode byte
  3571  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  3572  		 * before the 0f opcode escape!), or it might be ignored.
  3573  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  3574  		 */
  3575  		if(p->mode != 64)
  3576  			ctxt->diag("asmins: illegal in mode %d: %P", p->mode, p);
  3577  		n = ctxt->andptr - and0;
  3578  		for(np = 0; np < n; np++) {
  3579  			c = and0[np];
  3580  			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
  3581  				break;
  3582  		}
  3583  		memmove(and0+np+1, and0+np, n-np);
  3584  		and0[np] = 0x40 | ctxt->rexflag;
  3585  		ctxt->andptr++;
  3586  	}
  3587  	n = ctxt->andptr - ctxt->and;
  3588  	for(i=ctxt->cursym->nr-1; i>=0; i--) {
  3589  		r = ctxt->cursym->r+i;
  3590  		if(r->off < p->pc)
  3591  			break;
  3592  		if(ctxt->rexflag)
  3593  			r->off++;
  3594  		if(r->type == R_PCREL || r->type == R_CALL)
  3595  			r->add -= p->pc + n - (r->off + r->siz);
  3596  	}
  3597  
  3598  	if(ctxt->headtype == Hnacl && p->as != ACMPL && p->as != ACMPQ) {
  3599  		switch(p->to.type) {
  3600  		case D_SP:
  3601  			memmove(ctxt->andptr, naclspfix, sizeof naclspfix);
  3602  			ctxt->andptr += sizeof naclspfix;
  3603  			break;
  3604  		case D_BP:
  3605  			memmove(ctxt->andptr, naclbpfix, sizeof naclbpfix);
  3606  			ctxt->andptr += sizeof naclbpfix;
  3607  			break;
  3608  		}
  3609  	}
  3610  }