github.com/golang-haiku/go-1.4.3@v0.0.0-20190609233734-1f5ae41cc308/src/liblink/asm6.c (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include <u.h>
    34  #include <libc.h>
    35  #include <bio.h>
    36  #include <link.h>
    37  #include "../cmd/6l/6.out.h"
    38  #include "../runtime/stack.h"
    39  
    40  enum
    41  {
    42  	MaxAlign = 32,	// max data alignment
    43  	
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	// 
    57  	LoopAlign = 16,
    58  	MaxLoopPad = 0,
    59  
    60  	FuncAlign = 16
    61  };
    62  
    63  typedef	struct	Optab	Optab;
    64  typedef	struct	Movtab	Movtab;
    65  
    66  struct	Optab
    67  {
    68  	short	as;
    69  	uchar*	ytab;
    70  	uchar	prefix;
    71  	uchar	op[23];
    72  };
    73  struct	Movtab
    74  {
    75  	short	as;
    76  	uchar	ft;
    77  	uchar	tt;
    78  	uchar	code;
    79  	uchar	op[4];
    80  };
    81  
    82  enum
    83  {
    84  	Yxxx		= 0,
    85  	Ynone,
    86  	Yi0,
    87  	Yi1,
    88  	Yi8,
    89  	Ys32,
    90  	Yi32,
    91  	Yi64,
    92  	Yiauto,
    93  	Yal,
    94  	Ycl,
    95  	Yax,
    96  	Ycx,
    97  	Yrb,
    98  	Yrl,
    99  	Yrf,
   100  	Yf0,
   101  	Yrx,
   102  	Ymb,
   103  	Yml,
   104  	Ym,
   105  	Ybr,
   106  	Ycol,
   107  
   108  	Ycs,	Yss,	Yds,	Yes,	Yfs,	Ygs,
   109  	Ygdtr,	Yidtr,	Yldtr,	Ymsw,	Ytask,
   110  	Ycr0,	Ycr1,	Ycr2,	Ycr3,	Ycr4,	Ycr5,	Ycr6,	Ycr7,	Ycr8,
   111  	Ydr0,	Ydr1,	Ydr2,	Ydr3,	Ydr4,	Ydr5,	Ydr6,	Ydr7,
   112  	Ytr0,	Ytr1,	Ytr2,	Ytr3,	Ytr4,	Ytr5,	Ytr6,	Ytr7,	Yrl32,	Yrl64,
   113  	Ymr, Ymm,
   114  	Yxr, Yxm,
   115  	Ytls,
   116  	Ymax,
   117  
   118  	Zxxx		= 0,
   119  
   120  	Zlit,
   121  	Zlitm_r,
   122  	Z_rp,
   123  	Zbr,
   124  	Zcall,
   125  	Zcallindreg,
   126  	Zib_,
   127  	Zib_rp,
   128  	Zibo_m,
   129  	Zibo_m_xm,
   130  	Zil_,
   131  	Zil_rp,
   132  	Ziq_rp,
   133  	Zilo_m,
   134  	Ziqo_m,
   135  	Zjmp,
   136  	Zloop,
   137  	Zo_iw,
   138  	Zm_o,
   139  	Zm_r,
   140  	Zm2_r,
   141  	Zm_r_xm,
   142  	Zm_r_i_xm,
   143  	Zm_r_3d,
   144  	Zm_r_xm_nr,
   145  	Zr_m_xm_nr,
   146  	Zibm_r,	/* mmx1,mmx2/mem64,imm8 */
   147  	Zmb_r,
   148  	Zaut_r,
   149  	Zo_m,
   150  	Zo_m64,
   151  	Zpseudo,
   152  	Zr_m,
   153  	Zr_m_xm,
   154  	Zr_m_i_xm,
   155  	Zrp_,
   156  	Z_ib,
   157  	Z_il,
   158  	Zm_ibo,
   159  	Zm_ilo,
   160  	Zib_rr,
   161  	Zil_rr,
   162  	Zclr,
   163  	Zbyte,
   164  	Zmax,
   165  
   166  	Px		= 0,
   167  	P32		= 0x32,	/* 32-bit only */
   168  	Pe		= 0x66,	/* operand escape */
   169  	Pm		= 0x0f,	/* 2byte opcode escape */
   170  	Pq		= 0xff,	/* both escapes: 66 0f */
   171  	Pb		= 0xfe,	/* byte operands */
   172  	Pf2		= 0xf2,	/* xmm escape 1: f2 0f */
   173  	Pf3		= 0xf3,	/* xmm escape 2: f3 0f */
   174  	Pq3		= 0x67, /* xmm escape 3: 66 48 0f */
   175  	Pw		= 0x48,	/* Rex.w */
   176  	Py		= 0x80,	/* defaults to 64-bit mode */
   177  
   178  	Rxf		= 1<<9,	/* internal flag for Rxr on from */
   179  	Rxt		= 1<<8,	/* internal flag for Rxr on to */
   180  	Rxw		= 1<<3,	/* =1, 64-bit operand size */
   181  	Rxr		= 1<<2,	/* extend modrm reg */
   182  	Rxx		= 1<<1,	/* extend sib index */
   183  	Rxb		= 1<<0,	/* extend modrm r/m, sib base, or opcode reg */
   184  
   185  	Maxand	= 10,		/* in -a output width of the byte codes */
   186  };
   187  
   188  static uchar ycover[Ymax*Ymax];
   189  static	int	reg[D_NONE];
   190  static	int	regrex[D_NONE+1];
   191  static	void	asmins(Link *ctxt, Prog *p);
   192  
   193  static uchar	ynone[] =
   194  {
   195  	Ynone,	Ynone,	Zlit,	1,
   196  	0
   197  };
   198  static uchar	ytext[] =
   199  {
   200  	Ymb,	Yi64,	Zpseudo,1,
   201  	0
   202  };
   203  static uchar	ynop[] =
   204  {
   205  	Ynone,	Ynone,	Zpseudo,0,
   206  	Ynone,	Yiauto,	Zpseudo,0,
   207  	Ynone,	Yml,	Zpseudo,0,
   208  	Ynone,	Yrf,	Zpseudo,0,
   209  	Ynone,	Yxr,	Zpseudo,0,
   210  	Yiauto,	Ynone,	Zpseudo,0,
   211  	Yml,	Ynone,	Zpseudo,0,
   212  	Yrf,	Ynone,	Zpseudo,0,
   213  	Yxr,	Ynone,	Zpseudo,1,
   214  	0
   215  };
   216  static uchar	yfuncdata[] =
   217  {
   218  	Yi32,	Ym,	Zpseudo,	0,
   219  	0
   220  };
   221  static uchar	ypcdata[] = 
   222  {
   223  	Yi32,	Yi32,	Zpseudo,	0,
   224  	0
   225  };
   226  static uchar	yxorb[] =
   227  {
   228  	Yi32,	Yal,	Zib_,	1,
   229  	Yi32,	Ymb,	Zibo_m,	2,
   230  	Yrb,	Ymb,	Zr_m,	1,
   231  	Ymb,	Yrb,	Zm_r,	1,
   232  	0
   233  };
   234  static uchar	yxorl[] =
   235  {
   236  	Yi8,	Yml,	Zibo_m,	2,
   237  	Yi32,	Yax,	Zil_,	1,
   238  	Yi32,	Yml,	Zilo_m,	2,
   239  	Yrl,	Yml,	Zr_m,	1,
   240  	Yml,	Yrl,	Zm_r,	1,
   241  	0
   242  };
   243  static uchar	yaddl[] =
   244  {
   245  	Yi8,	Yml,	Zibo_m,	2,
   246  	Yi32,	Yax,	Zil_,	1,
   247  	Yi32,	Yml,	Zilo_m,	2,
   248  	Yrl,	Yml,	Zr_m,	1,
   249  	Yml,	Yrl,	Zm_r,	1,
   250  	0
   251  };
   252  static uchar	yincb[] =
   253  {
   254  	Ynone,	Ymb,	Zo_m,	2,
   255  	0
   256  };
   257  static uchar	yincw[] =
   258  {
   259  	Ynone,	Yml,	Zo_m,	2,
   260  	0
   261  };
   262  static uchar	yincl[] =
   263  {
   264  	Ynone,	Yml,	Zo_m,	2,
   265  	0
   266  };
   267  static uchar	ycmpb[] =
   268  {
   269  	Yal,	Yi32,	Z_ib,	1,
   270  	Ymb,	Yi32,	Zm_ibo,	2,
   271  	Ymb,	Yrb,	Zm_r,	1,
   272  	Yrb,	Ymb,	Zr_m,	1,
   273  	0
   274  };
   275  static uchar	ycmpl[] =
   276  {
   277  	Yml,	Yi8,	Zm_ibo,	2,
   278  	Yax,	Yi32,	Z_il,	1,
   279  	Yml,	Yi32,	Zm_ilo,	2,
   280  	Yml,	Yrl,	Zm_r,	1,
   281  	Yrl,	Yml,	Zr_m,	1,
   282  	0
   283  };
   284  static uchar	yshb[] =
   285  {
   286  	Yi1,	Ymb,	Zo_m,	2,
   287  	Yi32,	Ymb,	Zibo_m,	2,
   288  	Ycx,	Ymb,	Zo_m,	2,
   289  	0
   290  };
   291  static uchar	yshl[] =
   292  {
   293  	Yi1,	Yml,	Zo_m,	2,
   294  	Yi32,	Yml,	Zibo_m,	2,
   295  	Ycl,	Yml,	Zo_m,	2,
   296  	Ycx,	Yml,	Zo_m,	2,
   297  	0
   298  };
   299  static uchar	ytestb[] =
   300  {
   301  	Yi32,	Yal,	Zib_,	1,
   302  	Yi32,	Ymb,	Zibo_m,	2,
   303  	Yrb,	Ymb,	Zr_m,	1,
   304  	Ymb,	Yrb,	Zm_r,	1,
   305  	0
   306  };
   307  static uchar	ytestl[] =
   308  {
   309  	Yi32,	Yax,	Zil_,	1,
   310  	Yi32,	Yml,	Zilo_m,	2,
   311  	Yrl,	Yml,	Zr_m,	1,
   312  	Yml,	Yrl,	Zm_r,	1,
   313  	0
   314  };
   315  static uchar	ymovb[] =
   316  {
   317  	Yrb,	Ymb,	Zr_m,	1,
   318  	Ymb,	Yrb,	Zm_r,	1,
   319  	Yi32,	Yrb,	Zib_rp,	1,
   320  	Yi32,	Ymb,	Zibo_m,	2,
   321  	0
   322  };
   323  static uchar	ymbs[] =
   324  {
   325  	Ymb,	Ynone,	Zm_o,	2,
   326  	0
   327  };
   328  static uchar	ybtl[] =
   329  {
   330  	Yi8,	Yml,	Zibo_m,	2,
   331  	Yrl,	Yml,	Zr_m,	1,
   332  	0
   333  };
   334  static uchar	ymovw[] =
   335  {
   336  	Yrl,	Yml,	Zr_m,	1,
   337  	Yml,	Yrl,	Zm_r,	1,
   338  	Yi0,	Yrl,	Zclr,	1,
   339  	Yi32,	Yrl,	Zil_rp,	1,
   340  	Yi32,	Yml,	Zilo_m,	2,
   341  	Yiauto,	Yrl,	Zaut_r,	2,
   342  	0
   343  };
   344  static uchar	ymovl[] =
   345  {
   346  	Yrl,	Yml,	Zr_m,	1,
   347  	Yml,	Yrl,	Zm_r,	1,
   348  	Yi0,	Yrl,	Zclr,	1,
   349  	Yi32,	Yrl,	Zil_rp,	1,
   350  	Yi32,	Yml,	Zilo_m,	2,
   351  	Yml,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
   352  	Ymr,	Yml,	Zr_m_xm,	1,	// MMX MOVD
   353  	Yml,	Yxr,	Zm_r_xm,	2,	// XMM MOVD (32 bit)
   354  	Yxr,	Yml,	Zr_m_xm,	2,	// XMM MOVD (32 bit)
   355  	Yiauto,	Yrl,	Zaut_r,	2,
   356  	0
   357  };
   358  static uchar	yret[] =
   359  {
   360  	Ynone,	Ynone,	Zo_iw,	1,
   361  	Yi32,	Ynone,	Zo_iw,	1,
   362  	0
   363  };
   364  static uchar	ymovq[] =
   365  {
   366  	Yrl,	Yml,	Zr_m,	1,	// 0x89
   367  	Yml,	Yrl,	Zm_r,	1,	// 0x8b
   368  	Yi0,	Yrl,	Zclr,	1,	// 0x31
   369  	Ys32,	Yrl,	Zilo_m,	2,	// 32 bit signed 0xc7,(0)
   370  	Yi64,	Yrl,	Ziq_rp,	1,	// 0xb8 -- 32/64 bit immediate
   371  	Yi32,	Yml,	Zilo_m,	2,	// 0xc7,(0)
   372  	Ym,	Ymr,	Zm_r_xm_nr,	1,	// MMX MOVQ (shorter encoding)
   373  	Ymr,	Ym,	Zr_m_xm_nr,	1,	// MMX MOVQ
   374  	Ymm,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
   375  	Ymr,	Ymm,	Zr_m_xm,	1,	// MMX MOVD
   376  	Yxr,	Ymr,	Zm_r_xm_nr,	2,	// MOVDQ2Q
   377  	Yxm,	Yxr,	Zm_r_xm_nr,	2, // MOVQ xmm1/m64 -> xmm2
   378  	Yxr,	Yxm,	Zr_m_xm_nr,	2, // MOVQ xmm1 -> xmm2/m64
   379  	Yml,	Yxr,	Zm_r_xm,	2,	// MOVD xmm load
   380  	Yxr,	Yml,	Zr_m_xm,	2,	// MOVD xmm store
   381  	Yiauto,	Yrl,	Zaut_r,	2,	// built-in LEAQ
   382  	0
   383  };
   384  static uchar	ym_rl[] =
   385  {
   386  	Ym,	Yrl,	Zm_r,	1,
   387  	0
   388  };
   389  static uchar	yrl_m[] =
   390  {
   391  	Yrl,	Ym,	Zr_m,	1,
   392  	0
   393  };
   394  static uchar	ymb_rl[] =
   395  {
   396  	Ymb,	Yrl,	Zmb_r,	1,
   397  	0
   398  };
   399  static uchar	yml_rl[] =
   400  {
   401  	Yml,	Yrl,	Zm_r,	1,
   402  	0
   403  };
   404  static uchar	yrl_ml[] =
   405  {
   406  	Yrl,	Yml,	Zr_m,	1,
   407  	0
   408  };
   409  static uchar	yml_mb[] =
   410  {
   411  	Yrb,	Ymb,	Zr_m,	1,
   412  	Ymb,	Yrb,	Zm_r,	1,
   413  	0
   414  };
   415  static uchar	yrb_mb[] =
   416  {
   417  	Yrb,	Ymb,	Zr_m,	1,
   418  	0
   419  };
   420  static uchar	yxchg[] =
   421  {
   422  	Yax,	Yrl,	Z_rp,	1,
   423  	Yrl,	Yax,	Zrp_,	1,
   424  	Yrl,	Yml,	Zr_m,	1,
   425  	Yml,	Yrl,	Zm_r,	1,
   426  	0
   427  };
   428  static uchar	ydivl[] =
   429  {
   430  	Yml,	Ynone,	Zm_o,	2,
   431  	0
   432  };
   433  static uchar	ydivb[] =
   434  {
   435  	Ymb,	Ynone,	Zm_o,	2,
   436  	0
   437  };
   438  static uchar	yimul[] =
   439  {
   440  	Yml,	Ynone,	Zm_o,	2,
   441  	Yi8,	Yrl,	Zib_rr,	1,
   442  	Yi32,	Yrl,	Zil_rr,	1,
   443  	Yml,	Yrl,	Zm_r,	2,
   444  	0
   445  };
   446  static uchar	yimul3[] =
   447  {
   448  	Yml,	Yrl,	Zibm_r,	2,
   449  	0
   450  };
   451  static uchar	ybyte[] =
   452  {
   453  	Yi64,	Ynone,	Zbyte,	1,
   454  	0
   455  };
   456  static uchar	yin[] =
   457  {
   458  	Yi32,	Ynone,	Zib_,	1,
   459  	Ynone,	Ynone,	Zlit,	1,
   460  	0
   461  };
   462  static uchar	yint[] =
   463  {
   464  	Yi32,	Ynone,	Zib_,	1,
   465  	0
   466  };
   467  static uchar	ypushl[] =
   468  {
   469  	Yrl,	Ynone,	Zrp_,	1,
   470  	Ym,	Ynone,	Zm_o,	2,
   471  	Yi8,	Ynone,	Zib_,	1,
   472  	Yi32,	Ynone,	Zil_,	1,
   473  	0
   474  };
   475  static uchar	ypopl[] =
   476  {
   477  	Ynone,	Yrl,	Z_rp,	1,
   478  	Ynone,	Ym,	Zo_m,	2,
   479  	0
   480  };
   481  static uchar	ybswap[] =
   482  {
   483  	Ynone,	Yrl,	Z_rp,	2,
   484  	0,
   485  };
   486  static uchar	yscond[] =
   487  {
   488  	Ynone,	Ymb,	Zo_m,	2,
   489  	0
   490  };
   491  static uchar	yjcond[] =
   492  {
   493  	Ynone,	Ybr,	Zbr,	0,
   494  	Yi0,	Ybr,	Zbr,	0,
   495  	Yi1,	Ybr,	Zbr,	1,
   496  	0
   497  };
   498  static uchar	yloop[] =
   499  {
   500  	Ynone,	Ybr,	Zloop,	1,
   501  	0
   502  };
   503  static uchar	ycall[] =
   504  {
   505  	Ynone,	Yml,	Zcallindreg,	0,
   506  	Yrx,	Yrx,	Zcallindreg,	2,
   507  	Ynone,	Ybr,	Zcall,	1,
   508  	0
   509  };
   510  static uchar	yduff[] =
   511  {
   512  	Ynone,	Yi32,	Zcall,	1,
   513  	0
   514  };
   515  static uchar	yjmp[] =
   516  {
   517  	Ynone,	Yml,	Zo_m64,	2,
   518  	Ynone,	Ybr,	Zjmp,	1,
   519  	0
   520  };
   521  
   522  static uchar	yfmvd[] =
   523  {
   524  	Ym,	Yf0,	Zm_o,	2,
   525  	Yf0,	Ym,	Zo_m,	2,
   526  	Yrf,	Yf0,	Zm_o,	2,
   527  	Yf0,	Yrf,	Zo_m,	2,
   528  	0
   529  };
   530  static uchar	yfmvdp[] =
   531  {
   532  	Yf0,	Ym,	Zo_m,	2,
   533  	Yf0,	Yrf,	Zo_m,	2,
   534  	0
   535  };
   536  static uchar	yfmvf[] =
   537  {
   538  	Ym,	Yf0,	Zm_o,	2,
   539  	Yf0,	Ym,	Zo_m,	2,
   540  	0
   541  };
   542  static uchar	yfmvx[] =
   543  {
   544  	Ym,	Yf0,	Zm_o,	2,
   545  	0
   546  };
   547  static uchar	yfmvp[] =
   548  {
   549  	Yf0,	Ym,	Zo_m,	2,
   550  	0
   551  };
   552  static uchar	yfadd[] =
   553  {
   554  	Ym,	Yf0,	Zm_o,	2,
   555  	Yrf,	Yf0,	Zm_o,	2,
   556  	Yf0,	Yrf,	Zo_m,	2,
   557  	0
   558  };
   559  static uchar	yfaddp[] =
   560  {
   561  	Yf0,	Yrf,	Zo_m,	2,
   562  	0
   563  };
   564  static uchar	yfxch[] =
   565  {
   566  	Yf0,	Yrf,	Zo_m,	2,
   567  	Yrf,	Yf0,	Zm_o,	2,
   568  	0
   569  };
   570  static uchar	ycompp[] =
   571  {
   572  	Yf0,	Yrf,	Zo_m,	2,	/* botch is really f0,f1 */
   573  	0
   574  };
   575  static uchar	ystsw[] =
   576  {
   577  	Ynone,	Ym,	Zo_m,	2,
   578  	Ynone,	Yax,	Zlit,	1,
   579  	0
   580  };
   581  static uchar	ystcw[] =
   582  {
   583  	Ynone,	Ym,	Zo_m,	2,
   584  	Ym,	Ynone,	Zm_o,	2,
   585  	0
   586  };
   587  static uchar	ysvrs[] =
   588  {
   589  	Ynone,	Ym,	Zo_m,	2,
   590  	Ym,	Ynone,	Zm_o,	2,
   591  	0
   592  };
   593  static uchar	ymm[] = 
   594  {
   595  	Ymm,	Ymr,	Zm_r_xm,	1,
   596  	Yxm,	Yxr,	Zm_r_xm,	2,
   597  	0
   598  };
   599  static uchar	yxm[] = 
   600  {
   601  	Yxm,	Yxr,	Zm_r_xm,	1,
   602  	0
   603  };
   604  static uchar	yxcvm1[] = 
   605  {
   606  	Yxm,	Yxr,	Zm_r_xm,	2,
   607  	Yxm,	Ymr,	Zm_r_xm,	2,
   608  	0
   609  };
   610  static uchar	yxcvm2[] =
   611  {
   612  	Yxm,	Yxr,	Zm_r_xm,	2,
   613  	Ymm,	Yxr,	Zm_r_xm,	2,
   614  	0
   615  };
   616  /*
   617  static uchar	yxmq[] = 
   618  {
   619  	Yxm,	Yxr,	Zm_r_xm,	2,
   620  	0
   621  };
   622  */
   623  static uchar	yxr[] = 
   624  {
   625  	Yxr,	Yxr,	Zm_r_xm,	1,
   626  	0
   627  };
   628  static uchar	yxr_ml[] =
   629  {
   630  	Yxr,	Yml,	Zr_m_xm,	1,
   631  	0
   632  };
   633  static uchar	ymr[] =
   634  {
   635  	Ymr,	Ymr,	Zm_r,	1,
   636  	0
   637  };
   638  static uchar	ymr_ml[] =
   639  {
   640  	Ymr,	Yml,	Zr_m_xm,	1,
   641  	0
   642  };
   643  static uchar	yxcmp[] =
   644  {
   645  	Yxm,	Yxr, Zm_r_xm,	1,
   646  	0
   647  };
   648  static uchar	yxcmpi[] =
   649  {
   650  	Yxm,	Yxr, Zm_r_i_xm,	2,
   651  	0
   652  };
   653  static uchar	yxmov[] =
   654  {
   655  	Yxm,	Yxr,	Zm_r_xm,	1,
   656  	Yxr,	Yxm,	Zr_m_xm,	1,
   657  	0
   658  };
   659  static uchar	yxcvfl[] = 
   660  {
   661  	Yxm,	Yrl,	Zm_r_xm,	1,
   662  	0
   663  };
   664  static uchar	yxcvlf[] =
   665  {
   666  	Yml,	Yxr,	Zm_r_xm,	1,
   667  	0
   668  };
   669  static uchar	yxcvfq[] = 
   670  {
   671  	Yxm,	Yrl,	Zm_r_xm,	2,
   672  	0
   673  };
   674  static uchar	yxcvqf[] =
   675  {
   676  	Yml,	Yxr,	Zm_r_xm,	2,
   677  	0
   678  };
   679  static uchar	yps[] = 
   680  {
   681  	Ymm,	Ymr,	Zm_r_xm,	1,
   682  	Yi8,	Ymr,	Zibo_m_xm,	2,
   683  	Yxm,	Yxr,	Zm_r_xm,	2,
   684  	Yi8,	Yxr,	Zibo_m_xm,	3,
   685  	0
   686  };
   687  static uchar	yxrrl[] =
   688  {
   689  	Yxr,	Yrl,	Zm_r,	1,
   690  	0
   691  };
   692  static uchar	ymfp[] =
   693  {
   694  	Ymm,	Ymr,	Zm_r_3d,	1,
   695  	0,
   696  };
   697  static uchar	ymrxr[] =
   698  {
   699  	Ymr,	Yxr,	Zm_r,	1,
   700  	Yxm,	Yxr,	Zm_r_xm,	1,
   701  	0
   702  };
   703  static uchar	ymshuf[] =
   704  {
   705  	Ymm,	Ymr,	Zibm_r,	2,
   706  	0
   707  };
   708  static uchar	ymshufb[] =
   709  {
   710  	Yxm,	Yxr,	Zm2_r,	2,
   711  	0
   712  };
   713  static uchar	yxshuf[] =
   714  {
   715  	Yxm,	Yxr,	Zibm_r,	2,
   716  	0
   717  };
   718  static uchar	yextrw[] =
   719  {
   720  	Yxr,	Yrl,	Zibm_r,	2,
   721  	0
   722  };
   723  static uchar	yinsrw[] =
   724  {
   725  	Yml,	Yxr,	Zibm_r,	2,
   726  	0
   727  };
   728  static uchar	yinsr[] =
   729  {
   730  	Ymm,	Yxr,	Zibm_r,	3,
   731  	0
   732  };
   733  static uchar	ypsdq[] =
   734  {
   735  	Yi8,	Yxr,	Zibo_m,	2,
   736  	0
   737  };
   738  static uchar	ymskb[] =
   739  {
   740  	Yxr,	Yrl,	Zm_r_xm,	2,
   741  	Ymr,	Yrl,	Zm_r_xm,	1,
   742  	0
   743  };
   744  static uchar	ycrc32l[] =
   745  {
   746  	Yml,	Yrl,	Zlitm_r,	0,
   747  };
   748  static uchar	yprefetch[] =
   749  {
   750  	Ym,	Ynone,	Zm_o,	2,
   751  	0,
   752  };
   753  static uchar	yaes[] =
   754  {
   755  	Yxm,	Yxr,	Zlitm_r,	2,
   756  	0
   757  };
   758  static uchar	yaes2[] =
   759  {
   760  	Yxm,	Yxr,	Zibm_r,	2,
   761  	0
   762  };
   763  
   764  /*
   765   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   766   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   767   * the entry with the given p->as and then looks through the ytable for that
   768   * instruction (the second field in the optab struct) for a line whose first
   769   * two values match the Ytypes of the p->from and p->to operands.  The function
   770   * oclass in span.c computes the specific Ytype of an operand and then the set
   771   * of more general Ytypes that it satisfies is implied by the ycover table, set
   772   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   773   * from the more general 8-bit constants, but instinit says
   774   *
   775   *        ycover[Yi0*Ymax + Ys32] = 1;
   776   *        ycover[Yi1*Ymax + Ys32] = 1;
   777   *        ycover[Yi8*Ymax + Ys32] = 1;
   778   *
   779   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   780   * if that's what an instruction can handle.
   781   *
   782   * In parallel with the scan through the ytable for the appropriate line, there
   783   * is a z pointer that starts out pointing at the strange magic byte list in
   784   * the Optab struct.  With each step past a non-matching ytable line, z
   785   * advances by the 4th entry in the line.  When a matching line is found, that
   786   * z pointer has the extra data to use in laying down the instruction bytes.
   787   * The actual bytes laid down are a function of the 3rd entry in the line (that
   788   * is, the Ztype) and the z bytes.
   789   *
   790   * For example, let's look at AADDL.  The optab line says:
   791   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   792   *
   793   * and yaddl says
   794   *        uchar   yaddl[] =
   795   *        {
   796   *                Yi8,    Yml,    Zibo_m, 2,
   797   *                Yi32,   Yax,    Zil_,   1,
   798   *                Yi32,   Yml,    Zilo_m, 2,
   799   *                Yrl,    Yml,    Zr_m,   1,
   800   *                Yml,    Yrl,    Zm_r,   1,
   801   *                0
   802   *        };
   803   *
   804   * so there are 5 possible types of ADDL instruction that can be laid down, and
   805   * possible states used to lay them down (Ztype and z pointer, assuming z
   806   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   807   *
   808   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   809   *        Yi32, Yax -> Zil_, z+2 (0x05)
   810   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   811   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   812   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   813   *
   814   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   815   * relatively straightforward as this program goes.
   816   *
   817   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   818   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   819   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   820   * Zilo_m is the same but a long (32-bit) immediate.
   821   */
   822  static Optab optab[] =
   823  /*	as, ytab, andproto, opcode */
   824  {
   825  	{ AXXX },
   826  	{ AAAA,		ynone,	P32, {0x37} },
   827  	{ AAAD,		ynone,	P32, {0xd5,0x0a} },
   828  	{ AAAM,		ynone,	P32, {0xd4,0x0a} },
   829  	{ AAAS,		ynone,	P32, {0x3f} },
   830  	{ AADCB,	yxorb,	Pb, {0x14,0x80,(02),0x10,0x10} },
   831  	{ AADCL,	yxorl,	Px, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   832  	{ AADCQ,	yxorl,	Pw, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   833  	{ AADCW,	yxorl,	Pe, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   834  	{ AADDB,	yxorb,	Pb, {0x04,0x80,(00),0x00,0x02} },
   835  	{ AADDL,	yaddl,	Px, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   836  	{ AADDPD,	yxm,	Pq, {0x58} },
   837  	{ AADDPS,	yxm,	Pm, {0x58} },
   838  	{ AADDQ,	yaddl,	Pw, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   839  	{ AADDSD,	yxm,	Pf2, {0x58} },
   840  	{ AADDSS,	yxm,	Pf3, {0x58} },
   841  	{ AADDW,	yaddl,	Pe, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   842  	{ AADJSP },
   843  	{ AANDB,	yxorb,	Pb, {0x24,0x80,(04),0x20,0x22} },
   844  	{ AANDL,	yxorl,	Px, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   845  	{ AANDNPD,	yxm,	Pq, {0x55} },
   846  	{ AANDNPS,	yxm,	Pm, {0x55} },
   847  	{ AANDPD,	yxm,	Pq, {0x54} },
   848  	{ AANDPS,	yxm,	Pq, {0x54} },
   849  	{ AANDQ,	yxorl,	Pw, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   850  	{ AANDW,	yxorl,	Pe, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   851  	{ AARPL,	yrl_ml,	P32, {0x63} },
   852  	{ ABOUNDL,	yrl_m,	P32, {0x62} },
   853  	{ ABOUNDW,	yrl_m,	Pe, {0x62} },
   854  	{ ABSFL,	yml_rl,	Pm, {0xbc} },
   855  	{ ABSFQ,	yml_rl,	Pw, {0x0f,0xbc} },
   856  	{ ABSFW,	yml_rl,	Pq, {0xbc} },
   857  	{ ABSRL,	yml_rl,	Pm, {0xbd} },
   858  	{ ABSRQ,	yml_rl,	Pw, {0x0f,0xbd} },
   859  	{ ABSRW,	yml_rl,	Pq, {0xbd} },
   860  	{ ABSWAPL,	ybswap,	Px, {0x0f,0xc8} },
   861  	{ ABSWAPQ,	ybswap,	Pw, {0x0f,0xc8} },
   862  	{ ABTCL,	ybtl,	Pm, {0xba,(07),0xbb} },
   863  	{ ABTCQ,	ybtl,	Pw, {0x0f,0xba,(07),0x0f,0xbb} },
   864  	{ ABTCW,	ybtl,	Pq, {0xba,(07),0xbb} },
   865  	{ ABTL,		ybtl,	Pm, {0xba,(04),0xa3} },
   866  	{ ABTQ,		ybtl,	Pw, {0x0f,0xba,(04),0x0f,0xa3}},
   867  	{ ABTRL,	ybtl,	Pm, {0xba,(06),0xb3} },
   868  	{ ABTRQ,	ybtl,	Pw, {0x0f,0xba,(06),0x0f,0xb3} },
   869  	{ ABTRW,	ybtl,	Pq, {0xba,(06),0xb3} },
   870  	{ ABTSL,	ybtl,	Pm, {0xba,(05),0xab } },
   871  	{ ABTSQ,	ybtl,	Pw, {0x0f,0xba,(05),0x0f,0xab} },
   872  	{ ABTSW,	ybtl,	Pq, {0xba,(05),0xab } },
   873  	{ ABTW,		ybtl,	Pq, {0xba,(04),0xa3} },
   874  	{ ABYTE,	ybyte,	Px, {1} },
   875  	{ ACALL,	ycall,	Px, {0xff,(02),0xe8} },
   876  	{ ACDQ,		ynone,	Px, {0x99} },
   877  	{ ACLC,		ynone,	Px, {0xf8} },
   878  	{ ACLD,		ynone,	Px, {0xfc} },
   879  	{ ACLI,		ynone,	Px, {0xfa} },
   880  	{ ACLTS,	ynone,	Pm, {0x06} },
   881  	{ ACMC,		ynone,	Px, {0xf5} },
   882  	{ ACMOVLCC,	yml_rl,	Pm, {0x43} },
   883  	{ ACMOVLCS,	yml_rl,	Pm, {0x42} },
   884  	{ ACMOVLEQ,	yml_rl,	Pm, {0x44} },
   885  	{ ACMOVLGE,	yml_rl,	Pm, {0x4d} },
   886  	{ ACMOVLGT,	yml_rl,	Pm, {0x4f} },
   887  	{ ACMOVLHI,	yml_rl,	Pm, {0x47} },
   888  	{ ACMOVLLE,	yml_rl,	Pm, {0x4e} },
   889  	{ ACMOVLLS,	yml_rl,	Pm, {0x46} },
   890  	{ ACMOVLLT,	yml_rl,	Pm, {0x4c} },
   891  	{ ACMOVLMI,	yml_rl,	Pm, {0x48} },
   892  	{ ACMOVLNE,	yml_rl,	Pm, {0x45} },
   893  	{ ACMOVLOC,	yml_rl,	Pm, {0x41} },
   894  	{ ACMOVLOS,	yml_rl,	Pm, {0x40} },
   895  	{ ACMOVLPC,	yml_rl,	Pm, {0x4b} },
   896  	{ ACMOVLPL,	yml_rl,	Pm, {0x49} },
   897  	{ ACMOVLPS,	yml_rl,	Pm, {0x4a} },
   898  	{ ACMOVQCC,	yml_rl,	Pw, {0x0f,0x43} },
   899  	{ ACMOVQCS,	yml_rl,	Pw, {0x0f,0x42} },
   900  	{ ACMOVQEQ,	yml_rl,	Pw, {0x0f,0x44} },
   901  	{ ACMOVQGE,	yml_rl,	Pw, {0x0f,0x4d} },
   902  	{ ACMOVQGT,	yml_rl,	Pw, {0x0f,0x4f} },
   903  	{ ACMOVQHI,	yml_rl,	Pw, {0x0f,0x47} },
   904  	{ ACMOVQLE,	yml_rl,	Pw, {0x0f,0x4e} },
   905  	{ ACMOVQLS,	yml_rl,	Pw, {0x0f,0x46} },
   906  	{ ACMOVQLT,	yml_rl,	Pw, {0x0f,0x4c} },
   907  	{ ACMOVQMI,	yml_rl,	Pw, {0x0f,0x48} },
   908  	{ ACMOVQNE,	yml_rl,	Pw, {0x0f,0x45} },
   909  	{ ACMOVQOC,	yml_rl,	Pw, {0x0f,0x41} },
   910  	{ ACMOVQOS,	yml_rl,	Pw, {0x0f,0x40} },
   911  	{ ACMOVQPC,	yml_rl,	Pw, {0x0f,0x4b} },
   912  	{ ACMOVQPL,	yml_rl,	Pw, {0x0f,0x49} },
   913  	{ ACMOVQPS,	yml_rl,	Pw, {0x0f,0x4a} },
   914  	{ ACMOVWCC,	yml_rl,	Pq, {0x43} },
   915  	{ ACMOVWCS,	yml_rl,	Pq, {0x42} },
   916  	{ ACMOVWEQ,	yml_rl,	Pq, {0x44} },
   917  	{ ACMOVWGE,	yml_rl,	Pq, {0x4d} },
   918  	{ ACMOVWGT,	yml_rl,	Pq, {0x4f} },
   919  	{ ACMOVWHI,	yml_rl,	Pq, {0x47} },
   920  	{ ACMOVWLE,	yml_rl,	Pq, {0x4e} },
   921  	{ ACMOVWLS,	yml_rl,	Pq, {0x46} },
   922  	{ ACMOVWLT,	yml_rl,	Pq, {0x4c} },
   923  	{ ACMOVWMI,	yml_rl,	Pq, {0x48} },
   924  	{ ACMOVWNE,	yml_rl,	Pq, {0x45} },
   925  	{ ACMOVWOC,	yml_rl,	Pq, {0x41} },
   926  	{ ACMOVWOS,	yml_rl,	Pq, {0x40} },
   927  	{ ACMOVWPC,	yml_rl,	Pq, {0x4b} },
   928  	{ ACMOVWPL,	yml_rl,	Pq, {0x49} },
   929  	{ ACMOVWPS,	yml_rl,	Pq, {0x4a} },
   930  	{ ACMPB,	ycmpb,	Pb, {0x3c,0x80,(07),0x38,0x3a} },
   931  	{ ACMPL,	ycmpl,	Px, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   932  	{ ACMPPD,	yxcmpi,	Px, {Pe,0xc2} },
   933  	{ ACMPPS,	yxcmpi,	Pm, {0xc2,0} },
   934  	{ ACMPQ,	ycmpl,	Pw, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   935  	{ ACMPSB,	ynone,	Pb, {0xa6} },
   936  	{ ACMPSD,	yxcmpi,	Px, {Pf2,0xc2} },
   937  	{ ACMPSL,	ynone,	Px, {0xa7} },
   938  	{ ACMPSQ,	ynone,	Pw, {0xa7} },
   939  	{ ACMPSS,	yxcmpi,	Px, {Pf3,0xc2} },
   940  	{ ACMPSW,	ynone,	Pe, {0xa7} },
   941  	{ ACMPW,	ycmpl,	Pe, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   942  	{ ACOMISD,	yxcmp,	Pe, {0x2f} },
   943  	{ ACOMISS,	yxcmp,	Pm, {0x2f} },
   944  	{ ACPUID,	ynone,	Pm, {0xa2} },
   945  	{ ACVTPL2PD,	yxcvm2,	Px, {Pf3,0xe6,Pe,0x2a} },
   946  	{ ACVTPL2PS,	yxcvm2,	Pm, {0x5b,0,0x2a,0,} },
   947  	{ ACVTPD2PL,	yxcvm1,	Px, {Pf2,0xe6,Pe,0x2d} },
   948  	{ ACVTPD2PS,	yxm,	Pe, {0x5a} },
   949  	{ ACVTPS2PL,	yxcvm1, Px, {Pe,0x5b,Pm,0x2d} },
   950  	{ ACVTPS2PD,	yxm,	Pm, {0x5a} },
   951  	{ API2FW,	ymfp,	Px, {0x0c} },
   952  	{ ACVTSD2SL,	yxcvfl, Pf2, {0x2d} },
   953  	{ ACVTSD2SQ,	yxcvfq, Pw, {Pf2,0x2d} },
   954  	{ ACVTSD2SS,	yxm,	Pf2, {0x5a} },
   955  	{ ACVTSL2SD,	yxcvlf, Pf2, {0x2a} },
   956  	{ ACVTSQ2SD,	yxcvqf, Pw, {Pf2,0x2a} },
   957  	{ ACVTSL2SS,	yxcvlf, Pf3, {0x2a} },
   958  	{ ACVTSQ2SS,	yxcvqf, Pw, {Pf3,0x2a} },
   959  	{ ACVTSS2SD,	yxm,	Pf3, {0x5a} },
   960  	{ ACVTSS2SL,	yxcvfl, Pf3, {0x2d} },
   961  	{ ACVTSS2SQ,	yxcvfq, Pw, {Pf3,0x2d} },
   962  	{ ACVTTPD2PL,	yxcvm1,	Px, {Pe,0xe6,Pe,0x2c} },
   963  	{ ACVTTPS2PL,	yxcvm1,	Px, {Pf3,0x5b,Pm,0x2c} },
   964  	{ ACVTTSD2SL,	yxcvfl, Pf2, {0x2c} },
   965  	{ ACVTTSD2SQ,	yxcvfq, Pw, {Pf2,0x2c} },
   966  	{ ACVTTSS2SL,	yxcvfl,	Pf3, {0x2c} },
   967  	{ ACVTTSS2SQ,	yxcvfq, Pw, {Pf3,0x2c} },
   968  	{ ACWD,		ynone,	Pe, {0x99} },
   969  	{ ACQO,		ynone,	Pw, {0x99} },
   970  	{ ADAA,		ynone,	P32, {0x27} },
   971  	{ ADAS,		ynone,	P32, {0x2f} },
   972  	{ ADATA },
   973  	{ ADECB,	yincb,	Pb, {0xfe,(01)} },
   974  	{ ADECL,	yincl,	Px, {0xff,(01)} },
   975  	{ ADECQ,	yincl,	Pw, {0xff,(01)} },
   976  	{ ADECW,	yincw,	Pe, {0xff,(01)} },
   977  	{ ADIVB,	ydivb,	Pb, {0xf6,(06)} },
   978  	{ ADIVL,	ydivl,	Px, {0xf7,(06)} },
   979  	{ ADIVPD,	yxm,	Pe, {0x5e} },
   980  	{ ADIVPS,	yxm,	Pm, {0x5e} },
   981  	{ ADIVQ,	ydivl,	Pw, {0xf7,(06)} },
   982  	{ ADIVSD,	yxm,	Pf2, {0x5e} },
   983  	{ ADIVSS,	yxm,	Pf3, {0x5e} },
   984  	{ ADIVW,	ydivl,	Pe, {0xf7,(06)} },
   985  	{ AEMMS,	ynone,	Pm, {0x77} },
   986  	{ AENTER },				/* botch */
   987  	{ AFXRSTOR,	ysvrs,	Pm, {0xae,(01),0xae,(01)} },
   988  	{ AFXSAVE,	ysvrs,	Pm, {0xae,(00),0xae,(00)} },
   989  	{ AFXRSTOR64,	ysvrs,	Pw, {0x0f,0xae,(01),0x0f,0xae,(01)} },
   990  	{ AFXSAVE64,	ysvrs,	Pw, {0x0f,0xae,(00),0x0f,0xae,(00)} },
   991  	{ AGLOBL },
   992  	{ AGOK },
   993  	{ AHISTORY },
   994  	{ AHLT,		ynone,	Px, {0xf4} },
   995  	{ AIDIVB,	ydivb,	Pb, {0xf6,(07)} },
   996  	{ AIDIVL,	ydivl,	Px, {0xf7,(07)} },
   997  	{ AIDIVQ,	ydivl,	Pw, {0xf7,(07)} },
   998  	{ AIDIVW,	ydivl,	Pe, {0xf7,(07)} },
   999  	{ AIMULB,	ydivb,	Pb, {0xf6,(05)} },
  1000  	{ AIMULL,	yimul,	Px, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1001  	{ AIMULQ,	yimul,	Pw, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1002  	{ AIMULW,	yimul,	Pe, {0xf7,(05),0x6b,0x69,Pm,0xaf} },
  1003  	{ AIMUL3Q,	yimul3,	Pw, {0x6b,(00)} },
  1004  	{ AINB,		yin,	Pb, {0xe4,0xec} },
  1005  	{ AINCB,	yincb,	Pb, {0xfe,(00)} },
  1006  	{ AINCL,	yincl,	Px, {0xff,(00)} },
  1007  	{ AINCQ,	yincl,	Pw, {0xff,(00)} },
  1008  	{ AINCW,	yincw,	Pe, {0xff,(00)} },
  1009  	{ AINL,		yin,	Px, {0xe5,0xed} },
  1010  	{ AINSB,	ynone,	Pb, {0x6c} },
  1011  	{ AINSL,	ynone,	Px, {0x6d} },
  1012  	{ AINSW,	ynone,	Pe, {0x6d} },
  1013  	{ AINT,		yint,	Px, {0xcd} },
  1014  	{ AINTO,	ynone,	P32, {0xce} },
  1015  	{ AINW,		yin,	Pe, {0xe5,0xed} },
  1016  	{ AIRETL,	ynone,	Px, {0xcf} },
  1017  	{ AIRETQ,	ynone,	Pw, {0xcf} },
  1018  	{ AIRETW,	ynone,	Pe, {0xcf} },
  1019  	{ AJCC,		yjcond,	Px, {0x73,0x83,(00)} },
  1020  	{ AJCS,		yjcond,	Px, {0x72,0x82} },
  1021  	{ AJCXZL,	yloop,	Px, {0xe3} },
  1022  	{ AJCXZQ,	yloop,	Px, {0xe3} },
  1023  	{ AJEQ,		yjcond,	Px, {0x74,0x84} },
  1024  	{ AJGE,		yjcond,	Px, {0x7d,0x8d} },
  1025  	{ AJGT,		yjcond,	Px, {0x7f,0x8f} },
  1026  	{ AJHI,		yjcond,	Px, {0x77,0x87} },
  1027  	{ AJLE,		yjcond,	Px, {0x7e,0x8e} },
  1028  	{ AJLS,		yjcond,	Px, {0x76,0x86} },
  1029  	{ AJLT,		yjcond,	Px, {0x7c,0x8c} },
  1030  	{ AJMI,		yjcond,	Px, {0x78,0x88} },
  1031  	{ AJMP,		yjmp,	Px, {0xff,(04),0xeb,0xe9} },
  1032  	{ AJNE,		yjcond,	Px, {0x75,0x85} },
  1033  	{ AJOC,		yjcond,	Px, {0x71,0x81,(00)} },
  1034  	{ AJOS,		yjcond,	Px, {0x70,0x80,(00)} },
  1035  	{ AJPC,		yjcond,	Px, {0x7b,0x8b} },
  1036  	{ AJPL,		yjcond,	Px, {0x79,0x89} },
  1037  	{ AJPS,		yjcond,	Px, {0x7a,0x8a} },
  1038  	{ ALAHF,	ynone,	Px, {0x9f} },
  1039  	{ ALARL,	yml_rl,	Pm, {0x02} },
  1040  	{ ALARW,	yml_rl,	Pq, {0x02} },
  1041  	{ ALDMXCSR,	ysvrs,	Pm, {0xae,(02),0xae,(02)} },
  1042  	{ ALEAL,	ym_rl,	Px, {0x8d} },
  1043  	{ ALEAQ,	ym_rl,	Pw, {0x8d} },
  1044  	{ ALEAVEL,	ynone,	P32, {0xc9} },
  1045  	{ ALEAVEQ,	ynone,	Py, {0xc9} },
  1046  	{ ALEAVEW,	ynone,	Pe, {0xc9} },
  1047  	{ ALEAW,	ym_rl,	Pe, {0x8d} },
  1048  	{ ALOCK,	ynone,	Px, {0xf0} },
  1049  	{ ALODSB,	ynone,	Pb, {0xac} },
  1050  	{ ALODSL,	ynone,	Px, {0xad} },
  1051  	{ ALODSQ,	ynone,	Pw, {0xad} },
  1052  	{ ALODSW,	ynone,	Pe, {0xad} },
  1053  	{ ALONG,	ybyte,	Px, {4} },
  1054  	{ ALOOP,	yloop,	Px, {0xe2} },
  1055  	{ ALOOPEQ,	yloop,	Px, {0xe1} },
  1056  	{ ALOOPNE,	yloop,	Px, {0xe0} },
  1057  	{ ALSLL,	yml_rl,	Pm, {0x03 } },
  1058  	{ ALSLW,	yml_rl,	Pq, {0x03 } },
  1059  	{ AMASKMOVOU,	yxr,	Pe, {0xf7} },
  1060  	{ AMASKMOVQ,	ymr,	Pm, {0xf7} },
  1061  	{ AMAXPD,	yxm,	Pe, {0x5f} },
  1062  	{ AMAXPS,	yxm,	Pm, {0x5f} },
  1063  	{ AMAXSD,	yxm,	Pf2, {0x5f} },
  1064  	{ AMAXSS,	yxm,	Pf3, {0x5f} },
  1065  	{ AMINPD,	yxm,	Pe, {0x5d} },
  1066  	{ AMINPS,	yxm,	Pm, {0x5d} },
  1067  	{ AMINSD,	yxm,	Pf2, {0x5d} },
  1068  	{ AMINSS,	yxm,	Pf3, {0x5d} },
  1069  	{ AMOVAPD,	yxmov,	Pe, {0x28,0x29} },
  1070  	{ AMOVAPS,	yxmov,	Pm, {0x28,0x29} },
  1071  	{ AMOVB,	ymovb,	Pb, {0x88,0x8a,0xb0,0xc6,(00)} },
  1072  	{ AMOVBLSX,	ymb_rl,	Pm, {0xbe} },
  1073  	{ AMOVBLZX,	ymb_rl,	Pm, {0xb6} },
  1074  	{ AMOVBQSX,	ymb_rl,	Pw, {0x0f,0xbe} },
  1075  	{ AMOVBQZX,	ymb_rl,	Pm, {0xb6} },
  1076  	{ AMOVBWSX,	ymb_rl,	Pq, {0xbe} },
  1077  	{ AMOVBWZX,	ymb_rl,	Pq, {0xb6} },
  1078  	{ AMOVO,	yxmov,	Pe, {0x6f,0x7f} },
  1079  	{ AMOVOU,	yxmov,	Pf3, {0x6f,0x7f} },
  1080  	{ AMOVHLPS,	yxr,	Pm, {0x12} },
  1081  	{ AMOVHPD,	yxmov,	Pe, {0x16,0x17} },
  1082  	{ AMOVHPS,	yxmov,	Pm, {0x16,0x17} },
  1083  	{ AMOVL,	ymovl,	Px, {0x89,0x8b,0x31,0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e,0} },
  1084  	{ AMOVLHPS,	yxr,	Pm, {0x16} },
  1085  	{ AMOVLPD,	yxmov,	Pe, {0x12,0x13} },
  1086  	{ AMOVLPS,	yxmov,	Pm, {0x12,0x13} },
  1087  	{ AMOVLQSX,	yml_rl,	Pw, {0x63} },
  1088  	{ AMOVLQZX,	yml_rl,	Px, {0x8b} },
  1089  	{ AMOVMSKPD,	yxrrl,	Pq, {0x50} },
  1090  	{ AMOVMSKPS,	yxrrl,	Pm, {0x50} },
  1091  	{ AMOVNTO,	yxr_ml,	Pe, {0xe7} },
  1092  	{ AMOVNTPD,	yxr_ml,	Pe, {0x2b} },
  1093  	{ AMOVNTPS,	yxr_ml,	Pm, {0x2b} },
  1094  	{ AMOVNTQ,	ymr_ml,	Pm, {0xe7} },
  1095  	{ AMOVQ,	ymovq,	Pw, {0x89, 0x8b, 0x31, 0xc7,(00), 0xb8, 0xc7,(00), 0x6f, 0x7f, 0x6e, 0x7e, Pf2,0xd6, Pf3,0x7e, Pe,0xd6, Pe,0x6e, Pe,0x7e,0} },
  1096  	{ AMOVQOZX,	ymrxr,	Pf3, {0xd6,0x7e} },
  1097  	{ AMOVSB,	ynone,	Pb, {0xa4} },
  1098  	{ AMOVSD,	yxmov,	Pf2, {0x10,0x11} },
  1099  	{ AMOVSL,	ynone,	Px, {0xa5} },
  1100  	{ AMOVSQ,	ynone,	Pw, {0xa5} },
  1101  	{ AMOVSS,	yxmov,	Pf3, {0x10,0x11} },
  1102  	{ AMOVSW,	ynone,	Pe, {0xa5} },
  1103  	{ AMOVUPD,	yxmov,	Pe, {0x10,0x11} },
  1104  	{ AMOVUPS,	yxmov,	Pm, {0x10,0x11} },
  1105  	{ AMOVW,	ymovw,	Pe, {0x89,0x8b,0x31,0xb8,0xc7,(00),0} },
  1106  	{ AMOVWLSX,	yml_rl,	Pm, {0xbf} },
  1107  	{ AMOVWLZX,	yml_rl,	Pm, {0xb7} },
  1108  	{ AMOVWQSX,	yml_rl,	Pw, {0x0f,0xbf} },
  1109  	{ AMOVWQZX,	yml_rl,	Pw, {0x0f,0xb7} },
  1110  	{ AMULB,	ydivb,	Pb, {0xf6,(04)} },
  1111  	{ AMULL,	ydivl,	Px, {0xf7,(04)} },
  1112  	{ AMULPD,	yxm,	Pe, {0x59} },
  1113  	{ AMULPS,	yxm,	Ym, {0x59} },
  1114  	{ AMULQ,	ydivl,	Pw, {0xf7,(04)} },
  1115  	{ AMULSD,	yxm,	Pf2, {0x59} },
  1116  	{ AMULSS,	yxm,	Pf3, {0x59} },
  1117  	{ AMULW,	ydivl,	Pe, {0xf7,(04)} },
  1118  	{ ANAME },
  1119  	{ ANEGB,	yscond,	Pb, {0xf6,(03)} },
  1120  	{ ANEGL,	yscond,	Px, {0xf7,(03)} },
  1121  	{ ANEGQ,	yscond,	Pw, {0xf7,(03)} },
  1122  	{ ANEGW,	yscond,	Pe, {0xf7,(03)} },
  1123  	{ ANOP,		ynop,	Px, {0,0} },
  1124  	{ ANOTB,	yscond,	Pb, {0xf6,(02)} },
  1125  	{ ANOTL,	yscond,	Px, {0xf7,(02)} },
  1126  	{ ANOTQ,	yscond,	Pw, {0xf7,(02)} },
  1127  	{ ANOTW,	yscond,	Pe, {0xf7,(02)} },
  1128  	{ AORB,		yxorb,	Pb, {0x0c,0x80,(01),0x08,0x0a} },
  1129  	{ AORL,		yxorl,	Px, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1130  	{ AORPD,	yxm,	Pq, {0x56} },
  1131  	{ AORPS,	yxm,	Pm, {0x56} },
  1132  	{ AORQ,		yxorl,	Pw, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1133  	{ AORW,		yxorl,	Pe, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
  1134  	{ AOUTB,	yin,	Pb, {0xe6,0xee} },
  1135  	{ AOUTL,	yin,	Px, {0xe7,0xef} },
  1136  	{ AOUTSB,	ynone,	Pb, {0x6e} },
  1137  	{ AOUTSL,	ynone,	Px, {0x6f} },
  1138  	{ AOUTSW,	ynone,	Pe, {0x6f} },
  1139  	{ AOUTW,	yin,	Pe, {0xe7,0xef} },
  1140  	{ APACKSSLW,	ymm,	Py, {0x6b,Pe,0x6b} },
  1141  	{ APACKSSWB,	ymm,	Py, {0x63,Pe,0x63} },
  1142  	{ APACKUSWB,	ymm,	Py, {0x67,Pe,0x67} },
  1143  	{ APADDB,	ymm,	Py, {0xfc,Pe,0xfc} },
  1144  	{ APADDL,	ymm,	Py, {0xfe,Pe,0xfe} },
  1145  	{ APADDQ,	yxm,	Pe, {0xd4} },
  1146  	{ APADDSB,	ymm,	Py, {0xec,Pe,0xec} },
  1147  	{ APADDSW,	ymm,	Py, {0xed,Pe,0xed} },
  1148  	{ APADDUSB,	ymm,	Py, {0xdc,Pe,0xdc} },
  1149  	{ APADDUSW,	ymm,	Py, {0xdd,Pe,0xdd} },
  1150  	{ APADDW,	ymm,	Py, {0xfd,Pe,0xfd} },
  1151  	{ APAND,	ymm,	Py, {0xdb,Pe,0xdb} },
  1152  	{ APANDN,	ymm,	Py, {0xdf,Pe,0xdf} },
  1153  	{ APAUSE,	ynone,	Px, {0xf3,0x90} },
  1154  	{ APAVGB,	ymm,	Py, {0xe0,Pe,0xe0} },
  1155  	{ APAVGW,	ymm,	Py, {0xe3,Pe,0xe3} },
  1156  	{ APCMPEQB,	ymm,	Py, {0x74,Pe,0x74} },
  1157  	{ APCMPEQL,	ymm,	Py, {0x76,Pe,0x76} },
  1158  	{ APCMPEQW,	ymm,	Py, {0x75,Pe,0x75} },
  1159  	{ APCMPGTB,	ymm,	Py, {0x64,Pe,0x64} },
  1160  	{ APCMPGTL,	ymm,	Py, {0x66,Pe,0x66} },
  1161  	{ APCMPGTW,	ymm,	Py, {0x65,Pe,0x65} },
  1162  	{ APEXTRW,	yextrw,	Pq, {0xc5,(00)} },
  1163  	{ APF2IL,	ymfp,	Px, {0x1d} },
  1164  	{ APF2IW,	ymfp,	Px, {0x1c} },
  1165  	{ API2FL,	ymfp,	Px, {0x0d} },
  1166  	{ APFACC,	ymfp,	Px, {0xae} },
  1167  	{ APFADD,	ymfp,	Px, {0x9e} },
  1168  	{ APFCMPEQ,	ymfp,	Px, {0xb0} },
  1169  	{ APFCMPGE,	ymfp,	Px, {0x90} },
  1170  	{ APFCMPGT,	ymfp,	Px, {0xa0} },
  1171  	{ APFMAX,	ymfp,	Px, {0xa4} },
  1172  	{ APFMIN,	ymfp,	Px, {0x94} },
  1173  	{ APFMUL,	ymfp,	Px, {0xb4} },
  1174  	{ APFNACC,	ymfp,	Px, {0x8a} },
  1175  	{ APFPNACC,	ymfp,	Px, {0x8e} },
  1176  	{ APFRCP,	ymfp,	Px, {0x96} },
  1177  	{ APFRCPIT1,	ymfp,	Px, {0xa6} },
  1178  	{ APFRCPI2T,	ymfp,	Px, {0xb6} },
  1179  	{ APFRSQIT1,	ymfp,	Px, {0xa7} },
  1180  	{ APFRSQRT,	ymfp,	Px, {0x97} },
  1181  	{ APFSUB,	ymfp,	Px, {0x9a} },
  1182  	{ APFSUBR,	ymfp,	Px, {0xaa} },
  1183  	{ APINSRW,	yinsrw,	Pq, {0xc4,(00)} },
  1184  	{ APINSRD,	yinsr,	Pq, {0x3a, 0x22, (00)} },
  1185  	{ APINSRQ,	yinsr,	Pq3, {0x3a, 0x22, (00)} },
  1186  	{ APMADDWL,	ymm,	Py, {0xf5,Pe,0xf5} },
  1187  	{ APMAXSW,	yxm,	Pe, {0xee} },
  1188  	{ APMAXUB,	yxm,	Pe, {0xde} },
  1189  	{ APMINSW,	yxm,	Pe, {0xea} },
  1190  	{ APMINUB,	yxm,	Pe, {0xda} },
  1191  	{ APMOVMSKB,	ymskb,	Px, {Pe,0xd7,0xd7} },
  1192  	{ APMULHRW,	ymfp,	Px, {0xb7} },
  1193  	{ APMULHUW,	ymm,	Py, {0xe4,Pe,0xe4} },
  1194  	{ APMULHW,	ymm,	Py, {0xe5,Pe,0xe5} },
  1195  	{ APMULLW,	ymm,	Py, {0xd5,Pe,0xd5} },
  1196  	{ APMULULQ,	ymm,	Py, {0xf4,Pe,0xf4} },
  1197  	{ APOPAL,	ynone,	P32, {0x61} },
  1198  	{ APOPAW,	ynone,	Pe, {0x61} },
  1199  	{ APOPFL,	ynone,	P32, {0x9d} },
  1200  	{ APOPFQ,	ynone,	Py, {0x9d} },
  1201  	{ APOPFW,	ynone,	Pe, {0x9d} },
  1202  	{ APOPL,	ypopl,	P32, {0x58,0x8f,(00)} },
  1203  	{ APOPQ,	ypopl,	Py, {0x58,0x8f,(00)} },
  1204  	{ APOPW,	ypopl,	Pe, {0x58,0x8f,(00)} },
  1205  	{ APOR,		ymm,	Py, {0xeb,Pe,0xeb} },
  1206  	{ APSADBW,	yxm,	Pq, {0xf6} },
  1207  	{ APSHUFHW,	yxshuf,	Pf3, {0x70,(00)} },
  1208  	{ APSHUFL,	yxshuf,	Pq, {0x70,(00)} },
  1209  	{ APSHUFLW,	yxshuf,	Pf2, {0x70,(00)} },
  1210  	{ APSHUFW,	ymshuf,	Pm, {0x70,(00)} },
  1211  	{ APSHUFB,	ymshufb,Pq, {0x38, 0x00} },
  1212  	{ APSLLO,	ypsdq,	Pq, {0x73,(07)} },
  1213  	{ APSLLL,	yps,	Py, {0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06)} },
  1214  	{ APSLLQ,	yps,	Py, {0xf3, 0x73,(06), Pe,0xf3, Pe,0x73,(06)} },
  1215  	{ APSLLW,	yps,	Py, {0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06)} },
  1216  	{ APSRAL,	yps,	Py, {0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04)} },
  1217  	{ APSRAW,	yps,	Py, {0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04)} },
  1218  	{ APSRLO,	ypsdq,	Pq, {0x73,(03)} },
  1219  	{ APSRLL,	yps,	Py, {0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02)} },
  1220  	{ APSRLQ,	yps,	Py, {0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02)} },
  1221  	{ APSRLW,	yps,	Py, {0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02)} },
  1222  	{ APSUBB,	yxm,	Pe, {0xf8} },
  1223  	{ APSUBL,	yxm,	Pe, {0xfa} },
  1224  	{ APSUBQ,	yxm,	Pe, {0xfb} },
  1225  	{ APSUBSB,	yxm,	Pe, {0xe8} },
  1226  	{ APSUBSW,	yxm,	Pe, {0xe9} },
  1227  	{ APSUBUSB,	yxm,	Pe, {0xd8} },
  1228  	{ APSUBUSW,	yxm,	Pe, {0xd9} },
  1229  	{ APSUBW,	yxm,	Pe, {0xf9} },
  1230  	{ APSWAPL,	ymfp,	Px, {0xbb} },
  1231  	{ APUNPCKHBW,	ymm,	Py, {0x68,Pe,0x68} },
  1232  	{ APUNPCKHLQ,	ymm,	Py, {0x6a,Pe,0x6a} },
  1233  	{ APUNPCKHQDQ,	yxm,	Pe, {0x6d} },
  1234  	{ APUNPCKHWL,	ymm,	Py, {0x69,Pe,0x69} },
  1235  	{ APUNPCKLBW,	ymm,	Py, {0x60,Pe,0x60} },
  1236  	{ APUNPCKLLQ,	ymm,	Py, {0x62,Pe,0x62} },
  1237  	{ APUNPCKLQDQ,	yxm,	Pe, {0x6c} },
  1238  	{ APUNPCKLWL,	ymm,	Py, {0x61,Pe,0x61} },
  1239  	{ APUSHAL,	ynone,	P32, {0x60} },
  1240  	{ APUSHAW,	ynone,	Pe, {0x60} },
  1241  	{ APUSHFL,	ynone,	P32, {0x9c} },
  1242  	{ APUSHFQ,	ynone,	Py, {0x9c} },
  1243  	{ APUSHFW,	ynone,	Pe, {0x9c} },
  1244  	{ APUSHL,	ypushl,	P32, {0x50,0xff,(06),0x6a,0x68} },
  1245  	{ APUSHQ,	ypushl,	Py, {0x50,0xff,(06),0x6a,0x68} },
  1246  	{ APUSHW,	ypushl,	Pe, {0x50,0xff,(06),0x6a,0x68} },
  1247  	{ APXOR,	ymm,	Py, {0xef,Pe,0xef} },
  1248  	{ AQUAD,	ybyte,	Px, {8} },
  1249  	{ ARCLB,	yshb,	Pb, {0xd0,(02),0xc0,(02),0xd2,(02)} },
  1250  	{ ARCLL,	yshl,	Px, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1251  	{ ARCLQ,	yshl,	Pw, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1252  	{ ARCLW,	yshl,	Pe, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
  1253  	{ ARCPPS,	yxm,	Pm, {0x53} },
  1254  	{ ARCPSS,	yxm,	Pf3, {0x53} },
  1255  	{ ARCRB,	yshb,	Pb, {0xd0,(03),0xc0,(03),0xd2,(03)} },
  1256  	{ ARCRL,	yshl,	Px, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1257  	{ ARCRQ,	yshl,	Pw, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1258  	{ ARCRW,	yshl,	Pe, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
  1259  	{ AREP,		ynone,	Px, {0xf3} },
  1260  	{ AREPN,	ynone,	Px, {0xf2} },
  1261  	{ ARET,		ynone,	Px, {0xc3} },
  1262  	{ ARETFW,	yret,	Pe, {0xcb,0xca} },
  1263  	{ ARETFL,	yret,	Px, {0xcb,0xca} },
  1264  	{ ARETFQ,	yret,	Pw, {0xcb,0xca} },
  1265  	{ AROLB,	yshb,	Pb, {0xd0,(00),0xc0,(00),0xd2,(00)} },
  1266  	{ AROLL,	yshl,	Px, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1267  	{ AROLQ,	yshl,	Pw, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1268  	{ AROLW,	yshl,	Pe, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
  1269  	{ ARORB,	yshb,	Pb, {0xd0,(01),0xc0,(01),0xd2,(01)} },
  1270  	{ ARORL,	yshl,	Px, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1271  	{ ARORQ,	yshl,	Pw, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1272  	{ ARORW,	yshl,	Pe, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
  1273  	{ ARSQRTPS,	yxm,	Pm, {0x52} },
  1274  	{ ARSQRTSS,	yxm,	Pf3, {0x52} },
  1275  	{ ASAHF,	ynone,	Px, {0x86,0xe0,0x50,0x9d} },	/* XCHGB AH,AL; PUSH AX; POPFL */
  1276  	{ ASALB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
  1277  	{ ASALL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1278  	{ ASALQ,	yshl,	Pw, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1279  	{ ASALW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1280  	{ ASARB,	yshb,	Pb, {0xd0,(07),0xc0,(07),0xd2,(07)} },
  1281  	{ ASARL,	yshl,	Px, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1282  	{ ASARQ,	yshl,	Pw, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1283  	{ ASARW,	yshl,	Pe, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
  1284  	{ ASBBB,	yxorb,	Pb, {0x1c,0x80,(03),0x18,0x1a} },
  1285  	{ ASBBL,	yxorl,	Px, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1286  	{ ASBBQ,	yxorl,	Pw, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1287  	{ ASBBW,	yxorl,	Pe, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
  1288  	{ ASCASB,	ynone,	Pb, {0xae} },
  1289  	{ ASCASL,	ynone,	Px, {0xaf} },
  1290  	{ ASCASQ,	ynone,	Pw, {0xaf} },
  1291  	{ ASCASW,	ynone,	Pe, {0xaf} },
  1292  	{ ASETCC,	yscond,	Pm, {0x93,(00)} },
  1293  	{ ASETCS,	yscond,	Pm, {0x92,(00)} },
  1294  	{ ASETEQ,	yscond,	Pm, {0x94,(00)} },
  1295  	{ ASETGE,	yscond,	Pm, {0x9d,(00)} },
  1296  	{ ASETGT,	yscond,	Pm, {0x9f,(00)} },
  1297  	{ ASETHI,	yscond,	Pm, {0x97,(00)} },
  1298  	{ ASETLE,	yscond,	Pm, {0x9e,(00)} },
  1299  	{ ASETLS,	yscond,	Pm, {0x96,(00)} },
  1300  	{ ASETLT,	yscond,	Pm, {0x9c,(00)} },
  1301  	{ ASETMI,	yscond,	Pm, {0x98,(00)} },
  1302  	{ ASETNE,	yscond,	Pm, {0x95,(00)} },
  1303  	{ ASETOC,	yscond,	Pm, {0x91,(00)} },
  1304  	{ ASETOS,	yscond,	Pm, {0x90,(00)} },
  1305  	{ ASETPC,	yscond,	Pm, {0x9b,(00)} },
  1306  	{ ASETPL,	yscond,	Pm, {0x99,(00)} },
  1307  	{ ASETPS,	yscond,	Pm, {0x9a,(00)} },
  1308  	{ ASHLB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
  1309  	{ ASHLL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1310  	{ ASHLQ,	yshl,	Pw, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1311  	{ ASHLW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
  1312  	{ ASHRB,	yshb,	Pb, {0xd0,(05),0xc0,(05),0xd2,(05)} },
  1313  	{ ASHRL,	yshl,	Px, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1314  	{ ASHRQ,	yshl,	Pw, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1315  	{ ASHRW,	yshl,	Pe, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
  1316  	{ ASHUFPD,	yxshuf,	Pq, {0xc6,(00)} },
  1317  	{ ASHUFPS,	yxshuf,	Pm, {0xc6,(00)} },
  1318  	{ ASQRTPD,	yxm,	Pe, {0x51} },
  1319  	{ ASQRTPS,	yxm,	Pm, {0x51} },
  1320  	{ ASQRTSD,	yxm,	Pf2, {0x51} },
  1321  	{ ASQRTSS,	yxm,	Pf3, {0x51} },
  1322  	{ ASTC,		ynone,	Px, {0xf9} },
  1323  	{ ASTD,		ynone,	Px, {0xfd} },
  1324  	{ ASTI,		ynone,	Px, {0xfb} },
  1325  	{ ASTMXCSR,	ysvrs,	Pm, {0xae,(03),0xae,(03)} },
  1326  	{ ASTOSB,	ynone,	Pb, {0xaa} },
  1327  	{ ASTOSL,	ynone,	Px, {0xab} },
  1328  	{ ASTOSQ,	ynone,	Pw, {0xab} },
  1329  	{ ASTOSW,	ynone,	Pe, {0xab} },
  1330  	{ ASUBB,	yxorb,	Pb, {0x2c,0x80,(05),0x28,0x2a} },
  1331  	{ ASUBL,	yaddl,	Px, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1332  	{ ASUBPD,	yxm,	Pe, {0x5c} },
  1333  	{ ASUBPS,	yxm,	Pm, {0x5c} },
  1334  	{ ASUBQ,	yaddl,	Pw, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1335  	{ ASUBSD,	yxm,	Pf2, {0x5c} },
  1336  	{ ASUBSS,	yxm,	Pf3, {0x5c} },
  1337  	{ ASUBW,	yaddl,	Pe, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
  1338  	{ ASWAPGS,	ynone,	Pm, {0x01,0xf8} },
  1339  	{ ASYSCALL,	ynone,	Px, {0x0f,0x05} },	/* fast syscall */
  1340  	{ ATESTB,	ytestb,	Pb, {0xa8,0xf6,(00),0x84,0x84} },
  1341  	{ ATESTL,	ytestl,	Px, {0xa9,0xf7,(00),0x85,0x85} },
  1342  	{ ATESTQ,	ytestl,	Pw, {0xa9,0xf7,(00),0x85,0x85} },
  1343  	{ ATESTW,	ytestl,	Pe, {0xa9,0xf7,(00),0x85,0x85} },
  1344  	{ ATEXT,	ytext,	Px },
  1345  	{ AUCOMISD,	yxcmp,	Pe, {0x2e} },
  1346  	{ AUCOMISS,	yxcmp,	Pm, {0x2e} },
  1347  	{ AUNPCKHPD,	yxm,	Pe, {0x15} },
  1348  	{ AUNPCKHPS,	yxm,	Pm, {0x15} },
  1349  	{ AUNPCKLPD,	yxm,	Pe, {0x14} },
  1350  	{ AUNPCKLPS,	yxm,	Pm, {0x14} },
  1351  	{ AVERR,	ydivl,	Pm, {0x00,(04)} },
  1352  	{ AVERW,	ydivl,	Pm, {0x00,(05)} },
  1353  	{ AWAIT,	ynone,	Px, {0x9b} },
  1354  	{ AWORD,	ybyte,	Px, {2} },
  1355  	{ AXCHGB,	yml_mb,	Pb, {0x86,0x86} },
  1356  	{ AXCHGL,	yxchg,	Px, {0x90,0x90,0x87,0x87} },
  1357  	{ AXCHGQ,	yxchg,	Pw, {0x90,0x90,0x87,0x87} },
  1358  	{ AXCHGW,	yxchg,	Pe, {0x90,0x90,0x87,0x87} },
  1359  	{ AXLAT,	ynone,	Px, {0xd7} },
  1360  	{ AXORB,	yxorb,	Pb, {0x34,0x80,(06),0x30,0x32} },
  1361  	{ AXORL,	yxorl,	Px, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1362  	{ AXORPD,	yxm,	Pe, {0x57} },
  1363  	{ AXORPS,	yxm,	Pm, {0x57} },
  1364  	{ AXORQ,	yxorl,	Pw, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1365  	{ AXORW,	yxorl,	Pe, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
  1366  
  1367  	{ AFMOVB,	yfmvx,	Px, {0xdf,(04)} },
  1368  	{ AFMOVBP,	yfmvp,	Px, {0xdf,(06)} },
  1369  	{ AFMOVD,	yfmvd,	Px, {0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02)} },
  1370  	{ AFMOVDP,	yfmvdp,	Px, {0xdd,(03),0xdd,(03)} },
  1371  	{ AFMOVF,	yfmvf,	Px, {0xd9,(00),0xd9,(02)} },
  1372  	{ AFMOVFP,	yfmvp,	Px, {0xd9,(03)} },
  1373  	{ AFMOVL,	yfmvf,	Px, {0xdb,(00),0xdb,(02)} },
  1374  	{ AFMOVLP,	yfmvp,	Px, {0xdb,(03)} },
  1375  	{ AFMOVV,	yfmvx,	Px, {0xdf,(05)} },
  1376  	{ AFMOVVP,	yfmvp,	Px, {0xdf,(07)} },
  1377  	{ AFMOVW,	yfmvf,	Px, {0xdf,(00),0xdf,(02)} },
  1378  	{ AFMOVWP,	yfmvp,	Px, {0xdf,(03)} },
  1379  	{ AFMOVX,	yfmvx,	Px, {0xdb,(05)} },
  1380  	{ AFMOVXP,	yfmvp,	Px, {0xdb,(07)} },
  1381  
  1382  	{ AFCOMB },
  1383  	{ AFCOMBP },
  1384  	{ AFCOMD,	yfadd,	Px, {0xdc,(02),0xd8,(02),0xdc,(02)} },	/* botch */
  1385  	{ AFCOMDP,	yfadd,	Px, {0xdc,(03),0xd8,(03),0xdc,(03)} },	/* botch */
  1386  	{ AFCOMDPP,	ycompp,	Px, {0xde,(03)} },
  1387  	{ AFCOMF,	yfmvx,	Px, {0xd8,(02)} },
  1388  	{ AFCOMFP,	yfmvx,	Px, {0xd8,(03)} },
  1389  	{ AFCOML,	yfmvx,	Px, {0xda,(02)} },
  1390  	{ AFCOMLP,	yfmvx,	Px, {0xda,(03)} },
  1391  	{ AFCOMW,	yfmvx,	Px, {0xde,(02)} },
  1392  	{ AFCOMWP,	yfmvx,	Px, {0xde,(03)} },
  1393  
  1394  	{ AFUCOM,	ycompp,	Px, {0xdd,(04)} },
  1395  	{ AFUCOMP,	ycompp, Px, {0xdd,(05)} },
  1396  	{ AFUCOMPP,	ycompp,	Px, {0xda,(13)} },
  1397  
  1398  	{ AFADDDP,	yfaddp,	Px, {0xde,(00)} },
  1399  	{ AFADDW,	yfmvx,	Px, {0xde,(00)} },
  1400  	{ AFADDL,	yfmvx,	Px, {0xda,(00)} },
  1401  	{ AFADDF,	yfmvx,	Px, {0xd8,(00)} },
  1402  	{ AFADDD,	yfadd,	Px, {0xdc,(00),0xd8,(00),0xdc,(00)} },
  1403  
  1404  	{ AFMULDP,	yfaddp,	Px, {0xde,(01)} },
  1405  	{ AFMULW,	yfmvx,	Px, {0xde,(01)} },
  1406  	{ AFMULL,	yfmvx,	Px, {0xda,(01)} },
  1407  	{ AFMULF,	yfmvx,	Px, {0xd8,(01)} },
  1408  	{ AFMULD,	yfadd,	Px, {0xdc,(01),0xd8,(01),0xdc,(01)} },
  1409  
  1410  	{ AFSUBDP,	yfaddp,	Px, {0xde,(05)} },
  1411  	{ AFSUBW,	yfmvx,	Px, {0xde,(04)} },
  1412  	{ AFSUBL,	yfmvx,	Px, {0xda,(04)} },
  1413  	{ AFSUBF,	yfmvx,	Px, {0xd8,(04)} },
  1414  	{ AFSUBD,	yfadd,	Px, {0xdc,(04),0xd8,(04),0xdc,(05)} },
  1415  
  1416  	{ AFSUBRDP,	yfaddp,	Px, {0xde,(04)} },
  1417  	{ AFSUBRW,	yfmvx,	Px, {0xde,(05)} },
  1418  	{ AFSUBRL,	yfmvx,	Px, {0xda,(05)} },
  1419  	{ AFSUBRF,	yfmvx,	Px, {0xd8,(05)} },
  1420  	{ AFSUBRD,	yfadd,	Px, {0xdc,(05),0xd8,(05),0xdc,(04)} },
  1421  
  1422  	{ AFDIVDP,	yfaddp,	Px, {0xde,(07)} },
  1423  	{ AFDIVW,	yfmvx,	Px, {0xde,(06)} },
  1424  	{ AFDIVL,	yfmvx,	Px, {0xda,(06)} },
  1425  	{ AFDIVF,	yfmvx,	Px, {0xd8,(06)} },
  1426  	{ AFDIVD,	yfadd,	Px, {0xdc,(06),0xd8,(06),0xdc,(07)} },
  1427  
  1428  	{ AFDIVRDP,	yfaddp,	Px, {0xde,(06)} },
  1429  	{ AFDIVRW,	yfmvx,	Px, {0xde,(07)} },
  1430  	{ AFDIVRL,	yfmvx,	Px, {0xda,(07)} },
  1431  	{ AFDIVRF,	yfmvx,	Px, {0xd8,(07)} },
  1432  	{ AFDIVRD,	yfadd,	Px, {0xdc,(07),0xd8,(07),0xdc,(06)} },
  1433  
  1434  	{ AFXCHD,	yfxch,	Px, {0xd9,(01),0xd9,(01)} },
  1435  	{ AFFREE },
  1436  	{ AFLDCW,	ystcw,	Px, {0xd9,(05),0xd9,(05)} },
  1437  	{ AFLDENV,	ystcw,	Px, {0xd9,(04),0xd9,(04)} },
  1438  	{ AFRSTOR,	ysvrs,	Px, {0xdd,(04),0xdd,(04)} },
  1439  	{ AFSAVE,	ysvrs,	Px, {0xdd,(06),0xdd,(06)} },
  1440  	{ AFSTCW,	ystcw,	Px, {0xd9,(07),0xd9,(07)} },
  1441  	{ AFSTENV,	ystcw,	Px, {0xd9,(06),0xd9,(06)} },
  1442  	{ AFSTSW,	ystsw,	Px, {0xdd,(07),0xdf,0xe0} },
  1443  	{ AF2XM1,	ynone,	Px, {0xd9, 0xf0} },
  1444  	{ AFABS,	ynone,	Px, {0xd9, 0xe1} },
  1445  	{ AFCHS,	ynone,	Px, {0xd9, 0xe0} },
  1446  	{ AFCLEX,	ynone,	Px, {0xdb, 0xe2} },
  1447  	{ AFCOS,	ynone,	Px, {0xd9, 0xff} },
  1448  	{ AFDECSTP,	ynone,	Px, {0xd9, 0xf6} },
  1449  	{ AFINCSTP,	ynone,	Px, {0xd9, 0xf7} },
  1450  	{ AFINIT,	ynone,	Px, {0xdb, 0xe3} },
  1451  	{ AFLD1,	ynone,	Px, {0xd9, 0xe8} },
  1452  	{ AFLDL2E,	ynone,	Px, {0xd9, 0xea} },
  1453  	{ AFLDL2T,	ynone,	Px, {0xd9, 0xe9} },
  1454  	{ AFLDLG2,	ynone,	Px, {0xd9, 0xec} },
  1455  	{ AFLDLN2,	ynone,	Px, {0xd9, 0xed} },
  1456  	{ AFLDPI,	ynone,	Px, {0xd9, 0xeb} },
  1457  	{ AFLDZ,	ynone,	Px, {0xd9, 0xee} },
  1458  	{ AFNOP,	ynone,	Px, {0xd9, 0xd0} },
  1459  	{ AFPATAN,	ynone,	Px, {0xd9, 0xf3} },
  1460  	{ AFPREM,	ynone,	Px, {0xd9, 0xf8} },
  1461  	{ AFPREM1,	ynone,	Px, {0xd9, 0xf5} },
  1462  	{ AFPTAN,	ynone,	Px, {0xd9, 0xf2} },
  1463  	{ AFRNDINT,	ynone,	Px, {0xd9, 0xfc} },
  1464  	{ AFSCALE,	ynone,	Px, {0xd9, 0xfd} },
  1465  	{ AFSIN,	ynone,	Px, {0xd9, 0xfe} },
  1466  	{ AFSINCOS,	ynone,	Px, {0xd9, 0xfb} },
  1467  	{ AFSQRT,	ynone,	Px, {0xd9, 0xfa} },
  1468  	{ AFTST,	ynone,	Px, {0xd9, 0xe4} },
  1469  	{ AFXAM,	ynone,	Px, {0xd9, 0xe5} },
  1470  	{ AFXTRACT,	ynone,	Px, {0xd9, 0xf4} },
  1471  	{ AFYL2X,	ynone,	Px, {0xd9, 0xf1} },
  1472  	{ AFYL2XP1,	ynone,	Px, {0xd9, 0xf9} },
  1473  
  1474  	{ ACMPXCHGB,	yrb_mb,	Pb, {0x0f,0xb0} },
  1475  	{ ACMPXCHGL,	yrl_ml,	Px, {0x0f,0xb1} },
  1476  	{ ACMPXCHGW,	yrl_ml,	Pe, {0x0f,0xb1} },
  1477  	{ ACMPXCHGQ,	yrl_ml,	Pw, {0x0f,0xb1} },
  1478  	{ ACMPXCHG8B,	yscond,	Pm, {0xc7,(01)} },
  1479  	{ AINVD,	ynone,	Pm, {0x08} },
  1480  	{ AINVLPG,	ymbs,	Pm, {0x01,(07)} },
  1481  	{ ALFENCE,	ynone,	Pm, {0xae,0xe8} },
  1482  	{ AMFENCE,	ynone,	Pm, {0xae,0xf0} },
  1483  	{ AMOVNTIL,	yrl_ml,	Pm, {0xc3} },
  1484  	{ AMOVNTIQ,	yrl_ml, Pw, {0x0f,0xc3} },
  1485  	{ ARDMSR,	ynone,	Pm, {0x32} },
  1486  	{ ARDPMC,	ynone,	Pm, {0x33} },
  1487  	{ ARDTSC,	ynone,	Pm, {0x31} },
  1488  	{ ARSM,		ynone,	Pm, {0xaa} },
  1489  	{ ASFENCE,	ynone,	Pm, {0xae,0xf8} },
  1490  	{ ASYSRET,	ynone,	Pm, {0x07} },
  1491  	{ AWBINVD,	ynone,	Pm, {0x09} },
  1492  	{ AWRMSR,	ynone,	Pm, {0x30} },
  1493  
  1494  	{ AXADDB,	yrb_mb,	Pb, {0x0f,0xc0} },
  1495  	{ AXADDL,	yrl_ml,	Px, {0x0f,0xc1} },
  1496  	{ AXADDQ,	yrl_ml,	Pw, {0x0f,0xc1} },
  1497  	{ AXADDW,	yrl_ml,	Pe, {0x0f,0xc1} },
  1498  
  1499  	{ ACRC32B,       ycrc32l,Px, {0xf2,0x0f,0x38,0xf0,0} },
  1500  	{ ACRC32Q,       ycrc32l,Pw, {0xf2,0x0f,0x38,0xf1,0} },
  1501  	
  1502  	{ APREFETCHT0,	yprefetch,	Pm,	{0x18,(01)} },
  1503  	{ APREFETCHT1,	yprefetch,	Pm,	{0x18,(02)} },
  1504  	{ APREFETCHT2,	yprefetch,	Pm,	{0x18,(03)} },
  1505  	{ APREFETCHNTA,	yprefetch,	Pm,	{0x18,(00)} },
  1506  	
  1507  	{ AMOVQL,	yrl_ml,	Px, {0x89} },
  1508  
  1509  	{ AUNDEF,		ynone,	Px, {0x0f, 0x0b} },
  1510  
  1511  	{ AAESENC,	yaes,	Pq, {0x38,0xdc,(0)} },
  1512  	{ AAESENCLAST,	yaes,	Pq, {0x38,0xdd,(0)} },
  1513  	{ AAESDEC,	yaes,	Pq, {0x38,0xde,(0)} },
  1514  	{ AAESDECLAST,	yaes,	Pq, {0x38,0xdf,(0)} },
  1515  	{ AAESIMC,	yaes,	Pq, {0x38,0xdb,(0)} },
  1516  	{ AAESKEYGENASSIST,	yaes2,	Pq, {0x3a,0xdf,(0)} },
  1517  
  1518  	{ APSHUFD,	yaes2,	Pq,	{0x70,(0)} },
  1519  	{ APCLMULQDQ,	yxshuf,	Pq, {0x3a,0x44,0} },
  1520  
  1521  	{ AUSEFIELD,	ynop,	Px, {0,0} },
  1522  	{ ATYPE },
  1523  	{ AFUNCDATA,	yfuncdata,	Px, {0,0} },
  1524  	{ APCDATA,	ypcdata,	Px, {0,0} },
  1525  	{ ACHECKNIL },
  1526  	{ AVARDEF },
  1527  	{ AVARKILL },
  1528  	{ ADUFFCOPY,	yduff,	Px, {0xe8} },
  1529  	{ ADUFFZERO,	yduff,	Px, {0xe8} },
  1530  
  1531  	{ AEND },
  1532  	{0}
  1533  };
  1534  
  1535  static Optab*	opindex[ALAST+1];
  1536  static vlong	vaddr(Link*, Addr*, Reloc*);
  1537  
  1538  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1539  // This happens on systems like Solaris that call .so functions instead of system calls.
  1540  // It does not seem to be necessary for any other systems. This is probably working
  1541  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1542  // what that bug is. And this does fix it.
  1543  static int
  1544  isextern(LSym *s)
  1545  {
  1546  	// All the Solaris dynamic imports from libc.so begin with "libc·", which
  1547  	// the compiler rewrites to "libc." by the time liblink gets it.
  1548  	// Haiku also uses this for dynamic imports but from libroot instead.
  1549  #if defined(__HAIKU__)
  1550  	return strncmp(s->name, "libroot.", 9) == 0;
  1551  #else
  1552  	return strncmp(s->name, "libc.", 5) == 0;
  1553  #endif
  1554  }
  1555  
  1556  // single-instruction no-ops of various lengths.
  1557  // constructed by hand and disassembled with gdb to verify.
  1558  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1559  static uchar nop[][16] = {
  1560  	{0x90},
  1561  	{0x66, 0x90},
  1562  	{0x0F, 0x1F, 0x00},
  1563  	{0x0F, 0x1F, 0x40, 0x00},
  1564  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1565  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1566  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1567  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1568  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1569  	// Native Client rejects the repeated 0x66 prefix.
  1570  	// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1571  };
  1572  
  1573  static void
  1574  fillnop(uchar *p, int n)
  1575  {
  1576  	int m;
  1577  
  1578  	while(n > 0) {
  1579  		m = n;
  1580  		if(m > nelem(nop))
  1581  			m = nelem(nop);
  1582  		memmove(p, nop[m-1], m);
  1583  		p += m;
  1584  		n -= m;
  1585  	}
  1586  }
  1587  
  1588  static void instinit(void);
  1589  
  1590  static int32
  1591  naclpad(Link *ctxt, LSym *s, int32 c, int32 pad)
  1592  {
  1593  	symgrow(ctxt, s, c+pad);
  1594  	fillnop(s->p+c, pad);
  1595  	return c+pad;
  1596  }
  1597  
  1598  static int
  1599  spadjop(Link *ctxt, Prog *p, int l, int q)
  1600  {
  1601  	if(p->mode != 64 || ctxt->arch->ptrsize == 4)
  1602  		return l;
  1603  	return q;
  1604  }
  1605  
  1606  void
  1607  span6(Link *ctxt, LSym *s)
  1608  {
  1609  	Prog *p, *q;
  1610  	int32 c, v, loop;
  1611  	uchar *bp;
  1612  	int n, m, i;
  1613  
  1614  	ctxt->cursym = s;
  1615  	
  1616  	if(s->p != nil)
  1617  		return;
  1618  	
  1619  	if(ycover[0] == 0)
  1620  		instinit();
  1621  	
  1622  	for(p = ctxt->cursym->text; p != nil; p = p->link) {
  1623  		n = 0;
  1624  		if(p->to.type == D_BRANCH)
  1625  			if(p->pcond == nil)
  1626  				p->pcond = p;
  1627  		if((q = p->pcond) != nil)
  1628  			if(q->back != 2)
  1629  				n = 1;
  1630  		p->back = n;
  1631  		if(p->as == AADJSP) {
  1632  			p->to.type = D_SP;
  1633  			v = -p->from.offset;
  1634  			p->from.offset = v;
  1635  			p->as = spadjop(ctxt, p, AADDL, AADDQ);
  1636  			if(v < 0) {
  1637  				p->as = spadjop(ctxt, p, ASUBL, ASUBQ);
  1638  				v = -v;
  1639  				p->from.offset = v;
  1640  			}
  1641  			if(v == 0)
  1642  				p->as = ANOP;
  1643  		}
  1644  	}
  1645  
  1646  	for(p = s->text; p != nil; p = p->link) {
  1647  		p->back = 2;	// use short branches first time through
  1648  		if((q = p->pcond) != nil && (q->back & 2)) {
  1649  			p->back |= 1;	// backward jump
  1650  			q->back |= 4;   // loop head
  1651  		}
  1652  
  1653  		if(p->as == AADJSP) {
  1654  			p->to.type = D_SP;
  1655  			v = -p->from.offset;
  1656  			p->from.offset = v;
  1657  			p->as = spadjop(ctxt, p, AADDL, AADDQ);
  1658  			if(v < 0) {
  1659  				p->as = spadjop(ctxt, p, ASUBL, ASUBQ);
  1660  				v = -v;
  1661  				p->from.offset = v;
  1662  			}
  1663  			if(v == 0)
  1664  				p->as = ANOP;
  1665  		}
  1666  	}
  1667  	
  1668  	n = 0;
  1669  	do {
  1670  		loop = 0;
  1671  		memset(s->r, 0, s->nr*sizeof s->r[0]);
  1672  		s->nr = 0;
  1673  		s->np = 0;
  1674  		c = 0;
  1675  		for(p = s->text; p != nil; p = p->link) {
  1676  			if(ctxt->headtype == Hnacl && p->isize > 0) {
  1677  				static LSym *deferreturn;
  1678  				
  1679  				if(deferreturn == nil)
  1680  					deferreturn = linklookup(ctxt, "runtime.deferreturn", 0);
  1681  
  1682  				// pad everything to avoid crossing 32-byte boundary
  1683  				if((c>>5) != ((c+p->isize-1)>>5))
  1684  					c = naclpad(ctxt, s, c, -c&31);
  1685  				// pad call deferreturn to start at 32-byte boundary
  1686  				// so that subtracting 5 in jmpdefer will jump back
  1687  				// to that boundary and rerun the call.
  1688  				if(p->as == ACALL && p->to.sym == deferreturn)
  1689  					c = naclpad(ctxt, s, c, -c&31);
  1690  				// pad call to end at 32-byte boundary
  1691  				if(p->as == ACALL)
  1692  					c = naclpad(ctxt, s, c, -(c+p->isize)&31);
  1693  				
  1694  				// the linker treats REP and STOSQ as different instructions
  1695  				// but in fact the REP is a prefix on the STOSQ.
  1696  				// make sure REP has room for 2 more bytes, so that
  1697  				// padding will not be inserted before the next instruction.
  1698  				if((p->as == AREP || p->as == AREPN) && (c>>5) != ((c+3-1)>>5))
  1699  					c = naclpad(ctxt, s, c, -c&31);
  1700  				
  1701  				// same for LOCK.
  1702  				// various instructions follow; the longest is 4 bytes.
  1703  				// give ourselves 8 bytes so as to avoid surprises.
  1704  				if(p->as == ALOCK && (c>>5) != ((c+8-1)>>5))
  1705  					c = naclpad(ctxt, s, c, -c&31);
  1706  			}
  1707  
  1708  			if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
  1709  				// pad with NOPs
  1710  				v = -c&(LoopAlign-1);
  1711  				if(v <= MaxLoopPad) {
  1712  					symgrow(ctxt, s, c+v);
  1713  					fillnop(s->p+c, v);
  1714  					c += v;
  1715  				}
  1716  			}
  1717  
  1718  			p->pc = c;
  1719  
  1720  			// process forward jumps to p
  1721  			for(q = p->comefrom; q != nil; q = q->forwd) {
  1722  				v = p->pc - (q->pc + q->mark);
  1723  				if(q->back & 2)	{	// short
  1724  					if(v > 127) {
  1725  						loop++;
  1726  						q->back ^= 2;
  1727  					}
  1728  					if(q->as == AJCXZL)
  1729  						s->p[q->pc+2] = v;
  1730  					else
  1731  						s->p[q->pc+1] = v;
  1732  				} else {
  1733  					bp = s->p + q->pc + q->mark - 4;
  1734  					*bp++ = v;
  1735  					*bp++ = v>>8;
  1736  					*bp++ = v>>16;
  1737  					*bp = v>>24;
  1738  				}	
  1739  			}
  1740  			p->comefrom = nil;
  1741  
  1742  			p->pc = c;
  1743  			asmins(ctxt, p);
  1744  			m = ctxt->andptr-ctxt->and;
  1745  			if(p->isize != m) {
  1746  				p->isize = m;
  1747  				loop++;
  1748  			}
  1749  			symgrow(ctxt, s, p->pc+m);
  1750  			memmove(s->p+p->pc, ctxt->and, m);
  1751  			p->mark = m;
  1752  			c += m;
  1753  		}
  1754  		if(++n > 20) {
  1755  			ctxt->diag("span must be looping");
  1756  			sysfatal("loop");
  1757  		}
  1758  	} while(loop);
  1759  	
  1760  	if(ctxt->headtype == Hnacl)
  1761  		c = naclpad(ctxt, s, c, -c&31);
  1762  	
  1763  	c += -c&(FuncAlign-1);
  1764  	s->size = c;
  1765  
  1766  	if(0 /* debug['a'] > 1 */) {
  1767  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
  1768  		for(i=0; i<s->np; i++) {
  1769  			print(" %.2ux", s->p[i]);
  1770  			if(i%16 == 15)
  1771  				print("\n  %.6ux", i+1);
  1772  		}
  1773  		if(i%16)
  1774  			print("\n");
  1775  	
  1776  		for(i=0; i<s->nr; i++) {
  1777  			Reloc *r;
  1778  			
  1779  			r = &s->r[i];
  1780  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
  1781  		}
  1782  	}
  1783  }
  1784  
  1785  static void
  1786  instinit(void)
  1787  {
  1788  	int c, i;
  1789  
  1790  	for(i=1; optab[i].as; i++) {
  1791  		c = optab[i].as;
  1792  		if(opindex[c] != nil)
  1793  			sysfatal("phase error in optab: %d (%A)", i, c);
  1794  		opindex[c] = &optab[i];
  1795  	}
  1796  
  1797  	for(i=0; i<Ymax; i++)
  1798  		ycover[i*Ymax + i] = 1;
  1799  
  1800  	ycover[Yi0*Ymax + Yi8] = 1;
  1801  	ycover[Yi1*Ymax + Yi8] = 1;
  1802  
  1803  	ycover[Yi0*Ymax + Ys32] = 1;
  1804  	ycover[Yi1*Ymax + Ys32] = 1;
  1805  	ycover[Yi8*Ymax + Ys32] = 1;
  1806  
  1807  	ycover[Yi0*Ymax + Yi32] = 1;
  1808  	ycover[Yi1*Ymax + Yi32] = 1;
  1809  	ycover[Yi8*Ymax + Yi32] = 1;
  1810  	ycover[Ys32*Ymax + Yi32] = 1;
  1811  
  1812  	ycover[Yi0*Ymax + Yi64] = 1;
  1813  	ycover[Yi1*Ymax + Yi64] = 1;
  1814  	ycover[Yi8*Ymax + Yi64] = 1;
  1815  	ycover[Ys32*Ymax + Yi64] = 1;
  1816  	ycover[Yi32*Ymax + Yi64] = 1;
  1817  
  1818  	ycover[Yal*Ymax + Yrb] = 1;
  1819  	ycover[Ycl*Ymax + Yrb] = 1;
  1820  	ycover[Yax*Ymax + Yrb] = 1;
  1821  	ycover[Ycx*Ymax + Yrb] = 1;
  1822  	ycover[Yrx*Ymax + Yrb] = 1;
  1823  	ycover[Yrl*Ymax + Yrb] = 1;
  1824  
  1825  	ycover[Ycl*Ymax + Ycx] = 1;
  1826  
  1827  	ycover[Yax*Ymax + Yrx] = 1;
  1828  	ycover[Ycx*Ymax + Yrx] = 1;
  1829  
  1830  	ycover[Yax*Ymax + Yrl] = 1;
  1831  	ycover[Ycx*Ymax + Yrl] = 1;
  1832  	ycover[Yrx*Ymax + Yrl] = 1;
  1833  
  1834  	ycover[Yf0*Ymax + Yrf] = 1;
  1835  
  1836  	ycover[Yal*Ymax + Ymb] = 1;
  1837  	ycover[Ycl*Ymax + Ymb] = 1;
  1838  	ycover[Yax*Ymax + Ymb] = 1;
  1839  	ycover[Ycx*Ymax + Ymb] = 1;
  1840  	ycover[Yrx*Ymax + Ymb] = 1;
  1841  	ycover[Yrb*Ymax + Ymb] = 1;
  1842  	ycover[Yrl*Ymax + Ymb] = 1;
  1843  	ycover[Ym*Ymax + Ymb] = 1;
  1844  
  1845  	ycover[Yax*Ymax + Yml] = 1;
  1846  	ycover[Ycx*Ymax + Yml] = 1;
  1847  	ycover[Yrx*Ymax + Yml] = 1;
  1848  	ycover[Yrl*Ymax + Yml] = 1;
  1849  	ycover[Ym*Ymax + Yml] = 1;
  1850  
  1851  	ycover[Yax*Ymax + Ymm] = 1;
  1852  	ycover[Ycx*Ymax + Ymm] = 1;
  1853  	ycover[Yrx*Ymax + Ymm] = 1;
  1854  	ycover[Yrl*Ymax + Ymm] = 1;
  1855  	ycover[Ym*Ymax + Ymm] = 1;
  1856  	ycover[Ymr*Ymax + Ymm] = 1;
  1857  
  1858  	ycover[Ym*Ymax + Yxm] = 1;
  1859  	ycover[Yxr*Ymax + Yxm] = 1;
  1860  
  1861  	for(i=0; i<D_NONE; i++) {
  1862  		reg[i] = -1;
  1863  		if(i >= D_AL && i <= D_R15B) {
  1864  			reg[i] = (i-D_AL) & 7;
  1865  			if(i >= D_SPB && i <= D_DIB)
  1866  				regrex[i] = 0x40;
  1867  			if(i >= D_R8B && i <= D_R15B)
  1868  				regrex[i] = Rxr | Rxx | Rxb;
  1869  		}
  1870  		if(i >= D_AH && i<= D_BH)
  1871  			reg[i] = 4 + ((i-D_AH) & 7);
  1872  		if(i >= D_AX && i <= D_R15) {
  1873  			reg[i] = (i-D_AX) & 7;
  1874  			if(i >= D_R8)
  1875  				regrex[i] = Rxr | Rxx | Rxb;
  1876  		}
  1877  		if(i >= D_F0 && i <= D_F0+7)
  1878  			reg[i] = (i-D_F0) & 7;
  1879  		if(i >= D_M0 && i <= D_M0+7)
  1880  			reg[i] = (i-D_M0) & 7;
  1881  		if(i >= D_X0 && i <= D_X0+15) {
  1882  			reg[i] = (i-D_X0) & 7;
  1883  			if(i >= D_X0+8)
  1884  				regrex[i] = Rxr | Rxx | Rxb;
  1885  		}
  1886  		if(i >= D_CR+8 && i <= D_CR+15)
  1887  			regrex[i] = Rxr;
  1888  	}
  1889  }
  1890  
  1891  static int
  1892  prefixof(Link *ctxt, Addr *a)
  1893  {
  1894  	switch(a->type) {
  1895  	case D_INDIR+D_CS:
  1896  		return 0x2e;
  1897  	case D_INDIR+D_DS:
  1898  		return 0x3e;
  1899  	case D_INDIR+D_ES:
  1900  		return 0x26;
  1901  	case D_INDIR+D_FS:
  1902  		return 0x64;
  1903  	case D_INDIR+D_GS:
  1904  		return 0x65;
  1905  	case D_INDIR+D_TLS:
  1906  		// NOTE: Systems listed here should be only systems that
  1907  		// support direct TLS references like 8(TLS) implemented as
  1908  		// direct references from FS or GS. Systems that require
  1909  		// the initial-exec model, where you load the TLS base into
  1910  		// a register and then index from that register, do not reach
  1911  		// this code and should not be listed.
  1912  		switch(ctxt->headtype) {
  1913  		default:
  1914  			sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
  1915  		case Hdragonfly:
  1916  		case Hfreebsd:
  1917  		case Hlinux:
  1918  		case Hnetbsd:
  1919  		case Hopenbsd:
  1920  		case Hsolaris:
  1921  			return 0x64; // FS
  1922  		case Hdarwin:
  1923  			return 0x65; // GS
  1924  		}
  1925  	}
  1926  	switch(a->index) {
  1927  	case D_CS:
  1928  		return 0x2e;
  1929  	case D_DS:
  1930  		return 0x3e;
  1931  	case D_ES:
  1932  		return 0x26;
  1933  	case D_FS:
  1934  		return 0x64;
  1935  	case D_GS:
  1936  		return 0x65;
  1937  	}
  1938  	return 0;
  1939  }
  1940  
  1941  static int
  1942  oclass(Link *ctxt, Addr *a)
  1943  {
  1944  	vlong v;
  1945  	int32 l;
  1946  
  1947  	if(a->type >= D_INDIR || a->index != D_NONE) {
  1948  		if(a->index != D_NONE && a->scale == 0) {
  1949  			if(a->type == D_ADDR) {
  1950  				switch(a->index) {
  1951  				case D_EXTERN:
  1952  				case D_STATIC:
  1953  					if(a->sym != nil && isextern(a->sym))
  1954  						return Yi32;
  1955  					return Yiauto; // use pc-relative addressing
  1956  				case D_AUTO:
  1957  				case D_PARAM:
  1958  					return Yiauto;
  1959  				}
  1960  				return Yxxx;
  1961  			}
  1962  			return Ycol;
  1963  		}
  1964  		return Ym;
  1965  	}
  1966  	switch(a->type)
  1967  	{
  1968  	case D_AL:
  1969  		return Yal;
  1970  
  1971  	case D_AX:
  1972  		return Yax;
  1973  
  1974  /*
  1975  	case D_SPB:
  1976  */
  1977  	case D_BPB:
  1978  	case D_SIB:
  1979  	case D_DIB:
  1980  	case D_R8B:
  1981  	case D_R9B:
  1982  	case D_R10B:
  1983  	case D_R11B:
  1984  	case D_R12B:
  1985  	case D_R13B:
  1986  	case D_R14B:
  1987  	case D_R15B:
  1988  		if(ctxt->asmode != 64)
  1989  			return Yxxx;
  1990  	case D_DL:
  1991  	case D_BL:
  1992  	case D_AH:
  1993  	case D_CH:
  1994  	case D_DH:
  1995  	case D_BH:
  1996  		return Yrb;
  1997  
  1998  	case D_CL:
  1999  		return Ycl;
  2000  
  2001  	case D_CX:
  2002  		return Ycx;
  2003  
  2004  	case D_DX:
  2005  	case D_BX:
  2006  		return Yrx;
  2007  
  2008  	case D_R8:	/* not really Yrl */
  2009  	case D_R9:
  2010  	case D_R10:
  2011  	case D_R11:
  2012  	case D_R12:
  2013  	case D_R13:
  2014  	case D_R14:
  2015  	case D_R15:
  2016  		if(ctxt->asmode != 64)
  2017  			return Yxxx;
  2018  	case D_SP:
  2019  	case D_BP:
  2020  	case D_SI:
  2021  	case D_DI:
  2022  		return Yrl;
  2023  
  2024  	case D_F0+0:
  2025  		return	Yf0;
  2026  
  2027  	case D_F0+1:
  2028  	case D_F0+2:
  2029  	case D_F0+3:
  2030  	case D_F0+4:
  2031  	case D_F0+5:
  2032  	case D_F0+6:
  2033  	case D_F0+7:
  2034  		return	Yrf;
  2035  
  2036  	case D_M0+0:
  2037  	case D_M0+1:
  2038  	case D_M0+2:
  2039  	case D_M0+3:
  2040  	case D_M0+4:
  2041  	case D_M0+5:
  2042  	case D_M0+6:
  2043  	case D_M0+7:
  2044  		return	Ymr;
  2045  
  2046  	case D_X0+0:
  2047  	case D_X0+1:
  2048  	case D_X0+2:
  2049  	case D_X0+3:
  2050  	case D_X0+4:
  2051  	case D_X0+5:
  2052  	case D_X0+6:
  2053  	case D_X0+7:
  2054  	case D_X0+8:
  2055  	case D_X0+9:
  2056  	case D_X0+10:
  2057  	case D_X0+11:
  2058  	case D_X0+12:
  2059  	case D_X0+13:
  2060  	case D_X0+14:
  2061  	case D_X0+15:
  2062  		return	Yxr;
  2063  
  2064  	case D_NONE:
  2065  		return Ynone;
  2066  
  2067  	case D_CS:	return	Ycs;
  2068  	case D_SS:	return	Yss;
  2069  	case D_DS:	return	Yds;
  2070  	case D_ES:	return	Yes;
  2071  	case D_FS:	return	Yfs;
  2072  	case D_GS:	return	Ygs;
  2073  	case D_TLS:	return	Ytls;
  2074  
  2075  	case D_GDTR:	return	Ygdtr;
  2076  	case D_IDTR:	return	Yidtr;
  2077  	case D_LDTR:	return	Yldtr;
  2078  	case D_MSW:	return	Ymsw;
  2079  	case D_TASK:	return	Ytask;
  2080  
  2081  	case D_CR+0:	return	Ycr0;
  2082  	case D_CR+1:	return	Ycr1;
  2083  	case D_CR+2:	return	Ycr2;
  2084  	case D_CR+3:	return	Ycr3;
  2085  	case D_CR+4:	return	Ycr4;
  2086  	case D_CR+5:	return	Ycr5;
  2087  	case D_CR+6:	return	Ycr6;
  2088  	case D_CR+7:	return	Ycr7;
  2089  	case D_CR+8:	return	Ycr8;
  2090  
  2091  	case D_DR+0:	return	Ydr0;
  2092  	case D_DR+1:	return	Ydr1;
  2093  	case D_DR+2:	return	Ydr2;
  2094  	case D_DR+3:	return	Ydr3;
  2095  	case D_DR+4:	return	Ydr4;
  2096  	case D_DR+5:	return	Ydr5;
  2097  	case D_DR+6:	return	Ydr6;
  2098  	case D_DR+7:	return	Ydr7;
  2099  
  2100  	case D_TR+0:	return	Ytr0;
  2101  	case D_TR+1:	return	Ytr1;
  2102  	case D_TR+2:	return	Ytr2;
  2103  	case D_TR+3:	return	Ytr3;
  2104  	case D_TR+4:	return	Ytr4;
  2105  	case D_TR+5:	return	Ytr5;
  2106  	case D_TR+6:	return	Ytr6;
  2107  	case D_TR+7:	return	Ytr7;
  2108  
  2109  	case D_EXTERN:
  2110  	case D_STATIC:
  2111  	case D_AUTO:
  2112  	case D_PARAM:
  2113  		return Ym;
  2114  
  2115  	case D_CONST:
  2116  	case D_ADDR:
  2117  		if(a->sym == nil) {
  2118  			v = a->offset;
  2119  			if(v == 0)
  2120  				return Yi0;
  2121  			if(v == 1)
  2122  				return Yi1;
  2123  			if(v >= -128 && v <= 127)
  2124  				return Yi8;
  2125  			l = v;
  2126  			if((vlong)l == v)
  2127  				return Ys32;	/* can sign extend */
  2128  			if((v>>32) == 0)
  2129  				return Yi32;	/* unsigned */
  2130  			return Yi64;
  2131  		}
  2132  		return Yi32;
  2133  
  2134  	case D_BRANCH:
  2135  		return Ybr;
  2136  	}
  2137  	return Yxxx;
  2138  }
  2139  
  2140  static void
  2141  asmidx(Link *ctxt, int scale, int index, int base)
  2142  {
  2143  	int i;
  2144  
  2145  	switch(index) {
  2146  	default:
  2147  		goto bad;
  2148  
  2149  	case D_NONE:
  2150  		i = 4 << 3;
  2151  		goto bas;
  2152  
  2153  	case D_R8:
  2154  	case D_R9:
  2155  	case D_R10:
  2156  	case D_R11:
  2157  	case D_R12:
  2158  	case D_R13:
  2159  	case D_R14:
  2160  	case D_R15:
  2161  		if(ctxt->asmode != 64)
  2162  			goto bad;
  2163  	case D_AX:
  2164  	case D_CX:
  2165  	case D_DX:
  2166  	case D_BX:
  2167  	case D_BP:
  2168  	case D_SI:
  2169  	case D_DI:
  2170  		i = reg[index] << 3;
  2171  		break;
  2172  	}
  2173  	switch(scale) {
  2174  	default:
  2175  		goto bad;
  2176  	case 1:
  2177  		break;
  2178  	case 2:
  2179  		i |= (1<<6);
  2180  		break;
  2181  	case 4:
  2182  		i |= (2<<6);
  2183  		break;
  2184  	case 8:
  2185  		i |= (3<<6);
  2186  		break;
  2187  	}
  2188  bas:
  2189  	switch(base) {
  2190  	default:
  2191  		goto bad;
  2192  	case D_NONE:	/* must be mod=00 */
  2193  		i |= 5;
  2194  		break;
  2195  	case D_R8:
  2196  	case D_R9:
  2197  	case D_R10:
  2198  	case D_R11:
  2199  	case D_R12:
  2200  	case D_R13:
  2201  	case D_R14:
  2202  	case D_R15:
  2203  		if(ctxt->asmode != 64)
  2204  			goto bad;
  2205  	case D_AX:
  2206  	case D_CX:
  2207  	case D_DX:
  2208  	case D_BX:
  2209  	case D_SP:
  2210  	case D_BP:
  2211  	case D_SI:
  2212  	case D_DI:
  2213  		i |= reg[base];
  2214  		break;
  2215  	}
  2216  	*ctxt->andptr++ = i;
  2217  	return;
  2218  bad:
  2219  	ctxt->diag("asmidx: bad address %d/%d/%d", scale, index, base);
  2220  	*ctxt->andptr++ = 0;
  2221  	return;
  2222  }
  2223  
  2224  static void
  2225  put4(Link *ctxt, int32 v)
  2226  {
  2227  	ctxt->andptr[0] = v;
  2228  	ctxt->andptr[1] = v>>8;
  2229  	ctxt->andptr[2] = v>>16;
  2230  	ctxt->andptr[3] = v>>24;
  2231  	ctxt->andptr += 4;
  2232  }
  2233  
  2234  static void
  2235  relput4(Link *ctxt, Prog *p, Addr *a)
  2236  {
  2237  	vlong v;
  2238  	Reloc rel, *r;
  2239  	
  2240  	v = vaddr(ctxt, a, &rel);
  2241  	if(rel.siz != 0) {
  2242  		if(rel.siz != 4)
  2243  			ctxt->diag("bad reloc");
  2244  		r = addrel(ctxt->cursym);
  2245  		*r = rel;
  2246  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2247  	}
  2248  	put4(ctxt, v);
  2249  }
  2250  
  2251  static void
  2252  put8(Link *ctxt, vlong v)
  2253  {
  2254  	ctxt->andptr[0] = v;
  2255  	ctxt->andptr[1] = v>>8;
  2256  	ctxt->andptr[2] = v>>16;
  2257  	ctxt->andptr[3] = v>>24;
  2258  	ctxt->andptr[4] = v>>32;
  2259  	ctxt->andptr[5] = v>>40;
  2260  	ctxt->andptr[6] = v>>48;
  2261  	ctxt->andptr[7] = v>>56;
  2262  	ctxt->andptr += 8;
  2263  }
  2264  
  2265  /*
  2266  static void
  2267  relput8(Prog *p, Addr *a)
  2268  {
  2269  	vlong v;
  2270  	Reloc rel, *r;
  2271  	
  2272  	v = vaddr(ctxt, a, &rel);
  2273  	if(rel.siz != 0) {
  2274  		r = addrel(ctxt->cursym);
  2275  		*r = rel;
  2276  		r->siz = 8;
  2277  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2278  	}
  2279  	put8(ctxt, v);
  2280  }
  2281  */
  2282  
  2283  static vlong
  2284  vaddr(Link *ctxt, Addr *a, Reloc *r)
  2285  {
  2286  	int t;
  2287  	vlong v;
  2288  	LSym *s;
  2289  	
  2290  	if(r != nil)
  2291  		memset(r, 0, sizeof *r);
  2292  
  2293  	t = a->type;
  2294  	v = a->offset;
  2295  	if(t == D_ADDR)
  2296  		t = a->index;
  2297  	switch(t) {
  2298  	case D_STATIC:
  2299  	case D_EXTERN:
  2300  		s = a->sym;
  2301  		if(r == nil) {
  2302  			ctxt->diag("need reloc for %D", a);
  2303  			sysfatal("reloc");
  2304  		}
  2305  		if(isextern(s)) {
  2306  			r->siz = 4;
  2307  			r->type = R_ADDR;
  2308  		} else {
  2309  			r->siz = 4;
  2310  			r->type = R_PCREL;
  2311  		}
  2312  		r->off = -1;	// caller must fill in
  2313  		r->sym = s;
  2314  		r->add = v;
  2315  		v = 0;
  2316  		if(s->type == STLSBSS) {
  2317  			r->xadd = r->add - r->siz;
  2318  			r->type = R_TLS;
  2319  			r->xsym = s;
  2320  		}
  2321  		break;
  2322  	
  2323  	case D_INDIR+D_TLS:
  2324  		if(r == nil) {
  2325  			ctxt->diag("need reloc for %D", a);
  2326  			sysfatal("reloc");
  2327  		}
  2328  		r->type = R_TLS_LE;
  2329  		r->siz = 4;
  2330  		r->off = -1;	// caller must fill in
  2331  		r->add = v;
  2332  		v = 0;
  2333  		break;
  2334  	}
  2335  	return v;
  2336  }
  2337  
  2338  static void
  2339  asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
  2340  {
  2341  	int32 v;
  2342  	int t, scale;
  2343  	Reloc rel;
  2344  
  2345  	USED(m64);
  2346  	rex &= (0x40 | Rxr);
  2347  	v = a->offset;
  2348  	t = a->type;
  2349  	rel.siz = 0;
  2350  	if(a->index != D_NONE && a->index != D_TLS) {
  2351  		if(t < D_INDIR) { 
  2352  			switch(t) {
  2353  			default:
  2354  				goto bad;
  2355  			case D_EXTERN:
  2356  			case D_STATIC:
  2357  				if(!isextern(a->sym))
  2358  					goto bad;
  2359  				t = D_NONE;
  2360  				v = vaddr(ctxt, a, &rel);
  2361  				break;
  2362  			case D_AUTO:
  2363  			case D_PARAM:
  2364  				t = D_SP;
  2365  				break;
  2366  			}
  2367  		} else
  2368  			t -= D_INDIR;
  2369  		ctxt->rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
  2370  		if(t == D_NONE) {
  2371  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  2372  			asmidx(ctxt, a->scale, a->index, t);
  2373  			goto putrelv;
  2374  		}
  2375  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
  2376  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  2377  			asmidx(ctxt, a->scale, a->index, t);
  2378  			return;
  2379  		}
  2380  		if(v >= -128 && v < 128 && rel.siz == 0) {
  2381  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  2382  			asmidx(ctxt, a->scale, a->index, t);
  2383  			*ctxt->andptr++ = v;
  2384  			return;
  2385  		}
  2386  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  2387  		asmidx(ctxt, a->scale, a->index, t);
  2388  		goto putrelv;
  2389  	}
  2390  	if(t >= D_AL && t <= D_X0+15) {
  2391  		if(v)
  2392  			goto bad;
  2393  		*ctxt->andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
  2394  		ctxt->rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
  2395  		return;
  2396  	}
  2397  	
  2398  	scale = a->scale;
  2399  	if(t < D_INDIR) {
  2400  		switch(a->type) {
  2401  		default:
  2402  			goto bad;
  2403  		case D_STATIC:
  2404  		case D_EXTERN:
  2405  			t = D_NONE;
  2406  			v = vaddr(ctxt, a, &rel);
  2407  			break;
  2408  		case D_AUTO:
  2409  		case D_PARAM:
  2410  			t = D_SP;
  2411  			break;
  2412  		}
  2413  		scale = 1;
  2414  	} else
  2415  		t -= D_INDIR;
  2416  	if(t == D_TLS)
  2417  		v = vaddr(ctxt, a, &rel);
  2418  
  2419  	ctxt->rexflag |= (regrex[t] & Rxb) | rex;
  2420  	if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
  2421  		if((a->sym == nil || !isextern(a->sym)) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) {
  2422  			*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
  2423  			goto putrelv;
  2424  		}
  2425  		/* temporary */
  2426  		*ctxt->andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
  2427  		*ctxt->andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
  2428  		goto putrelv;
  2429  	}
  2430  	if(t == D_SP || t == D_R12) {
  2431  		if(v == 0) {
  2432  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  2433  			asmidx(ctxt, scale, D_NONE, t);
  2434  			return;
  2435  		}
  2436  		if(v >= -128 && v < 128) {
  2437  			*ctxt->andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
  2438  			asmidx(ctxt, scale, D_NONE, t);
  2439  			*ctxt->andptr++ = v;
  2440  			return;
  2441  		}
  2442  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  2443  		asmidx(ctxt, scale, D_NONE, t);
  2444  		goto putrelv;
  2445  	}
  2446  	if(t >= D_AX && t <= D_R15) {
  2447  		if(a->index == D_TLS) {
  2448  			memset(&rel, 0, sizeof rel);
  2449  			rel.type = R_TLS_IE;
  2450  			rel.siz = 4;
  2451  			rel.sym = nil;
  2452  			rel.add = v;
  2453  			v = 0;
  2454  		}
  2455  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
  2456  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  2457  			return;
  2458  		}
  2459  		if(v >= -128 && v < 128 && rel.siz == 0) {
  2460  			ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
  2461  			ctxt->andptr[1] = v;
  2462  			ctxt->andptr += 2;
  2463  			return;
  2464  		}
  2465  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  2466  		goto putrelv;
  2467  	}
  2468  	goto bad;
  2469  	
  2470  putrelv:
  2471  	if(rel.siz != 0) {
  2472  		Reloc *r;
  2473  
  2474  		if(rel.siz != 4) {
  2475  			ctxt->diag("bad rel");
  2476  			goto bad;
  2477  		}
  2478  		r = addrel(ctxt->cursym);
  2479  		*r = rel;
  2480  		r->off = ctxt->curp->pc + ctxt->andptr - ctxt->and;
  2481  	}
  2482  		
  2483  	put4(ctxt, v);
  2484  	return;
  2485  
  2486  bad:
  2487  	ctxt->diag("asmand: bad address %D", a);
  2488  	return;
  2489  }
  2490  
  2491  static void
  2492  asmand(Link *ctxt, Addr *a, Addr *ra)
  2493  {
  2494  	asmandsz(ctxt, a, reg[ra->type], regrex[ra->type], 0);
  2495  }
  2496  
  2497  static void
  2498  asmando(Link *ctxt, Addr *a, int o)
  2499  {
  2500  	asmandsz(ctxt, a, o, 0, 0);
  2501  }
  2502  
  2503  static void
  2504  bytereg(Addr *a, uint8 *t)
  2505  {
  2506  	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
  2507  		a->type = D_AL + (a->type-D_AX);
  2508  		*t = 0;
  2509  	}
  2510  }
  2511  
  2512  enum {
  2513  	E = 0xff,
  2514  };
  2515  static Movtab	ymovtab[] =
  2516  {
  2517  /* push */
  2518  	{APUSHL,	Ycs,	Ynone,	0,	{0x0e,E,0,0}},
  2519  	{APUSHL,	Yss,	Ynone,	0,	{0x16,E,0,0}},
  2520  	{APUSHL,	Yds,	Ynone,	0,	{0x1e,E,0,0}},
  2521  	{APUSHL,	Yes,	Ynone,	0,	{0x06,E,0,0}},
  2522  	{APUSHL,	Yfs,	Ynone,	0,	{0x0f,0xa0,E,0}},
  2523  	{APUSHL,	Ygs,	Ynone,	0,	{0x0f,0xa8,E,0}},
  2524  	{APUSHQ,	Yfs,	Ynone,	0,	{0x0f,0xa0,E,0}},
  2525  	{APUSHQ,	Ygs,	Ynone,	0,	{0x0f,0xa8,E,0}},
  2526  
  2527  	{APUSHW,	Ycs,	Ynone,	0,	{Pe,0x0e,E,0}},
  2528  	{APUSHW,	Yss,	Ynone,	0,	{Pe,0x16,E,0}},
  2529  	{APUSHW,	Yds,	Ynone,	0,	{Pe,0x1e,E,0}},
  2530  	{APUSHW,	Yes,	Ynone,	0,	{Pe,0x06,E,0}},
  2531  	{APUSHW,	Yfs,	Ynone,	0,	{Pe,0x0f,0xa0,E}},
  2532  	{APUSHW,	Ygs,	Ynone,	0,	{Pe,0x0f,0xa8,E}},
  2533  
  2534  /* pop */
  2535  	{APOPL,	Ynone,	Yds,	0,	{0x1f,E,0,0}},
  2536  	{APOPL,	Ynone,	Yes,	0,	{0x07,E,0,0}},
  2537  	{APOPL,	Ynone,	Yss,	0,	{0x17,E,0,0}},
  2538  	{APOPL,	Ynone,	Yfs,	0,	{0x0f,0xa1,E,0}},
  2539  	{APOPL,	Ynone,	Ygs,	0,	{0x0f,0xa9,E,0}},
  2540  	{APOPQ,	Ynone,	Yfs,	0,	{0x0f,0xa1,E,0}},
  2541  	{APOPQ,	Ynone,	Ygs,	0,	{0x0f,0xa9,E,0}},
  2542  
  2543  	{APOPW,	Ynone,	Yds,	0,	{Pe,0x1f,E,0}},
  2544  	{APOPW,	Ynone,	Yes,	0,	{Pe,0x07,E,0}},
  2545  	{APOPW,	Ynone,	Yss,	0,	{Pe,0x17,E,0}},
  2546  	{APOPW,	Ynone,	Yfs,	0,	{Pe,0x0f,0xa1,E}},
  2547  	{APOPW,	Ynone,	Ygs,	0,	{Pe,0x0f,0xa9,E}},
  2548  
  2549  /* mov seg */
  2550  	{AMOVW,	Yes,	Yml,	1,	{0x8c,0,0,0}},
  2551  	{AMOVW,	Ycs,	Yml,	1,	{0x8c,1,0,0}},
  2552  	{AMOVW,	Yss,	Yml,	1,	{0x8c,2,0,0}},
  2553  	{AMOVW,	Yds,	Yml,	1,	{0x8c,3,0,0}},
  2554  	{AMOVW,	Yfs,	Yml,	1,	{0x8c,4,0,0}},
  2555  	{AMOVW,	Ygs,	Yml,	1,	{0x8c,5,0,0}},
  2556  
  2557  	{AMOVW,	Yml,	Yes,	2,	{0x8e,0,0,0}},
  2558  	{AMOVW,	Yml,	Ycs,	2,	{0x8e,1,0,0}},
  2559  	{AMOVW,	Yml,	Yss,	2,	{0x8e,2,0,0}},
  2560  	{AMOVW,	Yml,	Yds,	2,	{0x8e,3,0,0}},
  2561  	{AMOVW,	Yml,	Yfs,	2,	{0x8e,4,0,0}},
  2562  	{AMOVW,	Yml,	Ygs,	2,	{0x8e,5,0,0}},
  2563  
  2564  /* mov cr */
  2565  	{AMOVL,	Ycr0,	Yml,	3,	{0x0f,0x20,0,0}},
  2566  	{AMOVL,	Ycr2,	Yml,	3,	{0x0f,0x20,2,0}},
  2567  	{AMOVL,	Ycr3,	Yml,	3,	{0x0f,0x20,3,0}},
  2568  	{AMOVL,	Ycr4,	Yml,	3,	{0x0f,0x20,4,0}},
  2569  	{AMOVL,	Ycr8,	Yml,	3,	{0x0f,0x20,8,0}},
  2570  	{AMOVQ,	Ycr0,	Yml,	3,	{0x0f,0x20,0,0}},
  2571  	{AMOVQ,	Ycr2,	Yml,	3,	{0x0f,0x20,2,0}},
  2572  	{AMOVQ,	Ycr3,	Yml,	3,	{0x0f,0x20,3,0}},
  2573  	{AMOVQ,	Ycr4,	Yml,	3,	{0x0f,0x20,4,0}},
  2574  	{AMOVQ,	Ycr8,	Yml,	3,	{0x0f,0x20,8,0}},
  2575  
  2576  	{AMOVL,	Yml,	Ycr0,	4,	{0x0f,0x22,0,0}},
  2577  	{AMOVL,	Yml,	Ycr2,	4,	{0x0f,0x22,2,0}},
  2578  	{AMOVL,	Yml,	Ycr3,	4,	{0x0f,0x22,3,0}},
  2579  	{AMOVL,	Yml,	Ycr4,	4,	{0x0f,0x22,4,0}},
  2580  	{AMOVL,	Yml,	Ycr8,	4,	{0x0f,0x22,8,0}},
  2581  	{AMOVQ,	Yml,	Ycr0,	4,	{0x0f,0x22,0,0}},
  2582  	{AMOVQ,	Yml,	Ycr2,	4,	{0x0f,0x22,2,0}},
  2583  	{AMOVQ,	Yml,	Ycr3,	4,	{0x0f,0x22,3,0}},
  2584  	{AMOVQ,	Yml,	Ycr4,	4,	{0x0f,0x22,4,0}},
  2585  	{AMOVQ,	Yml,	Ycr8,	4,	{0x0f,0x22,8,0}},
  2586  
  2587  /* mov dr */
  2588  	{AMOVL,	Ydr0,	Yml,	3,	{0x0f,0x21,0,0}},
  2589  	{AMOVL,	Ydr6,	Yml,	3,	{0x0f,0x21,6,0}},
  2590  	{AMOVL,	Ydr7,	Yml,	3,	{0x0f,0x21,7,0}},
  2591  	{AMOVQ,	Ydr0,	Yml,	3,	{0x0f,0x21,0,0}},
  2592  	{AMOVQ,	Ydr6,	Yml,	3,	{0x0f,0x21,6,0}},
  2593  	{AMOVQ,	Ydr7,	Yml,	3,	{0x0f,0x21,7,0}},
  2594  
  2595  	{AMOVL,	Yml,	Ydr0,	4,	{0x0f,0x23,0,0}},
  2596  	{AMOVL,	Yml,	Ydr6,	4,	{0x0f,0x23,6,0}},
  2597  	{AMOVL,	Yml,	Ydr7,	4,	{0x0f,0x23,7,0}},
  2598  	{AMOVQ,	Yml,	Ydr0,	4,	{0x0f,0x23,0,0}},
  2599  	{AMOVQ,	Yml,	Ydr6,	4,	{0x0f,0x23,6,0}},
  2600  	{AMOVQ,	Yml,	Ydr7,	4,	{0x0f,0x23,7,0}},
  2601  
  2602  /* mov tr */
  2603  	{AMOVL,	Ytr6,	Yml,	3,	{0x0f,0x24,6,0}},
  2604  	{AMOVL,	Ytr7,	Yml,	3,	{0x0f,0x24,7,0}},
  2605  
  2606  	{AMOVL,	Yml,	Ytr6,	4,	{0x0f,0x26,6,E}},
  2607  	{AMOVL,	Yml,	Ytr7,	4,	{0x0f,0x26,7,E}},
  2608  
  2609  /* lgdt, sgdt, lidt, sidt */
  2610  	{AMOVL,	Ym,	Ygdtr,	4,	{0x0f,0x01,2,0}},
  2611  	{AMOVL,	Ygdtr,	Ym,	3,	{0x0f,0x01,0,0}},
  2612  	{AMOVL,	Ym,	Yidtr,	4,	{0x0f,0x01,3,0}},
  2613  	{AMOVL,	Yidtr,	Ym,	3,	{0x0f,0x01,1,0}},
  2614  	{AMOVQ,	Ym,	Ygdtr,	4,	{0x0f,0x01,2,0}},
  2615  	{AMOVQ,	Ygdtr,	Ym,	3,	{0x0f,0x01,0,0}},
  2616  	{AMOVQ,	Ym,	Yidtr,	4,	{0x0f,0x01,3,0}},
  2617  	{AMOVQ,	Yidtr,	Ym,	3,	{0x0f,0x01,1,0}},
  2618  
  2619  /* lldt, sldt */
  2620  	{AMOVW,	Yml,	Yldtr,	4,	{0x0f,0x00,2,0}},
  2621  	{AMOVW,	Yldtr,	Yml,	3,	{0x0f,0x00,0,0}},
  2622  
  2623  /* lmsw, smsw */
  2624  	{AMOVW,	Yml,	Ymsw,	4,	{0x0f,0x01,6,0}},
  2625  	{AMOVW,	Ymsw,	Yml,	3,	{0x0f,0x01,4,0}},
  2626  
  2627  /* ltr, str */
  2628  	{AMOVW,	Yml,	Ytask,	4,	{0x0f,0x00,3,0}},
  2629  	{AMOVW,	Ytask,	Yml,	3,	{0x0f,0x00,1,0}},
  2630  
  2631  /* load full pointer */
  2632  	{AMOVL,	Yml,	Ycol,	5,	{0,0,0,0}},
  2633  	{AMOVW,	Yml,	Ycol,	5,	{Pe,0,0,0}},
  2634  
  2635  /* double shift */
  2636  	{ASHLL,	Ycol,	Yml,	6,	{0xa4,0xa5,0,0}},
  2637  	{ASHRL,	Ycol,	Yml,	6,	{0xac,0xad,0,0}},
  2638  	{ASHLQ,	Ycol,	Yml,	6,	{Pw,0xa4,0xa5,0}},
  2639  	{ASHRQ,	Ycol,	Yml,	6,	{Pw,0xac,0xad,0}},
  2640  	{ASHLW,	Ycol,	Yml,	6,	{Pe,0xa4,0xa5,0}},
  2641  	{ASHRW,	Ycol,	Yml,	6,	{Pe,0xac,0xad,0}},
  2642  
  2643  /* load TLS base */
  2644  	{AMOVQ,	Ytls,	Yrl,	7,	{0,0,0,0}},
  2645  
  2646  	{0}
  2647  };
  2648  
  2649  static int
  2650  isax(Addr *a)
  2651  {
  2652  
  2653  	switch(a->type) {
  2654  	case D_AX:
  2655  	case D_AL:
  2656  	case D_AH:
  2657  	case D_INDIR+D_AX:
  2658  		return 1;
  2659  	}
  2660  	if(a->index == D_AX)
  2661  		return 1;
  2662  	return 0;
  2663  }
  2664  
  2665  static void
  2666  subreg(Prog *p, int from, int to)
  2667  {
  2668  
  2669  	if(0 /*debug['Q']*/)
  2670  		print("\n%P	s/%R/%R/\n", p, from, to);
  2671  
  2672  	if(p->from.type == from)
  2673  		p->from.type = to;
  2674  	if(p->to.type == from)
  2675  		p->to.type = to;
  2676  
  2677  	if(p->from.index == from)
  2678  		p->from.index = to;
  2679  	if(p->to.index == from)
  2680  		p->to.index = to;
  2681  
  2682  	from += D_INDIR;
  2683  	if(p->from.type == from)
  2684  		p->from.type = to+D_INDIR;
  2685  	if(p->to.type == from)
  2686  		p->to.type = to+D_INDIR;
  2687  
  2688  	if(0 /*debug['Q']*/)
  2689  		print("%P\n", p);
  2690  }
  2691  
  2692  static int
  2693  mediaop(Link *ctxt, Optab *o, int op, int osize, int z)
  2694  {
  2695  	switch(op){
  2696  	case Pm:
  2697  	case Pe:
  2698  	case Pf2:
  2699  	case Pf3:
  2700  		if(osize != 1){
  2701  			if(op != Pm)
  2702  				*ctxt->andptr++ = op;
  2703  			*ctxt->andptr++ = Pm;
  2704  			op = o->op[++z];
  2705  			break;
  2706  		}
  2707  	default:
  2708  		if(ctxt->andptr == ctxt->and || ctxt->and[ctxt->andptr - ctxt->and - 1] != Pm)
  2709  			*ctxt->andptr++ = Pm;
  2710  		break;
  2711  	}
  2712  	*ctxt->andptr++ = op;
  2713  	return z;
  2714  }
  2715  
  2716  static void
  2717  doasm(Link *ctxt, Prog *p)
  2718  {
  2719  	Optab *o;
  2720  	Prog *q, pp;
  2721  	uchar *t;
  2722  	Movtab *mo;
  2723  	int z, op, ft, tt, xo, l, pre;
  2724  	vlong v;
  2725  	Reloc rel, *r;
  2726  	Addr *a;
  2727  	
  2728  	ctxt->curp = p;	// TODO
  2729  
  2730  	o = opindex[p->as];
  2731  	if(o == nil) {
  2732  		ctxt->diag("asmins: missing op %P", p);
  2733  		return;
  2734  	}
  2735  	
  2736  	pre = prefixof(ctxt, &p->from);
  2737  	if(pre)
  2738  		*ctxt->andptr++ = pre;
  2739  	pre = prefixof(ctxt, &p->to);
  2740  	if(pre)
  2741  		*ctxt->andptr++ = pre;
  2742  
  2743  	if(p->ft == 0)
  2744  		p->ft = oclass(ctxt, &p->from);
  2745  	if(p->tt == 0)
  2746  		p->tt = oclass(ctxt, &p->to);
  2747  
  2748  	ft = p->ft * Ymax;
  2749  	tt = p->tt * Ymax;
  2750  
  2751  	t = o->ytab;
  2752  	if(t == 0) {
  2753  		ctxt->diag("asmins: noproto %P", p);
  2754  		return;
  2755  	}
  2756  	xo = o->op[0] == 0x0f;
  2757  	for(z=0; *t; z+=t[3]+xo,t+=4)
  2758  		if(ycover[ft+t[0]])
  2759  		if(ycover[tt+t[1]])
  2760  			goto found;
  2761  	goto domov;
  2762  
  2763  found:
  2764  	switch(o->prefix) {
  2765  	case Pq:	/* 16 bit escape and opcode escape */
  2766  		*ctxt->andptr++ = Pe;
  2767  		*ctxt->andptr++ = Pm;
  2768  		break;
  2769  	case Pq3:	/* 16 bit escape, Rex.w, and opcode escape */
  2770  		*ctxt->andptr++ = Pe;
  2771  		*ctxt->andptr++ = Pw;
  2772  		*ctxt->andptr++ = Pm;
  2773  		break;
  2774  
  2775  	case Pf2:	/* xmm opcode escape */
  2776  	case Pf3:
  2777  		*ctxt->andptr++ = o->prefix;
  2778  		*ctxt->andptr++ = Pm;
  2779  		break;
  2780  
  2781  	case Pm:	/* opcode escape */
  2782  		*ctxt->andptr++ = Pm;
  2783  		break;
  2784  
  2785  	case Pe:	/* 16 bit escape */
  2786  		*ctxt->andptr++ = Pe;
  2787  		break;
  2788  
  2789  	case Pw:	/* 64-bit escape */
  2790  		if(p->mode != 64)
  2791  			ctxt->diag("asmins: illegal 64: %P", p);
  2792  		ctxt->rexflag |= Pw;
  2793  		break;
  2794  
  2795  	case Pb:	/* botch */
  2796  		bytereg(&p->from, &p->ft);
  2797  		bytereg(&p->to, &p->tt);
  2798  		break;
  2799  
  2800  	case P32:	/* 32 bit but illegal if 64-bit mode */
  2801  		if(p->mode == 64)
  2802  			ctxt->diag("asmins: illegal in 64-bit mode: %P", p);
  2803  		break;
  2804  
  2805  	case Py:	/* 64-bit only, no prefix */
  2806  		if(p->mode != 64)
  2807  			ctxt->diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
  2808  		break;
  2809  	}
  2810  
  2811  	if(z >= nelem(o->op))
  2812  		sysfatal("asmins bad table %P", p);
  2813  	op = o->op[z];
  2814  	if(op == 0x0f) {
  2815  		*ctxt->andptr++ = op;
  2816  		op = o->op[++z];
  2817  	}
  2818  	switch(t[2]) {
  2819  	default:
  2820  		ctxt->diag("asmins: unknown z %d %P", t[2], p);
  2821  		return;
  2822  
  2823  	case Zpseudo:
  2824  		break;
  2825  
  2826  	case Zlit:
  2827  		for(; op = o->op[z]; z++)
  2828  			*ctxt->andptr++ = op;
  2829  		break;
  2830  
  2831  	case Zlitm_r:
  2832  		for(; op = o->op[z]; z++)
  2833  			*ctxt->andptr++ = op;
  2834  		asmand(ctxt, &p->from, &p->to);
  2835  		break;
  2836  
  2837  	case Zmb_r:
  2838  		bytereg(&p->from, &p->ft);
  2839  		/* fall through */
  2840  	case Zm_r:
  2841  		*ctxt->andptr++ = op;
  2842  		asmand(ctxt, &p->from, &p->to);
  2843  		break;
  2844  	case Zm2_r:
  2845  		*ctxt->andptr++ = op;
  2846  		*ctxt->andptr++ = o->op[z+1];
  2847  		asmand(ctxt, &p->from, &p->to);
  2848  		break;
  2849  
  2850  	case Zm_r_xm:
  2851  		mediaop(ctxt, o, op, t[3], z);
  2852  		asmand(ctxt, &p->from, &p->to);
  2853  		break;
  2854  
  2855  	case Zm_r_xm_nr:
  2856  		ctxt->rexflag = 0;
  2857  		mediaop(ctxt, o, op, t[3], z);
  2858  		asmand(ctxt, &p->from, &p->to);
  2859  		break;
  2860  
  2861  	case Zm_r_i_xm:
  2862  		mediaop(ctxt, o, op, t[3], z);
  2863  		asmand(ctxt, &p->from, &p->to);
  2864  		*ctxt->andptr++ = p->to.offset;
  2865  		break;
  2866  
  2867  	case Zm_r_3d:
  2868  		*ctxt->andptr++ = 0x0f;
  2869  		*ctxt->andptr++ = 0x0f;
  2870  		asmand(ctxt, &p->from, &p->to);
  2871  		*ctxt->andptr++ = op;
  2872  		break;
  2873  
  2874  	case Zibm_r:
  2875  		while ((op = o->op[z++]) != 0)
  2876  			*ctxt->andptr++ = op;
  2877  		asmand(ctxt, &p->from, &p->to);
  2878  		*ctxt->andptr++ = p->to.offset;
  2879  		break;
  2880  
  2881  	case Zaut_r:
  2882  		*ctxt->andptr++ = 0x8d;	/* leal */
  2883  		if(p->from.type != D_ADDR)
  2884  			ctxt->diag("asmins: Zaut sb type ADDR");
  2885  		p->from.type = p->from.index;
  2886  		p->from.index = D_NONE;
  2887  		asmand(ctxt, &p->from, &p->to);
  2888  		p->from.index = p->from.type;
  2889  		p->from.type = D_ADDR;
  2890  		break;
  2891  
  2892  	case Zm_o:
  2893  		*ctxt->andptr++ = op;
  2894  		asmando(ctxt, &p->from, o->op[z+1]);
  2895  		break;
  2896  
  2897  	case Zr_m:
  2898  		*ctxt->andptr++ = op;
  2899  		asmand(ctxt, &p->to, &p->from);
  2900  		break;
  2901  
  2902  	case Zr_m_xm:
  2903  		mediaop(ctxt, o, op, t[3], z);
  2904  		asmand(ctxt, &p->to, &p->from);
  2905  		break;
  2906  
  2907  	case Zr_m_xm_nr:
  2908  		ctxt->rexflag = 0;
  2909  		mediaop(ctxt, o, op, t[3], z);
  2910  		asmand(ctxt, &p->to, &p->from);
  2911  		break;
  2912  
  2913  	case Zr_m_i_xm:
  2914  		mediaop(ctxt, o, op, t[3], z);
  2915  		asmand(ctxt, &p->to, &p->from);
  2916  		*ctxt->andptr++ = p->from.offset;
  2917  		break;
  2918  
  2919  	case Zo_m:
  2920  		*ctxt->andptr++ = op;
  2921  		asmando(ctxt, &p->to, o->op[z+1]);
  2922  		break;
  2923  
  2924  	case Zcallindreg:
  2925  		r = addrel(ctxt->cursym);
  2926  		r->off = p->pc;
  2927  		r->type = R_CALLIND;
  2928  		r->siz = 0;
  2929  		// fallthrough
  2930  	case Zo_m64:
  2931  		*ctxt->andptr++ = op;
  2932  		asmandsz(ctxt, &p->to, o->op[z+1], 0, 1);
  2933  		break;
  2934  
  2935  	case Zm_ibo:
  2936  		*ctxt->andptr++ = op;
  2937  		asmando(ctxt, &p->from, o->op[z+1]);
  2938  		*ctxt->andptr++ = vaddr(ctxt, &p->to, nil);
  2939  		break;
  2940  
  2941  	case Zibo_m:
  2942  		*ctxt->andptr++ = op;
  2943  		asmando(ctxt, &p->to, o->op[z+1]);
  2944  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2945  		break;
  2946  
  2947  	case Zibo_m_xm:
  2948  		z = mediaop(ctxt, o, op, t[3], z);
  2949  		asmando(ctxt, &p->to, o->op[z+1]);
  2950  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2951  		break;
  2952  
  2953  	case Z_ib:
  2954  	case Zib_:
  2955  		if(t[2] == Zib_)
  2956  			a = &p->from;
  2957  		else
  2958  			a = &p->to;
  2959  		*ctxt->andptr++ = op;
  2960  		*ctxt->andptr++ = vaddr(ctxt, a, nil);
  2961  		break;
  2962  
  2963  	case Zib_rp:
  2964  		ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40);
  2965  		*ctxt->andptr++ = op + reg[p->to.type];
  2966  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2967  		break;
  2968  
  2969  	case Zil_rp:
  2970  		ctxt->rexflag |= regrex[p->to.type] & Rxb;
  2971  		*ctxt->andptr++ = op + reg[p->to.type];
  2972  		if(o->prefix == Pe) {
  2973  			v = vaddr(ctxt, &p->from, nil);
  2974  			*ctxt->andptr++ = v;
  2975  			*ctxt->andptr++ = v>>8;
  2976  		}
  2977  		else
  2978  			relput4(ctxt, p, &p->from);
  2979  		break;
  2980  
  2981  	case Zo_iw:
  2982  		*ctxt->andptr++ = op;
  2983  		if(p->from.type != D_NONE){
  2984  			v = vaddr(ctxt, &p->from, nil);
  2985  			*ctxt->andptr++ = v;
  2986  			*ctxt->andptr++ = v>>8;
  2987  		}
  2988  		break;
  2989  
  2990  	case Ziq_rp:
  2991  		v = vaddr(ctxt, &p->from, &rel);
  2992  		l = v>>32;
  2993  		if(l == 0 && rel.siz != 8){
  2994  			//p->mark |= 0100;
  2995  			//print("zero: %llux %P\n", v, p);
  2996  			ctxt->rexflag &= ~(0x40|Rxw);
  2997  			ctxt->rexflag |= regrex[p->to.type] & Rxb;
  2998  			*ctxt->andptr++ = 0xb8 + reg[p->to.type];
  2999  			if(rel.type != 0) {
  3000  				r = addrel(ctxt->cursym);
  3001  				*r = rel;
  3002  				r->off = p->pc + ctxt->andptr - ctxt->and;
  3003  			}
  3004  			put4(ctxt, v);
  3005  		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
  3006  			//p->mark |= 0100;
  3007  			//print("sign: %llux %P\n", v, p);
  3008  			*ctxt->andptr ++ = 0xc7;
  3009  			asmando(ctxt, &p->to, 0);
  3010  			put4(ctxt, v);
  3011  		}else{	/* need all 8 */
  3012  			//print("all: %llux %P\n", v, p);
  3013  			ctxt->rexflag |= regrex[p->to.type] & Rxb;
  3014  			*ctxt->andptr++ = op + reg[p->to.type];
  3015  			if(rel.type != 0) {
  3016  				r = addrel(ctxt->cursym);
  3017  				*r = rel;
  3018  				r->off = p->pc + ctxt->andptr - ctxt->and;
  3019  			}
  3020  			put8(ctxt, v);
  3021  		}
  3022  		break;
  3023  
  3024  	case Zib_rr:
  3025  		*ctxt->andptr++ = op;
  3026  		asmand(ctxt, &p->to, &p->to);
  3027  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  3028  		break;
  3029  
  3030  	case Z_il:
  3031  	case Zil_:
  3032  		if(t[2] == Zil_)
  3033  			a = &p->from;
  3034  		else
  3035  			a = &p->to;
  3036  		*ctxt->andptr++ = op;
  3037  		if(o->prefix == Pe) {
  3038  			v = vaddr(ctxt, a, nil);
  3039  			*ctxt->andptr++ = v;
  3040  			*ctxt->andptr++ = v>>8;
  3041  		}
  3042  		else
  3043  			relput4(ctxt, p, a);
  3044  		break;
  3045  
  3046  	case Zm_ilo:
  3047  	case Zilo_m:
  3048  		*ctxt->andptr++ = op;
  3049  		if(t[2] == Zilo_m) {
  3050  			a = &p->from;
  3051  			asmando(ctxt, &p->to, o->op[z+1]);
  3052  		} else {
  3053  			a = &p->to;
  3054  			asmando(ctxt, &p->from, o->op[z+1]);
  3055  		}
  3056  		if(o->prefix == Pe) {
  3057  			v = vaddr(ctxt, a, nil);
  3058  			*ctxt->andptr++ = v;
  3059  			*ctxt->andptr++ = v>>8;
  3060  		}
  3061  		else
  3062  			relput4(ctxt, p, a);
  3063  		break;
  3064  
  3065  	case Zil_rr:
  3066  		*ctxt->andptr++ = op;
  3067  		asmand(ctxt, &p->to, &p->to);
  3068  		if(o->prefix == Pe) {
  3069  			v = vaddr(ctxt, &p->from, nil);
  3070  			*ctxt->andptr++ = v;
  3071  			*ctxt->andptr++ = v>>8;
  3072  		}
  3073  		else
  3074  			relput4(ctxt, p, &p->from);
  3075  		break;
  3076  
  3077  	case Z_rp:
  3078  		ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40);
  3079  		*ctxt->andptr++ = op + reg[p->to.type];
  3080  		break;
  3081  
  3082  	case Zrp_:
  3083  		ctxt->rexflag |= regrex[p->from.type] & (Rxb|0x40);
  3084  		*ctxt->andptr++ = op + reg[p->from.type];
  3085  		break;
  3086  
  3087  	case Zclr:
  3088  		ctxt->rexflag &= ~Pw;
  3089  		*ctxt->andptr++ = op;
  3090  		asmand(ctxt, &p->to, &p->to);
  3091  		break;
  3092  
  3093  	case Zcall:
  3094  		if(p->to.sym == nil) {
  3095  			ctxt->diag("call without target");
  3096  			sysfatal("bad code");
  3097  		}
  3098  		*ctxt->andptr++ = op;
  3099  		r = addrel(ctxt->cursym);
  3100  		r->off = p->pc + ctxt->andptr - ctxt->and;
  3101  		r->sym = p->to.sym;
  3102  		r->add = p->to.offset;
  3103  		r->type = R_CALL;
  3104  		r->siz = 4;
  3105  		put4(ctxt, 0);
  3106  		break;
  3107  
  3108  	case Zbr:
  3109  	case Zjmp:
  3110  	case Zloop:
  3111  		// TODO: jump across functions needs reloc
  3112  		if(p->to.sym != nil) {
  3113  			if(t[2] != Zjmp) {
  3114  				ctxt->diag("branch to ATEXT");
  3115  				sysfatal("bad code");
  3116  			}
  3117  			*ctxt->andptr++ = o->op[z+1];
  3118  			r = addrel(ctxt->cursym);
  3119  			r->off = p->pc + ctxt->andptr - ctxt->and;
  3120  			r->sym = p->to.sym;
  3121  			r->type = R_PCREL;
  3122  			r->siz = 4;
  3123  			put4(ctxt, 0);
  3124  			break;
  3125  		}
  3126  		// Assumes q is in this function.
  3127  		// TODO: Check in input, preserve in brchain.
  3128  
  3129  		// Fill in backward jump now.
  3130  		q = p->pcond;
  3131  		if(q == nil) {
  3132  			ctxt->diag("jmp/branch/loop without target");
  3133  			sysfatal("bad code");
  3134  		}
  3135  		if(p->back & 1) {
  3136  			v = q->pc - (p->pc + 2);
  3137  			if(v >= -128) {
  3138  				if(p->as == AJCXZL)
  3139  					*ctxt->andptr++ = 0x67;
  3140  				*ctxt->andptr++ = op;
  3141  				*ctxt->andptr++ = v;
  3142  			} else if(t[2] == Zloop) {
  3143  				ctxt->diag("loop too far: %P", p);
  3144  			} else {
  3145  				v -= 5-2;
  3146  				if(t[2] == Zbr) {
  3147  					*ctxt->andptr++ = 0x0f;
  3148  					v--;
  3149  				}
  3150  				*ctxt->andptr++ = o->op[z+1];
  3151  				*ctxt->andptr++ = v;
  3152  				*ctxt->andptr++ = v>>8;
  3153  				*ctxt->andptr++ = v>>16;
  3154  				*ctxt->andptr++ = v>>24;
  3155  			}
  3156  			break;
  3157  		}
  3158  		
  3159  		// Annotate target; will fill in later.
  3160  		p->forwd = q->comefrom;
  3161  		q->comefrom = p;
  3162  		if(p->back & 2)	{ // short
  3163  			if(p->as == AJCXZL)
  3164  				*ctxt->andptr++ = 0x67;
  3165  			*ctxt->andptr++ = op;
  3166  			*ctxt->andptr++ = 0;
  3167  		} else if(t[2] == Zloop) {
  3168  			ctxt->diag("loop too far: %P", p);
  3169  		} else {
  3170  			if(t[2] == Zbr)
  3171  				*ctxt->andptr++ = 0x0f;
  3172  			*ctxt->andptr++ = o->op[z+1];
  3173  			*ctxt->andptr++ = 0;
  3174  			*ctxt->andptr++ = 0;
  3175  			*ctxt->andptr++ = 0;
  3176  			*ctxt->andptr++ = 0;
  3177  		}
  3178  		break;
  3179  				
  3180  /*
  3181  		v = q->pc - p->pc - 2;
  3182  		if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3183  			*ctxt->andptr++ = op;
  3184  			*ctxt->andptr++ = v;
  3185  		} else {
  3186  			v -= 5-2;
  3187  			if(t[2] == Zbr) {
  3188  				*ctxt->andptr++ = 0x0f;
  3189  				v--;
  3190  			}
  3191  			*ctxt->andptr++ = o->op[z+1];
  3192  			*ctxt->andptr++ = v;
  3193  			*ctxt->andptr++ = v>>8;
  3194  			*ctxt->andptr++ = v>>16;
  3195  			*ctxt->andptr++ = v>>24;
  3196  		}
  3197  */
  3198  		break;
  3199  
  3200  	case Zbyte:
  3201  		v = vaddr(ctxt, &p->from, &rel);
  3202  		if(rel.siz != 0) {
  3203  			rel.siz = op;
  3204  			r = addrel(ctxt->cursym);
  3205  			*r = rel;
  3206  			r->off = p->pc + ctxt->andptr - ctxt->and;
  3207  		}
  3208  		*ctxt->andptr++ = v;
  3209  		if(op > 1) {
  3210  			*ctxt->andptr++ = v>>8;
  3211  			if(op > 2) {
  3212  				*ctxt->andptr++ = v>>16;
  3213  				*ctxt->andptr++ = v>>24;
  3214  				if(op > 4) {
  3215  					*ctxt->andptr++ = v>>32;
  3216  					*ctxt->andptr++ = v>>40;
  3217  					*ctxt->andptr++ = v>>48;
  3218  					*ctxt->andptr++ = v>>56;
  3219  				}
  3220  			}
  3221  		}
  3222  		break;
  3223  	}
  3224  	return;
  3225  
  3226  domov:
  3227  	for(mo=ymovtab; mo->as; mo++)
  3228  		if(p->as == mo->as)
  3229  		if(ycover[ft+mo->ft])
  3230  		if(ycover[tt+mo->tt]){
  3231  			t = mo->op;
  3232  			goto mfound;
  3233  		}
  3234  bad:
  3235  	if(p->mode != 64){
  3236  		/*
  3237  		 * here, the assembly has failed.
  3238  		 * if its a byte instruction that has
  3239  		 * unaddressable registers, try to
  3240  		 * exchange registers and reissue the
  3241  		 * instruction with the operands renamed.
  3242  		 */
  3243  		pp = *p;
  3244  		z = p->from.type;
  3245  		if(z >= D_BP && z <= D_DI) {
  3246  			if(isax(&p->to) || p->to.type == D_NONE) {
  3247  				// We certainly don't want to exchange
  3248  				// with AX if the op is MUL or DIV.
  3249  				*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  3250  				asmando(ctxt, &p->from, reg[D_BX]);
  3251  				subreg(&pp, z, D_BX);
  3252  				doasm(ctxt, &pp);
  3253  				*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  3254  				asmando(ctxt, &p->from, reg[D_BX]);
  3255  			} else {
  3256  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  3257  				subreg(&pp, z, D_AX);
  3258  				doasm(ctxt, &pp);
  3259  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  3260  			}
  3261  			return;
  3262  		}
  3263  		z = p->to.type;
  3264  		if(z >= D_BP && z <= D_DI) {
  3265  			if(isax(&p->from)) {
  3266  				*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  3267  				asmando(ctxt, &p->to, reg[D_BX]);
  3268  				subreg(&pp, z, D_BX);
  3269  				doasm(ctxt, &pp);
  3270  				*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  3271  				asmando(ctxt, &p->to, reg[D_BX]);
  3272  			} else {
  3273  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  3274  				subreg(&pp, z, D_AX);
  3275  				doasm(ctxt, &pp);
  3276  				*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  3277  			}
  3278  			return;
  3279  		}
  3280  	}
  3281  	ctxt->diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
  3282  	return;
  3283  
  3284  mfound:
  3285  	switch(mo->code) {
  3286  	default:
  3287  		ctxt->diag("asmins: unknown mov %d %P", mo->code, p);
  3288  		break;
  3289  
  3290  	case 0:	/* lit */
  3291  		for(z=0; t[z]!=E; z++)
  3292  			*ctxt->andptr++ = t[z];
  3293  		break;
  3294  
  3295  	case 1:	/* r,m */
  3296  		*ctxt->andptr++ = t[0];
  3297  		asmando(ctxt, &p->to, t[1]);
  3298  		break;
  3299  
  3300  	case 2:	/* m,r */
  3301  		*ctxt->andptr++ = t[0];
  3302  		asmando(ctxt, &p->from, t[1]);
  3303  		break;
  3304  
  3305  	case 3:	/* r,m - 2op */
  3306  		*ctxt->andptr++ = t[0];
  3307  		*ctxt->andptr++ = t[1];
  3308  		asmando(ctxt, &p->to, t[2]);
  3309  		ctxt->rexflag |= regrex[p->from.type] & (Rxr|0x40);
  3310  		break;
  3311  
  3312  	case 4:	/* m,r - 2op */
  3313  		*ctxt->andptr++ = t[0];
  3314  		*ctxt->andptr++ = t[1];
  3315  		asmando(ctxt, &p->from, t[2]);
  3316  		ctxt->rexflag |= regrex[p->to.type] & (Rxr|0x40);
  3317  		break;
  3318  
  3319  	case 5:	/* load full pointer, trash heap */
  3320  		if(t[0])
  3321  			*ctxt->andptr++ = t[0];
  3322  		switch(p->to.index) {
  3323  		default:
  3324  			goto bad;
  3325  		case D_DS:
  3326  			*ctxt->andptr++ = 0xc5;
  3327  			break;
  3328  		case D_SS:
  3329  			*ctxt->andptr++ = 0x0f;
  3330  			*ctxt->andptr++ = 0xb2;
  3331  			break;
  3332  		case D_ES:
  3333  			*ctxt->andptr++ = 0xc4;
  3334  			break;
  3335  		case D_FS:
  3336  			*ctxt->andptr++ = 0x0f;
  3337  			*ctxt->andptr++ = 0xb4;
  3338  			break;
  3339  		case D_GS:
  3340  			*ctxt->andptr++ = 0x0f;
  3341  			*ctxt->andptr++ = 0xb5;
  3342  			break;
  3343  		}
  3344  		asmand(ctxt, &p->from, &p->to);
  3345  		break;
  3346  
  3347  	case 6:	/* double shift */
  3348  		if(t[0] == Pw){
  3349  			if(p->mode != 64)
  3350  				ctxt->diag("asmins: illegal 64: %P", p);
  3351  			ctxt->rexflag |= Pw;
  3352  			t++;
  3353  		}else if(t[0] == Pe){
  3354  			*ctxt->andptr++ = Pe;
  3355  			t++;
  3356  		}
  3357  		z = p->from.type;
  3358  		switch(z) {
  3359  		default:
  3360  			goto bad;
  3361  		case D_CONST:
  3362  			*ctxt->andptr++ = 0x0f;
  3363  			*ctxt->andptr++ = t[0];
  3364  			asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  3365  			*ctxt->andptr++ = p->from.offset;
  3366  			break;
  3367  		case D_CL:
  3368  		case D_CX:
  3369  			*ctxt->andptr++ = 0x0f;
  3370  			*ctxt->andptr++ = t[1];
  3371  			asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  3372  			break;
  3373  		}
  3374  		break;
  3375  	
  3376  	case 7:	/* mov tls, r */
  3377  		// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3378  		// where you load the TLS base register into a register and then index off that
  3379  		// register to access the actual TLS variables. Systems that allow direct TLS access
  3380  		// are handled in prefixof above and should not be listed here.
  3381  		switch(ctxt->headtype) {
  3382  		default:
  3383  			sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
  3384  
  3385  		case Hplan9:
  3386  			if(ctxt->plan9privates == nil)
  3387  				ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
  3388  			memset(&pp.from, 0, sizeof pp.from);
  3389  			pp.from.type = D_EXTERN;
  3390  			pp.from.sym = ctxt->plan9privates;
  3391  			pp.from.offset = 0;
  3392  			pp.from.index = D_NONE;
  3393  			ctxt->rexflag |= Pw;
  3394  			*ctxt->andptr++ = 0x8B;
  3395  			asmand(ctxt, &pp.from, &p->to);
  3396  			break;
  3397  
  3398  		case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3399  			// TLS base is 0(FS).
  3400  			pp.from = p->from;
  3401  			pp.from.type = D_INDIR+D_NONE;
  3402  			pp.from.offset = 0;
  3403  			pp.from.index = D_NONE;
  3404  			pp.from.scale = 0;
  3405  			ctxt->rexflag |= Pw;
  3406  			*ctxt->andptr++ = 0x64; // FS
  3407  			*ctxt->andptr++ = 0x8B;
  3408  			asmand(ctxt, &pp.from, &p->to);
  3409  			break;
  3410  		
  3411  		case Hwindows:
  3412  			// Windows TLS base is always 0x28(GS).
  3413  			pp.from = p->from;
  3414  			pp.from.type = D_INDIR+D_GS;
  3415  			pp.from.offset = 0x28;
  3416  			pp.from.index = D_NONE;
  3417  			pp.from.scale = 0;
  3418  			ctxt->rexflag |= Pw;
  3419  			*ctxt->andptr++ = 0x65; // GS
  3420  			*ctxt->andptr++ = 0x8B;
  3421  			asmand(ctxt, &pp.from, &p->to);
  3422  			break;
  3423  		
  3424  		case Hhaiku:
  3425  			// Haiku TLS base is 0(FS).
  3426  			pp.from = p->from;
  3427  			pp.from.type = D_INDIR+D_GS;
  3428  			pp.from.offset = 0;
  3429  			pp.from.index = D_NONE;
  3430  			pp.from.scale = 0;
  3431  			ctxt->rexflag |= Pw;
  3432  			*ctxt->andptr++ = 0x64; // FS
  3433  			*ctxt->andptr++ = 0x8B;
  3434  			asmand(ctxt, &pp.from, &p->to);
  3435  			break;
  3436  		}
  3437  		break;
  3438  	}
  3439  }
  3440  
  3441  static uchar naclret[] = {
  3442  	0x5e, // POPL SI
  3443  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  3444  	0x83, 0xe6, 0xe0,	// ANDL $~31, SI
  3445  	0x4c, 0x01, 0xfe,	// ADDQ R15, SI
  3446  	0xff, 0xe6, // JMP SI
  3447  };
  3448  
  3449  static uchar naclspfix[] = {
  3450  	0x4c, 0x01, 0xfc, // ADDQ R15, SP
  3451  };
  3452  
  3453  static uchar naclbpfix[] = {
  3454  	0x4c, 0x01, 0xfd, // ADDQ R15, BP
  3455  };
  3456  
  3457  static uchar naclmovs[] = {
  3458  	0x89, 0xf6,	// MOVL SI, SI
  3459  	0x49, 0x8d, 0x34, 0x37,	// LEAQ (R15)(SI*1), SI
  3460  	0x89, 0xff,	// MOVL DI, DI
  3461  	0x49, 0x8d, 0x3c, 0x3f,	// LEAQ (R15)(DI*1), DI
  3462  };
  3463  
  3464  static uchar naclstos[] = {
  3465  	0x89, 0xff,	// MOVL DI, DI
  3466  	0x49, 0x8d, 0x3c, 0x3f,	// LEAQ (R15)(DI*1), DI
  3467  };
  3468  
  3469  static void
  3470  nacltrunc(Link *ctxt, int reg)
  3471  {	
  3472  	if(reg >= D_R8)
  3473  		*ctxt->andptr++ = 0x45;
  3474  	reg = (reg - D_AX) & 7;
  3475  	*ctxt->andptr++ = 0x89;
  3476  	*ctxt->andptr++ = (3<<6) | (reg<<3) | reg;
  3477  }
  3478  
  3479  static void
  3480  asmins(Link *ctxt, Prog *p)
  3481  {
  3482  	int i, n, np, c;
  3483  	uchar *and0;
  3484  	Reloc *r;
  3485  	
  3486  	ctxt->andptr = ctxt->and;
  3487  	ctxt->asmode = p->mode;
  3488  	
  3489  	if(p->as == AUSEFIELD) {
  3490  		r = addrel(ctxt->cursym);
  3491  		r->off = 0;
  3492  		r->siz = 0;
  3493  		r->sym = p->from.sym;
  3494  		r->type = R_USEFIELD;
  3495  		return;
  3496  	}
  3497  	
  3498  	if(ctxt->headtype == Hnacl) {
  3499  		if(p->as == AREP) {
  3500  			ctxt->rep++;
  3501  			return;
  3502  		}
  3503  		if(p->as == AREPN) {
  3504  			ctxt->repn++;
  3505  			return;
  3506  		}
  3507  		if(p->as == ALOCK) {
  3508  			ctxt->lock++;
  3509  			return;
  3510  		}
  3511  		if(p->as != ALEAQ && p->as != ALEAL) {
  3512  			if(p->from.index != D_NONE && p->from.scale > 0)
  3513  				nacltrunc(ctxt, p->from.index);
  3514  			if(p->to.index != D_NONE && p->to.scale > 0)
  3515  				nacltrunc(ctxt, p->to.index);
  3516  		}
  3517  		switch(p->as) {
  3518  		case ARET:
  3519  			memmove(ctxt->andptr, naclret, sizeof naclret);
  3520  			ctxt->andptr += sizeof naclret;
  3521  			return;
  3522  		case ACALL:
  3523  		case AJMP:
  3524  			if(D_AX <= p->to.type && p->to.type <= D_DI) {
  3525  				// ANDL $~31, reg
  3526  				*ctxt->andptr++ = 0x83;
  3527  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_AX);
  3528  				*ctxt->andptr++ = 0xe0;
  3529  				// ADDQ R15, reg
  3530  				*ctxt->andptr++ = 0x4c;
  3531  				*ctxt->andptr++ = 0x01;
  3532  				*ctxt->andptr++ = 0xf8 | (p->to.type - D_AX);
  3533  			}
  3534  			if(D_R8 <= p->to.type && p->to.type <= D_R15) {
  3535  				// ANDL $~31, reg
  3536  				*ctxt->andptr++ = 0x41;
  3537  				*ctxt->andptr++ = 0x83;
  3538  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_R8);
  3539  				*ctxt->andptr++ = 0xe0;
  3540  				// ADDQ R15, reg
  3541  				*ctxt->andptr++ = 0x4d;
  3542  				*ctxt->andptr++ = 0x01;
  3543  				*ctxt->andptr++ = 0xf8 | (p->to.type - D_R8);
  3544  			}
  3545  			break;
  3546  		case AINT:
  3547  			*ctxt->andptr++ = 0xf4;
  3548  			return;
  3549  		case ASCASB:
  3550  		case ASCASW:
  3551  		case ASCASL:
  3552  		case ASCASQ:
  3553  		case ASTOSB:
  3554  		case ASTOSW:
  3555  		case ASTOSL:
  3556  		case ASTOSQ:
  3557  			memmove(ctxt->andptr, naclstos, sizeof naclstos);
  3558  			ctxt->andptr += sizeof naclstos;
  3559  			break;
  3560  		case AMOVSB:
  3561  		case AMOVSW:
  3562  		case AMOVSL:
  3563  		case AMOVSQ:
  3564  			memmove(ctxt->andptr, naclmovs, sizeof naclmovs);
  3565  			ctxt->andptr += sizeof naclmovs;
  3566  			break;
  3567  		}
  3568  		if(ctxt->rep) {
  3569  			*ctxt->andptr++ = 0xf3;
  3570  			ctxt->rep = 0;
  3571  		}
  3572  		if(ctxt->repn) {
  3573  			*ctxt->andptr++ = 0xf2;
  3574  			ctxt->repn = 0;
  3575  		}
  3576  		if(ctxt->lock) {
  3577  			*ctxt->andptr++ = 0xf0;
  3578  			ctxt->lock = 0;
  3579  		}
  3580  	}		
  3581  
  3582  	ctxt->rexflag = 0;
  3583  	and0 = ctxt->andptr;
  3584  	ctxt->asmode = p->mode;
  3585  	doasm(ctxt, p);
  3586  	if(ctxt->rexflag){
  3587  		/*
  3588  		 * as befits the whole approach of the architecture,
  3589  		 * the rex prefix must appear before the first opcode byte
  3590  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  3591  		 * before the 0f opcode escape!), or it might be ignored.
  3592  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  3593  		 */
  3594  		if(p->mode != 64)
  3595  			ctxt->diag("asmins: illegal in mode %d: %P", p->mode, p);
  3596  		n = ctxt->andptr - and0;
  3597  		for(np = 0; np < n; np++) {
  3598  			c = and0[np];
  3599  			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
  3600  				break;
  3601  		}
  3602  		memmove(and0+np+1, and0+np, n-np);
  3603  		and0[np] = 0x40 | ctxt->rexflag;
  3604  		ctxt->andptr++;
  3605  	}
  3606  	n = ctxt->andptr - ctxt->and;
  3607  	for(i=ctxt->cursym->nr-1; i>=0; i--) {
  3608  		r = ctxt->cursym->r+i;
  3609  		if(r->off < p->pc)
  3610  			break;
  3611  		if(ctxt->rexflag)
  3612  			r->off++;
  3613  		if(r->type == R_PCREL || r->type == R_CALL)
  3614  			r->add -= p->pc + n - (r->off + r->siz);
  3615  	}
  3616  
  3617  	if(ctxt->headtype == Hnacl && p->as != ACMPL && p->as != ACMPQ) {
  3618  		switch(p->to.type) {
  3619  		case D_SP:
  3620  			memmove(ctxt->andptr, naclspfix, sizeof naclspfix);
  3621  			ctxt->andptr += sizeof naclspfix;
  3622  			break;
  3623  		case D_BP:
  3624  			memmove(ctxt->andptr, naclbpfix, sizeof naclbpfix);
  3625  			ctxt->andptr += sizeof naclbpfix;
  3626  			break;
  3627  		}
  3628  	}
  3629  }