github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/liblink/asm8.c (about)

     1  // Inferno utils/8l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/8l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include <u.h>
    34  #include <libc.h>
    35  #include <bio.h>
    36  #include <link.h>
    37  #include "../cmd/8l/8.out.h"
    38  #include "../runtime/stack.h"
    39  
    40  enum
    41  {
    42  	MaxAlign = 32,	// max data alignment
    43  	FuncAlign = 16
    44  };
    45  
    46  typedef	struct	Optab	Optab;
    47  
    48  struct	Optab
    49  {
    50  	short	as;
    51  	uchar*	ytab;
    52  	uchar	prefix;
    53  	uchar	op[13];
    54  };
    55  
    56  enum
    57  {
    58  	Yxxx		= 0,
    59  	Ynone,
    60  	Yi0,
    61  	Yi1,
    62  	Yi8,
    63  	Yi32,
    64  	Yiauto,
    65  	Yal,
    66  	Ycl,
    67  	Yax,
    68  	Ycx,
    69  	Yrb,
    70  	Yrl,
    71  	Yrf,
    72  	Yf0,
    73  	Yrx,
    74  	Ymb,
    75  	Yml,
    76  	Ym,
    77  	Ybr,
    78  	Ycol,
    79  	Ytls,
    80  
    81  	Ycs,	Yss,	Yds,	Yes,	Yfs,	Ygs,
    82  	Ygdtr,	Yidtr,	Yldtr,	Ymsw,	Ytask,
    83  	Ycr0,	Ycr1,	Ycr2,	Ycr3,	Ycr4,	Ycr5,	Ycr6,	Ycr7,
    84  	Ydr0,	Ydr1,	Ydr2,	Ydr3,	Ydr4,	Ydr5,	Ydr6,	Ydr7,
    85  	Ytr0,	Ytr1,	Ytr2,	Ytr3,	Ytr4,	Ytr5,	Ytr6,	Ytr7,
    86  	Ymr, Ymm,
    87  	Yxr, Yxm,
    88  	Ymax,
    89  
    90  	Zxxx		= 0,
    91  
    92  	Zlit,
    93  	Zlitm_r,
    94  	Z_rp,
    95  	Zbr,
    96  	Zcall,
    97  	Zcallcon,
    98  	Zcallind,
    99  	Zcallindreg,
   100  	Zib_,
   101  	Zib_rp,
   102  	Zibo_m,
   103  	Zil_,
   104  	Zil_rp,
   105  	Zilo_m,
   106  	Zjmp,
   107  	Zjmpcon,
   108  	Zloop,
   109  	Zm_o,
   110  	Zm_r,
   111  	Zm2_r,
   112  	Zm_r_xm,
   113  	Zm_r_i_xm,
   114  	Zaut_r,
   115  	Zo_m,
   116  	Zpseudo,
   117  	Zr_m,
   118  	Zr_m_xm,
   119  	Zr_m_i_xm,
   120  	Zrp_,
   121  	Z_ib,
   122  	Z_il,
   123  	Zm_ibo,
   124  	Zm_ilo,
   125  	Zib_rr,
   126  	Zil_rr,
   127  	Zclr,
   128  	Zibm_r,	/* mmx1,mmx2/mem64,imm8 */
   129  	Zbyte,
   130  	Zmov,
   131  	Zmax,
   132  
   133  	Px		= 0,
   134  	Pe		= 0x66,	/* operand escape */
   135  	Pm		= 0x0f,	/* 2byte opcode escape */
   136  	Pq		= 0xff,	/* both escape */
   137  	Pb		= 0xfe,	/* byte operands */
   138  	Pf2		= 0xf2,	/* xmm escape 1 */
   139  	Pf3		= 0xf3,	/* xmm escape 2 */
   140  };
   141  
   142  static	uchar	ycover[Ymax*Ymax];
   143  static	int	reg[D_NONE];
   144  static	void	asmins(Link *ctxt, Prog *p);
   145  
   146  static uchar	ynone[] =
   147  {
   148  	Ynone,	Ynone,	Zlit,	1,
   149  	0
   150  };
   151  static uchar	ytext[] =
   152  {
   153  	Ymb,	Yi32,	Zpseudo,1,
   154  	0
   155  };
   156  static uchar	ynop[] =
   157  {
   158  	Ynone,	Ynone,	Zpseudo,0,
   159  	Ynone,	Yiauto,	Zpseudo,0,
   160  	Ynone,	Yml,	Zpseudo,0,
   161  	Ynone,	Yrf,	Zpseudo,0,
   162  	Yiauto,	Ynone,	Zpseudo,0,
   163  	Ynone,	Yxr,	Zpseudo,0,
   164  	Yml,	Ynone,	Zpseudo,0,
   165  	Yrf,	Ynone,	Zpseudo,0,
   166  	Yxr,	Ynone,	Zpseudo,1,
   167  	0
   168  };
   169  static uchar	yfuncdata[] =
   170  {
   171  	Yi32,	Ym,	Zpseudo,	0,
   172  	0
   173  };
   174  static uchar	ypcdata[] =
   175  {
   176  	Yi32,	Yi32,	Zpseudo,	0,
   177  	0,
   178  };
   179  static uchar	yxorb[] =
   180  {
   181  	Yi32,	Yal,	Zib_,	1,
   182  	Yi32,	Ymb,	Zibo_m,	2,
   183  	Yrb,	Ymb,	Zr_m,	1,
   184  	Ymb,	Yrb,	Zm_r,	1,
   185  	0
   186  };
   187  static uchar	yxorl[] =
   188  {
   189  	Yi8,	Yml,	Zibo_m,	2,
   190  	Yi32,	Yax,	Zil_,	1,
   191  	Yi32,	Yml,	Zilo_m,	2,
   192  	Yrl,	Yml,	Zr_m,	1,
   193  	Yml,	Yrl,	Zm_r,	1,
   194  	0
   195  };
   196  static uchar	yaddl[] =
   197  {
   198  	Yi8,	Yml,	Zibo_m,	2,
   199  	Yi32,	Yax,	Zil_,	1,
   200  	Yi32,	Yml,	Zilo_m,	2,
   201  	Yrl,	Yml,	Zr_m,	1,
   202  	Yml,	Yrl,	Zm_r,	1,
   203  	0
   204  };
   205  static uchar	yincb[] =
   206  {
   207  	Ynone,	Ymb,	Zo_m,	2,
   208  	0
   209  };
   210  static uchar	yincl[] =
   211  {
   212  	Ynone,	Yrl,	Z_rp,	1,
   213  	Ynone,	Yml,	Zo_m,	2,
   214  	0
   215  };
   216  static uchar	ycmpb[] =
   217  {
   218  	Yal,	Yi32,	Z_ib,	1,
   219  	Ymb,	Yi32,	Zm_ibo,	2,
   220  	Ymb,	Yrb,	Zm_r,	1,
   221  	Yrb,	Ymb,	Zr_m,	1,
   222  	0
   223  };
   224  static uchar	ycmpl[] =
   225  {
   226  	Yml,	Yi8,	Zm_ibo,	2,
   227  	Yax,	Yi32,	Z_il,	1,
   228  	Yml,	Yi32,	Zm_ilo,	2,
   229  	Yml,	Yrl,	Zm_r,	1,
   230  	Yrl,	Yml,	Zr_m,	1,
   231  	0
   232  };
   233  static uchar	yshb[] =
   234  {
   235  	Yi1,	Ymb,	Zo_m,	2,
   236  	Yi32,	Ymb,	Zibo_m,	2,
   237  	Ycx,	Ymb,	Zo_m,	2,
   238  	0
   239  };
   240  static uchar	yshl[] =
   241  {
   242  	Yi1,	Yml,	Zo_m,	2,
   243  	Yi32,	Yml,	Zibo_m,	2,
   244  	Ycl,	Yml,	Zo_m,	2,
   245  	Ycx,	Yml,	Zo_m,	2,
   246  	0
   247  };
   248  static uchar	ytestb[] =
   249  {
   250  	Yi32,	Yal,	Zib_,	1,
   251  	Yi32,	Ymb,	Zibo_m,	2,
   252  	Yrb,	Ymb,	Zr_m,	1,
   253  	Ymb,	Yrb,	Zm_r,	1,
   254  	0
   255  };
   256  static uchar	ytestl[] =
   257  {
   258  	Yi32,	Yax,	Zil_,	1,
   259  	Yi32,	Yml,	Zilo_m,	2,
   260  	Yrl,	Yml,	Zr_m,	1,
   261  	Yml,	Yrl,	Zm_r,	1,
   262  	0
   263  };
   264  static uchar	ymovb[] =
   265  {
   266  	Yrb,	Ymb,	Zr_m,	1,
   267  	Ymb,	Yrb,	Zm_r,	1,
   268  	Yi32,	Yrb,	Zib_rp,	1,
   269  	Yi32,	Ymb,	Zibo_m,	2,
   270  	0
   271  };
   272  static uchar	ymovw[] =
   273  {
   274  	Yrl,	Yml,	Zr_m,	1,
   275  	Yml,	Yrl,	Zm_r,	1,
   276  	Yi0,	Yrl,	Zclr,	1+2,
   277  //	Yi0,	Yml,	Zibo_m,	2,	// shorter but slower AND $0,dst
   278  	Yi32,	Yrl,	Zil_rp,	1,
   279  	Yi32,	Yml,	Zilo_m,	2,
   280  	Yiauto,	Yrl,	Zaut_r,	1,
   281  	0
   282  };
   283  static uchar	ymovl[] =
   284  {
   285  	Yrl,	Yml,	Zr_m,	1,
   286  	Yml,	Yrl,	Zm_r,	1,
   287  	Yi0,	Yrl,	Zclr,	1+2,
   288  //	Yi0,	Yml,	Zibo_m,	2,	// shorter but slower AND $0,dst
   289  	Yi32,	Yrl,	Zil_rp,	1,
   290  	Yi32,	Yml,	Zilo_m,	2,
   291  	Yml,	Yxr,	Zm_r_xm,	2,	// XMM MOVD (32 bit)
   292  	Yxr,	Yml,	Zr_m_xm,	2,	// XMM MOVD (32 bit)
   293  	Yiauto,	Yrl,	Zaut_r,	1,
   294  	0
   295  };
   296  static uchar	ymovq[] =
   297  {
   298  	Yml,	Yxr,	Zm_r_xm,	2,
   299  	0
   300  };
   301  static uchar	ym_rl[] =
   302  {
   303  	Ym,	Yrl,	Zm_r,	1,
   304  	0
   305  };
   306  static uchar	yrl_m[] =
   307  {
   308  	Yrl,	Ym,	Zr_m,	1,
   309  	0
   310  };
   311  static uchar	ymb_rl[] =
   312  {
   313  	Ymb,	Yrl,	Zm_r,	1,
   314  	0
   315  };
   316  static uchar	yml_rl[] =
   317  {
   318  	Yml,	Yrl,	Zm_r,	1,
   319  	0
   320  };
   321  static uchar	yrb_mb[] =
   322  {
   323  	Yrb,	Ymb,	Zr_m,	1,
   324  	0
   325  };
   326  static uchar	yrl_ml[] =
   327  {
   328  	Yrl,	Yml,	Zr_m,	1,
   329  	0
   330  };
   331  static uchar	yml_mb[] =
   332  {
   333  	Yrb,	Ymb,	Zr_m,	1,
   334  	Ymb,	Yrb,	Zm_r,	1,
   335  	0
   336  };
   337  static uchar	yxchg[] =
   338  {
   339  	Yax,	Yrl,	Z_rp,	1,
   340  	Yrl,	Yax,	Zrp_,	1,
   341  	Yrl,	Yml,	Zr_m,	1,
   342  	Yml,	Yrl,	Zm_r,	1,
   343  	0
   344  };
   345  static uchar	ydivl[] =
   346  {
   347  	Yml,	Ynone,	Zm_o,	2,
   348  	0
   349  };
   350  static uchar	ydivb[] =
   351  {
   352  	Ymb,	Ynone,	Zm_o,	2,
   353  	0
   354  };
   355  static uchar	yimul[] =
   356  {
   357  	Yml,	Ynone,	Zm_o,	2,
   358  	Yi8,	Yrl,	Zib_rr,	1,
   359  	Yi32,	Yrl,	Zil_rr,	1,
   360  	0
   361  };
   362  static uchar	ybyte[] =
   363  {
   364  	Yi32,	Ynone,	Zbyte,	1,
   365  	0
   366  };
   367  static uchar	yin[] =
   368  {
   369  	Yi32,	Ynone,	Zib_,	1,
   370  	Ynone,	Ynone,	Zlit,	1,
   371  	0
   372  };
   373  static uchar	yint[] =
   374  {
   375  	Yi32,	Ynone,	Zib_,	1,
   376  	0
   377  };
   378  static uchar	ypushl[] =
   379  {
   380  	Yrl,	Ynone,	Zrp_,	1,
   381  	Ym,	Ynone,	Zm_o,	2,
   382  	Yi8,	Ynone,	Zib_,	1,
   383  	Yi32,	Ynone,	Zil_,	1,
   384  	0
   385  };
   386  static uchar	ypopl[] =
   387  {
   388  	Ynone,	Yrl,	Z_rp,	1,
   389  	Ynone,	Ym,	Zo_m,	2,
   390  	0
   391  };
   392  static uchar	ybswap[] =
   393  {
   394  	Ynone,	Yrl,	Z_rp,	1,
   395  	0,
   396  };
   397  static uchar	yscond[] =
   398  {
   399  	Ynone,	Ymb,	Zo_m,	2,
   400  	0
   401  };
   402  static uchar	yjcond[] =
   403  {
   404  	Ynone,	Ybr,	Zbr,	0,
   405  	Yi0,	Ybr,	Zbr,	0,
   406  	Yi1,	Ybr,	Zbr,	1,
   407  	0
   408  };
   409  static uchar	yloop[] =
   410  {
   411  	Ynone,	Ybr,	Zloop,	1,
   412  	0
   413  };
   414  static uchar	ycall[] =
   415  {
   416  	Ynone,	Yml,	Zcallindreg,	0,
   417  	Yrx,	Yrx,	Zcallindreg,	2,
   418  	Ynone,	Ycol,	Zcallind,	2,
   419  	Ynone,	Ybr,	Zcall,	0,
   420  	Ynone,	Yi32,	Zcallcon,	1,
   421  	0
   422  };
   423  static uchar	yduff[] =
   424  {
   425  	Ynone,	Yi32,	Zcall,	1,
   426  	0
   427  };
   428  static uchar	yjmp[] =
   429  {
   430  	Ynone,	Yml,	Zo_m,	2,
   431  	Ynone,	Ybr,	Zjmp,	0,
   432  	Ynone,	Yi32,	Zjmpcon,	1,
   433  	0
   434  };
   435  
   436  static uchar	yfmvd[] =
   437  {
   438  	Ym,	Yf0,	Zm_o,	2,
   439  	Yf0,	Ym,	Zo_m,	2,
   440  	Yrf,	Yf0,	Zm_o,	2,
   441  	Yf0,	Yrf,	Zo_m,	2,
   442  	0
   443  };
   444  static uchar	yfmvdp[] =
   445  {
   446  	Yf0,	Ym,	Zo_m,	2,
   447  	Yf0,	Yrf,	Zo_m,	2,
   448  	0
   449  };
   450  static uchar	yfmvf[] =
   451  {
   452  	Ym,	Yf0,	Zm_o,	2,
   453  	Yf0,	Ym,	Zo_m,	2,
   454  	0
   455  };
   456  static uchar	yfmvx[] =
   457  {
   458  	Ym,	Yf0,	Zm_o,	2,
   459  	0
   460  };
   461  static uchar	yfmvp[] =
   462  {
   463  	Yf0,	Ym,	Zo_m,	2,
   464  	0
   465  };
   466  static uchar	yfcmv[] =
   467  {
   468  	Yrf,	Yf0,	Zm_o,	2,
   469  	0
   470  };
   471  static uchar	yfadd[] =
   472  {
   473  	Ym,	Yf0,	Zm_o,	2,
   474  	Yrf,	Yf0,	Zm_o,	2,
   475  	Yf0,	Yrf,	Zo_m,	2,
   476  	0
   477  };
   478  static uchar	yfaddp[] =
   479  {
   480  	Yf0,	Yrf,	Zo_m,	2,
   481  	0
   482  };
   483  static uchar	yfxch[] =
   484  {
   485  	Yf0,	Yrf,	Zo_m,	2,
   486  	Yrf,	Yf0,	Zm_o,	2,
   487  	0
   488  };
   489  static uchar	ycompp[] =
   490  {
   491  	Yf0,	Yrf,	Zo_m,	2,	/* botch is really f0,f1 */
   492  	0
   493  };
   494  static uchar	ystsw[] =
   495  {
   496  	Ynone,	Ym,	Zo_m,	2,
   497  	Ynone,	Yax,	Zlit,	1,
   498  	0
   499  };
   500  static uchar	ystcw[] =
   501  {
   502  	Ynone,	Ym,	Zo_m,	2,
   503  	Ym,	Ynone,	Zm_o,	2,
   504  	0
   505  };
   506  static uchar	ysvrs[] =
   507  {
   508  	Ynone,	Ym,	Zo_m,	2,
   509  	Ym,	Ynone,	Zm_o,	2,
   510  	0
   511  };
   512  static uchar	ymskb[] =
   513  {
   514  	Yxr,	Yrl,	Zm_r_xm,	2,
   515  	Ymr,	Yrl,	Zm_r_xm,	1,
   516  	0
   517  };
   518  static uchar	yxm[] = 
   519  {
   520  	Yxm,	Yxr,	Zm_r_xm,	1,
   521  	0
   522  };
   523  static uchar	yxcvm1[] = 
   524  {
   525  	Yxm,	Yxr,	Zm_r_xm,	2,
   526  	Yxm,	Ymr,	Zm_r_xm,	2,
   527  	0
   528  };
   529  static uchar	yxcvm2[] =
   530  {
   531  	Yxm,	Yxr,	Zm_r_xm,	2,
   532  	Ymm,	Yxr,	Zm_r_xm,	2,
   533  	0
   534  };
   535  static uchar	yxmq[] = 
   536  {
   537  	Yxm,	Yxr,	Zm_r_xm,	2,
   538  	0
   539  };
   540  static uchar	yxr[] = 
   541  {
   542  	Yxr,	Yxr,	Zm_r_xm,	1,
   543  	0
   544  };
   545  static uchar	yxr_ml[] =
   546  {
   547  	Yxr,	Yml,	Zr_m_xm,	1,
   548  	0
   549  };
   550  static uchar	yxcmp[] =
   551  {
   552  	Yxm,	Yxr, Zm_r_xm,	1,
   553  	0
   554  };
   555  static uchar	yxcmpi[] =
   556  {
   557  	Yxm,	Yxr, Zm_r_i_xm,	2,
   558  	0
   559  };
   560  static uchar	yxmov[] =
   561  {
   562  	Yxm,	Yxr,	Zm_r_xm,	1,
   563  	Yxr,	Yxm,	Zr_m_xm,	1,
   564  	0
   565  };
   566  static uchar	yxcvfl[] = 
   567  {
   568  	Yxm,	Yrl,	Zm_r_xm,	1,
   569  	0
   570  };
   571  static uchar	yxcvlf[] =
   572  {
   573  	Yml,	Yxr,	Zm_r_xm,	1,
   574  	0
   575  };
   576  /*
   577  static uchar	yxcvfq[] = 
   578  {
   579  	Yxm,	Yrl,	Zm_r_xm,	2,
   580  	0
   581  };
   582  static uchar	yxcvqf[] =
   583  {
   584  	Yml,	Yxr,	Zm_r_xm,	2,
   585  	0
   586  };
   587  */
   588  static uchar	yxrrl[] =
   589  {
   590  	Yxr,	Yrl,	Zm_r,	1,
   591  	0
   592  };
   593  static uchar	yprefetch[] =
   594  {
   595  	Ym,	Ynone,	Zm_o,	2,
   596  	0,
   597  };
   598  static uchar	yaes[] =
   599  {
   600  	Yxm,	Yxr,	Zlitm_r,	2,
   601  	0
   602  };
   603  static uchar	yinsrd[] =
   604  {
   605  	Yml,	Yxr,	Zibm_r,	2,
   606  	0
   607  };
   608  static uchar	ymshufb[] =
   609  {
   610  	Yxm,	Yxr,	Zm2_r,	2,
   611  	0
   612  };
   613  
   614  static Optab optab[] =
   615  /*	as, ytab, andproto, opcode */
   616  {
   617  	{ AXXX },
   618  	{ AAAA,		ynone,	Px, {0x37} },
   619  	{ AAAD,		ynone,	Px, {0xd5,0x0a} },
   620  	{ AAAM,		ynone,	Px, {0xd4,0x0a} },
   621  	{ AAAS,		ynone,	Px, {0x3f} },
   622  	{ AADCB,	yxorb,	Pb, {0x14,0x80,(02),0x10,0x10} },
   623  	{ AADCL,	yxorl,	Px, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   624  	{ AADCW,	yxorl,	Pe, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   625  	{ AADDB,	yxorb,	Px, {0x04,0x80,(00),0x00,0x02} },
   626  	{ AADDL,	yaddl,	Px, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   627  	{ AADDW,	yaddl,	Pe, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   628  	{ AADJSP },
   629  	{ AANDB,	yxorb,	Pb, {0x24,0x80,(04),0x20,0x22} },
   630  	{ AANDL,	yxorl,	Px, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   631  	{ AANDW,	yxorl,	Pe, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   632  	{ AARPL,	yrl_ml,	Px, {0x63} },
   633  	{ ABOUNDL,	yrl_m,	Px, {0x62} },
   634  	{ ABOUNDW,	yrl_m,	Pe, {0x62} },
   635  	{ ABSFL,	yml_rl,	Pm, {0xbc} },
   636  	{ ABSFW,	yml_rl,	Pq, {0xbc} },
   637  	{ ABSRL,	yml_rl,	Pm, {0xbd} },
   638  	{ ABSRW,	yml_rl,	Pq, {0xbd} },
   639  	{ ABTL,		yml_rl,	Pm, {0xa3} },
   640  	{ ABTW,		yml_rl,	Pq, {0xa3} },
   641  	{ ABTCL,	yml_rl,	Pm, {0xbb} },
   642  	{ ABTCW,	yml_rl,	Pq, {0xbb} },
   643  	{ ABTRL,	yml_rl,	Pm, {0xb3} },
   644  	{ ABTRW,	yml_rl,	Pq, {0xb3} },
   645  	{ ABTSL,	yml_rl,	Pm, {0xab} },
   646  	{ ABTSW,	yml_rl,	Pq, {0xab} },
   647  	{ ABYTE,	ybyte,	Px, {1} },
   648  	{ ACALL,	ycall,	Px, {0xff,(02),0xff,(0x15),0xe8} },
   649  	{ ACLC,		ynone,	Px, {0xf8} },
   650  	{ ACLD,		ynone,	Px, {0xfc} },
   651  	{ ACLI,		ynone,	Px, {0xfa} },
   652  	{ ACLTS,	ynone,	Pm, {0x06} },
   653  	{ ACMC,		ynone,	Px, {0xf5} },
   654  	{ ACMPB,	ycmpb,	Pb, {0x3c,0x80,(07),0x38,0x3a} },
   655  	{ ACMPL,	ycmpl,	Px, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   656  	{ ACMPW,	ycmpl,	Pe, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   657  	{ ACMPSB,	ynone,	Pb, {0xa6} },
   658  	{ ACMPSL,	ynone,	Px, {0xa7} },
   659  	{ ACMPSW,	ynone,	Pe, {0xa7} },
   660  	{ ADAA,		ynone,	Px, {0x27} },
   661  	{ ADAS,		ynone,	Px, {0x2f} },
   662  	{ ADATA },
   663  	{ ADECB,	yincb,	Pb, {0xfe,(01)} },
   664  	{ ADECL,	yincl,	Px, {0x48,0xff,(01)} },
   665  	{ ADECW,	yincl,	Pe, {0x48,0xff,(01)} },
   666  	{ ADIVB,	ydivb,	Pb, {0xf6,(06)} },
   667  	{ ADIVL,	ydivl,	Px, {0xf7,(06)} },
   668  	{ ADIVW,	ydivl,	Pe, {0xf7,(06)} },
   669  	{ AENTER },				/* botch */
   670  	{ AGLOBL },
   671  	{ AGOK },
   672  	{ AHISTORY },
   673  	{ AHLT,		ynone,	Px, {0xf4} },
   674  	{ AIDIVB,	ydivb,	Pb, {0xf6,(07)} },
   675  	{ AIDIVL,	ydivl,	Px, {0xf7,(07)} },
   676  	{ AIDIVW,	ydivl,	Pe, {0xf7,(07)} },
   677  	{ AIMULB,	ydivb,	Pb, {0xf6,(05)} },
   678  	{ AIMULL,	yimul,	Px, {0xf7,(05),0x6b,0x69} },
   679  	{ AIMULW,	yimul,	Pe, {0xf7,(05),0x6b,0x69} },
   680  	{ AINB,		yin,	Pb, {0xe4,0xec} },
   681  	{ AINL,		yin,	Px, {0xe5,0xed} },
   682  	{ AINW,		yin,	Pe, {0xe5,0xed} },
   683  	{ AINCB,	yincb,	Pb, {0xfe,(00)} },
   684  	{ AINCL,	yincl,	Px, {0x40,0xff,(00)} },
   685  	{ AINCW,	yincl,	Pe, {0x40,0xff,(00)} },
   686  	{ AINSB,	ynone,	Pb, {0x6c} },
   687  	{ AINSL,	ynone,	Px, {0x6d} },
   688  	{ AINSW,	ynone,	Pe, {0x6d} },
   689  	{ AINT,		yint,	Px, {0xcd} },
   690  	{ AINTO,	ynone,	Px, {0xce} },
   691  	{ AIRETL,	ynone,	Px, {0xcf} },
   692  	{ AIRETW,	ynone,	Pe, {0xcf} },
   693  	{ AJCC,		yjcond,	Px, {0x73,0x83,(00)} },
   694  	{ AJCS,		yjcond,	Px, {0x72,0x82} },
   695  	{ AJCXZL,	yloop,	Px, {0xe3} },
   696  	{ AJCXZW,	yloop,	Px, {0xe3} },
   697  	{ AJEQ,		yjcond,	Px, {0x74,0x84} },
   698  	{ AJGE,		yjcond,	Px, {0x7d,0x8d} },
   699  	{ AJGT,		yjcond,	Px, {0x7f,0x8f} },
   700  	{ AJHI,		yjcond,	Px, {0x77,0x87} },
   701  	{ AJLE,		yjcond,	Px, {0x7e,0x8e} },
   702  	{ AJLS,		yjcond,	Px, {0x76,0x86} },
   703  	{ AJLT,		yjcond,	Px, {0x7c,0x8c} },
   704  	{ AJMI,		yjcond,	Px, {0x78,0x88} },
   705  	{ AJMP,		yjmp,	Px, {0xff,(04),0xeb,0xe9} },
   706  	{ AJNE,		yjcond,	Px, {0x75,0x85} },
   707  	{ AJOC,		yjcond,	Px, {0x71,0x81,(00)} },
   708  	{ AJOS,		yjcond,	Px, {0x70,0x80,(00)} },
   709  	{ AJPC,		yjcond,	Px, {0x7b,0x8b} },
   710  	{ AJPL,		yjcond,	Px, {0x79,0x89} },
   711  	{ AJPS,		yjcond,	Px, {0x7a,0x8a} },
   712  	{ ALAHF,	ynone,	Px, {0x9f} },
   713  	{ ALARL,	yml_rl,	Pm, {0x02} },
   714  	{ ALARW,	yml_rl,	Pq, {0x02} },
   715  	{ ALEAL,	ym_rl,	Px, {0x8d} },
   716  	{ ALEAW,	ym_rl,	Pe, {0x8d} },
   717  	{ ALEAVEL,	ynone,	Px, {0xc9} },
   718  	{ ALEAVEW,	ynone,	Pe, {0xc9} },
   719  	{ ALOCK,	ynone,	Px, {0xf0} },
   720  	{ ALODSB,	ynone,	Pb, {0xac} },
   721  	{ ALODSL,	ynone,	Px, {0xad} },
   722  	{ ALODSW,	ynone,	Pe, {0xad} },
   723  	{ ALONG,	ybyte,	Px, {4} },
   724  	{ ALOOP,	yloop,	Px, {0xe2} },
   725  	{ ALOOPEQ,	yloop,	Px, {0xe1} },
   726  	{ ALOOPNE,	yloop,	Px, {0xe0} },
   727  	{ ALSLL,	yml_rl,	Pm, {0x03 } },
   728  	{ ALSLW,	yml_rl,	Pq, {0x03 } },
   729  	{ AMOVB,	ymovb,	Pb, {0x88,0x8a,0xb0,0xc6,(00)} },
   730  	{ AMOVL,	ymovl,	Px, {0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00),Pe,0x6e,Pe,0x7e,0} },
   731  	{ AMOVW,	ymovw,	Pe, {0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00),0} },
   732  	{ AMOVQ,	ymovq,	Pf3, {0x7e} },
   733  	{ AMOVBLSX,	ymb_rl,	Pm, {0xbe} },
   734  	{ AMOVBLZX,	ymb_rl,	Pm, {0xb6} },
   735  	{ AMOVBWSX,	ymb_rl,	Pq, {0xbe} },
   736  	{ AMOVBWZX,	ymb_rl,	Pq, {0xb6} },
   737  	{ AMOVWLSX,	yml_rl,	Pm, {0xbf} },
   738  	{ AMOVWLZX,	yml_rl,	Pm, {0xb7} },
   739  	{ AMOVSB,	ynone,	Pb, {0xa4} },
   740  	{ AMOVSL,	ynone,	Px, {0xa5} },
   741  	{ AMOVSW,	ynone,	Pe, {0xa5} },
   742  	{ AMULB,	ydivb,	Pb, {0xf6,(04)} },
   743  	{ AMULL,	ydivl,	Px, {0xf7,(04)} },
   744  	{ AMULW,	ydivl,	Pe, {0xf7,(04)} },
   745  	{ ANAME },
   746  	{ ANEGB,	yscond,	Px, {0xf6,(03)} },
   747  	{ ANEGL,	yscond,	Px, {0xf7,(03)} },
   748  	{ ANEGW,	yscond,	Pe, {0xf7,(03)} },
   749  	{ ANOP,		ynop,	Px, {0,0} },
   750  	{ ANOTB,	yscond,	Px, {0xf6,(02)} },
   751  	{ ANOTL,	yscond,	Px, {0xf7,(02)} },
   752  	{ ANOTW,	yscond,	Pe, {0xf7,(02)} },
   753  	{ AORB,		yxorb,	Pb, {0x0c,0x80,(01),0x08,0x0a} },
   754  	{ AORL,		yxorl,	Px, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
   755  	{ AORW,		yxorl,	Pe, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
   756  	{ AOUTB,	yin,	Pb, {0xe6,0xee} },
   757  	{ AOUTL,	yin,	Px, {0xe7,0xef} },
   758  	{ AOUTW,	yin,	Pe, {0xe7,0xef} },
   759  	{ AOUTSB,	ynone,	Pb, {0x6e} },
   760  	{ AOUTSL,	ynone,	Px, {0x6f} },
   761  	{ AOUTSW,	ynone,	Pe, {0x6f} },
   762  	{ APAUSE,	ynone,	Px, {0xf3,0x90} },
   763  	{ APOPAL,	ynone,	Px, {0x61} },
   764  	{ APOPAW,	ynone,	Pe, {0x61} },
   765  	{ APOPFL,	ynone,	Px, {0x9d} },
   766  	{ APOPFW,	ynone,	Pe, {0x9d} },
   767  	{ APOPL,	ypopl,	Px, {0x58,0x8f,(00)} },
   768  	{ APOPW,	ypopl,	Pe, {0x58,0x8f,(00)} },
   769  	{ APUSHAL,	ynone,	Px, {0x60} },
   770  	{ APUSHAW,	ynone,	Pe, {0x60} },
   771  	{ APUSHFL,	ynone,	Px, {0x9c} },
   772  	{ APUSHFW,	ynone,	Pe, {0x9c} },
   773  	{ APUSHL,	ypushl,	Px, {0x50,0xff,(06),0x6a,0x68} },
   774  	{ APUSHW,	ypushl,	Pe, {0x50,0xff,(06),0x6a,0x68} },
   775  	{ ARCLB,	yshb,	Pb, {0xd0,(02),0xc0,(02),0xd2,(02)} },
   776  	{ ARCLL,	yshl,	Px, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
   777  	{ ARCLW,	yshl,	Pe, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
   778  	{ ARCRB,	yshb,	Pb, {0xd0,(03),0xc0,(03),0xd2,(03)} },
   779  	{ ARCRL,	yshl,	Px, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
   780  	{ ARCRW,	yshl,	Pe, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
   781  	{ AREP,		ynone,	Px, {0xf3} },
   782  	{ AREPN,	ynone,	Px, {0xf2} },
   783  	{ ARET,		ynone,	Px, {0xc3} },
   784  	{ AROLB,	yshb,	Pb, {0xd0,(00),0xc0,(00),0xd2,(00)} },
   785  	{ AROLL,	yshl,	Px, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
   786  	{ AROLW,	yshl,	Pe, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
   787  	{ ARORB,	yshb,	Pb, {0xd0,(01),0xc0,(01),0xd2,(01)} },
   788  	{ ARORL,	yshl,	Px, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
   789  	{ ARORW,	yshl,	Pe, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
   790  	{ ASAHF,	ynone,	Px, {0x9e} },
   791  	{ ASALB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
   792  	{ ASALL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   793  	{ ASALW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   794  	{ ASARB,	yshb,	Pb, {0xd0,(07),0xc0,(07),0xd2,(07)} },
   795  	{ ASARL,	yshl,	Px, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
   796  	{ ASARW,	yshl,	Pe, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
   797  	{ ASBBB,	yxorb,	Pb, {0x1c,0x80,(03),0x18,0x1a} },
   798  	{ ASBBL,	yxorl,	Px, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
   799  	{ ASBBW,	yxorl,	Pe, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
   800  	{ ASCASB,	ynone,	Pb, {0xae} },
   801  	{ ASCASL,	ynone,	Px, {0xaf} },
   802  	{ ASCASW,	ynone,	Pe, {0xaf} },
   803  	{ ASETCC,	yscond,	Pm, {0x93,(00)} },
   804  	{ ASETCS,	yscond,	Pm, {0x92,(00)} },
   805  	{ ASETEQ,	yscond,	Pm, {0x94,(00)} },
   806  	{ ASETGE,	yscond,	Pm, {0x9d,(00)} },
   807  	{ ASETGT,	yscond,	Pm, {0x9f,(00)} },
   808  	{ ASETHI,	yscond,	Pm, {0x97,(00)} },
   809  	{ ASETLE,	yscond,	Pm, {0x9e,(00)} },
   810  	{ ASETLS,	yscond,	Pm, {0x96,(00)} },
   811  	{ ASETLT,	yscond,	Pm, {0x9c,(00)} },
   812  	{ ASETMI,	yscond,	Pm, {0x98,(00)} },
   813  	{ ASETNE,	yscond,	Pm, {0x95,(00)} },
   814  	{ ASETOC,	yscond,	Pm, {0x91,(00)} },
   815  	{ ASETOS,	yscond,	Pm, {0x90,(00)} },
   816  	{ ASETPC,	yscond,	Pm, {0x9b,(00)} },
   817  	{ ASETPL,	yscond,	Pm, {0x99,(00)} },
   818  	{ ASETPS,	yscond,	Pm, {0x9a,(00)} },
   819  	{ ACDQ,		ynone,	Px, {0x99} },
   820  	{ ACWD,		ynone,	Pe, {0x99} },
   821  	{ ASHLB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
   822  	{ ASHLL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   823  	{ ASHLW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   824  	{ ASHRB,	yshb,	Pb, {0xd0,(05),0xc0,(05),0xd2,(05)} },
   825  	{ ASHRL,	yshl,	Px, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
   826  	{ ASHRW,	yshl,	Pe, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
   827  	{ ASTC,		ynone,	Px, {0xf9} },
   828  	{ ASTD,		ynone,	Px, {0xfd} },
   829  	{ ASTI,		ynone,	Px, {0xfb} },
   830  	{ ASTOSB,	ynone,	Pb, {0xaa} },
   831  	{ ASTOSL,	ynone,	Px, {0xab} },
   832  	{ ASTOSW,	ynone,	Pe, {0xab} },
   833  	{ ASUBB,	yxorb,	Pb, {0x2c,0x80,(05),0x28,0x2a} },
   834  	{ ASUBL,	yaddl,	Px, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
   835  	{ ASUBW,	yaddl,	Pe, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
   836  	{ ASYSCALL,	ynone,	Px, {0xcd,100} },
   837  	{ ATESTB,	ytestb,	Pb, {0xa8,0xf6,(00),0x84,0x84} },
   838  	{ ATESTL,	ytestl,	Px, {0xa9,0xf7,(00),0x85,0x85} },
   839  	{ ATESTW,	ytestl,	Pe, {0xa9,0xf7,(00),0x85,0x85} },
   840  	{ ATEXT,	ytext,	Px },
   841  	{ AVERR,	ydivl,	Pm, {0x00,(04)} },
   842  	{ AVERW,	ydivl,	Pm, {0x00,(05)} },
   843  	{ AWAIT,	ynone,	Px, {0x9b} },
   844  	{ AWORD,	ybyte,	Px, {2} },
   845  	{ AXCHGB,	yml_mb,	Pb, {0x86,0x86} },
   846  	{ AXCHGL,	yxchg,	Px, {0x90,0x90,0x87,0x87} },
   847  	{ AXCHGW,	yxchg,	Pe, {0x90,0x90,0x87,0x87} },
   848  	{ AXLAT,	ynone,	Px, {0xd7} },
   849  	{ AXORB,	yxorb,	Pb, {0x34,0x80,(06),0x30,0x32} },
   850  	{ AXORL,	yxorl,	Px, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
   851  	{ AXORW,	yxorl,	Pe, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
   852  
   853  	{ AFMOVB,	yfmvx,	Px, {0xdf,(04)} },
   854  	{ AFMOVBP,	yfmvp,	Px, {0xdf,(06)} },
   855  	{ AFMOVD,	yfmvd,	Px, {0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02)} },
   856  	{ AFMOVDP,	yfmvdp,	Px, {0xdd,(03),0xdd,(03)} },
   857  	{ AFMOVF,	yfmvf,	Px, {0xd9,(00),0xd9,(02)} },
   858  	{ AFMOVFP,	yfmvp,	Px, {0xd9,(03)} },
   859  	{ AFMOVL,	yfmvf,	Px, {0xdb,(00),0xdb,(02)} },
   860  	{ AFMOVLP,	yfmvp,	Px, {0xdb,(03)} },
   861  	{ AFMOVV,	yfmvx,	Px, {0xdf,(05)} },
   862  	{ AFMOVVP,	yfmvp,	Px, {0xdf,(07)} },
   863  	{ AFMOVW,	yfmvf,	Px, {0xdf,(00),0xdf,(02)} },
   864  	{ AFMOVWP,	yfmvp,	Px, {0xdf,(03)} },
   865  	{ AFMOVX,	yfmvx,	Px, {0xdb,(05)} },
   866  	{ AFMOVXP,	yfmvp,	Px, {0xdb,(07)} },
   867  
   868  	{ AFCOMB },
   869  	{ AFCOMBP },
   870  	{ AFCOMD,	yfadd,	Px, {0xdc,(02),0xd8,(02),0xdc,(02)} },	/* botch */
   871  	{ AFCOMDP,	yfadd,	Px, {0xdc,(03),0xd8,(03),0xdc,(03)} },	/* botch */
   872  	{ AFCOMDPP,	ycompp,	Px, {0xde,(03)} },
   873  	{ AFCOMF,	yfmvx,	Px, {0xd8,(02)} },
   874  	{ AFCOMFP,	yfmvx,	Px, {0xd8,(03)} },
   875  	{ AFCOMI,	yfmvx,	Px, {0xdb,(06)} },
   876  	{ AFCOMIP,	yfmvx,	Px, {0xdf,(06)} },
   877  	{ AFCOML,	yfmvx,	Px, {0xda,(02)} },
   878  	{ AFCOMLP,	yfmvx,	Px, {0xda,(03)} },
   879  	{ AFCOMW,	yfmvx,	Px, {0xde,(02)} },
   880  	{ AFCOMWP,	yfmvx,	Px, {0xde,(03)} },
   881  
   882  	{ AFUCOM,	ycompp,	Px, {0xdd,(04)} },
   883  	{ AFUCOMI,	ycompp,	Px, {0xdb,(05)} },
   884  	{ AFUCOMIP,	ycompp,	Px, {0xdf,(05)} },
   885  	{ AFUCOMP,	ycompp,	Px, {0xdd,(05)} },
   886  	{ AFUCOMPP,	ycompp,	Px, {0xda,(13)} },
   887  
   888  	{ AFADDDP,	yfaddp,	Px, {0xde,(00)} },
   889  	{ AFADDW,	yfmvx,	Px, {0xde,(00)} },
   890  	{ AFADDL,	yfmvx,	Px, {0xda,(00)} },
   891  	{ AFADDF,	yfmvx,	Px, {0xd8,(00)} },
   892  	{ AFADDD,	yfadd,	Px, {0xdc,(00),0xd8,(00),0xdc,(00)} },
   893  
   894  	{ AFMULDP,	yfaddp,	Px, {0xde,(01)} },
   895  	{ AFMULW,	yfmvx,	Px, {0xde,(01)} },
   896  	{ AFMULL,	yfmvx,	Px, {0xda,(01)} },
   897  	{ AFMULF,	yfmvx,	Px, {0xd8,(01)} },
   898  	{ AFMULD,	yfadd,	Px, {0xdc,(01),0xd8,(01),0xdc,(01)} },
   899  
   900  	{ AFSUBDP,	yfaddp,	Px, {0xde,(05)} },
   901  	{ AFSUBW,	yfmvx,	Px, {0xde,(04)} },
   902  	{ AFSUBL,	yfmvx,	Px, {0xda,(04)} },
   903  	{ AFSUBF,	yfmvx,	Px, {0xd8,(04)} },
   904  	{ AFSUBD,	yfadd,	Px, {0xdc,(04),0xd8,(04),0xdc,(05)} },
   905  
   906  	{ AFSUBRDP,	yfaddp,	Px, {0xde,(04)} },
   907  	{ AFSUBRW,	yfmvx,	Px, {0xde,(05)} },
   908  	{ AFSUBRL,	yfmvx,	Px, {0xda,(05)} },
   909  	{ AFSUBRF,	yfmvx,	Px, {0xd8,(05)} },
   910  	{ AFSUBRD,	yfadd,	Px, {0xdc,(05),0xd8,(05),0xdc,(04)} },
   911  
   912  	{ AFDIVDP,	yfaddp,	Px, {0xde,(07)} },
   913  	{ AFDIVW,	yfmvx,	Px, {0xde,(06)} },
   914  	{ AFDIVL,	yfmvx,	Px, {0xda,(06)} },
   915  	{ AFDIVF,	yfmvx,	Px, {0xd8,(06)} },
   916  	{ AFDIVD,	yfadd,	Px, {0xdc,(06),0xd8,(06),0xdc,(07)} },
   917  
   918  	{ AFDIVRDP,	yfaddp,	Px, {0xde,(06)} },
   919  	{ AFDIVRW,	yfmvx,	Px, {0xde,(07)} },
   920  	{ AFDIVRL,	yfmvx,	Px, {0xda,(07)} },
   921  	{ AFDIVRF,	yfmvx,	Px, {0xd8,(07)} },
   922  	{ AFDIVRD,	yfadd,	Px, {0xdc,(07),0xd8,(07),0xdc,(06)} },
   923  
   924  	{ AFXCHD,	yfxch,	Px, {0xd9,(01),0xd9,(01)} },
   925  	{ AFFREE },
   926  	{ AFLDCW,	ystcw,	Px, {0xd9,(05),0xd9,(05)} },
   927  	{ AFLDENV,	ystcw,	Px, {0xd9,(04),0xd9,(04)} },
   928  	{ AFRSTOR,	ysvrs,	Px, {0xdd,(04),0xdd,(04)} },
   929  	{ AFSAVE,	ysvrs,	Px, {0xdd,(06),0xdd,(06)} },
   930  	{ AFSTCW,	ystcw,	Px, {0xd9,(07),0xd9,(07)} },
   931  	{ AFSTENV,	ystcw,	Px, {0xd9,(06),0xd9,(06)} },
   932  	{ AFSTSW,	ystsw,	Px, {0xdd,(07),0xdf,0xe0} },
   933  	{ AF2XM1,	ynone,	Px, {0xd9, 0xf0} },
   934  	{ AFABS,	ynone,	Px, {0xd9, 0xe1} },
   935  	{ AFCHS,	ynone,	Px, {0xd9, 0xe0} },
   936  	{ AFCLEX,	ynone,	Px, {0xdb, 0xe2} },
   937  	{ AFCOS,	ynone,	Px, {0xd9, 0xff} },
   938  	{ AFDECSTP,	ynone,	Px, {0xd9, 0xf6} },
   939  	{ AFINCSTP,	ynone,	Px, {0xd9, 0xf7} },
   940  	{ AFINIT,	ynone,	Px, {0xdb, 0xe3} },
   941  	{ AFLD1,	ynone,	Px, {0xd9, 0xe8} },
   942  	{ AFLDL2E,	ynone,	Px, {0xd9, 0xea} },
   943  	{ AFLDL2T,	ynone,	Px, {0xd9, 0xe9} },
   944  	{ AFLDLG2,	ynone,	Px, {0xd9, 0xec} },
   945  	{ AFLDLN2,	ynone,	Px, {0xd9, 0xed} },
   946  	{ AFLDPI,	ynone,	Px, {0xd9, 0xeb} },
   947  	{ AFLDZ,	ynone,	Px, {0xd9, 0xee} },
   948  	{ AFNOP,	ynone,	Px, {0xd9, 0xd0} },
   949  	{ AFPATAN,	ynone,	Px, {0xd9, 0xf3} },
   950  	{ AFPREM,	ynone,	Px, {0xd9, 0xf8} },
   951  	{ AFPREM1,	ynone,	Px, {0xd9, 0xf5} },
   952  	{ AFPTAN,	ynone,	Px, {0xd9, 0xf2} },
   953  	{ AFRNDINT,	ynone,	Px, {0xd9, 0xfc} },
   954  	{ AFSCALE,	ynone,	Px, {0xd9, 0xfd} },
   955  	{ AFSIN,	ynone,	Px, {0xd9, 0xfe} },
   956  	{ AFSINCOS,	ynone,	Px, {0xd9, 0xfb} },
   957  	{ AFSQRT,	ynone,	Px, {0xd9, 0xfa} },
   958  	{ AFTST,	ynone,	Px, {0xd9, 0xe4} },
   959  	{ AFXAM,	ynone,	Px, {0xd9, 0xe5} },
   960  	{ AFXTRACT,	ynone,	Px, {0xd9, 0xf4} },
   961  	{ AFYL2X,	ynone,	Px, {0xd9, 0xf1} },
   962  	{ AFYL2XP1,	ynone,	Px, {0xd9, 0xf9} },
   963  	{ AEND },
   964  	{ ADYNT_ },
   965  	{ AINIT_ },
   966  	{ ASIGNAME },
   967  	{ ACMPXCHGB,	yrb_mb,	Pm, {0xb0} },
   968  	{ ACMPXCHGL,	yrl_ml,	Pm, {0xb1} },
   969  	{ ACMPXCHGW,	yrl_ml,	Pm, {0xb1} },
   970  	{ ACMPXCHG8B,	yscond,	Pm, {0xc7,(01)} },
   971  
   972  	{ ACPUID,	ynone,	Pm, {0xa2} },
   973  	{ ARDTSC,	ynone,	Pm, {0x31} },
   974  
   975  	{ AXADDB,	yrb_mb,	Pb, {0x0f,0xc0} },
   976  	{ AXADDL,	yrl_ml,	Pm, {0xc1} },
   977  	{ AXADDW,	yrl_ml,	Pe, {0x0f,0xc1} },
   978  
   979  	{ ACMOVLCC,	yml_rl,	Pm, {0x43} },
   980  	{ ACMOVLCS,	yml_rl,	Pm, {0x42} },
   981  	{ ACMOVLEQ,	yml_rl,	Pm, {0x44} },
   982  	{ ACMOVLGE,	yml_rl,	Pm, {0x4d} },
   983  	{ ACMOVLGT,	yml_rl,	Pm, {0x4f} },
   984  	{ ACMOVLHI,	yml_rl,	Pm, {0x47} },
   985  	{ ACMOVLLE,	yml_rl,	Pm, {0x4e} },
   986  	{ ACMOVLLS,	yml_rl,	Pm, {0x46} },
   987  	{ ACMOVLLT,	yml_rl,	Pm, {0x4c} },
   988  	{ ACMOVLMI,	yml_rl,	Pm, {0x48} },
   989  	{ ACMOVLNE,	yml_rl,	Pm, {0x45} },
   990  	{ ACMOVLOC,	yml_rl,	Pm, {0x41} },
   991  	{ ACMOVLOS,	yml_rl,	Pm, {0x40} },
   992  	{ ACMOVLPC,	yml_rl,	Pm, {0x4b} },
   993  	{ ACMOVLPL,	yml_rl,	Pm, {0x49} },
   994  	{ ACMOVLPS,	yml_rl,	Pm, {0x4a} },
   995  	{ ACMOVWCC,	yml_rl,	Pq, {0x43} },
   996  	{ ACMOVWCS,	yml_rl,	Pq, {0x42} },
   997  	{ ACMOVWEQ,	yml_rl,	Pq, {0x44} },
   998  	{ ACMOVWGE,	yml_rl,	Pq, {0x4d} },
   999  	{ ACMOVWGT,	yml_rl,	Pq, {0x4f} },
  1000  	{ ACMOVWHI,	yml_rl,	Pq, {0x47} },
  1001  	{ ACMOVWLE,	yml_rl,	Pq, {0x4e} },
  1002  	{ ACMOVWLS,	yml_rl,	Pq, {0x46} },
  1003  	{ ACMOVWLT,	yml_rl,	Pq, {0x4c} },
  1004  	{ ACMOVWMI,	yml_rl,	Pq, {0x48} },
  1005  	{ ACMOVWNE,	yml_rl,	Pq, {0x45} },
  1006  	{ ACMOVWOC,	yml_rl,	Pq, {0x41} },
  1007  	{ ACMOVWOS,	yml_rl,	Pq, {0x40} },
  1008  	{ ACMOVWPC,	yml_rl,	Pq, {0x4b} },
  1009  	{ ACMOVWPL,	yml_rl,	Pq, {0x49} },
  1010  	{ ACMOVWPS,	yml_rl,	Pq, {0x4a} },
  1011  
  1012  	{ AFCMOVCC,	yfcmv,	Px, {0xdb,(00)} },
  1013  	{ AFCMOVCS,	yfcmv,	Px, {0xda,(00)} },
  1014  	{ AFCMOVEQ,	yfcmv,	Px, {0xda,(01)} },
  1015  	{ AFCMOVHI,	yfcmv,	Px, {0xdb,(02)} },
  1016  	{ AFCMOVLS,	yfcmv,	Px, {0xda,(02)} },
  1017  	{ AFCMOVNE,	yfcmv,	Px, {0xdb,(01)} },
  1018  	{ AFCMOVNU,	yfcmv,	Px, {0xdb,(03)} },
  1019  	{ AFCMOVUN,	yfcmv,	Px, {0xda,(03)} },
  1020  
  1021  	{ ALFENCE, ynone, Pm, {0xae,0xe8} },
  1022  	{ AMFENCE, ynone, Pm, {0xae,0xf0} },
  1023  	{ ASFENCE, ynone, Pm, {0xae,0xf8} },
  1024  
  1025  	{ AEMMS, ynone, Pm, {0x77} },
  1026  
  1027  	{ APREFETCHT0,	yprefetch,	Pm,	{0x18,(01)} },
  1028  	{ APREFETCHT1,	yprefetch,	Pm,	{0x18,(02)} },
  1029  	{ APREFETCHT2,	yprefetch,	Pm,	{0x18,(03)} },
  1030  	{ APREFETCHNTA,	yprefetch,	Pm,	{0x18,(00)} },
  1031  
  1032  	{ ABSWAPL,	ybswap,	Pm,	{0xc8} },
  1033  	
  1034  	{ AUNDEF,		ynone,	Px,	{0x0f, 0x0b} },
  1035  
  1036  	{ AADDPD,	yxm,	Pq, {0x58} },
  1037  	{ AADDPS,	yxm,	Pm, {0x58} },
  1038  	{ AADDSD,	yxm,	Pf2, {0x58} },
  1039  	{ AADDSS,	yxm,	Pf3, {0x58} },
  1040  	{ AANDNPD,	yxm,	Pq, {0x55} },
  1041  	{ AANDNPS,	yxm,	Pm, {0x55} },
  1042  	{ AANDPD,	yxm,	Pq, {0x54} },
  1043  	{ AANDPS,	yxm,	Pq, {0x54} },
  1044  	{ ACMPPD,	yxcmpi,	Px, {Pe,0xc2} },
  1045  	{ ACMPPS,	yxcmpi,	Pm, {0xc2,0} },
  1046  	{ ACMPSD,	yxcmpi,	Px, {Pf2,0xc2} },
  1047  	{ ACMPSS,	yxcmpi,	Px, {Pf3,0xc2} },
  1048  	{ ACOMISD,	yxcmp,	Pe, {0x2f} },
  1049  	{ ACOMISS,	yxcmp,	Pm, {0x2f} },
  1050  	{ ACVTPL2PD,	yxcvm2,	Px, {Pf3,0xe6,Pe,0x2a} },
  1051  	{ ACVTPL2PS,	yxcvm2,	Pm, {0x5b,0,0x2a,0,} },
  1052  	{ ACVTPD2PL,	yxcvm1,	Px, {Pf2,0xe6,Pe,0x2d} },
  1053  	{ ACVTPD2PS,	yxm,	Pe, {0x5a} },
  1054  	{ ACVTPS2PL,	yxcvm1, Px, {Pe,0x5b,Pm,0x2d} },
  1055  	{ ACVTPS2PD,	yxm,	Pm, {0x5a} },
  1056  	{ ACVTSD2SL,	yxcvfl, Pf2, {0x2d} },
  1057   	{ ACVTSD2SS,	yxm,	Pf2, {0x5a} },
  1058  	{ ACVTSL2SD,	yxcvlf, Pf2, {0x2a} },
  1059  	{ ACVTSL2SS,	yxcvlf, Pf3, {0x2a} },
  1060  	{ ACVTSS2SD,	yxm,	Pf3, {0x5a} },
  1061  	{ ACVTSS2SL,	yxcvfl, Pf3, {0x2d} },
  1062  	{ ACVTTPD2PL,	yxcvm1,	Px, {Pe,0xe6,Pe,0x2c} },
  1063  	{ ACVTTPS2PL,	yxcvm1,	Px, {Pf3,0x5b,Pm,0x2c} },
  1064  	{ ACVTTSD2SL,	yxcvfl, Pf2, {0x2c} },
  1065  	{ ACVTTSS2SL,	yxcvfl,	Pf3, {0x2c} },
  1066  	{ ADIVPD,	yxm,	Pe, {0x5e} },
  1067  	{ ADIVPS,	yxm,	Pm, {0x5e} },
  1068  	{ ADIVSD,	yxm,	Pf2, {0x5e} },
  1069  	{ ADIVSS,	yxm,	Pf3, {0x5e} },
  1070  	{ AMASKMOVOU,	yxr,	Pe, {0xf7} },
  1071  	{ AMAXPD,	yxm,	Pe, {0x5f} },
  1072  	{ AMAXPS,	yxm,	Pm, {0x5f} },
  1073  	{ AMAXSD,	yxm,	Pf2, {0x5f} },
  1074  	{ AMAXSS,	yxm,	Pf3, {0x5f} },
  1075  	{ AMINPD,	yxm,	Pe, {0x5d} },
  1076  	{ AMINPS,	yxm,	Pm, {0x5d} },
  1077  	{ AMINSD,	yxm,	Pf2, {0x5d} },
  1078  	{ AMINSS,	yxm,	Pf3, {0x5d} },
  1079  	{ AMOVAPD,	yxmov,	Pe, {0x28,0x29} },
  1080  	{ AMOVAPS,	yxmov,	Pm, {0x28,0x29} },
  1081  	{ AMOVO,	yxmov,	Pe, {0x6f,0x7f} },
  1082  	{ AMOVOU,	yxmov,	Pf3, {0x6f,0x7f} },
  1083  	{ AMOVHLPS,	yxr,	Pm, {0x12} },
  1084  	{ AMOVHPD,	yxmov,	Pe, {0x16,0x17} },
  1085  	{ AMOVHPS,	yxmov,	Pm, {0x16,0x17} },
  1086  	{ AMOVLHPS,	yxr,	Pm, {0x16} },
  1087  	{ AMOVLPD,	yxmov,	Pe, {0x12,0x13} },
  1088  	{ AMOVLPS,	yxmov,	Pm, {0x12,0x13} },
  1089  	{ AMOVMSKPD,	yxrrl,	Pq, {0x50} },
  1090  	{ AMOVMSKPS,	yxrrl,	Pm, {0x50} },
  1091  	{ AMOVNTO,	yxr_ml,	Pe, {0xe7} },
  1092  	{ AMOVNTPD,	yxr_ml,	Pe, {0x2b} },
  1093  	{ AMOVNTPS,	yxr_ml,	Pm, {0x2b} },
  1094  	{ AMOVSD,	yxmov,	Pf2, {0x10,0x11} },
  1095  	{ AMOVSS,	yxmov,	Pf3, {0x10,0x11} },
  1096  	{ AMOVUPD,	yxmov,	Pe, {0x10,0x11} },
  1097  	{ AMOVUPS,	yxmov,	Pm, {0x10,0x11} },
  1098  	{ AMULPD,	yxm,	Pe, {0x59} },
  1099  	{ AMULPS,	yxm,	Ym, {0x59} },
  1100  	{ AMULSD,	yxm,	Pf2, {0x59} },
  1101  	{ AMULSS,	yxm,	Pf3, {0x59} },
  1102  	{ AORPD,	yxm,	Pq, {0x56} },
  1103  	{ AORPS,	yxm,	Pm, {0x56} },
  1104  	{ APADDQ,	yxm,	Pe, {0xd4} },
  1105  	{ APAND,	yxm,	Pe, {0xdb} },
  1106  	{ APCMPEQB,	yxmq,	Pe, {0x74} },
  1107  	{ APMAXSW,	yxm,	Pe, {0xee} },
  1108  	{ APMAXUB,	yxm,	Pe, {0xde} },
  1109  	{ APMINSW,	yxm,	Pe, {0xea} },
  1110  	{ APMINUB,	yxm,	Pe, {0xda} },
  1111  	{ APMOVMSKB,	ymskb,	Px, {Pe,0xd7,0xd7} },
  1112  	{ APSADBW,	yxm,	Pq, {0xf6} },
  1113  	{ APSUBB,	yxm,	Pe, {0xf8} },
  1114  	{ APSUBL,	yxm,	Pe, {0xfa} },
  1115  	{ APSUBQ,	yxm,	Pe, {0xfb} },
  1116  	{ APSUBSB,	yxm,	Pe, {0xe8} },
  1117  	{ APSUBSW,	yxm,	Pe, {0xe9} },
  1118  	{ APSUBUSB,	yxm,	Pe, {0xd8} },
  1119  	{ APSUBUSW,	yxm,	Pe, {0xd9} },
  1120  	{ APSUBW,	yxm,	Pe, {0xf9} },
  1121  	{ APUNPCKHQDQ,	yxm,	Pe, {0x6d} },
  1122  	{ APUNPCKLQDQ,	yxm,	Pe, {0x6c} },
  1123  	{ APXOR,	yxm,	Pe, {0xef} },
  1124  	{ ARCPPS,	yxm,	Pm, {0x53} },
  1125  	{ ARCPSS,	yxm,	Pf3, {0x53} },
  1126  	{ ARSQRTPS,	yxm,	Pm, {0x52} },
  1127  	{ ARSQRTSS,	yxm,	Pf3, {0x52} },
  1128  	{ ASQRTPD,	yxm,	Pe, {0x51} },
  1129  	{ ASQRTPS,	yxm,	Pm, {0x51} },
  1130  	{ ASQRTSD,	yxm,	Pf2, {0x51} },
  1131  	{ ASQRTSS,	yxm,	Pf3, {0x51} },
  1132  	{ ASUBPD,	yxm,	Pe, {0x5c} },
  1133  	{ ASUBPS,	yxm,	Pm, {0x5c} },
  1134  	{ ASUBSD,	yxm,	Pf2, {0x5c} },
  1135  	{ ASUBSS,	yxm,	Pf3, {0x5c} },
  1136  	{ AUCOMISD,	yxcmp,	Pe, {0x2e} },
  1137  	{ AUCOMISS,	yxcmp,	Pm, {0x2e} },
  1138  	{ AUNPCKHPD,	yxm,	Pe, {0x15} },
  1139  	{ AUNPCKHPS,	yxm,	Pm, {0x15} },
  1140  	{ AUNPCKLPD,	yxm,	Pe, {0x14} },
  1141  	{ AUNPCKLPS,	yxm,	Pm, {0x14} },
  1142  	{ AXORPD,	yxm,	Pe, {0x57} },
  1143  	{ AXORPS,	yxm,	Pm, {0x57} },
  1144  
  1145  	{ AAESENC,	yaes,	Pq, {0x38,0xdc,(0)} },
  1146  	{ APINSRD,	yinsrd,	Pq, {0x3a, 0x22, (00)} },
  1147  	{ APSHUFB,	ymshufb,Pq, {0x38, 0x00} },
  1148  
  1149  	{ AUSEFIELD,	ynop,	Px, {0,0} },
  1150  	{ ATYPE },
  1151  	{ AFUNCDATA,	yfuncdata,	Px, {0,0} },
  1152  	{ APCDATA,	ypcdata,	Px, {0,0} },
  1153  	{ ACHECKNIL },
  1154  	{ AVARDEF },
  1155  	{ AVARKILL },
  1156  	{ ADUFFCOPY,	yduff,	Px, {0xe8} },
  1157  	{ ADUFFZERO,	yduff,	Px, {0xe8} },
  1158  
  1159  	{0}
  1160  };
  1161  
  1162  static int32	vaddr(Link*, Addr*, Reloc*);
  1163  
  1164  // single-instruction no-ops of various lengths.
  1165  // constructed by hand and disassembled with gdb to verify.
  1166  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1167  static uchar nop[][16] = {
  1168  	{0x90},
  1169  	{0x66, 0x90},
  1170  	{0x0F, 0x1F, 0x00},
  1171  	{0x0F, 0x1F, 0x40, 0x00},
  1172  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1173  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1174  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1175  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1176  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1177  	// Native Client rejects the repeated 0x66 prefix.
  1178  	// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1179  };
  1180  
  1181  static void
  1182  fillnop(uchar *p, int n)
  1183  {
  1184  	int m;
  1185  
  1186  	while(n > 0) {
  1187  		m = n;
  1188  		if(m > nelem(nop))
  1189  			m = nelem(nop);
  1190  		memmove(p, nop[m-1], m);
  1191  		p += m;
  1192  		n -= m;
  1193  	}
  1194  }
  1195  
  1196  static int32
  1197  naclpad(Link *ctxt, LSym *s, int32 c, int32 pad)
  1198  {
  1199  	symgrow(ctxt, s, c+pad);
  1200  	fillnop(s->p+c, pad);
  1201  	return c+pad;
  1202  }
  1203  
  1204  static void instinit(void);
  1205  
  1206  void
  1207  span8(Link *ctxt, LSym *s)
  1208  {
  1209  	Prog *p, *q;
  1210  	int32 c, v, loop;
  1211  	uchar *bp;
  1212  	int n, m, i;
  1213  
  1214  	ctxt->cursym = s;
  1215  
  1216  	if(s->text == nil || s->text->link == nil)
  1217  		return;
  1218  
  1219  	if(ycover[0] == 0)
  1220  		instinit();
  1221  
  1222  	for(p = s->text; p != nil; p = p->link) {
  1223  		n = 0;
  1224  		if(p->to.type == D_BRANCH)
  1225  			if(p->pcond == nil)
  1226  				p->pcond = p;
  1227  		if((q = p->pcond) != nil)
  1228  			if(q->back != 2)
  1229  				n = 1;
  1230  		p->back = n;
  1231  		if(p->as == AADJSP) {
  1232  			p->to.type = D_SP;
  1233  			v = -p->from.offset;
  1234  			p->from.offset = v;
  1235  			p->as = AADDL;
  1236  			if(v < 0) {
  1237  				p->as = ASUBL;
  1238  				v = -v;
  1239  				p->from.offset = v;
  1240  			}
  1241  			if(v == 0)
  1242  				p->as = ANOP;
  1243  		}
  1244  	}
  1245  
  1246  	for(p = s->text; p != nil; p = p->link) {
  1247  		p->back = 2;	// use short branches first time through
  1248  		if((q = p->pcond) != nil && (q->back & 2))
  1249  			p->back |= 1;	// backward jump
  1250  
  1251  		if(p->as == AADJSP) {
  1252  			p->to.type = D_SP;
  1253  			v = -p->from.offset;
  1254  			p->from.offset = v;
  1255  			p->as = AADDL;
  1256  			if(v < 0) {
  1257  				p->as = ASUBL;
  1258  				v = -v;
  1259  				p->from.offset = v;
  1260  			}
  1261  			if(v == 0)
  1262  				p->as = ANOP;
  1263  		}
  1264  	}
  1265  	
  1266  	n = 0;
  1267  	do {
  1268  		loop = 0;
  1269  		memset(s->r, 0, s->nr*sizeof s->r[0]);
  1270  		s->nr = 0;
  1271  		s->np = 0;
  1272  		c = 0;
  1273  		for(p = s->text; p != nil; p = p->link) {
  1274  			if(ctxt->headtype == Hnacl && p->isize > 0) {
  1275  				static LSym *deferreturn;
  1276  				
  1277  				if(deferreturn == nil)
  1278  					deferreturn = linklookup(ctxt, "runtime.deferreturn", 0);
  1279  
  1280  				// pad everything to avoid crossing 32-byte boundary
  1281  				if((c>>5) != ((c+p->isize-1)>>5))
  1282  					c = naclpad(ctxt, s, c, -c&31);
  1283  				// pad call deferreturn to start at 32-byte boundary
  1284  				// so that subtracting 5 in jmpdefer will jump back
  1285  				// to that boundary and rerun the call.
  1286  				if(p->as == ACALL && p->to.sym == deferreturn)
  1287  					c = naclpad(ctxt, s, c, -c&31);
  1288  				// pad call to end at 32-byte boundary
  1289  				if(p->as == ACALL)
  1290  					c = naclpad(ctxt, s, c, -(c+p->isize)&31);
  1291  				
  1292  				// the linker treats REP and STOSQ as different instructions
  1293  				// but in fact the REP is a prefix on the STOSQ.
  1294  				// make sure REP has room for 2 more bytes, so that
  1295  				// padding will not be inserted before the next instruction.
  1296  				if(p->as == AREP && (c>>5) != ((c+3-1)>>5))
  1297  					c = naclpad(ctxt, s, c, -c&31);
  1298  				
  1299  				// same for LOCK.
  1300  				// various instructions follow; the longest is 4 bytes.
  1301  				// give ourselves 8 bytes so as to avoid surprises.
  1302  				if(p->as == ALOCK && (c>>5) != ((c+8-1)>>5))
  1303  					c = naclpad(ctxt, s, c, -c&31);
  1304  			}
  1305  			
  1306  			p->pc = c;
  1307  
  1308  			// process forward jumps to p
  1309  			for(q = p->comefrom; q != nil; q = q->forwd) {
  1310  				v = p->pc - (q->pc + q->mark);
  1311  				if(q->back & 2)	{	// short
  1312  					if(v > 127) {
  1313  						loop++;
  1314  						q->back ^= 2;
  1315  					}
  1316  					if(q->as == AJCXZW)
  1317  						s->p[q->pc+2] = v;
  1318  					else
  1319  						s->p[q->pc+1] = v;
  1320  				} else {
  1321  					bp = s->p + q->pc + q->mark - 4;
  1322  					*bp++ = v;
  1323  					*bp++ = v>>8;
  1324  					*bp++ = v>>16;
  1325  					*bp = v>>24;
  1326  				}	
  1327  			}
  1328  			p->comefrom = nil;
  1329  
  1330  			p->pc = c;
  1331  			asmins(ctxt, p);
  1332  			m = ctxt->andptr-ctxt->and;
  1333  			if(p->isize != m) {
  1334  				p->isize = m;
  1335  				loop++;
  1336  			}
  1337  			symgrow(ctxt, s, p->pc+m);
  1338  			memmove(s->p+p->pc, ctxt->and, m);
  1339  			p->mark = m;
  1340  			c += m;
  1341  		}
  1342  		if(++n > 20) {
  1343  			ctxt->diag("span must be looping");
  1344  			sysfatal("bad code");
  1345  		}
  1346  	} while(loop);
  1347  	
  1348  	if(ctxt->headtype == Hnacl)
  1349  		c = naclpad(ctxt, s, c, -c&31);
  1350  	c += -c&(FuncAlign-1);
  1351  	s->size = c;
  1352  
  1353  	if(0 /* debug['a'] > 1 */) {
  1354  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
  1355  		for(i=0; i<s->np; i++) {
  1356  			print(" %.2ux", s->p[i]);
  1357  			if(i%16 == 15)
  1358  				print("\n  %.6ux", i+1);
  1359  		}
  1360  		if(i%16)
  1361  			print("\n");
  1362  	
  1363  		for(i=0; i<s->nr; i++) {
  1364  			Reloc *r;
  1365  			
  1366  			r = &s->r[i];
  1367  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
  1368  		}
  1369  	}
  1370  }
  1371  
  1372  static void
  1373  instinit(void)
  1374  {
  1375  	int i;
  1376  
  1377  	for(i=1; optab[i].as; i++)
  1378  		if(i != optab[i].as)
  1379  			sysfatal("phase error in optab: at %A found %A", i, optab[i].as);
  1380  
  1381  	for(i=0; i<Ymax; i++)
  1382  		ycover[i*Ymax + i] = 1;
  1383  
  1384  	ycover[Yi0*Ymax + Yi8] = 1;
  1385  	ycover[Yi1*Ymax + Yi8] = 1;
  1386  
  1387  	ycover[Yi0*Ymax + Yi32] = 1;
  1388  	ycover[Yi1*Ymax + Yi32] = 1;
  1389  	ycover[Yi8*Ymax + Yi32] = 1;
  1390  
  1391  	ycover[Yal*Ymax + Yrb] = 1;
  1392  	ycover[Ycl*Ymax + Yrb] = 1;
  1393  	ycover[Yax*Ymax + Yrb] = 1;
  1394  	ycover[Ycx*Ymax + Yrb] = 1;
  1395  	ycover[Yrx*Ymax + Yrb] = 1;
  1396  
  1397  	ycover[Yax*Ymax + Yrx] = 1;
  1398  	ycover[Ycx*Ymax + Yrx] = 1;
  1399  
  1400  	ycover[Yax*Ymax + Yrl] = 1;
  1401  	ycover[Ycx*Ymax + Yrl] = 1;
  1402  	ycover[Yrx*Ymax + Yrl] = 1;
  1403  
  1404  	ycover[Yf0*Ymax + Yrf] = 1;
  1405  
  1406  	ycover[Yal*Ymax + Ymb] = 1;
  1407  	ycover[Ycl*Ymax + Ymb] = 1;
  1408  	ycover[Yax*Ymax + Ymb] = 1;
  1409  	ycover[Ycx*Ymax + Ymb] = 1;
  1410  	ycover[Yrx*Ymax + Ymb] = 1;
  1411  	ycover[Yrb*Ymax + Ymb] = 1;
  1412  	ycover[Ym*Ymax + Ymb] = 1;
  1413  
  1414  	ycover[Yax*Ymax + Yml] = 1;
  1415  	ycover[Ycx*Ymax + Yml] = 1;
  1416  	ycover[Yrx*Ymax + Yml] = 1;
  1417  	ycover[Yrl*Ymax + Yml] = 1;
  1418  	ycover[Ym*Ymax + Yml] = 1;
  1419  
  1420  	ycover[Yax*Ymax + Ymm] = 1;
  1421  	ycover[Ycx*Ymax + Ymm] = 1;
  1422  	ycover[Yrx*Ymax + Ymm] = 1;
  1423  	ycover[Yrl*Ymax + Ymm] = 1;
  1424  	ycover[Ym*Ymax + Ymm] = 1;
  1425  	ycover[Ymr*Ymax + Ymm] = 1;
  1426  
  1427  	ycover[Ym*Ymax + Yxm] = 1;
  1428  	ycover[Yxr*Ymax + Yxm] = 1;
  1429  
  1430  	for(i=0; i<D_NONE; i++) {
  1431  		reg[i] = -1;
  1432  		if(i >= D_AL && i <= D_BH)
  1433  			reg[i] = (i-D_AL) & 7;
  1434  		if(i >= D_AX && i <= D_DI)
  1435  			reg[i] = (i-D_AX) & 7;
  1436  		if(i >= D_F0 && i <= D_F0+7)
  1437  			reg[i] = (i-D_F0) & 7;
  1438  		if(i >= D_X0 && i <= D_X0+7)
  1439  			reg[i] = (i-D_X0) & 7;
  1440  	}
  1441  }
  1442  
  1443  static int
  1444  prefixof(Link *ctxt, Addr *a)
  1445  {
  1446  	switch(a->type) {
  1447  	case D_INDIR+D_CS:
  1448  		return 0x2e;
  1449  	case D_INDIR+D_DS:
  1450  		return 0x3e;
  1451  	case D_INDIR+D_ES:
  1452  		return 0x26;
  1453  	case D_INDIR+D_FS:
  1454  		return 0x64;
  1455  	case D_INDIR+D_GS:
  1456  		return 0x65;
  1457  	case D_INDIR+D_TLS:
  1458  		// NOTE: Systems listed here should be only systems that
  1459  		// support direct TLS references like 8(TLS) implemented as
  1460  		// direct references from FS or GS. Systems that require
  1461  		// the initial-exec model, where you load the TLS base into
  1462  		// a register and then index from that register, do not reach
  1463  		// this code and should not be listed.
  1464  		switch(ctxt->headtype) {
  1465  		default:
  1466  			sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
  1467  		case Hdarwin:
  1468  		case Hdragonfly:
  1469  		case Hfreebsd:
  1470  		case Hnetbsd:
  1471  		case Hopenbsd:
  1472  			return 0x65; // GS
  1473  		}
  1474  	}
  1475  	return 0;
  1476  }
  1477  
  1478  static int
  1479  oclass(Addr *a)
  1480  {
  1481  	int32 v;
  1482  
  1483  	if((a->type >= D_INDIR && a->type < 2*D_INDIR) || a->index != D_NONE) {
  1484  		if(a->index != D_NONE && a->scale == 0) {
  1485  			if(a->type == D_ADDR) {
  1486  				switch(a->index) {
  1487  				case D_EXTERN:
  1488  				case D_STATIC:
  1489  					return Yi32;
  1490  				case D_AUTO:
  1491  				case D_PARAM:
  1492  					return Yiauto;
  1493  				}
  1494  				return Yxxx;
  1495  			}
  1496  			//if(a->type == D_INDIR+D_ADDR)
  1497  			//	print("*Ycol\n");
  1498  			return Ycol;
  1499  		}
  1500  		return Ym;
  1501  	}
  1502  	switch(a->type)
  1503  	{
  1504  	case D_AL:
  1505  		return Yal;
  1506  
  1507  	case D_AX:
  1508  		return Yax;
  1509  
  1510  	case D_CL:
  1511  	case D_DL:
  1512  	case D_BL:
  1513  	case D_AH:
  1514  	case D_CH:
  1515  	case D_DH:
  1516  	case D_BH:
  1517  		return Yrb;
  1518  
  1519  	case D_CX:
  1520  		return Ycx;
  1521  
  1522  	case D_DX:
  1523  	case D_BX:
  1524  		return Yrx;
  1525  
  1526  	case D_SP:
  1527  	case D_BP:
  1528  	case D_SI:
  1529  	case D_DI:
  1530  		return Yrl;
  1531  
  1532  	case D_F0+0:
  1533  		return	Yf0;
  1534  
  1535  	case D_F0+1:
  1536  	case D_F0+2:
  1537  	case D_F0+3:
  1538  	case D_F0+4:
  1539  	case D_F0+5:
  1540  	case D_F0+6:
  1541  	case D_F0+7:
  1542  		return	Yrf;
  1543  
  1544  	case D_X0+0:
  1545  	case D_X0+1:
  1546  	case D_X0+2:
  1547  	case D_X0+3:
  1548  	case D_X0+4:
  1549  	case D_X0+5:
  1550  	case D_X0+6:
  1551  	case D_X0+7:
  1552  		return	Yxr;
  1553  
  1554  	case D_NONE:
  1555  		return Ynone;
  1556  
  1557  	case D_CS:	return	Ycs;
  1558  	case D_SS:	return	Yss;
  1559  	case D_DS:	return	Yds;
  1560  	case D_ES:	return	Yes;
  1561  	case D_FS:	return	Yfs;
  1562  	case D_GS:	return	Ygs;
  1563  	case D_TLS:	return	Ytls;
  1564  
  1565  	case D_GDTR:	return	Ygdtr;
  1566  	case D_IDTR:	return	Yidtr;
  1567  	case D_LDTR:	return	Yldtr;
  1568  	case D_MSW:	return	Ymsw;
  1569  	case D_TASK:	return	Ytask;
  1570  
  1571  	case D_CR+0:	return	Ycr0;
  1572  	case D_CR+1:	return	Ycr1;
  1573  	case D_CR+2:	return	Ycr2;
  1574  	case D_CR+3:	return	Ycr3;
  1575  	case D_CR+4:	return	Ycr4;
  1576  	case D_CR+5:	return	Ycr5;
  1577  	case D_CR+6:	return	Ycr6;
  1578  	case D_CR+7:	return	Ycr7;
  1579  
  1580  	case D_DR+0:	return	Ydr0;
  1581  	case D_DR+1:	return	Ydr1;
  1582  	case D_DR+2:	return	Ydr2;
  1583  	case D_DR+3:	return	Ydr3;
  1584  	case D_DR+4:	return	Ydr4;
  1585  	case D_DR+5:	return	Ydr5;
  1586  	case D_DR+6:	return	Ydr6;
  1587  	case D_DR+7:	return	Ydr7;
  1588  
  1589  	case D_TR+0:	return	Ytr0;
  1590  	case D_TR+1:	return	Ytr1;
  1591  	case D_TR+2:	return	Ytr2;
  1592  	case D_TR+3:	return	Ytr3;
  1593  	case D_TR+4:	return	Ytr4;
  1594  	case D_TR+5:	return	Ytr5;
  1595  	case D_TR+6:	return	Ytr6;
  1596  	case D_TR+7:	return	Ytr7;
  1597  
  1598  	case D_EXTERN:
  1599  	case D_STATIC:
  1600  	case D_AUTO:
  1601  	case D_PARAM:
  1602  		return Ym;
  1603  
  1604  	case D_CONST:
  1605  	case D_CONST2:
  1606  	case D_ADDR:
  1607  		if(a->sym == nil) {
  1608  			v = a->offset;
  1609  			if(v == 0)
  1610  				return Yi0;
  1611  			if(v == 1)
  1612  				return Yi1;
  1613  			if(v >= -128 && v <= 127)
  1614  				return Yi8;
  1615  		}
  1616  		return Yi32;
  1617  
  1618  	case D_BRANCH:
  1619  		return Ybr;
  1620  	}
  1621  	return Yxxx;
  1622  }
  1623  
  1624  static void
  1625  asmidx(Link *ctxt, int scale, int index, int base)
  1626  {
  1627  	int i;
  1628  
  1629  	switch(index) {
  1630  	default:
  1631  		goto bad;
  1632  
  1633  	case D_NONE:
  1634  		i = 4 << 3;
  1635  		goto bas;
  1636  
  1637  	case D_AX:
  1638  	case D_CX:
  1639  	case D_DX:
  1640  	case D_BX:
  1641  	case D_BP:
  1642  	case D_SI:
  1643  	case D_DI:
  1644  		i = reg[index] << 3;
  1645  		break;
  1646  	}
  1647  	switch(scale) {
  1648  	default:
  1649  		goto bad;
  1650  	case 1:
  1651  		break;
  1652  	case 2:
  1653  		i |= (1<<6);
  1654  		break;
  1655  	case 4:
  1656  		i |= (2<<6);
  1657  		break;
  1658  	case 8:
  1659  		i |= (3<<6);
  1660  		break;
  1661  	}
  1662  bas:
  1663  	switch(base) {
  1664  	default:
  1665  		goto bad;
  1666  	case D_NONE:	/* must be mod=00 */
  1667  		i |= 5;
  1668  		break;
  1669  	case D_AX:
  1670  	case D_CX:
  1671  	case D_DX:
  1672  	case D_BX:
  1673  	case D_SP:
  1674  	case D_BP:
  1675  	case D_SI:
  1676  	case D_DI:
  1677  		i |= reg[base];
  1678  		break;
  1679  	}
  1680  	*ctxt->andptr++ = i;
  1681  	return;
  1682  bad:
  1683  	ctxt->diag("asmidx: bad address %d,%d,%d", scale, index, base);
  1684  	*ctxt->andptr++ = 0;
  1685  	return;
  1686  }
  1687  
  1688  static void
  1689  put4(Link *ctxt, int32 v)
  1690  {
  1691  	ctxt->andptr[0] = v;
  1692  	ctxt->andptr[1] = v>>8;
  1693  	ctxt->andptr[2] = v>>16;
  1694  	ctxt->andptr[3] = v>>24;
  1695  	ctxt->andptr += 4;
  1696  }
  1697  
  1698  static void
  1699  relput4(Link *ctxt, Prog *p, Addr *a)
  1700  {
  1701  	vlong v;
  1702  	Reloc rel, *r;
  1703  	
  1704  	v = vaddr(ctxt, a, &rel);
  1705  	if(rel.siz != 0) {
  1706  		if(rel.siz != 4)
  1707  			ctxt->diag("bad reloc");
  1708  		r = addrel(ctxt->cursym);
  1709  		*r = rel;
  1710  		r->off = p->pc + ctxt->andptr - ctxt->and;
  1711  	}
  1712  	put4(ctxt, v);
  1713  }
  1714  
  1715  static int32
  1716  vaddr(Link *ctxt, Addr *a, Reloc *r)
  1717  {
  1718  	int t;
  1719  	int32 v;
  1720  	LSym *s;
  1721  	
  1722  	if(r != nil)
  1723  		memset(r, 0, sizeof *r);
  1724  
  1725  	t = a->type;
  1726  	v = a->offset;
  1727  	if(t == D_ADDR)
  1728  		t = a->index;
  1729  	switch(t) {
  1730  	case D_STATIC:
  1731  	case D_EXTERN:
  1732  		s = a->sym;
  1733  		if(s != nil) {
  1734  			if(r == nil) {
  1735  				ctxt->diag("need reloc for %D", a);
  1736  				sysfatal("bad code");
  1737  			}
  1738  			r->type = R_ADDR;
  1739  			r->siz = 4;
  1740  			r->off = -1;
  1741  			r->sym = s;
  1742  			r->add = v;
  1743  			v = 0;
  1744  		}
  1745  		break;
  1746  	
  1747  	case D_INDIR+D_TLS:
  1748  		if(r == nil) {
  1749  			ctxt->diag("need reloc for %D", a);
  1750  			sysfatal("bad code");
  1751  		}
  1752  		r->type = R_TLS_LE;
  1753  		r->siz = 4;
  1754  		r->off = -1; // caller must fill in
  1755  		r->add = v;
  1756  		v = 0;
  1757  		break;
  1758  	}
  1759  	return v;
  1760  }
  1761  
  1762  static void
  1763  asmand(Link *ctxt, Addr *a, int r)
  1764  {
  1765  	int32 v;
  1766  	int t, scale;
  1767  	Reloc rel;
  1768  
  1769  	v = a->offset;
  1770  	t = a->type;
  1771  	rel.siz = 0;
  1772  	if(a->index != D_NONE && a->index != D_TLS) {
  1773  		if(t < D_INDIR || t >= 2*D_INDIR) {
  1774  			switch(t) {
  1775  			default:
  1776  				goto bad;
  1777  			case D_STATIC:
  1778  			case D_EXTERN:
  1779  				t = D_NONE;
  1780  				v = vaddr(ctxt, a, &rel);
  1781  				break;
  1782  			case D_AUTO:
  1783  			case D_PARAM:
  1784  				t = D_SP;
  1785  				break;
  1786  			}
  1787  		} else
  1788  			t -= D_INDIR;
  1789  
  1790  		if(t == D_NONE) {
  1791  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  1792  			asmidx(ctxt, a->scale, a->index, t);
  1793  			goto putrelv;
  1794  		}
  1795  		if(v == 0 && rel.siz == 0 && t != D_BP) {
  1796  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  1797  			asmidx(ctxt, a->scale, a->index, t);
  1798  			return;
  1799  		}
  1800  		if(v >= -128 && v < 128 && rel.siz == 0) {
  1801  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  1802  			asmidx(ctxt, a->scale, a->index, t);
  1803  			*ctxt->andptr++ = v;
  1804  			return;
  1805  		}
  1806  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  1807  		asmidx(ctxt, a->scale, a->index, t);
  1808  		goto putrelv;
  1809  	}
  1810  	if(t >= D_AL && t <= D_F7 || t >= D_X0 && t <= D_X7) {
  1811  		if(v)
  1812  			goto bad;
  1813  		*ctxt->andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
  1814  		return;
  1815  	}
  1816  	
  1817  	scale = a->scale;
  1818  	if(t < D_INDIR || t >= 2*D_INDIR) {
  1819  		switch(a->type) {
  1820  		default:
  1821  			goto bad;
  1822  		case D_STATIC:
  1823  		case D_EXTERN:
  1824  			t = D_NONE;
  1825  			v = vaddr(ctxt, a, &rel);
  1826  			break;
  1827  		case D_AUTO:
  1828  		case D_PARAM:
  1829  			t = D_SP;
  1830  			break;
  1831  		}
  1832  		scale = 1;
  1833  	} else
  1834  		t -= D_INDIR;
  1835  	if(t == D_TLS)
  1836  		v = vaddr(ctxt, a, &rel);
  1837  
  1838  	if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
  1839  		*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
  1840  		goto putrelv;
  1841  	}
  1842  	if(t == D_SP) {
  1843  		if(v == 0 && rel.siz == 0) {
  1844  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  1845  			asmidx(ctxt, scale, D_NONE, t);
  1846  			return;
  1847  		}
  1848  		if(v >= -128 && v < 128 && rel.siz == 0) {
  1849  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  1850  			asmidx(ctxt, scale, D_NONE, t);
  1851  			*ctxt->andptr++ = v;
  1852  			return;
  1853  		}
  1854  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  1855  		asmidx(ctxt, scale, D_NONE, t);
  1856  		goto putrelv;
  1857  	}
  1858  	if(t >= D_AX && t <= D_DI) {
  1859  		if(a->index == D_TLS) {
  1860  			memset(&rel, 0, sizeof rel);
  1861  			rel.type = R_TLS_IE;
  1862  			rel.siz = 4;
  1863  			rel.sym = nil;
  1864  			rel.add = v;
  1865  			v = 0;
  1866  		}
  1867  		if(v == 0 && rel.siz == 0 && t != D_BP) {
  1868  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  1869  			return;
  1870  		}
  1871  		if(v >= -128 && v < 128 && rel.siz == 0)  {
  1872  			ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
  1873  			ctxt->andptr[1] = v;
  1874  			ctxt->andptr += 2;
  1875  			return;
  1876  		}
  1877  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  1878  		goto putrelv;
  1879  	}
  1880  	goto bad;
  1881  
  1882  putrelv:
  1883  	if(rel.siz != 0) {
  1884  		Reloc *r;
  1885  		
  1886  		if(rel.siz != 4) {
  1887  			ctxt->diag("bad rel");
  1888  			goto bad;
  1889  		}
  1890  		r = addrel(ctxt->cursym);
  1891  		*r = rel;
  1892  		r->off = ctxt->curp->pc + ctxt->andptr - ctxt->and;
  1893  	}
  1894  
  1895  	put4(ctxt, v);
  1896  	return;
  1897  
  1898  bad:
  1899  	ctxt->diag("asmand: bad address %D", a);
  1900  	return;
  1901  }
  1902  
  1903  enum
  1904  {
  1905  	E = 0xff,
  1906  };
  1907  
  1908  static uchar	ymovtab[] =
  1909  {
  1910  /* push */
  1911  	APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0,
  1912  	APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0,
  1913  	APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0,
  1914  	APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0,
  1915  	APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0,
  1916  	APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0,
  1917  
  1918  	APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0,
  1919  	APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0,
  1920  	APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0,
  1921  	APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0,
  1922  	APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E,
  1923  	APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E,
  1924  
  1925  /* pop */
  1926  	APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0,
  1927  	APOPL,	Ynone,	Yes,	0,	0x07,E,0,0,
  1928  	APOPL,	Ynone,	Yss,	0,	0x17,E,0,0,
  1929  	APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0,
  1930  	APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0,
  1931  
  1932  	APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0,
  1933  	APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0,
  1934  	APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0,
  1935  	APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E,
  1936  	APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E,
  1937  
  1938  /* mov seg */
  1939  	AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0,
  1940  	AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0,
  1941  	AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0,
  1942  	AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0,
  1943  	AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0,
  1944  	AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0,
  1945  
  1946  	AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0,
  1947  	AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0,
  1948  	AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0,
  1949  	AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0,
  1950  	AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0,
  1951  	AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0,
  1952  
  1953  /* mov cr */
  1954  	AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0,
  1955  	AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0,
  1956  	AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0,
  1957  	AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0,
  1958  
  1959  	AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0,
  1960  	AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0,
  1961  	AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0,
  1962  	AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0,
  1963  
  1964  /* mov dr */
  1965  	AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0,
  1966  	AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0,
  1967  	AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0,
  1968  
  1969  	AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0,
  1970  	AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0,
  1971  	AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0,
  1972  
  1973  /* mov tr */
  1974  	AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0,
  1975  	AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0,
  1976  
  1977  	AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E,
  1978  	AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E,
  1979  
  1980  /* lgdt, sgdt, lidt, sidt */
  1981  	AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0,
  1982  	AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0,
  1983  	AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0,
  1984  	AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0,
  1985  
  1986  /* lldt, sldt */
  1987  	AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0,
  1988  	AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0,
  1989  
  1990  /* lmsw, smsw */
  1991  	AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0,
  1992  	AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0,
  1993  
  1994  /* ltr, str */
  1995  	AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0,
  1996  	AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0,
  1997  
  1998  /* load full pointer */
  1999  	AMOVL,	Yml,	Ycol,	5,	0,0,0,0,
  2000  	AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0,
  2001  
  2002  /* double shift */
  2003  	ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0,
  2004  	ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0,
  2005  
  2006  /* extra imul */
  2007  	AIMULW,	Yml,	Yrl,	7,	Pq,0xaf,0,0,
  2008  	AIMULL,	Yml,	Yrl,	7,	Pm,0xaf,0,0,
  2009  
  2010  /* load TLS base pointer */
  2011  	AMOVL,	Ytls,	Yrl,	8,	0,0,0,0,
  2012  
  2013  	0
  2014  };
  2015  
  2016  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  2017  // which is not referenced in a->type.
  2018  // If a is empty, it returns BX to account for MULB-like instructions
  2019  // that might use DX and AX.
  2020  static int
  2021  byteswapreg(Link *ctxt, Addr *a)
  2022  {
  2023  	int cana, canb, canc, cand;
  2024  
  2025  	cana = canb = canc = cand = 1;
  2026  
  2027  	switch(a->type) {
  2028  	case D_NONE:
  2029  		cana = cand = 0;
  2030  		break;
  2031  	case D_AX:
  2032  	case D_AL:
  2033  	case D_AH:
  2034  	case D_INDIR+D_AX:
  2035  		cana = 0;
  2036  		break;
  2037  	case D_BX:
  2038  	case D_BL:
  2039  	case D_BH:
  2040  	case D_INDIR+D_BX:
  2041  		canb = 0;
  2042  		break;
  2043  	case D_CX:
  2044  	case D_CL:
  2045  	case D_CH:
  2046  	case D_INDIR+D_CX:
  2047  		canc = 0;
  2048  		break;
  2049  	case D_DX:
  2050  	case D_DL:
  2051  	case D_DH:
  2052  	case D_INDIR+D_DX:
  2053  		cand = 0;
  2054  		break;
  2055  	}
  2056  	switch(a->index) {
  2057  	case D_AX:
  2058  		cana = 0;
  2059  		break;
  2060  	case D_BX:
  2061  		canb = 0;
  2062  		break;
  2063  	case D_CX:
  2064  		canc = 0;
  2065  		break;
  2066  	case D_DX:
  2067  		cand = 0;
  2068  		break;
  2069  	}
  2070  	if(cana)
  2071  		return D_AX;
  2072  	if(canb)
  2073  		return D_BX;
  2074  	if(canc)
  2075  		return D_CX;
  2076  	if(cand)
  2077  		return D_DX;
  2078  
  2079  	ctxt->diag("impossible byte register");
  2080  	sysfatal("bad code");
  2081  	return 0;
  2082  }
  2083  
  2084  static void
  2085  subreg(Prog *p, int from, int to)
  2086  {
  2087  
  2088  	if(0 /* debug['Q'] */)
  2089  		print("\n%P	s/%R/%R/\n", p, from, to);
  2090  
  2091  	if(p->from.type == from) {
  2092  		p->from.type = to;
  2093  		p->ft = 0;
  2094  	}
  2095  	if(p->to.type == from) {
  2096  		p->to.type = to;
  2097  		p->tt = 0;
  2098  	}
  2099  
  2100  	if(p->from.index == from) {
  2101  		p->from.index = to;
  2102  		p->ft = 0;
  2103  	}
  2104  	if(p->to.index == from) {
  2105  		p->to.index = to;
  2106  		p->tt = 0;
  2107  	}
  2108  
  2109  	from += D_INDIR;
  2110  	if(p->from.type == from) {
  2111  		p->from.type = to+D_INDIR;
  2112  		p->ft = 0;
  2113  	}
  2114  	if(p->to.type == from) {
  2115  		p->to.type = to+D_INDIR;
  2116  		p->tt = 0;
  2117  	}
  2118  
  2119  	if(0 /* debug['Q'] */)
  2120  		print("%P\n", p);
  2121  }
  2122  
  2123  static int
  2124  mediaop(Link *ctxt, Optab *o, int op, int osize, int z)
  2125  {
  2126  	switch(op){
  2127  	case Pm:
  2128  	case Pe:
  2129  	case Pf2:
  2130  	case Pf3:
  2131  		if(osize != 1){
  2132  			if(op != Pm)
  2133  				*ctxt->andptr++ = op;
  2134  			*ctxt->andptr++ = Pm;
  2135  			op = o->op[++z];
  2136  			break;
  2137  		}
  2138  	default:
  2139  		if(ctxt->andptr == ctxt->and || ctxt->and[ctxt->andptr - ctxt->and - 1] != Pm)
  2140  			*ctxt->andptr++ = Pm;
  2141  		break;
  2142  	}
  2143  	*ctxt->andptr++ = op;
  2144  	return z;
  2145  }
  2146  
  2147  static void
  2148  doasm(Link *ctxt, Prog *p)
  2149  {
  2150  	Optab *o;
  2151  	Prog *q, pp;
  2152  	uchar *t;
  2153  	int z, op, ft, tt, breg;
  2154  	int32 v, pre;
  2155  	Reloc rel, *r;
  2156  	Addr *a;
  2157  	
  2158  	ctxt->curp = p;	// TODO
  2159  
  2160  	pre = prefixof(ctxt, &p->from);
  2161  	if(pre)
  2162  		*ctxt->andptr++ = pre;
  2163  	pre = prefixof(ctxt, &p->to);
  2164  	if(pre)
  2165  		*ctxt->andptr++ = pre;
  2166  
  2167  	if(p->ft == 0)
  2168  		p->ft = oclass(&p->from);
  2169  	if(p->tt == 0)
  2170  		p->tt = oclass(&p->to);
  2171  
  2172  	ft = p->ft * Ymax;
  2173  	tt = p->tt * Ymax;
  2174  	o = &optab[p->as];
  2175  	t = o->ytab;
  2176  	if(t == 0) {
  2177  		ctxt->diag("asmins: noproto %P", p);
  2178  		return;
  2179  	}
  2180  	for(z=0; *t; z+=t[3],t+=4)
  2181  		if(ycover[ft+t[0]])
  2182  		if(ycover[tt+t[1]])
  2183  			goto found;
  2184  	goto domov;
  2185  
  2186  found:
  2187  	switch(o->prefix) {
  2188  	case Pq:	/* 16 bit escape and opcode escape */
  2189  		*ctxt->andptr++ = Pe;
  2190  		*ctxt->andptr++ = Pm;
  2191  		break;
  2192  
  2193  	case Pf2:	/* xmm opcode escape */
  2194  	case Pf3:
  2195  		*ctxt->andptr++ = o->prefix;
  2196  		*ctxt->andptr++ = Pm;
  2197  		break;
  2198  
  2199  	case Pm:	/* opcode escape */
  2200  		*ctxt->andptr++ = Pm;
  2201  		break;
  2202  
  2203  	case Pe:	/* 16 bit escape */
  2204  		*ctxt->andptr++ = Pe;
  2205  		break;
  2206  
  2207  	case Pb:	/* botch */
  2208  		break;
  2209  	}
  2210  
  2211  	op = o->op[z];
  2212  	switch(t[2]) {
  2213  	default:
  2214  		ctxt->diag("asmins: unknown z %d %P", t[2], p);
  2215  		return;
  2216  
  2217  	case Zpseudo:
  2218  		break;
  2219  
  2220  	case Zlit:
  2221  		for(; op = o->op[z]; z++)
  2222  			*ctxt->andptr++ = op;
  2223  		break;
  2224  
  2225  	case Zlitm_r:
  2226  		for(; op = o->op[z]; z++)
  2227  			*ctxt->andptr++ = op;
  2228  		asmand(ctxt, &p->from, reg[p->to.type]);
  2229  		break;
  2230  
  2231  	case Zm_r:
  2232  		*ctxt->andptr++ = op;
  2233  		asmand(ctxt, &p->from, reg[p->to.type]);
  2234  		break;
  2235  
  2236  	case Zm2_r:
  2237  		*ctxt->andptr++ = op;
  2238  		*ctxt->andptr++ = o->op[z+1];
  2239  		asmand(ctxt, &p->from, reg[p->to.type]);
  2240  		break;
  2241  
  2242  	case Zm_r_xm:
  2243  		mediaop(ctxt, o, op, t[3], z);
  2244  		asmand(ctxt, &p->from, reg[p->to.type]);
  2245  		break;
  2246  
  2247  	case Zm_r_i_xm:
  2248  		mediaop(ctxt, o, op, t[3], z);
  2249  		asmand(ctxt, &p->from, reg[p->to.type]);
  2250  		*ctxt->andptr++ = p->to.offset;
  2251  		break;
  2252  
  2253  	case Zibm_r:
  2254  		while ((op = o->op[z++]) != 0)
  2255  			*ctxt->andptr++ = op;
  2256  		asmand(ctxt, &p->from, reg[p->to.type]);
  2257  		*ctxt->andptr++ = p->to.offset;
  2258  		break;
  2259  
  2260  	case Zaut_r:
  2261  		*ctxt->andptr++ = 0x8d;	/* leal */
  2262  		if(p->from.type != D_ADDR)
  2263  			ctxt->diag("asmins: Zaut sb type ADDR");
  2264  		p->from.type = p->from.index;
  2265  		p->from.index = D_NONE;
  2266  		p->ft = 0;
  2267  		asmand(ctxt, &p->from, reg[p->to.type]);
  2268  		p->from.index = p->from.type;
  2269  		p->from.type = D_ADDR;
  2270  		p->ft = 0;
  2271  		break;
  2272  
  2273  	case Zm_o:
  2274  		*ctxt->andptr++ = op;
  2275  		asmand(ctxt, &p->from, o->op[z+1]);
  2276  		break;
  2277  
  2278  	case Zr_m:
  2279  		*ctxt->andptr++ = op;
  2280  		asmand(ctxt, &p->to, reg[p->from.type]);
  2281  		break;
  2282  
  2283  	case Zr_m_xm:
  2284  		mediaop(ctxt, o, op, t[3], z);
  2285  		asmand(ctxt, &p->to, reg[p->from.type]);
  2286  		break;
  2287  
  2288  	case Zr_m_i_xm:
  2289  		mediaop(ctxt, o, op, t[3], z);
  2290  		asmand(ctxt, &p->to, reg[p->from.type]);
  2291  		*ctxt->andptr++ = p->from.offset;
  2292  		break;
  2293  
  2294  	case Zcallindreg:
  2295  		r = addrel(ctxt->cursym);
  2296  		r->off = p->pc;
  2297  		r->type = R_CALLIND;
  2298  		r->siz = 0;
  2299  		// fallthrough
  2300  	case Zo_m:
  2301  		*ctxt->andptr++ = op;
  2302  		asmand(ctxt, &p->to, o->op[z+1]);
  2303  		break;
  2304  
  2305  	case Zm_ibo:
  2306  		*ctxt->andptr++ = op;
  2307  		asmand(ctxt, &p->from, o->op[z+1]);
  2308  		*ctxt->andptr++ = vaddr(ctxt, &p->to, nil);
  2309  		break;
  2310  
  2311  	case Zibo_m:
  2312  		*ctxt->andptr++ = op;
  2313  		asmand(ctxt, &p->to, o->op[z+1]);
  2314  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2315  		break;
  2316  
  2317  	case Z_ib:
  2318  	case Zib_:
  2319  		if(t[2] == Zib_)
  2320  			a = &p->from;
  2321  		else
  2322  			a = &p->to;
  2323  		v = vaddr(ctxt, a, nil);
  2324  		*ctxt->andptr++ = op;
  2325  		*ctxt->andptr++ = v;
  2326  		break;
  2327  
  2328  	case Zib_rp:
  2329  		*ctxt->andptr++ = op + reg[p->to.type];
  2330  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2331  		break;
  2332  
  2333  	case Zil_rp:
  2334  		*ctxt->andptr++ = op + reg[p->to.type];
  2335  		if(o->prefix == Pe) {
  2336  			v = vaddr(ctxt, &p->from, nil);
  2337  			*ctxt->andptr++ = v;
  2338  			*ctxt->andptr++ = v>>8;
  2339  		}
  2340  		else
  2341  			relput4(ctxt, p, &p->from);
  2342  		break;
  2343  
  2344  	case Zib_rr:
  2345  		*ctxt->andptr++ = op;
  2346  		asmand(ctxt, &p->to, reg[p->to.type]);
  2347  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2348  		break;
  2349  
  2350  	case Z_il:
  2351  	case Zil_:
  2352  		if(t[2] == Zil_)
  2353  			a = &p->from;
  2354  		else
  2355  			a = &p->to;
  2356  		*ctxt->andptr++ = op;
  2357  		if(o->prefix == Pe) {
  2358  			v = vaddr(ctxt, a, nil);
  2359  			*ctxt->andptr++ = v;
  2360  			*ctxt->andptr++ = v>>8;
  2361  		}
  2362  		else
  2363  			relput4(ctxt, p, a);
  2364  		break;
  2365  
  2366  	case Zm_ilo:
  2367  	case Zilo_m:
  2368  		*ctxt->andptr++ = op;
  2369  		if(t[2] == Zilo_m) {
  2370  			a = &p->from;
  2371  			asmand(ctxt, &p->to, o->op[z+1]);
  2372  		} else {
  2373  			a = &p->to;
  2374  			asmand(ctxt, &p->from, o->op[z+1]);
  2375  		}
  2376  		if(o->prefix == Pe) {
  2377  			v = vaddr(ctxt, a, nil);
  2378  			*ctxt->andptr++ = v;
  2379  			*ctxt->andptr++ = v>>8;
  2380  		}
  2381  		else
  2382  			relput4(ctxt, p, a);
  2383  		break;
  2384  
  2385  	case Zil_rr:
  2386  		*ctxt->andptr++ = op;
  2387  		asmand(ctxt, &p->to, reg[p->to.type]);
  2388  		if(o->prefix == Pe) {
  2389  			v = vaddr(ctxt, &p->from, nil);
  2390  			*ctxt->andptr++ = v;
  2391  			*ctxt->andptr++ = v>>8;
  2392  		}
  2393  		else
  2394  			relput4(ctxt, p, &p->from);
  2395  		break;
  2396  
  2397  	case Z_rp:
  2398  		*ctxt->andptr++ = op + reg[p->to.type];
  2399  		break;
  2400  
  2401  	case Zrp_:
  2402  		*ctxt->andptr++ = op + reg[p->from.type];
  2403  		break;
  2404  
  2405  	case Zclr:
  2406  		*ctxt->andptr++ = op;
  2407  		asmand(ctxt, &p->to, reg[p->to.type]);
  2408  		break;
  2409  	
  2410  	case Zcall:
  2411  		if(p->to.sym == nil) {
  2412  			ctxt->diag("call without target");
  2413  			sysfatal("bad code");
  2414  		}
  2415  		*ctxt->andptr++ = op;
  2416  		r = addrel(ctxt->cursym);
  2417  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2418  		r->type = R_CALL;
  2419  		r->siz = 4;
  2420  		r->sym = p->to.sym;
  2421  		r->add = p->to.offset;
  2422  		put4(ctxt, 0);
  2423  		break;
  2424  
  2425  	case Zbr:
  2426  	case Zjmp:
  2427  	case Zloop:
  2428  		if(p->to.sym != nil) {
  2429  			if(t[2] != Zjmp) {
  2430  				ctxt->diag("branch to ATEXT");
  2431  				sysfatal("bad code");
  2432  			}
  2433  			*ctxt->andptr++ = o->op[z+1];
  2434  			r = addrel(ctxt->cursym);
  2435  			r->off = p->pc + ctxt->andptr - ctxt->and;
  2436  			r->sym = p->to.sym;
  2437  			r->type = R_PCREL;
  2438  			r->siz = 4;
  2439  			put4(ctxt, 0);
  2440  			break;
  2441  		}
  2442  
  2443  		// Assumes q is in this function.
  2444  		// Fill in backward jump now.
  2445  		q = p->pcond;
  2446  		if(q == nil) {
  2447  			ctxt->diag("jmp/branch/loop without target");
  2448  			sysfatal("bad code");
  2449  		}
  2450  		if(p->back & 1) {
  2451  			v = q->pc - (p->pc + 2);
  2452  			if(v >= -128) {
  2453  				if(p->as == AJCXZW)
  2454  					*ctxt->andptr++ = 0x67;
  2455  				*ctxt->andptr++ = op;
  2456  				*ctxt->andptr++ = v;
  2457  			} else if(t[2] == Zloop) {
  2458  				ctxt->diag("loop too far: %P", p);
  2459  			} else {
  2460  				v -= 5-2;
  2461  				if(t[2] == Zbr) {
  2462  					*ctxt->andptr++ = 0x0f;
  2463  					v--;
  2464  				}
  2465  				*ctxt->andptr++ = o->op[z+1];
  2466  				*ctxt->andptr++ = v;
  2467  				*ctxt->andptr++ = v>>8;
  2468  				*ctxt->andptr++ = v>>16;
  2469  				*ctxt->andptr++ = v>>24;
  2470  			}
  2471  			break;
  2472  		}
  2473  
  2474  		// Annotate target; will fill in later.
  2475  		p->forwd = q->comefrom;
  2476  		q->comefrom = p;
  2477  		if(p->back & 2)	{ // short
  2478  			if(p->as == AJCXZW)
  2479  				*ctxt->andptr++ = 0x67;
  2480  			*ctxt->andptr++ = op;
  2481  			*ctxt->andptr++ = 0;
  2482  		} else if(t[2] == Zloop) {
  2483  			ctxt->diag("loop too far: %P", p);
  2484  		} else {
  2485  			if(t[2] == Zbr)
  2486  				*ctxt->andptr++ = 0x0f;
  2487  			*ctxt->andptr++ = o->op[z+1];
  2488  			*ctxt->andptr++ = 0;
  2489  			*ctxt->andptr++ = 0;
  2490  			*ctxt->andptr++ = 0;
  2491  			*ctxt->andptr++ = 0;
  2492  		}
  2493  		break;
  2494  
  2495  	case Zcallcon:
  2496  	case Zjmpcon:
  2497  		if(t[2] == Zcallcon)
  2498  			*ctxt->andptr++ = op;
  2499  		else
  2500  			*ctxt->andptr++ = o->op[z+1];
  2501  		r = addrel(ctxt->cursym);
  2502  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2503  		r->type = R_PCREL;
  2504  		r->siz = 4;
  2505  		r->add = p->to.offset;
  2506  		put4(ctxt, 0);
  2507  		break;
  2508  	
  2509  	case Zcallind:
  2510  		*ctxt->andptr++ = op;
  2511  		*ctxt->andptr++ = o->op[z+1];
  2512  		r = addrel(ctxt->cursym);
  2513  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2514  		r->type = R_ADDR;
  2515  		r->siz = 4;
  2516  		r->add = p->to.offset;
  2517  		r->sym = p->to.sym;
  2518  		put4(ctxt, 0);
  2519  		break;
  2520  
  2521  	case Zbyte:
  2522  		v = vaddr(ctxt, &p->from, &rel);
  2523  		if(rel.siz != 0) {
  2524  			rel.siz = op;
  2525  			r = addrel(ctxt->cursym);
  2526  			*r = rel;
  2527  			r->off = p->pc + ctxt->andptr - ctxt->and;
  2528  		}
  2529  		*ctxt->andptr++ = v;
  2530  		if(op > 1) {
  2531  			*ctxt->andptr++ = v>>8;
  2532  			if(op > 2) {
  2533  				*ctxt->andptr++ = v>>16;
  2534  				*ctxt->andptr++ = v>>24;
  2535  			}
  2536  		}
  2537  		break;
  2538  
  2539  	case Zmov:
  2540  		goto domov;
  2541  	}
  2542  	return;
  2543  
  2544  domov:
  2545  	for(t=ymovtab; *t; t+=8)
  2546  		if(p->as == t[0])
  2547  		if(ycover[ft+t[1]])
  2548  		if(ycover[tt+t[2]])
  2549  			goto mfound;
  2550  bad:
  2551  	/*
  2552  	 * here, the assembly has failed.
  2553  	 * if its a byte instruction that has
  2554  	 * unaddressable registers, try to
  2555  	 * exchange registers and reissue the
  2556  	 * instruction with the operands renamed.
  2557  	 */
  2558  	pp = *p;
  2559  	z = p->from.type;
  2560  	if(z >= D_BP && z <= D_DI) {
  2561  		if((breg = byteswapreg(ctxt, &p->to)) != D_AX) {
  2562  			*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  2563  			asmand(ctxt, &p->from, reg[breg]);
  2564  			subreg(&pp, z, breg);
  2565  			doasm(ctxt, &pp);
  2566  			*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  2567  			asmand(ctxt, &p->from, reg[breg]);
  2568  		} else {
  2569  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  2570  			subreg(&pp, z, D_AX);
  2571  			doasm(ctxt, &pp);
  2572  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  2573  		}
  2574  		return;
  2575  	}
  2576  	z = p->to.type;
  2577  	if(z >= D_BP && z <= D_DI) {
  2578  		if((breg = byteswapreg(ctxt, &p->from)) != D_AX) {
  2579  			*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  2580  			asmand(ctxt, &p->to, reg[breg]);
  2581  			subreg(&pp, z, breg);
  2582  			doasm(ctxt, &pp);
  2583  			*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  2584  			asmand(ctxt, &p->to, reg[breg]);
  2585  		} else {
  2586  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  2587  			subreg(&pp, z, D_AX);
  2588  			doasm(ctxt, &pp);
  2589  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  2590  		}
  2591  		return;
  2592  	}
  2593  	ctxt->diag("doasm: notfound t2=%ux from=%ux to=%ux %P", t[2], p->from.type, p->to.type, p);
  2594  	return;
  2595  
  2596  mfound:
  2597  	switch(t[3]) {
  2598  	default:
  2599  		ctxt->diag("asmins: unknown mov %d %P", t[3], p);
  2600  		break;
  2601  
  2602  	case 0:	/* lit */
  2603  		for(z=4; t[z]!=E; z++)
  2604  			*ctxt->andptr++ = t[z];
  2605  		break;
  2606  
  2607  	case 1:	/* r,m */
  2608  		*ctxt->andptr++ = t[4];
  2609  		asmand(ctxt, &p->to, t[5]);
  2610  		break;
  2611  
  2612  	case 2:	/* m,r */
  2613  		*ctxt->andptr++ = t[4];
  2614  		asmand(ctxt, &p->from, t[5]);
  2615  		break;
  2616  
  2617  	case 3:	/* r,m - 2op */
  2618  		*ctxt->andptr++ = t[4];
  2619  		*ctxt->andptr++ = t[5];
  2620  		asmand(ctxt, &p->to, t[6]);
  2621  		break;
  2622  
  2623  	case 4:	/* m,r - 2op */
  2624  		*ctxt->andptr++ = t[4];
  2625  		*ctxt->andptr++ = t[5];
  2626  		asmand(ctxt, &p->from, t[6]);
  2627  		break;
  2628  
  2629  	case 5:	/* load full pointer, trash heap */
  2630  		if(t[4])
  2631  			*ctxt->andptr++ = t[4];
  2632  		switch(p->to.index) {
  2633  		default:
  2634  			goto bad;
  2635  		case D_DS:
  2636  			*ctxt->andptr++ = 0xc5;
  2637  			break;
  2638  		case D_SS:
  2639  			*ctxt->andptr++ = 0x0f;
  2640  			*ctxt->andptr++ = 0xb2;
  2641  			break;
  2642  		case D_ES:
  2643  			*ctxt->andptr++ = 0xc4;
  2644  			break;
  2645  		case D_FS:
  2646  			*ctxt->andptr++ = 0x0f;
  2647  			*ctxt->andptr++ = 0xb4;
  2648  			break;
  2649  		case D_GS:
  2650  			*ctxt->andptr++ = 0x0f;
  2651  			*ctxt->andptr++ = 0xb5;
  2652  			break;
  2653  		}
  2654  		asmand(ctxt, &p->from, reg[p->to.type]);
  2655  		break;
  2656  
  2657  	case 6:	/* double shift */
  2658  		z = p->from.type;
  2659  		switch(z) {
  2660  		default:
  2661  			goto bad;
  2662  		case D_CONST:
  2663  			*ctxt->andptr++ = 0x0f;
  2664  			*ctxt->andptr++ = t[4];
  2665  			asmand(ctxt, &p->to, reg[p->from.index]);
  2666  			*ctxt->andptr++ = p->from.offset;
  2667  			break;
  2668  		case D_CL:
  2669  		case D_CX:
  2670  			*ctxt->andptr++ = 0x0f;
  2671  			*ctxt->andptr++ = t[5];
  2672  			asmand(ctxt, &p->to, reg[p->from.index]);
  2673  			break;
  2674  		}
  2675  		break;
  2676  
  2677  	case 7: /* imul rm,r */
  2678  		if(t[4] == Pq) {
  2679  			*ctxt->andptr++ = Pe;
  2680  			*ctxt->andptr++ = Pm;
  2681  		} else
  2682  			*ctxt->andptr++ = t[4];
  2683  		*ctxt->andptr++ = t[5];
  2684  		asmand(ctxt, &p->from, reg[p->to.type]);
  2685  		break;
  2686  	
  2687  	case 8: /* mov tls, r */
  2688  		// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  2689  		// where you load the TLS base register into a register and then index off that
  2690  		// register to access the actual TLS variables. Systems that allow direct TLS access
  2691  		// are handled in prefixof above and should not be listed here.
  2692  		switch(ctxt->headtype) {
  2693  		default:
  2694  			sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
  2695  
  2696  		case Hlinux:
  2697  		case Hnacl:
  2698  			// ELF TLS base is 0(GS).
  2699  			pp.from = p->from;
  2700  			pp.from.type = D_INDIR+D_GS;
  2701  			pp.from.offset = 0;
  2702  			pp.from.index = D_NONE;
  2703  			pp.from.scale = 0;
  2704  			*ctxt->andptr++ = 0x65; // GS
  2705  			*ctxt->andptr++ = 0x8B;
  2706  			asmand(ctxt, &pp.from, reg[p->to.type]);
  2707  			break;
  2708  		
  2709  		case Hplan9:
  2710  			if(ctxt->plan9privates == nil)
  2711  				ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
  2712  			memset(&pp.from, 0, sizeof pp.from);
  2713  			pp.from.type = D_EXTERN;
  2714  			pp.from.sym = ctxt->plan9privates;
  2715  			pp.from.offset = 0;
  2716  			pp.from.index = D_NONE;
  2717  			*ctxt->andptr++ = 0x8B;
  2718  			asmand(ctxt, &pp.from, reg[p->to.type]);
  2719  			break;
  2720  
  2721  		case Hwindows:
  2722  			// Windows TLS base is always 0x14(FS).
  2723  			pp.from = p->from;
  2724  			pp.from.type = D_INDIR+D_FS;
  2725  			pp.from.offset = 0x14;
  2726  			pp.from.index = D_NONE;
  2727  			pp.from.scale = 0;
  2728  			*ctxt->andptr++ = 0x64; // FS
  2729  			*ctxt->andptr++ = 0x8B;
  2730  			asmand(ctxt, &pp.from, reg[p->to.type]);
  2731  			break;
  2732  		}
  2733  		break;
  2734  	}
  2735  }
  2736  
  2737  static uchar naclret[] = {
  2738  	0x5d, // POPL BP
  2739  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  2740  	0x83, 0xe5, 0xe0,	// ANDL $~31, BP
  2741  	0xff, 0xe5, // JMP BP
  2742  };
  2743  
  2744  static void
  2745  asmins(Link *ctxt, Prog *p)
  2746  {
  2747  	Reloc *r;
  2748  
  2749  	ctxt->andptr = ctxt->and;
  2750  	
  2751  	if(p->as == AUSEFIELD) {
  2752  		r = addrel(ctxt->cursym);
  2753  		r->off = 0;
  2754  		r->sym = p->from.sym;
  2755  		r->type = R_USEFIELD;
  2756  		r->siz = 0;
  2757  		return;
  2758  	}
  2759  
  2760  	if(ctxt->headtype == Hnacl) {
  2761  		switch(p->as) {
  2762  		case ARET:
  2763  			memmove(ctxt->andptr, naclret, sizeof naclret);
  2764  			ctxt->andptr += sizeof naclret;
  2765  			return;
  2766  		case ACALL:
  2767  		case AJMP:
  2768  			if(D_AX <= p->to.type && p->to.type <= D_DI) {
  2769  				*ctxt->andptr++ = 0x83;
  2770  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_AX);
  2771  				*ctxt->andptr++ = 0xe0;
  2772  			}
  2773  			break;
  2774  		case AINT:
  2775  			*ctxt->andptr++ = 0xf4;
  2776  			return;
  2777  		}
  2778  	}
  2779  
  2780  	doasm(ctxt, p);
  2781  	if(ctxt->andptr > ctxt->and+sizeof ctxt->and) {
  2782  		print("and[] is too short - %ld byte instruction\n", ctxt->andptr - ctxt->and);
  2783  		sysfatal("bad code");
  2784  	}
  2785  }