github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/liblink/asm8.c (about)

     1  // Inferno utils/8l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/8l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include <u.h>
    34  #include <libc.h>
    35  #include <bio.h>
    36  #include <link.h>
    37  #include "../cmd/8l/8.out.h"
    38  #include "../runtime/stack.h"
    39  
    40  enum
    41  {
    42  	MaxAlign = 32,	// max data alignment
    43  	FuncAlign = 16
    44  };
    45  
    46  typedef	struct	Optab	Optab;
    47  
    48  struct	Optab
    49  {
    50  	short	as;
    51  	uchar*	ytab;
    52  	uchar	prefix;
    53  	uchar	op[13];
    54  };
    55  
    56  enum
    57  {
    58  	Yxxx		= 0,
    59  	Ynone,
    60  	Yi0,
    61  	Yi1,
    62  	Yi8,
    63  	Yi32,
    64  	Yiauto,
    65  	Yal,
    66  	Ycl,
    67  	Yax,
    68  	Ycx,
    69  	Yrb,
    70  	Yrl,
    71  	Yrf,
    72  	Yf0,
    73  	Yrx,
    74  	Ymb,
    75  	Yml,
    76  	Ym,
    77  	Ybr,
    78  	Ycol,
    79  	Ytls,
    80  
    81  	Ycs,	Yss,	Yds,	Yes,	Yfs,	Ygs,
    82  	Ygdtr,	Yidtr,	Yldtr,	Ymsw,	Ytask,
    83  	Ycr0,	Ycr1,	Ycr2,	Ycr3,	Ycr4,	Ycr5,	Ycr6,	Ycr7,
    84  	Ydr0,	Ydr1,	Ydr2,	Ydr3,	Ydr4,	Ydr5,	Ydr6,	Ydr7,
    85  	Ytr0,	Ytr1,	Ytr2,	Ytr3,	Ytr4,	Ytr5,	Ytr6,	Ytr7,
    86  	Ymr, Ymm,
    87  	Yxr, Yxm,
    88  	Ymax,
    89  
    90  	Zxxx		= 0,
    91  
    92  	Zlit,
    93  	Zlitm_r,
    94  	Z_rp,
    95  	Zbr,
    96  	Zcall,
    97  	Zcallcon,
    98  	Zcallind,
    99  	Zcallindreg,
   100  	Zib_,
   101  	Zib_rp,
   102  	Zibo_m,
   103  	Zil_,
   104  	Zil_rp,
   105  	Zilo_m,
   106  	Zjmp,
   107  	Zjmpcon,
   108  	Zloop,
   109  	Zm_o,
   110  	Zm_r,
   111  	Zm2_r,
   112  	Zm_r_xm,
   113  	Zm_r_i_xm,
   114  	Zaut_r,
   115  	Zo_m,
   116  	Zpseudo,
   117  	Zr_m,
   118  	Zr_m_xm,
   119  	Zr_m_i_xm,
   120  	Zrp_,
   121  	Z_ib,
   122  	Z_il,
   123  	Zm_ibo,
   124  	Zm_ilo,
   125  	Zib_rr,
   126  	Zil_rr,
   127  	Zclr,
   128  	Zibm_r,	/* mmx1,mmx2/mem64,imm8 */
   129  	Zbyte,
   130  	Zmov,
   131  	Zmax,
   132  
   133  	Px		= 0,
   134  	Pe		= 0x66,	/* operand escape */
   135  	Pm		= 0x0f,	/* 2byte opcode escape */
   136  	Pq		= 0xff,	/* both escape */
   137  	Pb		= 0xfe,	/* byte operands */
   138  	Pf2		= 0xf2,	/* xmm escape 1 */
   139  	Pf3		= 0xf3,	/* xmm escape 2 */
   140  };
   141  
   142  static	uchar	ycover[Ymax*Ymax];
   143  static	int	reg[D_NONE];
   144  static	void	asmins(Link *ctxt, Prog *p);
   145  
   146  static uchar	ynone[] =
   147  {
   148  	Ynone,	Ynone,	Zlit,	1,
   149  	0
   150  };
   151  static uchar	ytext[] =
   152  {
   153  	Ymb,	Yi32,	Zpseudo,1,
   154  	0
   155  };
   156  static uchar	ynop[] =
   157  {
   158  	Ynone,	Ynone,	Zpseudo,0,
   159  	Ynone,	Yiauto,	Zpseudo,0,
   160  	Ynone,	Yml,	Zpseudo,0,
   161  	Ynone,	Yrf,	Zpseudo,0,
   162  	Yiauto,	Ynone,	Zpseudo,0,
   163  	Ynone,	Yxr,	Zpseudo,0,
   164  	Yml,	Ynone,	Zpseudo,0,
   165  	Yrf,	Ynone,	Zpseudo,0,
   166  	Yxr,	Ynone,	Zpseudo,1,
   167  	0
   168  };
   169  static uchar	yfuncdata[] =
   170  {
   171  	Yi32,	Ym,	Zpseudo,	0,
   172  	0
   173  };
   174  static uchar	ypcdata[] =
   175  {
   176  	Yi32,	Yi32,	Zpseudo,	0,
   177  	0,
   178  };
   179  static uchar	yxorb[] =
   180  {
   181  	Yi32,	Yal,	Zib_,	1,
   182  	Yi32,	Ymb,	Zibo_m,	2,
   183  	Yrb,	Ymb,	Zr_m,	1,
   184  	Ymb,	Yrb,	Zm_r,	1,
   185  	0
   186  };
   187  static uchar	yxorl[] =
   188  {
   189  	Yi8,	Yml,	Zibo_m,	2,
   190  	Yi32,	Yax,	Zil_,	1,
   191  	Yi32,	Yml,	Zilo_m,	2,
   192  	Yrl,	Yml,	Zr_m,	1,
   193  	Yml,	Yrl,	Zm_r,	1,
   194  	0
   195  };
   196  static uchar	yaddl[] =
   197  {
   198  	Yi8,	Yml,	Zibo_m,	2,
   199  	Yi32,	Yax,	Zil_,	1,
   200  	Yi32,	Yml,	Zilo_m,	2,
   201  	Yrl,	Yml,	Zr_m,	1,
   202  	Yml,	Yrl,	Zm_r,	1,
   203  	0
   204  };
   205  static uchar	yincb[] =
   206  {
   207  	Ynone,	Ymb,	Zo_m,	2,
   208  	0
   209  };
   210  static uchar	yincl[] =
   211  {
   212  	Ynone,	Yrl,	Z_rp,	1,
   213  	Ynone,	Yml,	Zo_m,	2,
   214  	0
   215  };
   216  static uchar	ycmpb[] =
   217  {
   218  	Yal,	Yi32,	Z_ib,	1,
   219  	Ymb,	Yi32,	Zm_ibo,	2,
   220  	Ymb,	Yrb,	Zm_r,	1,
   221  	Yrb,	Ymb,	Zr_m,	1,
   222  	0
   223  };
   224  static uchar	ycmpl[] =
   225  {
   226  	Yml,	Yi8,	Zm_ibo,	2,
   227  	Yax,	Yi32,	Z_il,	1,
   228  	Yml,	Yi32,	Zm_ilo,	2,
   229  	Yml,	Yrl,	Zm_r,	1,
   230  	Yrl,	Yml,	Zr_m,	1,
   231  	0
   232  };
   233  static uchar	yshb[] =
   234  {
   235  	Yi1,	Ymb,	Zo_m,	2,
   236  	Yi32,	Ymb,	Zibo_m,	2,
   237  	Ycx,	Ymb,	Zo_m,	2,
   238  	0
   239  };
   240  static uchar	yshl[] =
   241  {
   242  	Yi1,	Yml,	Zo_m,	2,
   243  	Yi32,	Yml,	Zibo_m,	2,
   244  	Ycl,	Yml,	Zo_m,	2,
   245  	Ycx,	Yml,	Zo_m,	2,
   246  	0
   247  };
   248  static uchar	ytestb[] =
   249  {
   250  	Yi32,	Yal,	Zib_,	1,
   251  	Yi32,	Ymb,	Zibo_m,	2,
   252  	Yrb,	Ymb,	Zr_m,	1,
   253  	Ymb,	Yrb,	Zm_r,	1,
   254  	0
   255  };
   256  static uchar	ytestl[] =
   257  {
   258  	Yi32,	Yax,	Zil_,	1,
   259  	Yi32,	Yml,	Zilo_m,	2,
   260  	Yrl,	Yml,	Zr_m,	1,
   261  	Yml,	Yrl,	Zm_r,	1,
   262  	0
   263  };
   264  static uchar	ymovb[] =
   265  {
   266  	Yrb,	Ymb,	Zr_m,	1,
   267  	Ymb,	Yrb,	Zm_r,	1,
   268  	Yi32,	Yrb,	Zib_rp,	1,
   269  	Yi32,	Ymb,	Zibo_m,	2,
   270  	0
   271  };
   272  static uchar	ymovw[] =
   273  {
   274  	Yrl,	Yml,	Zr_m,	1,
   275  	Yml,	Yrl,	Zm_r,	1,
   276  	Yi0,	Yrl,	Zclr,	1+2,
   277  //	Yi0,	Yml,	Zibo_m,	2,	// shorter but slower AND $0,dst
   278  	Yi32,	Yrl,	Zil_rp,	1,
   279  	Yi32,	Yml,	Zilo_m,	2,
   280  	Yiauto,	Yrl,	Zaut_r,	1,
   281  	0
   282  };
   283  static uchar	ymovl[] =
   284  {
   285  	Yrl,	Yml,	Zr_m,	1,
   286  	Yml,	Yrl,	Zm_r,	1,
   287  	Yi0,	Yrl,	Zclr,	1+2,
   288  //	Yi0,	Yml,	Zibo_m,	2,	// shorter but slower AND $0,dst
   289  	Yi32,	Yrl,	Zil_rp,	1,
   290  	Yi32,	Yml,	Zilo_m,	2,
   291  	Yml,	Yxr,	Zm_r_xm,	2,	// XMM MOVD (32 bit)
   292  	Yxr,	Yml,	Zr_m_xm,	2,	// XMM MOVD (32 bit)
   293  	Yiauto,	Yrl,	Zaut_r,	1,
   294  	0
   295  };
   296  static uchar	ymovq[] =
   297  {
   298  	Yml,	Yxr,	Zm_r_xm,	2,
   299  	0
   300  };
   301  static uchar	ym_rl[] =
   302  {
   303  	Ym,	Yrl,	Zm_r,	1,
   304  	0
   305  };
   306  static uchar	yrl_m[] =
   307  {
   308  	Yrl,	Ym,	Zr_m,	1,
   309  	0
   310  };
   311  static uchar	ymb_rl[] =
   312  {
   313  	Ymb,	Yrl,	Zm_r,	1,
   314  	0
   315  };
   316  static uchar	yml_rl[] =
   317  {
   318  	Yml,	Yrl,	Zm_r,	1,
   319  	0
   320  };
   321  static uchar	yrb_mb[] =
   322  {
   323  	Yrb,	Ymb,	Zr_m,	1,
   324  	0
   325  };
   326  static uchar	yrl_ml[] =
   327  {
   328  	Yrl,	Yml,	Zr_m,	1,
   329  	0
   330  };
   331  static uchar	yml_mb[] =
   332  {
   333  	Yrb,	Ymb,	Zr_m,	1,
   334  	Ymb,	Yrb,	Zm_r,	1,
   335  	0
   336  };
   337  static uchar	yxchg[] =
   338  {
   339  	Yax,	Yrl,	Z_rp,	1,
   340  	Yrl,	Yax,	Zrp_,	1,
   341  	Yrl,	Yml,	Zr_m,	1,
   342  	Yml,	Yrl,	Zm_r,	1,
   343  	0
   344  };
   345  static uchar	ydivl[] =
   346  {
   347  	Yml,	Ynone,	Zm_o,	2,
   348  	0
   349  };
   350  static uchar	ydivb[] =
   351  {
   352  	Ymb,	Ynone,	Zm_o,	2,
   353  	0
   354  };
   355  static uchar	yimul[] =
   356  {
   357  	Yml,	Ynone,	Zm_o,	2,
   358  	Yi8,	Yrl,	Zib_rr,	1,
   359  	Yi32,	Yrl,	Zil_rr,	1,
   360  	0
   361  };
   362  static uchar	ybyte[] =
   363  {
   364  	Yi32,	Ynone,	Zbyte,	1,
   365  	0
   366  };
   367  static uchar	yin[] =
   368  {
   369  	Yi32,	Ynone,	Zib_,	1,
   370  	Ynone,	Ynone,	Zlit,	1,
   371  	0
   372  };
   373  static uchar	yint[] =
   374  {
   375  	Yi32,	Ynone,	Zib_,	1,
   376  	0
   377  };
   378  static uchar	ypushl[] =
   379  {
   380  	Yrl,	Ynone,	Zrp_,	1,
   381  	Ym,	Ynone,	Zm_o,	2,
   382  	Yi8,	Ynone,	Zib_,	1,
   383  	Yi32,	Ynone,	Zil_,	1,
   384  	0
   385  };
   386  static uchar	ypopl[] =
   387  {
   388  	Ynone,	Yrl,	Z_rp,	1,
   389  	Ynone,	Ym,	Zo_m,	2,
   390  	0
   391  };
   392  static uchar	ybswap[] =
   393  {
   394  	Ynone,	Yrl,	Z_rp,	1,
   395  	0,
   396  };
   397  static uchar	yscond[] =
   398  {
   399  	Ynone,	Ymb,	Zo_m,	2,
   400  	0
   401  };
   402  static uchar	yjcond[] =
   403  {
   404  	Ynone,	Ybr,	Zbr,	0,
   405  	Yi0,	Ybr,	Zbr,	0,
   406  	Yi1,	Ybr,	Zbr,	1,
   407  	0
   408  };
   409  static uchar	yloop[] =
   410  {
   411  	Ynone,	Ybr,	Zloop,	1,
   412  	0
   413  };
   414  static uchar	ycall[] =
   415  {
   416  	Ynone,	Yml,	Zcallindreg,	0,
   417  	Yrx,	Yrx,	Zcallindreg,	2,
   418  	Ynone,	Ycol,	Zcallind,	2,
   419  	Ynone,	Ybr,	Zcall,	0,
   420  	Ynone,	Yi32,	Zcallcon,	1,
   421  	0
   422  };
   423  static uchar	yduff[] =
   424  {
   425  	Ynone,	Yi32,	Zcall,	1,
   426  	0
   427  };
   428  static uchar	yjmp[] =
   429  {
   430  	Ynone,	Yml,	Zo_m,	2,
   431  	Ynone,	Ybr,	Zjmp,	0,
   432  	Ynone,	Yi32,	Zjmpcon,	1,
   433  	0
   434  };
   435  
   436  static uchar	yfmvd[] =
   437  {
   438  	Ym,	Yf0,	Zm_o,	2,
   439  	Yf0,	Ym,	Zo_m,	2,
   440  	Yrf,	Yf0,	Zm_o,	2,
   441  	Yf0,	Yrf,	Zo_m,	2,
   442  	0
   443  };
   444  static uchar	yfmvdp[] =
   445  {
   446  	Yf0,	Ym,	Zo_m,	2,
   447  	Yf0,	Yrf,	Zo_m,	2,
   448  	0
   449  };
   450  static uchar	yfmvf[] =
   451  {
   452  	Ym,	Yf0,	Zm_o,	2,
   453  	Yf0,	Ym,	Zo_m,	2,
   454  	0
   455  };
   456  static uchar	yfmvx[] =
   457  {
   458  	Ym,	Yf0,	Zm_o,	2,
   459  	0
   460  };
   461  static uchar	yfmvp[] =
   462  {
   463  	Yf0,	Ym,	Zo_m,	2,
   464  	0
   465  };
   466  static uchar	yfcmv[] =
   467  {
   468  	Yrf,	Yf0,	Zm_o,	2,
   469  	0
   470  };
   471  static uchar	yfadd[] =
   472  {
   473  	Ym,	Yf0,	Zm_o,	2,
   474  	Yrf,	Yf0,	Zm_o,	2,
   475  	Yf0,	Yrf,	Zo_m,	2,
   476  	0
   477  };
   478  static uchar	yfaddp[] =
   479  {
   480  	Yf0,	Yrf,	Zo_m,	2,
   481  	0
   482  };
   483  static uchar	yfxch[] =
   484  {
   485  	Yf0,	Yrf,	Zo_m,	2,
   486  	Yrf,	Yf0,	Zm_o,	2,
   487  	0
   488  };
   489  static uchar	ycompp[] =
   490  {
   491  	Yf0,	Yrf,	Zo_m,	2,	/* botch is really f0,f1 */
   492  	0
   493  };
   494  static uchar	ystsw[] =
   495  {
   496  	Ynone,	Ym,	Zo_m,	2,
   497  	Ynone,	Yax,	Zlit,	1,
   498  	0
   499  };
   500  static uchar	ystcw[] =
   501  {
   502  	Ynone,	Ym,	Zo_m,	2,
   503  	Ym,	Ynone,	Zm_o,	2,
   504  	0
   505  };
   506  static uchar	ysvrs[] =
   507  {
   508  	Ynone,	Ym,	Zo_m,	2,
   509  	Ym,	Ynone,	Zm_o,	2,
   510  	0
   511  };
   512  static uchar	ymskb[] =
   513  {
   514  	Yxr,	Yrl,	Zm_r_xm,	2,
   515  	Ymr,	Yrl,	Zm_r_xm,	1,
   516  	0
   517  };
   518  static uchar	yxm[] = 
   519  {
   520  	Yxm,	Yxr,	Zm_r_xm,	1,
   521  	0
   522  };
   523  static uchar	yxcvm1[] = 
   524  {
   525  	Yxm,	Yxr,	Zm_r_xm,	2,
   526  	Yxm,	Ymr,	Zm_r_xm,	2,
   527  	0
   528  };
   529  static uchar	yxcvm2[] =
   530  {
   531  	Yxm,	Yxr,	Zm_r_xm,	2,
   532  	Ymm,	Yxr,	Zm_r_xm,	2,
   533  	0
   534  };
   535  static uchar	yxmq[] = 
   536  {
   537  	Yxm,	Yxr,	Zm_r_xm,	2,
   538  	0
   539  };
   540  static uchar	yxr[] = 
   541  {
   542  	Yxr,	Yxr,	Zm_r_xm,	1,
   543  	0
   544  };
   545  static uchar	yxr_ml[] =
   546  {
   547  	Yxr,	Yml,	Zr_m_xm,	1,
   548  	0
   549  };
   550  static uchar	yxcmp[] =
   551  {
   552  	Yxm,	Yxr, Zm_r_xm,	1,
   553  	0
   554  };
   555  static uchar	yxcmpi[] =
   556  {
   557  	Yxm,	Yxr, Zm_r_i_xm,	2,
   558  	0
   559  };
   560  static uchar	yxmov[] =
   561  {
   562  	Yxm,	Yxr,	Zm_r_xm,	1,
   563  	Yxr,	Yxm,	Zr_m_xm,	1,
   564  	0
   565  };
   566  static uchar	yxcvfl[] = 
   567  {
   568  	Yxm,	Yrl,	Zm_r_xm,	1,
   569  	0
   570  };
   571  static uchar	yxcvlf[] =
   572  {
   573  	Yml,	Yxr,	Zm_r_xm,	1,
   574  	0
   575  };
   576  /*
   577  static uchar	yxcvfq[] = 
   578  {
   579  	Yxm,	Yrl,	Zm_r_xm,	2,
   580  	0
   581  };
   582  static uchar	yxcvqf[] =
   583  {
   584  	Yml,	Yxr,	Zm_r_xm,	2,
   585  	0
   586  };
   587  */
   588  static uchar	yxrrl[] =
   589  {
   590  	Yxr,	Yrl,	Zm_r,	1,
   591  	0
   592  };
   593  static uchar	yprefetch[] =
   594  {
   595  	Ym,	Ynone,	Zm_o,	2,
   596  	0,
   597  };
   598  static uchar	yaes[] =
   599  {
   600  	Yxm,	Yxr,	Zlitm_r,	2,
   601  	0
   602  };
   603  static uchar	yinsrd[] =
   604  {
   605  	Yml,	Yxr,	Zibm_r,	2,
   606  	0
   607  };
   608  static uchar	ymshufb[] =
   609  {
   610  	Yxm,	Yxr,	Zm2_r,	2,
   611  	0
   612  };
   613  
   614  static uchar   yxshuf[] =
   615  {
   616  	Yxm,    Yxr,    Zibm_r, 2,
   617  	0
   618  };
   619  
   620  static Optab optab[] =
   621  /*	as, ytab, andproto, opcode */
   622  {
   623  	{ AXXX },
   624  	{ AAAA,		ynone,	Px, {0x37} },
   625  	{ AAAD,		ynone,	Px, {0xd5,0x0a} },
   626  	{ AAAM,		ynone,	Px, {0xd4,0x0a} },
   627  	{ AAAS,		ynone,	Px, {0x3f} },
   628  	{ AADCB,	yxorb,	Pb, {0x14,0x80,(02),0x10,0x10} },
   629  	{ AADCL,	yxorl,	Px, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   630  	{ AADCW,	yxorl,	Pe, {0x83,(02),0x15,0x81,(02),0x11,0x13} },
   631  	{ AADDB,	yxorb,	Px, {0x04,0x80,(00),0x00,0x02} },
   632  	{ AADDL,	yaddl,	Px, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   633  	{ AADDW,	yaddl,	Pe, {0x83,(00),0x05,0x81,(00),0x01,0x03} },
   634  	{ AADJSP },
   635  	{ AANDB,	yxorb,	Pb, {0x24,0x80,(04),0x20,0x22} },
   636  	{ AANDL,	yxorl,	Px, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   637  	{ AANDW,	yxorl,	Pe, {0x83,(04),0x25,0x81,(04),0x21,0x23} },
   638  	{ AARPL,	yrl_ml,	Px, {0x63} },
   639  	{ ABOUNDL,	yrl_m,	Px, {0x62} },
   640  	{ ABOUNDW,	yrl_m,	Pe, {0x62} },
   641  	{ ABSFL,	yml_rl,	Pm, {0xbc} },
   642  	{ ABSFW,	yml_rl,	Pq, {0xbc} },
   643  	{ ABSRL,	yml_rl,	Pm, {0xbd} },
   644  	{ ABSRW,	yml_rl,	Pq, {0xbd} },
   645  	{ ABTL,		yml_rl,	Pm, {0xa3} },
   646  	{ ABTW,		yml_rl,	Pq, {0xa3} },
   647  	{ ABTCL,	yml_rl,	Pm, {0xbb} },
   648  	{ ABTCW,	yml_rl,	Pq, {0xbb} },
   649  	{ ABTRL,	yml_rl,	Pm, {0xb3} },
   650  	{ ABTRW,	yml_rl,	Pq, {0xb3} },
   651  	{ ABTSL,	yml_rl,	Pm, {0xab} },
   652  	{ ABTSW,	yml_rl,	Pq, {0xab} },
   653  	{ ABYTE,	ybyte,	Px, {1} },
   654  	{ ACALL,	ycall,	Px, {0xff,(02),0xff,(0x15),0xe8} },
   655  	{ ACLC,		ynone,	Px, {0xf8} },
   656  	{ ACLD,		ynone,	Px, {0xfc} },
   657  	{ ACLI,		ynone,	Px, {0xfa} },
   658  	{ ACLTS,	ynone,	Pm, {0x06} },
   659  	{ ACMC,		ynone,	Px, {0xf5} },
   660  	{ ACMPB,	ycmpb,	Pb, {0x3c,0x80,(07),0x38,0x3a} },
   661  	{ ACMPL,	ycmpl,	Px, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   662  	{ ACMPW,	ycmpl,	Pe, {0x83,(07),0x3d,0x81,(07),0x39,0x3b} },
   663  	{ ACMPSB,	ynone,	Pb, {0xa6} },
   664  	{ ACMPSL,	ynone,	Px, {0xa7} },
   665  	{ ACMPSW,	ynone,	Pe, {0xa7} },
   666  	{ ADAA,		ynone,	Px, {0x27} },
   667  	{ ADAS,		ynone,	Px, {0x2f} },
   668  	{ ADATA },
   669  	{ ADECB,	yincb,	Pb, {0xfe,(01)} },
   670  	{ ADECL,	yincl,	Px, {0x48,0xff,(01)} },
   671  	{ ADECW,	yincl,	Pe, {0x48,0xff,(01)} },
   672  	{ ADIVB,	ydivb,	Pb, {0xf6,(06)} },
   673  	{ ADIVL,	ydivl,	Px, {0xf7,(06)} },
   674  	{ ADIVW,	ydivl,	Pe, {0xf7,(06)} },
   675  	{ AENTER },				/* botch */
   676  	{ AGLOBL },
   677  	{ AGOK },
   678  	{ AHISTORY },
   679  	{ AHLT,		ynone,	Px, {0xf4} },
   680  	{ AIDIVB,	ydivb,	Pb, {0xf6,(07)} },
   681  	{ AIDIVL,	ydivl,	Px, {0xf7,(07)} },
   682  	{ AIDIVW,	ydivl,	Pe, {0xf7,(07)} },
   683  	{ AIMULB,	ydivb,	Pb, {0xf6,(05)} },
   684  	{ AIMULL,	yimul,	Px, {0xf7,(05),0x6b,0x69} },
   685  	{ AIMULW,	yimul,	Pe, {0xf7,(05),0x6b,0x69} },
   686  	{ AINB,		yin,	Pb, {0xe4,0xec} },
   687  	{ AINL,		yin,	Px, {0xe5,0xed} },
   688  	{ AINW,		yin,	Pe, {0xe5,0xed} },
   689  	{ AINCB,	yincb,	Pb, {0xfe,(00)} },
   690  	{ AINCL,	yincl,	Px, {0x40,0xff,(00)} },
   691  	{ AINCW,	yincl,	Pe, {0x40,0xff,(00)} },
   692  	{ AINSB,	ynone,	Pb, {0x6c} },
   693  	{ AINSL,	ynone,	Px, {0x6d} },
   694  	{ AINSW,	ynone,	Pe, {0x6d} },
   695  	{ AINT,		yint,	Px, {0xcd} },
   696  	{ AINTO,	ynone,	Px, {0xce} },
   697  	{ AIRETL,	ynone,	Px, {0xcf} },
   698  	{ AIRETW,	ynone,	Pe, {0xcf} },
   699  	{ AJCC,		yjcond,	Px, {0x73,0x83,(00)} },
   700  	{ AJCS,		yjcond,	Px, {0x72,0x82} },
   701  	{ AJCXZL,	yloop,	Px, {0xe3} },
   702  	{ AJCXZW,	yloop,	Px, {0xe3} },
   703  	{ AJEQ,		yjcond,	Px, {0x74,0x84} },
   704  	{ AJGE,		yjcond,	Px, {0x7d,0x8d} },
   705  	{ AJGT,		yjcond,	Px, {0x7f,0x8f} },
   706  	{ AJHI,		yjcond,	Px, {0x77,0x87} },
   707  	{ AJLE,		yjcond,	Px, {0x7e,0x8e} },
   708  	{ AJLS,		yjcond,	Px, {0x76,0x86} },
   709  	{ AJLT,		yjcond,	Px, {0x7c,0x8c} },
   710  	{ AJMI,		yjcond,	Px, {0x78,0x88} },
   711  	{ AJMP,		yjmp,	Px, {0xff,(04),0xeb,0xe9} },
   712  	{ AJNE,		yjcond,	Px, {0x75,0x85} },
   713  	{ AJOC,		yjcond,	Px, {0x71,0x81,(00)} },
   714  	{ AJOS,		yjcond,	Px, {0x70,0x80,(00)} },
   715  	{ AJPC,		yjcond,	Px, {0x7b,0x8b} },
   716  	{ AJPL,		yjcond,	Px, {0x79,0x89} },
   717  	{ AJPS,		yjcond,	Px, {0x7a,0x8a} },
   718  	{ ALAHF,	ynone,	Px, {0x9f} },
   719  	{ ALARL,	yml_rl,	Pm, {0x02} },
   720  	{ ALARW,	yml_rl,	Pq, {0x02} },
   721  	{ ALEAL,	ym_rl,	Px, {0x8d} },
   722  	{ ALEAW,	ym_rl,	Pe, {0x8d} },
   723  	{ ALEAVEL,	ynone,	Px, {0xc9} },
   724  	{ ALEAVEW,	ynone,	Pe, {0xc9} },
   725  	{ ALOCK,	ynone,	Px, {0xf0} },
   726  	{ ALODSB,	ynone,	Pb, {0xac} },
   727  	{ ALODSL,	ynone,	Px, {0xad} },
   728  	{ ALODSW,	ynone,	Pe, {0xad} },
   729  	{ ALONG,	ybyte,	Px, {4} },
   730  	{ ALOOP,	yloop,	Px, {0xe2} },
   731  	{ ALOOPEQ,	yloop,	Px, {0xe1} },
   732  	{ ALOOPNE,	yloop,	Px, {0xe0} },
   733  	{ ALSLL,	yml_rl,	Pm, {0x03 } },
   734  	{ ALSLW,	yml_rl,	Pq, {0x03 } },
   735  	{ AMOVB,	ymovb,	Pb, {0x88,0x8a,0xb0,0xc6,(00)} },
   736  	{ AMOVL,	ymovl,	Px, {0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00),Pe,0x6e,Pe,0x7e,0} },
   737  	{ AMOVW,	ymovw,	Pe, {0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00),0} },
   738  	{ AMOVQ,	ymovq,	Pf3, {0x7e} },
   739  	{ AMOVBLSX,	ymb_rl,	Pm, {0xbe} },
   740  	{ AMOVBLZX,	ymb_rl,	Pm, {0xb6} },
   741  	{ AMOVBWSX,	ymb_rl,	Pq, {0xbe} },
   742  	{ AMOVBWZX,	ymb_rl,	Pq, {0xb6} },
   743  	{ AMOVWLSX,	yml_rl,	Pm, {0xbf} },
   744  	{ AMOVWLZX,	yml_rl,	Pm, {0xb7} },
   745  	{ AMOVSB,	ynone,	Pb, {0xa4} },
   746  	{ AMOVSL,	ynone,	Px, {0xa5} },
   747  	{ AMOVSW,	ynone,	Pe, {0xa5} },
   748  	{ AMULB,	ydivb,	Pb, {0xf6,(04)} },
   749  	{ AMULL,	ydivl,	Px, {0xf7,(04)} },
   750  	{ AMULW,	ydivl,	Pe, {0xf7,(04)} },
   751  	{ ANAME },
   752  	{ ANEGB,	yscond,	Px, {0xf6,(03)} },
   753  	{ ANEGL,	yscond,	Px, {0xf7,(03)} },
   754  	{ ANEGW,	yscond,	Pe, {0xf7,(03)} },
   755  	{ ANOP,		ynop,	Px, {0,0} },
   756  	{ ANOTB,	yscond,	Px, {0xf6,(02)} },
   757  	{ ANOTL,	yscond,	Px, {0xf7,(02)} },
   758  	{ ANOTW,	yscond,	Pe, {0xf7,(02)} },
   759  	{ AORB,		yxorb,	Pb, {0x0c,0x80,(01),0x08,0x0a} },
   760  	{ AORL,		yxorl,	Px, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
   761  	{ AORW,		yxorl,	Pe, {0x83,(01),0x0d,0x81,(01),0x09,0x0b} },
   762  	{ AOUTB,	yin,	Pb, {0xe6,0xee} },
   763  	{ AOUTL,	yin,	Px, {0xe7,0xef} },
   764  	{ AOUTW,	yin,	Pe, {0xe7,0xef} },
   765  	{ AOUTSB,	ynone,	Pb, {0x6e} },
   766  	{ AOUTSL,	ynone,	Px, {0x6f} },
   767  	{ AOUTSW,	ynone,	Pe, {0x6f} },
   768  	{ APAUSE,	ynone,	Px, {0xf3,0x90} },
   769  	{ APOPAL,	ynone,	Px, {0x61} },
   770  	{ APOPAW,	ynone,	Pe, {0x61} },
   771  	{ APOPFL,	ynone,	Px, {0x9d} },
   772  	{ APOPFW,	ynone,	Pe, {0x9d} },
   773  	{ APOPL,	ypopl,	Px, {0x58,0x8f,(00)} },
   774  	{ APOPW,	ypopl,	Pe, {0x58,0x8f,(00)} },
   775  	{ APUSHAL,	ynone,	Px, {0x60} },
   776  	{ APUSHAW,	ynone,	Pe, {0x60} },
   777  	{ APUSHFL,	ynone,	Px, {0x9c} },
   778  	{ APUSHFW,	ynone,	Pe, {0x9c} },
   779  	{ APUSHL,	ypushl,	Px, {0x50,0xff,(06),0x6a,0x68} },
   780  	{ APUSHW,	ypushl,	Pe, {0x50,0xff,(06),0x6a,0x68} },
   781  	{ ARCLB,	yshb,	Pb, {0xd0,(02),0xc0,(02),0xd2,(02)} },
   782  	{ ARCLL,	yshl,	Px, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
   783  	{ ARCLW,	yshl,	Pe, {0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02)} },
   784  	{ ARCRB,	yshb,	Pb, {0xd0,(03),0xc0,(03),0xd2,(03)} },
   785  	{ ARCRL,	yshl,	Px, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
   786  	{ ARCRW,	yshl,	Pe, {0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03)} },
   787  	{ AREP,		ynone,	Px, {0xf3} },
   788  	{ AREPN,	ynone,	Px, {0xf2} },
   789  	{ ARET,		ynone,	Px, {0xc3} },
   790  	{ AROLB,	yshb,	Pb, {0xd0,(00),0xc0,(00),0xd2,(00)} },
   791  	{ AROLL,	yshl,	Px, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
   792  	{ AROLW,	yshl,	Pe, {0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00)} },
   793  	{ ARORB,	yshb,	Pb, {0xd0,(01),0xc0,(01),0xd2,(01)} },
   794  	{ ARORL,	yshl,	Px, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
   795  	{ ARORW,	yshl,	Pe, {0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01)} },
   796  	{ ASAHF,	ynone,	Px, {0x9e} },
   797  	{ ASALB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
   798  	{ ASALL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   799  	{ ASALW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   800  	{ ASARB,	yshb,	Pb, {0xd0,(07),0xc0,(07),0xd2,(07)} },
   801  	{ ASARL,	yshl,	Px, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
   802  	{ ASARW,	yshl,	Pe, {0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07)} },
   803  	{ ASBBB,	yxorb,	Pb, {0x1c,0x80,(03),0x18,0x1a} },
   804  	{ ASBBL,	yxorl,	Px, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
   805  	{ ASBBW,	yxorl,	Pe, {0x83,(03),0x1d,0x81,(03),0x19,0x1b} },
   806  	{ ASCASB,	ynone,	Pb, {0xae} },
   807  	{ ASCASL,	ynone,	Px, {0xaf} },
   808  	{ ASCASW,	ynone,	Pe, {0xaf} },
   809  	{ ASETCC,	yscond,	Pm, {0x93,(00)} },
   810  	{ ASETCS,	yscond,	Pm, {0x92,(00)} },
   811  	{ ASETEQ,	yscond,	Pm, {0x94,(00)} },
   812  	{ ASETGE,	yscond,	Pm, {0x9d,(00)} },
   813  	{ ASETGT,	yscond,	Pm, {0x9f,(00)} },
   814  	{ ASETHI,	yscond,	Pm, {0x97,(00)} },
   815  	{ ASETLE,	yscond,	Pm, {0x9e,(00)} },
   816  	{ ASETLS,	yscond,	Pm, {0x96,(00)} },
   817  	{ ASETLT,	yscond,	Pm, {0x9c,(00)} },
   818  	{ ASETMI,	yscond,	Pm, {0x98,(00)} },
   819  	{ ASETNE,	yscond,	Pm, {0x95,(00)} },
   820  	{ ASETOC,	yscond,	Pm, {0x91,(00)} },
   821  	{ ASETOS,	yscond,	Pm, {0x90,(00)} },
   822  	{ ASETPC,	yscond,	Pm, {0x9b,(00)} },
   823  	{ ASETPL,	yscond,	Pm, {0x99,(00)} },
   824  	{ ASETPS,	yscond,	Pm, {0x9a,(00)} },
   825  	{ ACDQ,		ynone,	Px, {0x99} },
   826  	{ ACWD,		ynone,	Pe, {0x99} },
   827  	{ ASHLB,	yshb,	Pb, {0xd0,(04),0xc0,(04),0xd2,(04)} },
   828  	{ ASHLL,	yshl,	Px, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   829  	{ ASHLW,	yshl,	Pe, {0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04)} },
   830  	{ ASHRB,	yshb,	Pb, {0xd0,(05),0xc0,(05),0xd2,(05)} },
   831  	{ ASHRL,	yshl,	Px, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
   832  	{ ASHRW,	yshl,	Pe, {0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05)} },
   833  	{ ASTC,		ynone,	Px, {0xf9} },
   834  	{ ASTD,		ynone,	Px, {0xfd} },
   835  	{ ASTI,		ynone,	Px, {0xfb} },
   836  	{ ASTOSB,	ynone,	Pb, {0xaa} },
   837  	{ ASTOSL,	ynone,	Px, {0xab} },
   838  	{ ASTOSW,	ynone,	Pe, {0xab} },
   839  	{ ASUBB,	yxorb,	Pb, {0x2c,0x80,(05),0x28,0x2a} },
   840  	{ ASUBL,	yaddl,	Px, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
   841  	{ ASUBW,	yaddl,	Pe, {0x83,(05),0x2d,0x81,(05),0x29,0x2b} },
   842  	{ ASYSCALL,	ynone,	Px, {0xcd,100} },
   843  	{ ATESTB,	ytestb,	Pb, {0xa8,0xf6,(00),0x84,0x84} },
   844  	{ ATESTL,	ytestl,	Px, {0xa9,0xf7,(00),0x85,0x85} },
   845  	{ ATESTW,	ytestl,	Pe, {0xa9,0xf7,(00),0x85,0x85} },
   846  	{ ATEXT,	ytext,	Px },
   847  	{ AVERR,	ydivl,	Pm, {0x00,(04)} },
   848  	{ AVERW,	ydivl,	Pm, {0x00,(05)} },
   849  	{ AWAIT,	ynone,	Px, {0x9b} },
   850  	{ AWORD,	ybyte,	Px, {2} },
   851  	{ AXCHGB,	yml_mb,	Pb, {0x86,0x86} },
   852  	{ AXCHGL,	yxchg,	Px, {0x90,0x90,0x87,0x87} },
   853  	{ AXCHGW,	yxchg,	Pe, {0x90,0x90,0x87,0x87} },
   854  	{ AXLAT,	ynone,	Px, {0xd7} },
   855  	{ AXORB,	yxorb,	Pb, {0x34,0x80,(06),0x30,0x32} },
   856  	{ AXORL,	yxorl,	Px, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
   857  	{ AXORW,	yxorl,	Pe, {0x83,(06),0x35,0x81,(06),0x31,0x33} },
   858  
   859  	{ AFMOVB,	yfmvx,	Px, {0xdf,(04)} },
   860  	{ AFMOVBP,	yfmvp,	Px, {0xdf,(06)} },
   861  	{ AFMOVD,	yfmvd,	Px, {0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02)} },
   862  	{ AFMOVDP,	yfmvdp,	Px, {0xdd,(03),0xdd,(03)} },
   863  	{ AFMOVF,	yfmvf,	Px, {0xd9,(00),0xd9,(02)} },
   864  	{ AFMOVFP,	yfmvp,	Px, {0xd9,(03)} },
   865  	{ AFMOVL,	yfmvf,	Px, {0xdb,(00),0xdb,(02)} },
   866  	{ AFMOVLP,	yfmvp,	Px, {0xdb,(03)} },
   867  	{ AFMOVV,	yfmvx,	Px, {0xdf,(05)} },
   868  	{ AFMOVVP,	yfmvp,	Px, {0xdf,(07)} },
   869  	{ AFMOVW,	yfmvf,	Px, {0xdf,(00),0xdf,(02)} },
   870  	{ AFMOVWP,	yfmvp,	Px, {0xdf,(03)} },
   871  	{ AFMOVX,	yfmvx,	Px, {0xdb,(05)} },
   872  	{ AFMOVXP,	yfmvp,	Px, {0xdb,(07)} },
   873  
   874  	{ AFCOMB },
   875  	{ AFCOMBP },
   876  	{ AFCOMD,	yfadd,	Px, {0xdc,(02),0xd8,(02),0xdc,(02)} },	/* botch */
   877  	{ AFCOMDP,	yfadd,	Px, {0xdc,(03),0xd8,(03),0xdc,(03)} },	/* botch */
   878  	{ AFCOMDPP,	ycompp,	Px, {0xde,(03)} },
   879  	{ AFCOMF,	yfmvx,	Px, {0xd8,(02)} },
   880  	{ AFCOMFP,	yfmvx,	Px, {0xd8,(03)} },
   881  	{ AFCOMI,	yfmvx,	Px, {0xdb,(06)} },
   882  	{ AFCOMIP,	yfmvx,	Px, {0xdf,(06)} },
   883  	{ AFCOML,	yfmvx,	Px, {0xda,(02)} },
   884  	{ AFCOMLP,	yfmvx,	Px, {0xda,(03)} },
   885  	{ AFCOMW,	yfmvx,	Px, {0xde,(02)} },
   886  	{ AFCOMWP,	yfmvx,	Px, {0xde,(03)} },
   887  
   888  	{ AFUCOM,	ycompp,	Px, {0xdd,(04)} },
   889  	{ AFUCOMI,	ycompp,	Px, {0xdb,(05)} },
   890  	{ AFUCOMIP,	ycompp,	Px, {0xdf,(05)} },
   891  	{ AFUCOMP,	ycompp,	Px, {0xdd,(05)} },
   892  	{ AFUCOMPP,	ycompp,	Px, {0xda,(13)} },
   893  
   894  	{ AFADDDP,	yfaddp,	Px, {0xde,(00)} },
   895  	{ AFADDW,	yfmvx,	Px, {0xde,(00)} },
   896  	{ AFADDL,	yfmvx,	Px, {0xda,(00)} },
   897  	{ AFADDF,	yfmvx,	Px, {0xd8,(00)} },
   898  	{ AFADDD,	yfadd,	Px, {0xdc,(00),0xd8,(00),0xdc,(00)} },
   899  
   900  	{ AFMULDP,	yfaddp,	Px, {0xde,(01)} },
   901  	{ AFMULW,	yfmvx,	Px, {0xde,(01)} },
   902  	{ AFMULL,	yfmvx,	Px, {0xda,(01)} },
   903  	{ AFMULF,	yfmvx,	Px, {0xd8,(01)} },
   904  	{ AFMULD,	yfadd,	Px, {0xdc,(01),0xd8,(01),0xdc,(01)} },
   905  
   906  	{ AFSUBDP,	yfaddp,	Px, {0xde,(05)} },
   907  	{ AFSUBW,	yfmvx,	Px, {0xde,(04)} },
   908  	{ AFSUBL,	yfmvx,	Px, {0xda,(04)} },
   909  	{ AFSUBF,	yfmvx,	Px, {0xd8,(04)} },
   910  	{ AFSUBD,	yfadd,	Px, {0xdc,(04),0xd8,(04),0xdc,(05)} },
   911  
   912  	{ AFSUBRDP,	yfaddp,	Px, {0xde,(04)} },
   913  	{ AFSUBRW,	yfmvx,	Px, {0xde,(05)} },
   914  	{ AFSUBRL,	yfmvx,	Px, {0xda,(05)} },
   915  	{ AFSUBRF,	yfmvx,	Px, {0xd8,(05)} },
   916  	{ AFSUBRD,	yfadd,	Px, {0xdc,(05),0xd8,(05),0xdc,(04)} },
   917  
   918  	{ AFDIVDP,	yfaddp,	Px, {0xde,(07)} },
   919  	{ AFDIVW,	yfmvx,	Px, {0xde,(06)} },
   920  	{ AFDIVL,	yfmvx,	Px, {0xda,(06)} },
   921  	{ AFDIVF,	yfmvx,	Px, {0xd8,(06)} },
   922  	{ AFDIVD,	yfadd,	Px, {0xdc,(06),0xd8,(06),0xdc,(07)} },
   923  
   924  	{ AFDIVRDP,	yfaddp,	Px, {0xde,(06)} },
   925  	{ AFDIVRW,	yfmvx,	Px, {0xde,(07)} },
   926  	{ AFDIVRL,	yfmvx,	Px, {0xda,(07)} },
   927  	{ AFDIVRF,	yfmvx,	Px, {0xd8,(07)} },
   928  	{ AFDIVRD,	yfadd,	Px, {0xdc,(07),0xd8,(07),0xdc,(06)} },
   929  
   930  	{ AFXCHD,	yfxch,	Px, {0xd9,(01),0xd9,(01)} },
   931  	{ AFFREE },
   932  	{ AFLDCW,	ystcw,	Px, {0xd9,(05),0xd9,(05)} },
   933  	{ AFLDENV,	ystcw,	Px, {0xd9,(04),0xd9,(04)} },
   934  	{ AFRSTOR,	ysvrs,	Px, {0xdd,(04),0xdd,(04)} },
   935  	{ AFSAVE,	ysvrs,	Px, {0xdd,(06),0xdd,(06)} },
   936  	{ AFSTCW,	ystcw,	Px, {0xd9,(07),0xd9,(07)} },
   937  	{ AFSTENV,	ystcw,	Px, {0xd9,(06),0xd9,(06)} },
   938  	{ AFSTSW,	ystsw,	Px, {0xdd,(07),0xdf,0xe0} },
   939  	{ AF2XM1,	ynone,	Px, {0xd9, 0xf0} },
   940  	{ AFABS,	ynone,	Px, {0xd9, 0xe1} },
   941  	{ AFCHS,	ynone,	Px, {0xd9, 0xe0} },
   942  	{ AFCLEX,	ynone,	Px, {0xdb, 0xe2} },
   943  	{ AFCOS,	ynone,	Px, {0xd9, 0xff} },
   944  	{ AFDECSTP,	ynone,	Px, {0xd9, 0xf6} },
   945  	{ AFINCSTP,	ynone,	Px, {0xd9, 0xf7} },
   946  	{ AFINIT,	ynone,	Px, {0xdb, 0xe3} },
   947  	{ AFLD1,	ynone,	Px, {0xd9, 0xe8} },
   948  	{ AFLDL2E,	ynone,	Px, {0xd9, 0xea} },
   949  	{ AFLDL2T,	ynone,	Px, {0xd9, 0xe9} },
   950  	{ AFLDLG2,	ynone,	Px, {0xd9, 0xec} },
   951  	{ AFLDLN2,	ynone,	Px, {0xd9, 0xed} },
   952  	{ AFLDPI,	ynone,	Px, {0xd9, 0xeb} },
   953  	{ AFLDZ,	ynone,	Px, {0xd9, 0xee} },
   954  	{ AFNOP,	ynone,	Px, {0xd9, 0xd0} },
   955  	{ AFPATAN,	ynone,	Px, {0xd9, 0xf3} },
   956  	{ AFPREM,	ynone,	Px, {0xd9, 0xf8} },
   957  	{ AFPREM1,	ynone,	Px, {0xd9, 0xf5} },
   958  	{ AFPTAN,	ynone,	Px, {0xd9, 0xf2} },
   959  	{ AFRNDINT,	ynone,	Px, {0xd9, 0xfc} },
   960  	{ AFSCALE,	ynone,	Px, {0xd9, 0xfd} },
   961  	{ AFSIN,	ynone,	Px, {0xd9, 0xfe} },
   962  	{ AFSINCOS,	ynone,	Px, {0xd9, 0xfb} },
   963  	{ AFSQRT,	ynone,	Px, {0xd9, 0xfa} },
   964  	{ AFTST,	ynone,	Px, {0xd9, 0xe4} },
   965  	{ AFXAM,	ynone,	Px, {0xd9, 0xe5} },
   966  	{ AFXTRACT,	ynone,	Px, {0xd9, 0xf4} },
   967  	{ AFYL2X,	ynone,	Px, {0xd9, 0xf1} },
   968  	{ AFYL2XP1,	ynone,	Px, {0xd9, 0xf9} },
   969  	{ AEND },
   970  	{ ADYNT_ },
   971  	{ AINIT_ },
   972  	{ ASIGNAME },
   973  	{ ACMPXCHGB,	yrb_mb,	Pm, {0xb0} },
   974  	{ ACMPXCHGL,	yrl_ml,	Pm, {0xb1} },
   975  	{ ACMPXCHGW,	yrl_ml,	Pm, {0xb1} },
   976  	{ ACMPXCHG8B,	yscond,	Pm, {0xc7,(01)} },
   977  
   978  	{ ACPUID,	ynone,	Pm, {0xa2} },
   979  	{ ARDTSC,	ynone,	Pm, {0x31} },
   980  
   981  	{ AXADDB,	yrb_mb,	Pb, {0x0f,0xc0} },
   982  	{ AXADDL,	yrl_ml,	Pm, {0xc1} },
   983  	{ AXADDW,	yrl_ml,	Pe, {0x0f,0xc1} },
   984  
   985  	{ ACMOVLCC,	yml_rl,	Pm, {0x43} },
   986  	{ ACMOVLCS,	yml_rl,	Pm, {0x42} },
   987  	{ ACMOVLEQ,	yml_rl,	Pm, {0x44} },
   988  	{ ACMOVLGE,	yml_rl,	Pm, {0x4d} },
   989  	{ ACMOVLGT,	yml_rl,	Pm, {0x4f} },
   990  	{ ACMOVLHI,	yml_rl,	Pm, {0x47} },
   991  	{ ACMOVLLE,	yml_rl,	Pm, {0x4e} },
   992  	{ ACMOVLLS,	yml_rl,	Pm, {0x46} },
   993  	{ ACMOVLLT,	yml_rl,	Pm, {0x4c} },
   994  	{ ACMOVLMI,	yml_rl,	Pm, {0x48} },
   995  	{ ACMOVLNE,	yml_rl,	Pm, {0x45} },
   996  	{ ACMOVLOC,	yml_rl,	Pm, {0x41} },
   997  	{ ACMOVLOS,	yml_rl,	Pm, {0x40} },
   998  	{ ACMOVLPC,	yml_rl,	Pm, {0x4b} },
   999  	{ ACMOVLPL,	yml_rl,	Pm, {0x49} },
  1000  	{ ACMOVLPS,	yml_rl,	Pm, {0x4a} },
  1001  	{ ACMOVWCC,	yml_rl,	Pq, {0x43} },
  1002  	{ ACMOVWCS,	yml_rl,	Pq, {0x42} },
  1003  	{ ACMOVWEQ,	yml_rl,	Pq, {0x44} },
  1004  	{ ACMOVWGE,	yml_rl,	Pq, {0x4d} },
  1005  	{ ACMOVWGT,	yml_rl,	Pq, {0x4f} },
  1006  	{ ACMOVWHI,	yml_rl,	Pq, {0x47} },
  1007  	{ ACMOVWLE,	yml_rl,	Pq, {0x4e} },
  1008  	{ ACMOVWLS,	yml_rl,	Pq, {0x46} },
  1009  	{ ACMOVWLT,	yml_rl,	Pq, {0x4c} },
  1010  	{ ACMOVWMI,	yml_rl,	Pq, {0x48} },
  1011  	{ ACMOVWNE,	yml_rl,	Pq, {0x45} },
  1012  	{ ACMOVWOC,	yml_rl,	Pq, {0x41} },
  1013  	{ ACMOVWOS,	yml_rl,	Pq, {0x40} },
  1014  	{ ACMOVWPC,	yml_rl,	Pq, {0x4b} },
  1015  	{ ACMOVWPL,	yml_rl,	Pq, {0x49} },
  1016  	{ ACMOVWPS,	yml_rl,	Pq, {0x4a} },
  1017  
  1018  	{ AFCMOVCC,	yfcmv,	Px, {0xdb,(00)} },
  1019  	{ AFCMOVCS,	yfcmv,	Px, {0xda,(00)} },
  1020  	{ AFCMOVEQ,	yfcmv,	Px, {0xda,(01)} },
  1021  	{ AFCMOVHI,	yfcmv,	Px, {0xdb,(02)} },
  1022  	{ AFCMOVLS,	yfcmv,	Px, {0xda,(02)} },
  1023  	{ AFCMOVNE,	yfcmv,	Px, {0xdb,(01)} },
  1024  	{ AFCMOVNU,	yfcmv,	Px, {0xdb,(03)} },
  1025  	{ AFCMOVUN,	yfcmv,	Px, {0xda,(03)} },
  1026  
  1027  	{ ALFENCE, ynone, Pm, {0xae,0xe8} },
  1028  	{ AMFENCE, ynone, Pm, {0xae,0xf0} },
  1029  	{ ASFENCE, ynone, Pm, {0xae,0xf8} },
  1030  
  1031  	{ AEMMS, ynone, Pm, {0x77} },
  1032  
  1033  	{ APREFETCHT0,	yprefetch,	Pm,	{0x18,(01)} },
  1034  	{ APREFETCHT1,	yprefetch,	Pm,	{0x18,(02)} },
  1035  	{ APREFETCHT2,	yprefetch,	Pm,	{0x18,(03)} },
  1036  	{ APREFETCHNTA,	yprefetch,	Pm,	{0x18,(00)} },
  1037  
  1038  	{ ABSWAPL,	ybswap,	Pm,	{0xc8} },
  1039  	
  1040  	{ AUNDEF,		ynone,	Px,	{0x0f, 0x0b} },
  1041  
  1042  	{ AADDPD,	yxm,	Pq, {0x58} },
  1043  	{ AADDPS,	yxm,	Pm, {0x58} },
  1044  	{ AADDSD,	yxm,	Pf2, {0x58} },
  1045  	{ AADDSS,	yxm,	Pf3, {0x58} },
  1046  	{ AANDNPD,	yxm,	Pq, {0x55} },
  1047  	{ AANDNPS,	yxm,	Pm, {0x55} },
  1048  	{ AANDPD,	yxm,	Pq, {0x54} },
  1049  	{ AANDPS,	yxm,	Pq, {0x54} },
  1050  	{ ACMPPD,	yxcmpi,	Px, {Pe,0xc2} },
  1051  	{ ACMPPS,	yxcmpi,	Pm, {0xc2,0} },
  1052  	{ ACMPSD,	yxcmpi,	Px, {Pf2,0xc2} },
  1053  	{ ACMPSS,	yxcmpi,	Px, {Pf3,0xc2} },
  1054  	{ ACOMISD,	yxcmp,	Pe, {0x2f} },
  1055  	{ ACOMISS,	yxcmp,	Pm, {0x2f} },
  1056  	{ ACVTPL2PD,	yxcvm2,	Px, {Pf3,0xe6,Pe,0x2a} },
  1057  	{ ACVTPL2PS,	yxcvm2,	Pm, {0x5b,0,0x2a,0,} },
  1058  	{ ACVTPD2PL,	yxcvm1,	Px, {Pf2,0xe6,Pe,0x2d} },
  1059  	{ ACVTPD2PS,	yxm,	Pe, {0x5a} },
  1060  	{ ACVTPS2PL,	yxcvm1, Px, {Pe,0x5b,Pm,0x2d} },
  1061  	{ ACVTPS2PD,	yxm,	Pm, {0x5a} },
  1062  	{ ACVTSD2SL,	yxcvfl, Pf2, {0x2d} },
  1063   	{ ACVTSD2SS,	yxm,	Pf2, {0x5a} },
  1064  	{ ACVTSL2SD,	yxcvlf, Pf2, {0x2a} },
  1065  	{ ACVTSL2SS,	yxcvlf, Pf3, {0x2a} },
  1066  	{ ACVTSS2SD,	yxm,	Pf3, {0x5a} },
  1067  	{ ACVTSS2SL,	yxcvfl, Pf3, {0x2d} },
  1068  	{ ACVTTPD2PL,	yxcvm1,	Px, {Pe,0xe6,Pe,0x2c} },
  1069  	{ ACVTTPS2PL,	yxcvm1,	Px, {Pf3,0x5b,Pm,0x2c} },
  1070  	{ ACVTTSD2SL,	yxcvfl, Pf2, {0x2c} },
  1071  	{ ACVTTSS2SL,	yxcvfl,	Pf3, {0x2c} },
  1072  	{ ADIVPD,	yxm,	Pe, {0x5e} },
  1073  	{ ADIVPS,	yxm,	Pm, {0x5e} },
  1074  	{ ADIVSD,	yxm,	Pf2, {0x5e} },
  1075  	{ ADIVSS,	yxm,	Pf3, {0x5e} },
  1076  	{ AMASKMOVOU,	yxr,	Pe, {0xf7} },
  1077  	{ AMAXPD,	yxm,	Pe, {0x5f} },
  1078  	{ AMAXPS,	yxm,	Pm, {0x5f} },
  1079  	{ AMAXSD,	yxm,	Pf2, {0x5f} },
  1080  	{ AMAXSS,	yxm,	Pf3, {0x5f} },
  1081  	{ AMINPD,	yxm,	Pe, {0x5d} },
  1082  	{ AMINPS,	yxm,	Pm, {0x5d} },
  1083  	{ AMINSD,	yxm,	Pf2, {0x5d} },
  1084  	{ AMINSS,	yxm,	Pf3, {0x5d} },
  1085  	{ AMOVAPD,	yxmov,	Pe, {0x28,0x29} },
  1086  	{ AMOVAPS,	yxmov,	Pm, {0x28,0x29} },
  1087  	{ AMOVO,	yxmov,	Pe, {0x6f,0x7f} },
  1088  	{ AMOVOU,	yxmov,	Pf3, {0x6f,0x7f} },
  1089  	{ AMOVHLPS,	yxr,	Pm, {0x12} },
  1090  	{ AMOVHPD,	yxmov,	Pe, {0x16,0x17} },
  1091  	{ AMOVHPS,	yxmov,	Pm, {0x16,0x17} },
  1092  	{ AMOVLHPS,	yxr,	Pm, {0x16} },
  1093  	{ AMOVLPD,	yxmov,	Pe, {0x12,0x13} },
  1094  	{ AMOVLPS,	yxmov,	Pm, {0x12,0x13} },
  1095  	{ AMOVMSKPD,	yxrrl,	Pq, {0x50} },
  1096  	{ AMOVMSKPS,	yxrrl,	Pm, {0x50} },
  1097  	{ AMOVNTO,	yxr_ml,	Pe, {0xe7} },
  1098  	{ AMOVNTPD,	yxr_ml,	Pe, {0x2b} },
  1099  	{ AMOVNTPS,	yxr_ml,	Pm, {0x2b} },
  1100  	{ AMOVSD,	yxmov,	Pf2, {0x10,0x11} },
  1101  	{ AMOVSS,	yxmov,	Pf3, {0x10,0x11} },
  1102  	{ AMOVUPD,	yxmov,	Pe, {0x10,0x11} },
  1103  	{ AMOVUPS,	yxmov,	Pm, {0x10,0x11} },
  1104  	{ AMULPD,	yxm,	Pe, {0x59} },
  1105  	{ AMULPS,	yxm,	Ym, {0x59} },
  1106  	{ AMULSD,	yxm,	Pf2, {0x59} },
  1107  	{ AMULSS,	yxm,	Pf3, {0x59} },
  1108  	{ AORPD,	yxm,	Pq, {0x56} },
  1109  	{ AORPS,	yxm,	Pm, {0x56} },
  1110  	{ APADDQ,	yxm,	Pe, {0xd4} },
  1111  	{ APAND,	yxm,	Pe, {0xdb} },
  1112  	{ APCMPEQB,	yxmq,	Pe, {0x74} },
  1113  	{ APMAXSW,	yxm,	Pe, {0xee} },
  1114  	{ APMAXUB,	yxm,	Pe, {0xde} },
  1115  	{ APMINSW,	yxm,	Pe, {0xea} },
  1116  	{ APMINUB,	yxm,	Pe, {0xda} },
  1117  	{ APMOVMSKB,	ymskb,	Px, {Pe,0xd7,0xd7} },
  1118  	{ APSADBW,	yxm,	Pq, {0xf6} },
  1119  	{ APSUBB,	yxm,	Pe, {0xf8} },
  1120  	{ APSUBL,	yxm,	Pe, {0xfa} },
  1121  	{ APSUBQ,	yxm,	Pe, {0xfb} },
  1122  	{ APSUBSB,	yxm,	Pe, {0xe8} },
  1123  	{ APSUBSW,	yxm,	Pe, {0xe9} },
  1124  	{ APSUBUSB,	yxm,	Pe, {0xd8} },
  1125  	{ APSUBUSW,	yxm,	Pe, {0xd9} },
  1126  	{ APSUBW,	yxm,	Pe, {0xf9} },
  1127  	{ APUNPCKHQDQ,	yxm,	Pe, {0x6d} },
  1128  	{ APUNPCKLQDQ,	yxm,	Pe, {0x6c} },
  1129  	{ APXOR,	yxm,	Pe, {0xef} },
  1130  	{ ARCPPS,	yxm,	Pm, {0x53} },
  1131  	{ ARCPSS,	yxm,	Pf3, {0x53} },
  1132  	{ ARSQRTPS,	yxm,	Pm, {0x52} },
  1133  	{ ARSQRTSS,	yxm,	Pf3, {0x52} },
  1134  	{ ASQRTPD,	yxm,	Pe, {0x51} },
  1135  	{ ASQRTPS,	yxm,	Pm, {0x51} },
  1136  	{ ASQRTSD,	yxm,	Pf2, {0x51} },
  1137  	{ ASQRTSS,	yxm,	Pf3, {0x51} },
  1138  	{ ASUBPD,	yxm,	Pe, {0x5c} },
  1139  	{ ASUBPS,	yxm,	Pm, {0x5c} },
  1140  	{ ASUBSD,	yxm,	Pf2, {0x5c} },
  1141  	{ ASUBSS,	yxm,	Pf3, {0x5c} },
  1142  	{ AUCOMISD,	yxcmp,	Pe, {0x2e} },
  1143  	{ AUCOMISS,	yxcmp,	Pm, {0x2e} },
  1144  	{ AUNPCKHPD,	yxm,	Pe, {0x15} },
  1145  	{ AUNPCKHPS,	yxm,	Pm, {0x15} },
  1146  	{ AUNPCKLPD,	yxm,	Pe, {0x14} },
  1147  	{ AUNPCKLPS,	yxm,	Pm, {0x14} },
  1148  	{ AXORPD,	yxm,	Pe, {0x57} },
  1149  	{ AXORPS,	yxm,	Pm, {0x57} },
  1150  	{ APSHUFHW,	yxshuf,	Pf3, {0x70,(00)} },
  1151  	{ APSHUFL,	yxshuf,	Pq, {0x70,(00)} },
  1152  	{ APSHUFLW,	yxshuf,	Pf2, {0x70,(00)} },
  1153  
  1154  
  1155  	{ AAESENC,	yaes,	Pq, {0x38,0xdc,(0)} },
  1156  	{ APINSRD,	yinsrd,	Pq, {0x3a, 0x22, (00)} },
  1157  	{ APSHUFB,	ymshufb,Pq, {0x38, 0x00} },
  1158  
  1159  	{ AUSEFIELD,	ynop,	Px, {0,0} },
  1160  	{ ATYPE },
  1161  	{ AFUNCDATA,	yfuncdata,	Px, {0,0} },
  1162  	{ APCDATA,	ypcdata,	Px, {0,0} },
  1163  	{ ACHECKNIL },
  1164  	{ AVARDEF },
  1165  	{ AVARKILL },
  1166  	{ ADUFFCOPY,	yduff,	Px, {0xe8} },
  1167  	{ ADUFFZERO,	yduff,	Px, {0xe8} },
  1168  
  1169  	{0}
  1170  };
  1171  
  1172  static int32	vaddr(Link*, Addr*, Reloc*);
  1173  
  1174  // single-instruction no-ops of various lengths.
  1175  // constructed by hand and disassembled with gdb to verify.
  1176  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1177  static uchar nop[][16] = {
  1178  	{0x90},
  1179  	{0x66, 0x90},
  1180  	{0x0F, 0x1F, 0x00},
  1181  	{0x0F, 0x1F, 0x40, 0x00},
  1182  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1183  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1184  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1185  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1186  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1187  	// Native Client rejects the repeated 0x66 prefix.
  1188  	// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1189  };
  1190  
  1191  static void
  1192  fillnop(uchar *p, int n)
  1193  {
  1194  	int m;
  1195  
  1196  	while(n > 0) {
  1197  		m = n;
  1198  		if(m > nelem(nop))
  1199  			m = nelem(nop);
  1200  		memmove(p, nop[m-1], m);
  1201  		p += m;
  1202  		n -= m;
  1203  	}
  1204  }
  1205  
  1206  static int32
  1207  naclpad(Link *ctxt, LSym *s, int32 c, int32 pad)
  1208  {
  1209  	symgrow(ctxt, s, c+pad);
  1210  	fillnop(s->p+c, pad);
  1211  	return c+pad;
  1212  }
  1213  
  1214  static void instinit(void);
  1215  
  1216  void
  1217  span8(Link *ctxt, LSym *s)
  1218  {
  1219  	Prog *p, *q;
  1220  	int32 c, v, loop;
  1221  	uchar *bp;
  1222  	int n, m, i;
  1223  
  1224  	ctxt->cursym = s;
  1225  
  1226  	if(s->text == nil || s->text->link == nil)
  1227  		return;
  1228  
  1229  	if(ycover[0] == 0)
  1230  		instinit();
  1231  
  1232  	for(p = s->text; p != nil; p = p->link) {
  1233  		n = 0;
  1234  		if(p->to.type == D_BRANCH)
  1235  			if(p->pcond == nil)
  1236  				p->pcond = p;
  1237  		if((q = p->pcond) != nil)
  1238  			if(q->back != 2)
  1239  				n = 1;
  1240  		p->back = n;
  1241  		if(p->as == AADJSP) {
  1242  			p->to.type = D_SP;
  1243  			v = -p->from.offset;
  1244  			p->from.offset = v;
  1245  			p->as = AADDL;
  1246  			if(v < 0) {
  1247  				p->as = ASUBL;
  1248  				v = -v;
  1249  				p->from.offset = v;
  1250  			}
  1251  			if(v == 0)
  1252  				p->as = ANOP;
  1253  		}
  1254  	}
  1255  
  1256  	for(p = s->text; p != nil; p = p->link) {
  1257  		p->back = 2;	// use short branches first time through
  1258  		if((q = p->pcond) != nil && (q->back & 2))
  1259  			p->back |= 1;	// backward jump
  1260  
  1261  		if(p->as == AADJSP) {
  1262  			p->to.type = D_SP;
  1263  			v = -p->from.offset;
  1264  			p->from.offset = v;
  1265  			p->as = AADDL;
  1266  			if(v < 0) {
  1267  				p->as = ASUBL;
  1268  				v = -v;
  1269  				p->from.offset = v;
  1270  			}
  1271  			if(v == 0)
  1272  				p->as = ANOP;
  1273  		}
  1274  	}
  1275  	
  1276  	n = 0;
  1277  	do {
  1278  		loop = 0;
  1279  		memset(s->r, 0, s->nr*sizeof s->r[0]);
  1280  		s->nr = 0;
  1281  		s->np = 0;
  1282  		c = 0;
  1283  		for(p = s->text; p != nil; p = p->link) {
  1284  			if(ctxt->headtype == Hnacl && p->isize > 0) {
  1285  				static LSym *deferreturn;
  1286  				
  1287  				if(deferreturn == nil)
  1288  					deferreturn = linklookup(ctxt, "runtime.deferreturn", 0);
  1289  
  1290  				// pad everything to avoid crossing 32-byte boundary
  1291  				if((c>>5) != ((c+p->isize-1)>>5))
  1292  					c = naclpad(ctxt, s, c, -c&31);
  1293  				// pad call deferreturn to start at 32-byte boundary
  1294  				// so that subtracting 5 in jmpdefer will jump back
  1295  				// to that boundary and rerun the call.
  1296  				if(p->as == ACALL && p->to.sym == deferreturn)
  1297  					c = naclpad(ctxt, s, c, -c&31);
  1298  				// pad call to end at 32-byte boundary
  1299  				if(p->as == ACALL)
  1300  					c = naclpad(ctxt, s, c, -(c+p->isize)&31);
  1301  				
  1302  				// the linker treats REP and STOSQ as different instructions
  1303  				// but in fact the REP is a prefix on the STOSQ.
  1304  				// make sure REP has room for 2 more bytes, so that
  1305  				// padding will not be inserted before the next instruction.
  1306  				if(p->as == AREP && (c>>5) != ((c+3-1)>>5))
  1307  					c = naclpad(ctxt, s, c, -c&31);
  1308  				
  1309  				// same for LOCK.
  1310  				// various instructions follow; the longest is 4 bytes.
  1311  				// give ourselves 8 bytes so as to avoid surprises.
  1312  				if(p->as == ALOCK && (c>>5) != ((c+8-1)>>5))
  1313  					c = naclpad(ctxt, s, c, -c&31);
  1314  			}
  1315  			
  1316  			p->pc = c;
  1317  
  1318  			// process forward jumps to p
  1319  			for(q = p->comefrom; q != nil; q = q->forwd) {
  1320  				v = p->pc - (q->pc + q->mark);
  1321  				if(q->back & 2)	{	// short
  1322  					if(v > 127) {
  1323  						loop++;
  1324  						q->back ^= 2;
  1325  					}
  1326  					if(q->as == AJCXZW)
  1327  						s->p[q->pc+2] = v;
  1328  					else
  1329  						s->p[q->pc+1] = v;
  1330  				} else {
  1331  					bp = s->p + q->pc + q->mark - 4;
  1332  					*bp++ = v;
  1333  					*bp++ = v>>8;
  1334  					*bp++ = v>>16;
  1335  					*bp = v>>24;
  1336  				}	
  1337  			}
  1338  			p->comefrom = nil;
  1339  
  1340  			p->pc = c;
  1341  			asmins(ctxt, p);
  1342  			m = ctxt->andptr-ctxt->and;
  1343  			if(p->isize != m) {
  1344  				p->isize = m;
  1345  				loop++;
  1346  			}
  1347  			symgrow(ctxt, s, p->pc+m);
  1348  			memmove(s->p+p->pc, ctxt->and, m);
  1349  			p->mark = m;
  1350  			c += m;
  1351  		}
  1352  		if(++n > 20) {
  1353  			ctxt->diag("span must be looping");
  1354  			sysfatal("bad code");
  1355  		}
  1356  	} while(loop);
  1357  	
  1358  	if(ctxt->headtype == Hnacl)
  1359  		c = naclpad(ctxt, s, c, -c&31);
  1360  	c += -c&(FuncAlign-1);
  1361  	s->size = c;
  1362  
  1363  	if(0 /* debug['a'] > 1 */) {
  1364  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
  1365  		for(i=0; i<s->np; i++) {
  1366  			print(" %.2ux", s->p[i]);
  1367  			if(i%16 == 15)
  1368  				print("\n  %.6ux", i+1);
  1369  		}
  1370  		if(i%16)
  1371  			print("\n");
  1372  	
  1373  		for(i=0; i<s->nr; i++) {
  1374  			Reloc *r;
  1375  			
  1376  			r = &s->r[i];
  1377  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
  1378  		}
  1379  	}
  1380  }
  1381  
  1382  static void
  1383  instinit(void)
  1384  {
  1385  	int i;
  1386  
  1387  	for(i=1; optab[i].as; i++)
  1388  		if(i != optab[i].as)
  1389  			sysfatal("phase error in optab: at %A found %A", i, optab[i].as);
  1390  
  1391  	for(i=0; i<Ymax; i++)
  1392  		ycover[i*Ymax + i] = 1;
  1393  
  1394  	ycover[Yi0*Ymax + Yi8] = 1;
  1395  	ycover[Yi1*Ymax + Yi8] = 1;
  1396  
  1397  	ycover[Yi0*Ymax + Yi32] = 1;
  1398  	ycover[Yi1*Ymax + Yi32] = 1;
  1399  	ycover[Yi8*Ymax + Yi32] = 1;
  1400  
  1401  	ycover[Yal*Ymax + Yrb] = 1;
  1402  	ycover[Ycl*Ymax + Yrb] = 1;
  1403  	ycover[Yax*Ymax + Yrb] = 1;
  1404  	ycover[Ycx*Ymax + Yrb] = 1;
  1405  	ycover[Yrx*Ymax + Yrb] = 1;
  1406  
  1407  	ycover[Yax*Ymax + Yrx] = 1;
  1408  	ycover[Ycx*Ymax + Yrx] = 1;
  1409  
  1410  	ycover[Yax*Ymax + Yrl] = 1;
  1411  	ycover[Ycx*Ymax + Yrl] = 1;
  1412  	ycover[Yrx*Ymax + Yrl] = 1;
  1413  
  1414  	ycover[Yf0*Ymax + Yrf] = 1;
  1415  
  1416  	ycover[Yal*Ymax + Ymb] = 1;
  1417  	ycover[Ycl*Ymax + Ymb] = 1;
  1418  	ycover[Yax*Ymax + Ymb] = 1;
  1419  	ycover[Ycx*Ymax + Ymb] = 1;
  1420  	ycover[Yrx*Ymax + Ymb] = 1;
  1421  	ycover[Yrb*Ymax + Ymb] = 1;
  1422  	ycover[Ym*Ymax + Ymb] = 1;
  1423  
  1424  	ycover[Yax*Ymax + Yml] = 1;
  1425  	ycover[Ycx*Ymax + Yml] = 1;
  1426  	ycover[Yrx*Ymax + Yml] = 1;
  1427  	ycover[Yrl*Ymax + Yml] = 1;
  1428  	ycover[Ym*Ymax + Yml] = 1;
  1429  
  1430  	ycover[Yax*Ymax + Ymm] = 1;
  1431  	ycover[Ycx*Ymax + Ymm] = 1;
  1432  	ycover[Yrx*Ymax + Ymm] = 1;
  1433  	ycover[Yrl*Ymax + Ymm] = 1;
  1434  	ycover[Ym*Ymax + Ymm] = 1;
  1435  	ycover[Ymr*Ymax + Ymm] = 1;
  1436  
  1437  	ycover[Ym*Ymax + Yxm] = 1;
  1438  	ycover[Yxr*Ymax + Yxm] = 1;
  1439  
  1440  	for(i=0; i<D_NONE; i++) {
  1441  		reg[i] = -1;
  1442  		if(i >= D_AL && i <= D_BH)
  1443  			reg[i] = (i-D_AL) & 7;
  1444  		if(i >= D_AX && i <= D_DI)
  1445  			reg[i] = (i-D_AX) & 7;
  1446  		if(i >= D_F0 && i <= D_F0+7)
  1447  			reg[i] = (i-D_F0) & 7;
  1448  		if(i >= D_X0 && i <= D_X0+7)
  1449  			reg[i] = (i-D_X0) & 7;
  1450  	}
  1451  }
  1452  
  1453  static int
  1454  prefixof(Link *ctxt, Addr *a)
  1455  {
  1456  	switch(a->type) {
  1457  	case D_INDIR+D_CS:
  1458  		return 0x2e;
  1459  	case D_INDIR+D_DS:
  1460  		return 0x3e;
  1461  	case D_INDIR+D_ES:
  1462  		return 0x26;
  1463  	case D_INDIR+D_FS:
  1464  		return 0x64;
  1465  	case D_INDIR+D_GS:
  1466  		return 0x65;
  1467  	case D_INDIR+D_TLS:
  1468  		// NOTE: Systems listed here should be only systems that
  1469  		// support direct TLS references like 8(TLS) implemented as
  1470  		// direct references from FS or GS. Systems that require
  1471  		// the initial-exec model, where you load the TLS base into
  1472  		// a register and then index from that register, do not reach
  1473  		// this code and should not be listed.
  1474  		switch(ctxt->headtype) {
  1475  		default:
  1476  			sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
  1477  		case Hdarwin:
  1478  		case Hdragonfly:
  1479  		case Hfreebsd:
  1480  		case Hnetbsd:
  1481  		case Hopenbsd:
  1482  			return 0x65; // GS
  1483  		}
  1484  	}
  1485  	return 0;
  1486  }
  1487  
  1488  static int
  1489  oclass(Addr *a)
  1490  {
  1491  	int32 v;
  1492  
  1493  	if((a->type >= D_INDIR && a->type < 2*D_INDIR) || a->index != D_NONE) {
  1494  		if(a->index != D_NONE && a->scale == 0) {
  1495  			if(a->type == D_ADDR) {
  1496  				switch(a->index) {
  1497  				case D_EXTERN:
  1498  				case D_STATIC:
  1499  					return Yi32;
  1500  				case D_AUTO:
  1501  				case D_PARAM:
  1502  					return Yiauto;
  1503  				}
  1504  				return Yxxx;
  1505  			}
  1506  			//if(a->type == D_INDIR+D_ADDR)
  1507  			//	print("*Ycol\n");
  1508  			return Ycol;
  1509  		}
  1510  		return Ym;
  1511  	}
  1512  	switch(a->type)
  1513  	{
  1514  	case D_AL:
  1515  		return Yal;
  1516  
  1517  	case D_AX:
  1518  		return Yax;
  1519  
  1520  	case D_CL:
  1521  	case D_DL:
  1522  	case D_BL:
  1523  	case D_AH:
  1524  	case D_CH:
  1525  	case D_DH:
  1526  	case D_BH:
  1527  		return Yrb;
  1528  
  1529  	case D_CX:
  1530  		return Ycx;
  1531  
  1532  	case D_DX:
  1533  	case D_BX:
  1534  		return Yrx;
  1535  
  1536  	case D_SP:
  1537  	case D_BP:
  1538  	case D_SI:
  1539  	case D_DI:
  1540  		return Yrl;
  1541  
  1542  	case D_F0+0:
  1543  		return	Yf0;
  1544  
  1545  	case D_F0+1:
  1546  	case D_F0+2:
  1547  	case D_F0+3:
  1548  	case D_F0+4:
  1549  	case D_F0+5:
  1550  	case D_F0+6:
  1551  	case D_F0+7:
  1552  		return	Yrf;
  1553  
  1554  	case D_X0+0:
  1555  	case D_X0+1:
  1556  	case D_X0+2:
  1557  	case D_X0+3:
  1558  	case D_X0+4:
  1559  	case D_X0+5:
  1560  	case D_X0+6:
  1561  	case D_X0+7:
  1562  		return	Yxr;
  1563  
  1564  	case D_NONE:
  1565  		return Ynone;
  1566  
  1567  	case D_CS:	return	Ycs;
  1568  	case D_SS:	return	Yss;
  1569  	case D_DS:	return	Yds;
  1570  	case D_ES:	return	Yes;
  1571  	case D_FS:	return	Yfs;
  1572  	case D_GS:	return	Ygs;
  1573  	case D_TLS:	return	Ytls;
  1574  
  1575  	case D_GDTR:	return	Ygdtr;
  1576  	case D_IDTR:	return	Yidtr;
  1577  	case D_LDTR:	return	Yldtr;
  1578  	case D_MSW:	return	Ymsw;
  1579  	case D_TASK:	return	Ytask;
  1580  
  1581  	case D_CR+0:	return	Ycr0;
  1582  	case D_CR+1:	return	Ycr1;
  1583  	case D_CR+2:	return	Ycr2;
  1584  	case D_CR+3:	return	Ycr3;
  1585  	case D_CR+4:	return	Ycr4;
  1586  	case D_CR+5:	return	Ycr5;
  1587  	case D_CR+6:	return	Ycr6;
  1588  	case D_CR+7:	return	Ycr7;
  1589  
  1590  	case D_DR+0:	return	Ydr0;
  1591  	case D_DR+1:	return	Ydr1;
  1592  	case D_DR+2:	return	Ydr2;
  1593  	case D_DR+3:	return	Ydr3;
  1594  	case D_DR+4:	return	Ydr4;
  1595  	case D_DR+5:	return	Ydr5;
  1596  	case D_DR+6:	return	Ydr6;
  1597  	case D_DR+7:	return	Ydr7;
  1598  
  1599  	case D_TR+0:	return	Ytr0;
  1600  	case D_TR+1:	return	Ytr1;
  1601  	case D_TR+2:	return	Ytr2;
  1602  	case D_TR+3:	return	Ytr3;
  1603  	case D_TR+4:	return	Ytr4;
  1604  	case D_TR+5:	return	Ytr5;
  1605  	case D_TR+6:	return	Ytr6;
  1606  	case D_TR+7:	return	Ytr7;
  1607  
  1608  	case D_EXTERN:
  1609  	case D_STATIC:
  1610  	case D_AUTO:
  1611  	case D_PARAM:
  1612  		return Ym;
  1613  
  1614  	case D_CONST:
  1615  	case D_CONST2:
  1616  	case D_ADDR:
  1617  		if(a->sym == nil) {
  1618  			v = a->offset;
  1619  			if(v == 0)
  1620  				return Yi0;
  1621  			if(v == 1)
  1622  				return Yi1;
  1623  			if(v >= -128 && v <= 127)
  1624  				return Yi8;
  1625  		}
  1626  		return Yi32;
  1627  
  1628  	case D_BRANCH:
  1629  		return Ybr;
  1630  	}
  1631  	return Yxxx;
  1632  }
  1633  
  1634  static void
  1635  asmidx(Link *ctxt, int scale, int index, int base)
  1636  {
  1637  	int i;
  1638  
  1639  	switch(index) {
  1640  	default:
  1641  		goto bad;
  1642  
  1643  	case D_NONE:
  1644  		i = 4 << 3;
  1645  		goto bas;
  1646  
  1647  	case D_AX:
  1648  	case D_CX:
  1649  	case D_DX:
  1650  	case D_BX:
  1651  	case D_BP:
  1652  	case D_SI:
  1653  	case D_DI:
  1654  		i = reg[index] << 3;
  1655  		break;
  1656  	}
  1657  	switch(scale) {
  1658  	default:
  1659  		goto bad;
  1660  	case 1:
  1661  		break;
  1662  	case 2:
  1663  		i |= (1<<6);
  1664  		break;
  1665  	case 4:
  1666  		i |= (2<<6);
  1667  		break;
  1668  	case 8:
  1669  		i |= (3<<6);
  1670  		break;
  1671  	}
  1672  bas:
  1673  	switch(base) {
  1674  	default:
  1675  		goto bad;
  1676  	case D_NONE:	/* must be mod=00 */
  1677  		i |= 5;
  1678  		break;
  1679  	case D_AX:
  1680  	case D_CX:
  1681  	case D_DX:
  1682  	case D_BX:
  1683  	case D_SP:
  1684  	case D_BP:
  1685  	case D_SI:
  1686  	case D_DI:
  1687  		i |= reg[base];
  1688  		break;
  1689  	}
  1690  	*ctxt->andptr++ = i;
  1691  	return;
  1692  bad:
  1693  	ctxt->diag("asmidx: bad address %d,%d,%d", scale, index, base);
  1694  	*ctxt->andptr++ = 0;
  1695  	return;
  1696  }
  1697  
  1698  static void
  1699  put4(Link *ctxt, int32 v)
  1700  {
  1701  	ctxt->andptr[0] = v;
  1702  	ctxt->andptr[1] = v>>8;
  1703  	ctxt->andptr[2] = v>>16;
  1704  	ctxt->andptr[3] = v>>24;
  1705  	ctxt->andptr += 4;
  1706  }
  1707  
  1708  static void
  1709  relput4(Link *ctxt, Prog *p, Addr *a)
  1710  {
  1711  	vlong v;
  1712  	Reloc rel, *r;
  1713  	
  1714  	v = vaddr(ctxt, a, &rel);
  1715  	if(rel.siz != 0) {
  1716  		if(rel.siz != 4)
  1717  			ctxt->diag("bad reloc");
  1718  		r = addrel(ctxt->cursym);
  1719  		*r = rel;
  1720  		r->off = p->pc + ctxt->andptr - ctxt->and;
  1721  	}
  1722  	put4(ctxt, v);
  1723  }
  1724  
  1725  static int32
  1726  vaddr(Link *ctxt, Addr *a, Reloc *r)
  1727  {
  1728  	int t;
  1729  	int32 v;
  1730  	LSym *s;
  1731  	
  1732  	if(r != nil)
  1733  		memset(r, 0, sizeof *r);
  1734  
  1735  	t = a->type;
  1736  	v = a->offset;
  1737  	if(t == D_ADDR)
  1738  		t = a->index;
  1739  	switch(t) {
  1740  	case D_STATIC:
  1741  	case D_EXTERN:
  1742  		s = a->sym;
  1743  		if(s != nil) {
  1744  			if(r == nil) {
  1745  				ctxt->diag("need reloc for %D", a);
  1746  				sysfatal("bad code");
  1747  			}
  1748  			r->type = R_ADDR;
  1749  			r->siz = 4;
  1750  			r->off = -1;
  1751  			r->sym = s;
  1752  			r->add = v;
  1753  			v = 0;
  1754  		}
  1755  		break;
  1756  	
  1757  	case D_INDIR+D_TLS:
  1758  		if(r == nil) {
  1759  			ctxt->diag("need reloc for %D", a);
  1760  			sysfatal("bad code");
  1761  		}
  1762  		r->type = R_TLS_LE;
  1763  		r->siz = 4;
  1764  		r->off = -1; // caller must fill in
  1765  		r->add = v;
  1766  		v = 0;
  1767  		break;
  1768  	}
  1769  	return v;
  1770  }
  1771  
  1772  static void
  1773  asmand(Link *ctxt, Addr *a, int r)
  1774  {
  1775  	int32 v;
  1776  	int t, scale;
  1777  	Reloc rel;
  1778  
  1779  	v = a->offset;
  1780  	t = a->type;
  1781  	rel.siz = 0;
  1782  	if(a->index != D_NONE && a->index != D_TLS) {
  1783  		if(t < D_INDIR || t >= 2*D_INDIR) {
  1784  			switch(t) {
  1785  			default:
  1786  				goto bad;
  1787  			case D_STATIC:
  1788  			case D_EXTERN:
  1789  				t = D_NONE;
  1790  				v = vaddr(ctxt, a, &rel);
  1791  				break;
  1792  			case D_AUTO:
  1793  			case D_PARAM:
  1794  				t = D_SP;
  1795  				break;
  1796  			}
  1797  		} else
  1798  			t -= D_INDIR;
  1799  
  1800  		if(t == D_NONE) {
  1801  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  1802  			asmidx(ctxt, a->scale, a->index, t);
  1803  			goto putrelv;
  1804  		}
  1805  		if(v == 0 && rel.siz == 0 && t != D_BP) {
  1806  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  1807  			asmidx(ctxt, a->scale, a->index, t);
  1808  			return;
  1809  		}
  1810  		if(v >= -128 && v < 128 && rel.siz == 0) {
  1811  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  1812  			asmidx(ctxt, a->scale, a->index, t);
  1813  			*ctxt->andptr++ = v;
  1814  			return;
  1815  		}
  1816  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  1817  		asmidx(ctxt, a->scale, a->index, t);
  1818  		goto putrelv;
  1819  	}
  1820  	if(t >= D_AL && t <= D_F7 || t >= D_X0 && t <= D_X7) {
  1821  		if(v)
  1822  			goto bad;
  1823  		*ctxt->andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
  1824  		return;
  1825  	}
  1826  	
  1827  	scale = a->scale;
  1828  	if(t < D_INDIR || t >= 2*D_INDIR) {
  1829  		switch(a->type) {
  1830  		default:
  1831  			goto bad;
  1832  		case D_STATIC:
  1833  		case D_EXTERN:
  1834  			t = D_NONE;
  1835  			v = vaddr(ctxt, a, &rel);
  1836  			break;
  1837  		case D_AUTO:
  1838  		case D_PARAM:
  1839  			t = D_SP;
  1840  			break;
  1841  		}
  1842  		scale = 1;
  1843  	} else
  1844  		t -= D_INDIR;
  1845  	if(t == D_TLS)
  1846  		v = vaddr(ctxt, a, &rel);
  1847  
  1848  	if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
  1849  		*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
  1850  		goto putrelv;
  1851  	}
  1852  	if(t == D_SP) {
  1853  		if(v == 0 && rel.siz == 0) {
  1854  			*ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3);
  1855  			asmidx(ctxt, scale, D_NONE, t);
  1856  			return;
  1857  		}
  1858  		if(v >= -128 && v < 128 && rel.siz == 0) {
  1859  			*ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3);
  1860  			asmidx(ctxt, scale, D_NONE, t);
  1861  			*ctxt->andptr++ = v;
  1862  			return;
  1863  		}
  1864  		*ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3);
  1865  		asmidx(ctxt, scale, D_NONE, t);
  1866  		goto putrelv;
  1867  	}
  1868  	if(t >= D_AX && t <= D_DI) {
  1869  		if(a->index == D_TLS) {
  1870  			memset(&rel, 0, sizeof rel);
  1871  			rel.type = R_TLS_IE;
  1872  			rel.siz = 4;
  1873  			rel.sym = nil;
  1874  			rel.add = v;
  1875  			v = 0;
  1876  		}
  1877  		if(v == 0 && rel.siz == 0 && t != D_BP) {
  1878  			*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
  1879  			return;
  1880  		}
  1881  		if(v >= -128 && v < 128 && rel.siz == 0)  {
  1882  			ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
  1883  			ctxt->andptr[1] = v;
  1884  			ctxt->andptr += 2;
  1885  			return;
  1886  		}
  1887  		*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
  1888  		goto putrelv;
  1889  	}
  1890  	goto bad;
  1891  
  1892  putrelv:
  1893  	if(rel.siz != 0) {
  1894  		Reloc *r;
  1895  		
  1896  		if(rel.siz != 4) {
  1897  			ctxt->diag("bad rel");
  1898  			goto bad;
  1899  		}
  1900  		r = addrel(ctxt->cursym);
  1901  		*r = rel;
  1902  		r->off = ctxt->curp->pc + ctxt->andptr - ctxt->and;
  1903  	}
  1904  
  1905  	put4(ctxt, v);
  1906  	return;
  1907  
  1908  bad:
  1909  	ctxt->diag("asmand: bad address %D", a);
  1910  	return;
  1911  }
  1912  
  1913  enum
  1914  {
  1915  	E = 0xff,
  1916  };
  1917  
  1918  static uchar	ymovtab[] =
  1919  {
  1920  /* push */
  1921  	APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0,
  1922  	APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0,
  1923  	APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0,
  1924  	APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0,
  1925  	APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0,
  1926  	APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0,
  1927  
  1928  	APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0,
  1929  	APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0,
  1930  	APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0,
  1931  	APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0,
  1932  	APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E,
  1933  	APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E,
  1934  
  1935  /* pop */
  1936  	APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0,
  1937  	APOPL,	Ynone,	Yes,	0,	0x07,E,0,0,
  1938  	APOPL,	Ynone,	Yss,	0,	0x17,E,0,0,
  1939  	APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0,
  1940  	APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0,
  1941  
  1942  	APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0,
  1943  	APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0,
  1944  	APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0,
  1945  	APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E,
  1946  	APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E,
  1947  
  1948  /* mov seg */
  1949  	AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0,
  1950  	AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0,
  1951  	AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0,
  1952  	AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0,
  1953  	AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0,
  1954  	AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0,
  1955  
  1956  	AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0,
  1957  	AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0,
  1958  	AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0,
  1959  	AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0,
  1960  	AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0,
  1961  	AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0,
  1962  
  1963  /* mov cr */
  1964  	AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0,
  1965  	AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0,
  1966  	AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0,
  1967  	AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0,
  1968  
  1969  	AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0,
  1970  	AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0,
  1971  	AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0,
  1972  	AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0,
  1973  
  1974  /* mov dr */
  1975  	AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0,
  1976  	AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0,
  1977  	AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0,
  1978  
  1979  	AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0,
  1980  	AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0,
  1981  	AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0,
  1982  
  1983  /* mov tr */
  1984  	AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0,
  1985  	AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0,
  1986  
  1987  	AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E,
  1988  	AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E,
  1989  
  1990  /* lgdt, sgdt, lidt, sidt */
  1991  	AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0,
  1992  	AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0,
  1993  	AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0,
  1994  	AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0,
  1995  
  1996  /* lldt, sldt */
  1997  	AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0,
  1998  	AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0,
  1999  
  2000  /* lmsw, smsw */
  2001  	AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0,
  2002  	AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0,
  2003  
  2004  /* ltr, str */
  2005  	AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0,
  2006  	AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0,
  2007  
  2008  /* load full pointer */
  2009  	AMOVL,	Yml,	Ycol,	5,	0,0,0,0,
  2010  	AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0,
  2011  
  2012  /* double shift */
  2013  	ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0,
  2014  	ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0,
  2015  
  2016  /* extra imul */
  2017  	AIMULW,	Yml,	Yrl,	7,	Pq,0xaf,0,0,
  2018  	AIMULL,	Yml,	Yrl,	7,	Pm,0xaf,0,0,
  2019  
  2020  /* load TLS base pointer */
  2021  	AMOVL,	Ytls,	Yrl,	8,	0,0,0,0,
  2022  
  2023  	0
  2024  };
  2025  
  2026  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  2027  // which is not referenced in a->type.
  2028  // If a is empty, it returns BX to account for MULB-like instructions
  2029  // that might use DX and AX.
  2030  static int
  2031  byteswapreg(Link *ctxt, Addr *a)
  2032  {
  2033  	int cana, canb, canc, cand;
  2034  
  2035  	cana = canb = canc = cand = 1;
  2036  
  2037  	switch(a->type) {
  2038  	case D_NONE:
  2039  		cana = cand = 0;
  2040  		break;
  2041  	case D_AX:
  2042  	case D_AL:
  2043  	case D_AH:
  2044  	case D_INDIR+D_AX:
  2045  		cana = 0;
  2046  		break;
  2047  	case D_BX:
  2048  	case D_BL:
  2049  	case D_BH:
  2050  	case D_INDIR+D_BX:
  2051  		canb = 0;
  2052  		break;
  2053  	case D_CX:
  2054  	case D_CL:
  2055  	case D_CH:
  2056  	case D_INDIR+D_CX:
  2057  		canc = 0;
  2058  		break;
  2059  	case D_DX:
  2060  	case D_DL:
  2061  	case D_DH:
  2062  	case D_INDIR+D_DX:
  2063  		cand = 0;
  2064  		break;
  2065  	}
  2066  	switch(a->index) {
  2067  	case D_AX:
  2068  		cana = 0;
  2069  		break;
  2070  	case D_BX:
  2071  		canb = 0;
  2072  		break;
  2073  	case D_CX:
  2074  		canc = 0;
  2075  		break;
  2076  	case D_DX:
  2077  		cand = 0;
  2078  		break;
  2079  	}
  2080  	if(cana)
  2081  		return D_AX;
  2082  	if(canb)
  2083  		return D_BX;
  2084  	if(canc)
  2085  		return D_CX;
  2086  	if(cand)
  2087  		return D_DX;
  2088  
  2089  	ctxt->diag("impossible byte register");
  2090  	sysfatal("bad code");
  2091  	return 0;
  2092  }
  2093  
  2094  static void
  2095  subreg(Prog *p, int from, int to)
  2096  {
  2097  
  2098  	if(0 /* debug['Q'] */)
  2099  		print("\n%P	s/%R/%R/\n", p, from, to);
  2100  
  2101  	if(p->from.type == from) {
  2102  		p->from.type = to;
  2103  		p->ft = 0;
  2104  	}
  2105  	if(p->to.type == from) {
  2106  		p->to.type = to;
  2107  		p->tt = 0;
  2108  	}
  2109  
  2110  	if(p->from.index == from) {
  2111  		p->from.index = to;
  2112  		p->ft = 0;
  2113  	}
  2114  	if(p->to.index == from) {
  2115  		p->to.index = to;
  2116  		p->tt = 0;
  2117  	}
  2118  
  2119  	from += D_INDIR;
  2120  	if(p->from.type == from) {
  2121  		p->from.type = to+D_INDIR;
  2122  		p->ft = 0;
  2123  	}
  2124  	if(p->to.type == from) {
  2125  		p->to.type = to+D_INDIR;
  2126  		p->tt = 0;
  2127  	}
  2128  
  2129  	if(0 /* debug['Q'] */)
  2130  		print("%P\n", p);
  2131  }
  2132  
  2133  static int
  2134  mediaop(Link *ctxt, Optab *o, int op, int osize, int z)
  2135  {
  2136  	switch(op){
  2137  	case Pm:
  2138  	case Pe:
  2139  	case Pf2:
  2140  	case Pf3:
  2141  		if(osize != 1){
  2142  			if(op != Pm)
  2143  				*ctxt->andptr++ = op;
  2144  			*ctxt->andptr++ = Pm;
  2145  			op = o->op[++z];
  2146  			break;
  2147  		}
  2148  	default:
  2149  		if(ctxt->andptr == ctxt->and || ctxt->and[ctxt->andptr - ctxt->and - 1] != Pm)
  2150  			*ctxt->andptr++ = Pm;
  2151  		break;
  2152  	}
  2153  	*ctxt->andptr++ = op;
  2154  	return z;
  2155  }
  2156  
  2157  static void
  2158  doasm(Link *ctxt, Prog *p)
  2159  {
  2160  	Optab *o;
  2161  	Prog *q, pp;
  2162  	uchar *t;
  2163  	int z, op, ft, tt, breg;
  2164  	int32 v, pre;
  2165  	Reloc rel, *r;
  2166  	Addr *a;
  2167  	
  2168  	ctxt->curp = p;	// TODO
  2169  
  2170  	pre = prefixof(ctxt, &p->from);
  2171  	if(pre)
  2172  		*ctxt->andptr++ = pre;
  2173  	pre = prefixof(ctxt, &p->to);
  2174  	if(pre)
  2175  		*ctxt->andptr++ = pre;
  2176  
  2177  	if(p->ft == 0)
  2178  		p->ft = oclass(&p->from);
  2179  	if(p->tt == 0)
  2180  		p->tt = oclass(&p->to);
  2181  
  2182  	ft = p->ft * Ymax;
  2183  	tt = p->tt * Ymax;
  2184  	o = &optab[p->as];
  2185  	t = o->ytab;
  2186  	if(t == 0) {
  2187  		ctxt->diag("asmins: noproto %P", p);
  2188  		return;
  2189  	}
  2190  	for(z=0; *t; z+=t[3],t+=4)
  2191  		if(ycover[ft+t[0]])
  2192  		if(ycover[tt+t[1]])
  2193  			goto found;
  2194  	goto domov;
  2195  
  2196  found:
  2197  	switch(o->prefix) {
  2198  	case Pq:	/* 16 bit escape and opcode escape */
  2199  		*ctxt->andptr++ = Pe;
  2200  		*ctxt->andptr++ = Pm;
  2201  		break;
  2202  
  2203  	case Pf2:	/* xmm opcode escape */
  2204  	case Pf3:
  2205  		*ctxt->andptr++ = o->prefix;
  2206  		*ctxt->andptr++ = Pm;
  2207  		break;
  2208  
  2209  	case Pm:	/* opcode escape */
  2210  		*ctxt->andptr++ = Pm;
  2211  		break;
  2212  
  2213  	case Pe:	/* 16 bit escape */
  2214  		*ctxt->andptr++ = Pe;
  2215  		break;
  2216  
  2217  	case Pb:	/* botch */
  2218  		break;
  2219  	}
  2220  
  2221  	op = o->op[z];
  2222  	switch(t[2]) {
  2223  	default:
  2224  		ctxt->diag("asmins: unknown z %d %P", t[2], p);
  2225  		return;
  2226  
  2227  	case Zpseudo:
  2228  		break;
  2229  
  2230  	case Zlit:
  2231  		for(; op = o->op[z]; z++)
  2232  			*ctxt->andptr++ = op;
  2233  		break;
  2234  
  2235  	case Zlitm_r:
  2236  		for(; op = o->op[z]; z++)
  2237  			*ctxt->andptr++ = op;
  2238  		asmand(ctxt, &p->from, reg[p->to.type]);
  2239  		break;
  2240  
  2241  	case Zm_r:
  2242  		*ctxt->andptr++ = op;
  2243  		asmand(ctxt, &p->from, reg[p->to.type]);
  2244  		break;
  2245  
  2246  	case Zm2_r:
  2247  		*ctxt->andptr++ = op;
  2248  		*ctxt->andptr++ = o->op[z+1];
  2249  		asmand(ctxt, &p->from, reg[p->to.type]);
  2250  		break;
  2251  
  2252  	case Zm_r_xm:
  2253  		mediaop(ctxt, o, op, t[3], z);
  2254  		asmand(ctxt, &p->from, reg[p->to.type]);
  2255  		break;
  2256  
  2257  	case Zm_r_i_xm:
  2258  		mediaop(ctxt, o, op, t[3], z);
  2259  		asmand(ctxt, &p->from, reg[p->to.type]);
  2260  		*ctxt->andptr++ = p->to.offset;
  2261  		break;
  2262  
  2263  	case Zibm_r:
  2264  		while ((op = o->op[z++]) != 0)
  2265  			*ctxt->andptr++ = op;
  2266  		asmand(ctxt, &p->from, reg[p->to.type]);
  2267  		*ctxt->andptr++ = p->to.offset;
  2268  		break;
  2269  
  2270  	case Zaut_r:
  2271  		*ctxt->andptr++ = 0x8d;	/* leal */
  2272  		if(p->from.type != D_ADDR)
  2273  			ctxt->diag("asmins: Zaut sb type ADDR");
  2274  		p->from.type = p->from.index;
  2275  		p->from.index = D_NONE;
  2276  		p->ft = 0;
  2277  		asmand(ctxt, &p->from, reg[p->to.type]);
  2278  		p->from.index = p->from.type;
  2279  		p->from.type = D_ADDR;
  2280  		p->ft = 0;
  2281  		break;
  2282  
  2283  	case Zm_o:
  2284  		*ctxt->andptr++ = op;
  2285  		asmand(ctxt, &p->from, o->op[z+1]);
  2286  		break;
  2287  
  2288  	case Zr_m:
  2289  		*ctxt->andptr++ = op;
  2290  		asmand(ctxt, &p->to, reg[p->from.type]);
  2291  		break;
  2292  
  2293  	case Zr_m_xm:
  2294  		mediaop(ctxt, o, op, t[3], z);
  2295  		asmand(ctxt, &p->to, reg[p->from.type]);
  2296  		break;
  2297  
  2298  	case Zr_m_i_xm:
  2299  		mediaop(ctxt, o, op, t[3], z);
  2300  		asmand(ctxt, &p->to, reg[p->from.type]);
  2301  		*ctxt->andptr++ = p->from.offset;
  2302  		break;
  2303  
  2304  	case Zcallindreg:
  2305  		r = addrel(ctxt->cursym);
  2306  		r->off = p->pc;
  2307  		r->type = R_CALLIND;
  2308  		r->siz = 0;
  2309  		// fallthrough
  2310  	case Zo_m:
  2311  		*ctxt->andptr++ = op;
  2312  		asmand(ctxt, &p->to, o->op[z+1]);
  2313  		break;
  2314  
  2315  	case Zm_ibo:
  2316  		*ctxt->andptr++ = op;
  2317  		asmand(ctxt, &p->from, o->op[z+1]);
  2318  		*ctxt->andptr++ = vaddr(ctxt, &p->to, nil);
  2319  		break;
  2320  
  2321  	case Zibo_m:
  2322  		*ctxt->andptr++ = op;
  2323  		asmand(ctxt, &p->to, o->op[z+1]);
  2324  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2325  		break;
  2326  
  2327  	case Z_ib:
  2328  	case Zib_:
  2329  		if(t[2] == Zib_)
  2330  			a = &p->from;
  2331  		else
  2332  			a = &p->to;
  2333  		v = vaddr(ctxt, a, nil);
  2334  		*ctxt->andptr++ = op;
  2335  		*ctxt->andptr++ = v;
  2336  		break;
  2337  
  2338  	case Zib_rp:
  2339  		*ctxt->andptr++ = op + reg[p->to.type];
  2340  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2341  		break;
  2342  
  2343  	case Zil_rp:
  2344  		*ctxt->andptr++ = op + reg[p->to.type];
  2345  		if(o->prefix == Pe) {
  2346  			v = vaddr(ctxt, &p->from, nil);
  2347  			*ctxt->andptr++ = v;
  2348  			*ctxt->andptr++ = v>>8;
  2349  		}
  2350  		else
  2351  			relput4(ctxt, p, &p->from);
  2352  		break;
  2353  
  2354  	case Zib_rr:
  2355  		*ctxt->andptr++ = op;
  2356  		asmand(ctxt, &p->to, reg[p->to.type]);
  2357  		*ctxt->andptr++ = vaddr(ctxt, &p->from, nil);
  2358  		break;
  2359  
  2360  	case Z_il:
  2361  	case Zil_:
  2362  		if(t[2] == Zil_)
  2363  			a = &p->from;
  2364  		else
  2365  			a = &p->to;
  2366  		*ctxt->andptr++ = op;
  2367  		if(o->prefix == Pe) {
  2368  			v = vaddr(ctxt, a, nil);
  2369  			*ctxt->andptr++ = v;
  2370  			*ctxt->andptr++ = v>>8;
  2371  		}
  2372  		else
  2373  			relput4(ctxt, p, a);
  2374  		break;
  2375  
  2376  	case Zm_ilo:
  2377  	case Zilo_m:
  2378  		*ctxt->andptr++ = op;
  2379  		if(t[2] == Zilo_m) {
  2380  			a = &p->from;
  2381  			asmand(ctxt, &p->to, o->op[z+1]);
  2382  		} else {
  2383  			a = &p->to;
  2384  			asmand(ctxt, &p->from, o->op[z+1]);
  2385  		}
  2386  		if(o->prefix == Pe) {
  2387  			v = vaddr(ctxt, a, nil);
  2388  			*ctxt->andptr++ = v;
  2389  			*ctxt->andptr++ = v>>8;
  2390  		}
  2391  		else
  2392  			relput4(ctxt, p, a);
  2393  		break;
  2394  
  2395  	case Zil_rr:
  2396  		*ctxt->andptr++ = op;
  2397  		asmand(ctxt, &p->to, reg[p->to.type]);
  2398  		if(o->prefix == Pe) {
  2399  			v = vaddr(ctxt, &p->from, nil);
  2400  			*ctxt->andptr++ = v;
  2401  			*ctxt->andptr++ = v>>8;
  2402  		}
  2403  		else
  2404  			relput4(ctxt, p, &p->from);
  2405  		break;
  2406  
  2407  	case Z_rp:
  2408  		*ctxt->andptr++ = op + reg[p->to.type];
  2409  		break;
  2410  
  2411  	case Zrp_:
  2412  		*ctxt->andptr++ = op + reg[p->from.type];
  2413  		break;
  2414  
  2415  	case Zclr:
  2416  		*ctxt->andptr++ = op;
  2417  		asmand(ctxt, &p->to, reg[p->to.type]);
  2418  		break;
  2419  	
  2420  	case Zcall:
  2421  		if(p->to.sym == nil) {
  2422  			ctxt->diag("call without target");
  2423  			sysfatal("bad code");
  2424  		}
  2425  		*ctxt->andptr++ = op;
  2426  		r = addrel(ctxt->cursym);
  2427  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2428  		r->type = R_CALL;
  2429  		r->siz = 4;
  2430  		r->sym = p->to.sym;
  2431  		r->add = p->to.offset;
  2432  		put4(ctxt, 0);
  2433  		break;
  2434  
  2435  	case Zbr:
  2436  	case Zjmp:
  2437  	case Zloop:
  2438  		if(p->to.sym != nil) {
  2439  			if(t[2] != Zjmp) {
  2440  				ctxt->diag("branch to ATEXT");
  2441  				sysfatal("bad code");
  2442  			}
  2443  			*ctxt->andptr++ = o->op[z+1];
  2444  			r = addrel(ctxt->cursym);
  2445  			r->off = p->pc + ctxt->andptr - ctxt->and;
  2446  			r->sym = p->to.sym;
  2447  			r->type = R_PCREL;
  2448  			r->siz = 4;
  2449  			put4(ctxt, 0);
  2450  			break;
  2451  		}
  2452  
  2453  		// Assumes q is in this function.
  2454  		// Fill in backward jump now.
  2455  		q = p->pcond;
  2456  		if(q == nil) {
  2457  			ctxt->diag("jmp/branch/loop without target");
  2458  			sysfatal("bad code");
  2459  		}
  2460  		if(p->back & 1) {
  2461  			v = q->pc - (p->pc + 2);
  2462  			if(v >= -128) {
  2463  				if(p->as == AJCXZW)
  2464  					*ctxt->andptr++ = 0x67;
  2465  				*ctxt->andptr++ = op;
  2466  				*ctxt->andptr++ = v;
  2467  			} else if(t[2] == Zloop) {
  2468  				ctxt->diag("loop too far: %P", p);
  2469  			} else {
  2470  				v -= 5-2;
  2471  				if(t[2] == Zbr) {
  2472  					*ctxt->andptr++ = 0x0f;
  2473  					v--;
  2474  				}
  2475  				*ctxt->andptr++ = o->op[z+1];
  2476  				*ctxt->andptr++ = v;
  2477  				*ctxt->andptr++ = v>>8;
  2478  				*ctxt->andptr++ = v>>16;
  2479  				*ctxt->andptr++ = v>>24;
  2480  			}
  2481  			break;
  2482  		}
  2483  
  2484  		// Annotate target; will fill in later.
  2485  		p->forwd = q->comefrom;
  2486  		q->comefrom = p;
  2487  		if(p->back & 2)	{ // short
  2488  			if(p->as == AJCXZW)
  2489  				*ctxt->andptr++ = 0x67;
  2490  			*ctxt->andptr++ = op;
  2491  			*ctxt->andptr++ = 0;
  2492  		} else if(t[2] == Zloop) {
  2493  			ctxt->diag("loop too far: %P", p);
  2494  		} else {
  2495  			if(t[2] == Zbr)
  2496  				*ctxt->andptr++ = 0x0f;
  2497  			*ctxt->andptr++ = o->op[z+1];
  2498  			*ctxt->andptr++ = 0;
  2499  			*ctxt->andptr++ = 0;
  2500  			*ctxt->andptr++ = 0;
  2501  			*ctxt->andptr++ = 0;
  2502  		}
  2503  		break;
  2504  
  2505  	case Zcallcon:
  2506  	case Zjmpcon:
  2507  		if(t[2] == Zcallcon)
  2508  			*ctxt->andptr++ = op;
  2509  		else
  2510  			*ctxt->andptr++ = o->op[z+1];
  2511  		r = addrel(ctxt->cursym);
  2512  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2513  		r->type = R_PCREL;
  2514  		r->siz = 4;
  2515  		r->add = p->to.offset;
  2516  		put4(ctxt, 0);
  2517  		break;
  2518  	
  2519  	case Zcallind:
  2520  		*ctxt->andptr++ = op;
  2521  		*ctxt->andptr++ = o->op[z+1];
  2522  		r = addrel(ctxt->cursym);
  2523  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2524  		r->type = R_ADDR;
  2525  		r->siz = 4;
  2526  		r->add = p->to.offset;
  2527  		r->sym = p->to.sym;
  2528  		put4(ctxt, 0);
  2529  		break;
  2530  
  2531  	case Zbyte:
  2532  		v = vaddr(ctxt, &p->from, &rel);
  2533  		if(rel.siz != 0) {
  2534  			rel.siz = op;
  2535  			r = addrel(ctxt->cursym);
  2536  			*r = rel;
  2537  			r->off = p->pc + ctxt->andptr - ctxt->and;
  2538  		}
  2539  		*ctxt->andptr++ = v;
  2540  		if(op > 1) {
  2541  			*ctxt->andptr++ = v>>8;
  2542  			if(op > 2) {
  2543  				*ctxt->andptr++ = v>>16;
  2544  				*ctxt->andptr++ = v>>24;
  2545  			}
  2546  		}
  2547  		break;
  2548  
  2549  	case Zmov:
  2550  		goto domov;
  2551  	}
  2552  	return;
  2553  
  2554  domov:
  2555  	for(t=ymovtab; *t; t+=8)
  2556  		if(p->as == t[0])
  2557  		if(ycover[ft+t[1]])
  2558  		if(ycover[tt+t[2]])
  2559  			goto mfound;
  2560  bad:
  2561  	/*
  2562  	 * here, the assembly has failed.
  2563  	 * if its a byte instruction that has
  2564  	 * unaddressable registers, try to
  2565  	 * exchange registers and reissue the
  2566  	 * instruction with the operands renamed.
  2567  	 */
  2568  	pp = *p;
  2569  	z = p->from.type;
  2570  	if(z >= D_BP && z <= D_DI) {
  2571  		if((breg = byteswapreg(ctxt, &p->to)) != D_AX) {
  2572  			*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  2573  			asmand(ctxt, &p->from, reg[breg]);
  2574  			subreg(&pp, z, breg);
  2575  			doasm(ctxt, &pp);
  2576  			*ctxt->andptr++ = 0x87;			/* xchg lhs,bx */
  2577  			asmand(ctxt, &p->from, reg[breg]);
  2578  		} else {
  2579  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  2580  			subreg(&pp, z, D_AX);
  2581  			doasm(ctxt, &pp);
  2582  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  2583  		}
  2584  		return;
  2585  	}
  2586  	z = p->to.type;
  2587  	if(z >= D_BP && z <= D_DI) {
  2588  		if((breg = byteswapreg(ctxt, &p->from)) != D_AX) {
  2589  			*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  2590  			asmand(ctxt, &p->to, reg[breg]);
  2591  			subreg(&pp, z, breg);
  2592  			doasm(ctxt, &pp);
  2593  			*ctxt->andptr++ = 0x87;			/* xchg rhs,bx */
  2594  			asmand(ctxt, &p->to, reg[breg]);
  2595  		} else {
  2596  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  2597  			subreg(&pp, z, D_AX);
  2598  			doasm(ctxt, &pp);
  2599  			*ctxt->andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  2600  		}
  2601  		return;
  2602  	}
  2603  	ctxt->diag("doasm: notfound t2=%ux from=%ux to=%ux %P", t[2], p->from.type, p->to.type, p);
  2604  	return;
  2605  
  2606  mfound:
  2607  	switch(t[3]) {
  2608  	default:
  2609  		ctxt->diag("asmins: unknown mov %d %P", t[3], p);
  2610  		break;
  2611  
  2612  	case 0:	/* lit */
  2613  		for(z=4; t[z]!=E; z++)
  2614  			*ctxt->andptr++ = t[z];
  2615  		break;
  2616  
  2617  	case 1:	/* r,m */
  2618  		*ctxt->andptr++ = t[4];
  2619  		asmand(ctxt, &p->to, t[5]);
  2620  		break;
  2621  
  2622  	case 2:	/* m,r */
  2623  		*ctxt->andptr++ = t[4];
  2624  		asmand(ctxt, &p->from, t[5]);
  2625  		break;
  2626  
  2627  	case 3:	/* r,m - 2op */
  2628  		*ctxt->andptr++ = t[4];
  2629  		*ctxt->andptr++ = t[5];
  2630  		asmand(ctxt, &p->to, t[6]);
  2631  		break;
  2632  
  2633  	case 4:	/* m,r - 2op */
  2634  		*ctxt->andptr++ = t[4];
  2635  		*ctxt->andptr++ = t[5];
  2636  		asmand(ctxt, &p->from, t[6]);
  2637  		break;
  2638  
  2639  	case 5:	/* load full pointer, trash heap */
  2640  		if(t[4])
  2641  			*ctxt->andptr++ = t[4];
  2642  		switch(p->to.index) {
  2643  		default:
  2644  			goto bad;
  2645  		case D_DS:
  2646  			*ctxt->andptr++ = 0xc5;
  2647  			break;
  2648  		case D_SS:
  2649  			*ctxt->andptr++ = 0x0f;
  2650  			*ctxt->andptr++ = 0xb2;
  2651  			break;
  2652  		case D_ES:
  2653  			*ctxt->andptr++ = 0xc4;
  2654  			break;
  2655  		case D_FS:
  2656  			*ctxt->andptr++ = 0x0f;
  2657  			*ctxt->andptr++ = 0xb4;
  2658  			break;
  2659  		case D_GS:
  2660  			*ctxt->andptr++ = 0x0f;
  2661  			*ctxt->andptr++ = 0xb5;
  2662  			break;
  2663  		}
  2664  		asmand(ctxt, &p->from, reg[p->to.type]);
  2665  		break;
  2666  
  2667  	case 6:	/* double shift */
  2668  		z = p->from.type;
  2669  		switch(z) {
  2670  		default:
  2671  			goto bad;
  2672  		case D_CONST:
  2673  			*ctxt->andptr++ = 0x0f;
  2674  			*ctxt->andptr++ = t[4];
  2675  			asmand(ctxt, &p->to, reg[p->from.index]);
  2676  			*ctxt->andptr++ = p->from.offset;
  2677  			break;
  2678  		case D_CL:
  2679  		case D_CX:
  2680  			*ctxt->andptr++ = 0x0f;
  2681  			*ctxt->andptr++ = t[5];
  2682  			asmand(ctxt, &p->to, reg[p->from.index]);
  2683  			break;
  2684  		}
  2685  		break;
  2686  
  2687  	case 7: /* imul rm,r */
  2688  		if(t[4] == Pq) {
  2689  			*ctxt->andptr++ = Pe;
  2690  			*ctxt->andptr++ = Pm;
  2691  		} else
  2692  			*ctxt->andptr++ = t[4];
  2693  		*ctxt->andptr++ = t[5];
  2694  		asmand(ctxt, &p->from, reg[p->to.type]);
  2695  		break;
  2696  	
  2697  	case 8: /* mov tls, r */
  2698  		// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  2699  		// where you load the TLS base register into a register and then index off that
  2700  		// register to access the actual TLS variables. Systems that allow direct TLS access
  2701  		// are handled in prefixof above and should not be listed here.
  2702  		switch(ctxt->headtype) {
  2703  		default:
  2704  			sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
  2705  
  2706  		case Hlinux:
  2707  		case Hnacl:
  2708  			// ELF TLS base is 0(GS).
  2709  			pp.from = p->from;
  2710  			pp.from.type = D_INDIR+D_GS;
  2711  			pp.from.offset = 0;
  2712  			pp.from.index = D_NONE;
  2713  			pp.from.scale = 0;
  2714  			*ctxt->andptr++ = 0x65; // GS
  2715  			*ctxt->andptr++ = 0x8B;
  2716  			asmand(ctxt, &pp.from, reg[p->to.type]);
  2717  			break;
  2718  		
  2719  		case Hplan9:
  2720  			if(ctxt->plan9privates == nil)
  2721  				ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
  2722  			memset(&pp.from, 0, sizeof pp.from);
  2723  			pp.from.type = D_EXTERN;
  2724  			pp.from.sym = ctxt->plan9privates;
  2725  			pp.from.offset = 0;
  2726  			pp.from.index = D_NONE;
  2727  			*ctxt->andptr++ = 0x8B;
  2728  			asmand(ctxt, &pp.from, reg[p->to.type]);
  2729  			break;
  2730  
  2731  		case Hwindows:
  2732  			// Windows TLS base is always 0x14(FS).
  2733  			pp.from = p->from;
  2734  			pp.from.type = D_INDIR+D_FS;
  2735  			pp.from.offset = 0x14;
  2736  			pp.from.index = D_NONE;
  2737  			pp.from.scale = 0;
  2738  			*ctxt->andptr++ = 0x64; // FS
  2739  			*ctxt->andptr++ = 0x8B;
  2740  			asmand(ctxt, &pp.from, reg[p->to.type]);
  2741  			break;
  2742  		}
  2743  		break;
  2744  	}
  2745  }
  2746  
  2747  static uchar naclret[] = {
  2748  	0x5d, // POPL BP
  2749  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  2750  	0x83, 0xe5, 0xe0,	// ANDL $~31, BP
  2751  	0xff, 0xe5, // JMP BP
  2752  };
  2753  
  2754  static void
  2755  asmins(Link *ctxt, Prog *p)
  2756  {
  2757  	Reloc *r;
  2758  
  2759  	ctxt->andptr = ctxt->and;
  2760  	
  2761  	if(p->as == AUSEFIELD) {
  2762  		r = addrel(ctxt->cursym);
  2763  		r->off = 0;
  2764  		r->sym = p->from.sym;
  2765  		r->type = R_USEFIELD;
  2766  		r->siz = 0;
  2767  		return;
  2768  	}
  2769  
  2770  	if(ctxt->headtype == Hnacl) {
  2771  		switch(p->as) {
  2772  		case ARET:
  2773  			memmove(ctxt->andptr, naclret, sizeof naclret);
  2774  			ctxt->andptr += sizeof naclret;
  2775  			return;
  2776  		case ACALL:
  2777  		case AJMP:
  2778  			if(D_AX <= p->to.type && p->to.type <= D_DI) {
  2779  				*ctxt->andptr++ = 0x83;
  2780  				*ctxt->andptr++ = 0xe0 | (p->to.type - D_AX);
  2781  				*ctxt->andptr++ = 0xe0;
  2782  			}
  2783  			break;
  2784  		case AINT:
  2785  			*ctxt->andptr++ = 0xf4;
  2786  			return;
  2787  		}
  2788  	}
  2789  
  2790  	doasm(ctxt, p);
  2791  	if(ctxt->andptr > ctxt->and+sizeof ctxt->and) {
  2792  		print("and[] is too short - %ld byte instruction\n", ctxt->andptr - ctxt->and);
  2793  		sysfatal("bad code");
  2794  	}
  2795  }