github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/6l/span.c (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include	"l.h"
    34  #include	"../ld/lib.h"
    35  #include	"../ld/elf.h"
    36  
    37  static int	rexflag;
    38  static int	asmode;
    39  static vlong	vaddr(Adr*, Reloc*);
    40  
    41  // single-instruction no-ops of various lengths.
    42  // constructed by hand and disassembled with gdb to verify.
    43  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
    44  static uchar nop[][16] = {
    45  	{0x90},
    46  	{0x66, 0x90},
    47  	{0x0F, 0x1F, 0x00},
    48  	{0x0F, 0x1F, 0x40, 0x00},
    49  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
    50  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
    51  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
    52  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
    53  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
    54  	{0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
    55  };
    56  
    57  static void
    58  fillnop(uchar *p, int n)
    59  {
    60  	int m;
    61  
    62  	while(n > 0) {
    63  		m = n;
    64  		if(m > nelem(nop))
    65  			m = nelem(nop);
    66  		memmove(p, nop[m-1], m);
    67  		p += m;
    68  		n -= m;
    69  	}
    70  }
    71  
    72  void
    73  span1(Sym *s)
    74  {
    75  	Prog *p, *q;
    76  	int32 c, v, loop;
    77  	uchar *bp;
    78  	int n, m, i;
    79  
    80  	cursym = s;
    81  	
    82  	if(s->p != nil)
    83  		return;
    84  
    85  	for(p = s->text; p != P; p = p->link) {
    86  		p->back = 2;	// use short branches first time through
    87  		if((q = p->pcond) != P && (q->back & 2)) {
    88  			p->back |= 1;	// backward jump
    89  			q->back |= 4;   // loop head
    90  		}
    91  
    92  		if(p->as == AADJSP) {
    93  			p->to.type = D_SP;
    94  			v = -p->from.offset;
    95  			p->from.offset = v;
    96  			p->as = p->mode != 64? AADDL: AADDQ;
    97  			if(v < 0) {
    98  				p->as = p->mode != 64? ASUBL: ASUBQ;
    99  				v = -v;
   100  				p->from.offset = v;
   101  			}
   102  			if(v == 0)
   103  				p->as = ANOP;
   104  		}
   105  	}
   106  	
   107  	n = 0;
   108  	do {
   109  		loop = 0;
   110  		memset(s->r, 0, s->nr*sizeof s->r[0]);
   111  		s->nr = 0;
   112  		s->np = 0;
   113  		c = 0;
   114  		for(p = s->text; p != P; p = p->link) {
   115  			if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
   116  				// pad with NOPs
   117  				v = -c&(LoopAlign-1);
   118  				if(v <= MaxLoopPad) {
   119  					symgrow(s, c+v);
   120  					fillnop(s->p+c, v);
   121  					c += v;
   122  				}
   123  			}
   124  
   125  			p->pc = c;
   126  
   127  			// process forward jumps to p
   128  			for(q = p->comefrom; q != P; q = q->forwd) {
   129  				v = p->pc - (q->pc + q->mark);
   130  				if(q->back & 2)	{	// short
   131  					if(v > 127) {
   132  						loop++;
   133  						q->back ^= 2;
   134  					}
   135  					if(q->as == AJCXZL)
   136  						s->p[q->pc+2] = v;
   137  					else
   138  						s->p[q->pc+1] = v;
   139  				} else {
   140  					bp = s->p + q->pc + q->mark - 4;
   141  					*bp++ = v;
   142  					*bp++ = v>>8;
   143  					*bp++ = v>>16;
   144  					*bp = v>>24;
   145  				}	
   146  			}
   147  			p->comefrom = P;
   148  
   149  			asmins(p);
   150  			p->pc = c;
   151  			m = andptr-and;
   152  			symgrow(s, p->pc+m);
   153  			memmove(s->p+p->pc, and, m);
   154  			p->mark = m;
   155  			c += m;
   156  		}
   157  		if(++n > 20) {
   158  			diag("span must be looping");
   159  			errorexit();
   160  		}
   161  	} while(loop);
   162  	s->size = c;
   163  
   164  	if(debug['a'] > 1) {
   165  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
   166  		for(i=0; i<s->np; i++) {
   167  			print(" %.2ux", s->p[i]);
   168  			if(i%16 == 15)
   169  				print("\n  %.6ux", i+1);
   170  		}
   171  		if(i%16)
   172  			print("\n");
   173  	
   174  		for(i=0; i<s->nr; i++) {
   175  			Reloc *r;
   176  			
   177  			r = &s->r[i];
   178  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
   179  		}
   180  	}
   181  }
   182  
   183  void
   184  span(void)
   185  {
   186  	Prog *p, *q;
   187  	int32 v;
   188  	int n;
   189  
   190  	if(debug['v'])
   191  		Bprint(&bso, "%5.2f span\n", cputime());
   192  
   193  	// NOTE(rsc): If we get rid of the globals we should
   194  	// be able to parallelize these iterations.
   195  	for(cursym = textp; cursym != nil; cursym = cursym->next) {
   196  		if(cursym->p != nil)
   197  			continue;
   198  		// TODO: move into span1
   199  		for(p = cursym->text; p != P; p = p->link) {
   200  			n = 0;
   201  			if(p->to.type == D_BRANCH)
   202  				if(p->pcond == P)
   203  					p->pcond = p;
   204  			if((q = p->pcond) != P)
   205  				if(q->back != 2)
   206  					n = 1;
   207  			p->back = n;
   208  			if(p->as == AADJSP) {
   209  				p->to.type = D_SP;
   210  				v = -p->from.offset;
   211  				p->from.offset = v;
   212  				p->as = p->mode != 64? AADDL: AADDQ;
   213  				if(v < 0) {
   214  					p->as = p->mode != 64? ASUBL: ASUBQ;
   215  					v = -v;
   216  					p->from.offset = v;
   217  				}
   218  				if(v == 0)
   219  					p->as = ANOP;
   220  			}
   221  		}
   222  		span1(cursym);
   223  	}
   224  }
   225  
   226  void
   227  xdefine(char *p, int t, vlong v)
   228  {
   229  	Sym *s;
   230  
   231  	s = lookup(p, 0);
   232  	s->type = t;
   233  	s->value = v;
   234  	s->reachable = 1;
   235  	s->special = 1;
   236  }
   237  
   238  void
   239  instinit(void)
   240  {
   241  	int c, i;
   242  
   243  	for(i=1; optab[i].as; i++) {
   244  		c = optab[i].as;
   245  		if(opindex[c] != nil) {
   246  			diag("phase error in optab: %d (%A)", i, c);
   247  			errorexit();
   248  		}
   249  		opindex[c] = &optab[i];
   250  	}
   251  
   252  	for(i=0; i<Ymax; i++)
   253  		ycover[i*Ymax + i] = 1;
   254  
   255  	ycover[Yi0*Ymax + Yi8] = 1;
   256  	ycover[Yi1*Ymax + Yi8] = 1;
   257  
   258  	ycover[Yi0*Ymax + Ys32] = 1;
   259  	ycover[Yi1*Ymax + Ys32] = 1;
   260  	ycover[Yi8*Ymax + Ys32] = 1;
   261  
   262  	ycover[Yi0*Ymax + Yi32] = 1;
   263  	ycover[Yi1*Ymax + Yi32] = 1;
   264  	ycover[Yi8*Ymax + Yi32] = 1;
   265  	ycover[Ys32*Ymax + Yi32] = 1;
   266  
   267  	ycover[Yi0*Ymax + Yi64] = 1;
   268  	ycover[Yi1*Ymax + Yi64] = 1;
   269  	ycover[Yi8*Ymax + Yi64] = 1;
   270  	ycover[Ys32*Ymax + Yi64] = 1;
   271  	ycover[Yi32*Ymax + Yi64] = 1;
   272  
   273  	ycover[Yal*Ymax + Yrb] = 1;
   274  	ycover[Ycl*Ymax + Yrb] = 1;
   275  	ycover[Yax*Ymax + Yrb] = 1;
   276  	ycover[Ycx*Ymax + Yrb] = 1;
   277  	ycover[Yrx*Ymax + Yrb] = 1;
   278  	ycover[Yrl*Ymax + Yrb] = 1;
   279  
   280  	ycover[Ycl*Ymax + Ycx] = 1;
   281  
   282  	ycover[Yax*Ymax + Yrx] = 1;
   283  	ycover[Ycx*Ymax + Yrx] = 1;
   284  
   285  	ycover[Yax*Ymax + Yrl] = 1;
   286  	ycover[Ycx*Ymax + Yrl] = 1;
   287  	ycover[Yrx*Ymax + Yrl] = 1;
   288  
   289  	ycover[Yf0*Ymax + Yrf] = 1;
   290  
   291  	ycover[Yal*Ymax + Ymb] = 1;
   292  	ycover[Ycl*Ymax + Ymb] = 1;
   293  	ycover[Yax*Ymax + Ymb] = 1;
   294  	ycover[Ycx*Ymax + Ymb] = 1;
   295  	ycover[Yrx*Ymax + Ymb] = 1;
   296  	ycover[Yrb*Ymax + Ymb] = 1;
   297  	ycover[Yrl*Ymax + Ymb] = 1;
   298  	ycover[Ym*Ymax + Ymb] = 1;
   299  
   300  	ycover[Yax*Ymax + Yml] = 1;
   301  	ycover[Ycx*Ymax + Yml] = 1;
   302  	ycover[Yrx*Ymax + Yml] = 1;
   303  	ycover[Yrl*Ymax + Yml] = 1;
   304  	ycover[Ym*Ymax + Yml] = 1;
   305  
   306  	ycover[Yax*Ymax + Ymm] = 1;
   307  	ycover[Ycx*Ymax + Ymm] = 1;
   308  	ycover[Yrx*Ymax + Ymm] = 1;
   309  	ycover[Yrl*Ymax + Ymm] = 1;
   310  	ycover[Ym*Ymax + Ymm] = 1;
   311  	ycover[Ymr*Ymax + Ymm] = 1;
   312  
   313  	ycover[Ym*Ymax + Yxm] = 1;
   314  	ycover[Yxr*Ymax + Yxm] = 1;
   315  
   316  	for(i=0; i<D_NONE; i++) {
   317  		reg[i] = -1;
   318  		if(i >= D_AL && i <= D_R15B) {
   319  			reg[i] = (i-D_AL) & 7;
   320  			if(i >= D_SPB && i <= D_DIB)
   321  				regrex[i] = 0x40;
   322  			if(i >= D_R8B && i <= D_R15B)
   323  				regrex[i] = Rxr | Rxx | Rxb;
   324  		}
   325  		if(i >= D_AH && i<= D_BH)
   326  			reg[i] = 4 + ((i-D_AH) & 7);
   327  		if(i >= D_AX && i <= D_R15) {
   328  			reg[i] = (i-D_AX) & 7;
   329  			if(i >= D_R8)
   330  				regrex[i] = Rxr | Rxx | Rxb;
   331  		}
   332  		if(i >= D_F0 && i <= D_F0+7)
   333  			reg[i] = (i-D_F0) & 7;
   334  		if(i >= D_M0 && i <= D_M0+7)
   335  			reg[i] = (i-D_M0) & 7;
   336  		if(i >= D_X0 && i <= D_X0+15) {
   337  			reg[i] = (i-D_X0) & 7;
   338  			if(i >= D_X0+8)
   339  				regrex[i] = Rxr | Rxx | Rxb;
   340  		}
   341  		if(i >= D_CR+8 && i <= D_CR+15)
   342  			regrex[i] = Rxr;
   343  	}
   344  }
   345  
   346  int
   347  prefixof(Adr *a)
   348  {
   349  	switch(a->type) {
   350  	case D_INDIR+D_CS:
   351  		return 0x2e;
   352  	case D_INDIR+D_DS:
   353  		return 0x3e;
   354  	case D_INDIR+D_ES:
   355  		return 0x26;
   356  	case D_INDIR+D_FS:
   357  		return 0x64;
   358  	case D_INDIR+D_GS:
   359  		return 0x65;
   360  	}
   361  	switch(a->index) {
   362  	case D_CS:
   363  		return 0x2e;
   364  	case D_DS:
   365  		return 0x3e;
   366  	case D_ES:
   367  		return 0x26;
   368  	case D_FS:
   369  		return 0x64;
   370  	case D_GS:
   371  		return 0x65;
   372  	}
   373  	return 0;
   374  }
   375  
   376  int
   377  oclass(Adr *a)
   378  {
   379  	vlong v;
   380  	int32 l;
   381  
   382  	if(a->type >= D_INDIR || a->index != D_NONE) {
   383  		if(a->index != D_NONE && a->scale == 0) {
   384  			if(a->type == D_ADDR) {
   385  				switch(a->index) {
   386  				case D_EXTERN:
   387  				case D_STATIC:
   388  					if(flag_shared)
   389  						return Yiauto;
   390  					else
   391  						return Yi32;	/* TO DO: Yi64 */
   392  				case D_AUTO:
   393  				case D_PARAM:
   394  					return Yiauto;
   395  				}
   396  				return Yxxx;
   397  			}
   398  			return Ycol;
   399  		}
   400  		return Ym;
   401  	}
   402  	switch(a->type)
   403  	{
   404  	case D_AL:
   405  		return Yal;
   406  
   407  	case D_AX:
   408  		return Yax;
   409  
   410  /*
   411  	case D_SPB:
   412  */
   413  	case D_BPB:
   414  	case D_SIB:
   415  	case D_DIB:
   416  	case D_R8B:
   417  	case D_R9B:
   418  	case D_R10B:
   419  	case D_R11B:
   420  	case D_R12B:
   421  	case D_R13B:
   422  	case D_R14B:
   423  	case D_R15B:
   424  		if(asmode != 64)
   425  			return Yxxx;
   426  	case D_DL:
   427  	case D_BL:
   428  	case D_AH:
   429  	case D_CH:
   430  	case D_DH:
   431  	case D_BH:
   432  		return Yrb;
   433  
   434  	case D_CL:
   435  		return Ycl;
   436  
   437  	case D_CX:
   438  		return Ycx;
   439  
   440  	case D_DX:
   441  	case D_BX:
   442  		return Yrx;
   443  
   444  	case D_R8:	/* not really Yrl */
   445  	case D_R9:
   446  	case D_R10:
   447  	case D_R11:
   448  	case D_R12:
   449  	case D_R13:
   450  	case D_R14:
   451  	case D_R15:
   452  		if(asmode != 64)
   453  			return Yxxx;
   454  	case D_SP:
   455  	case D_BP:
   456  	case D_SI:
   457  	case D_DI:
   458  		return Yrl;
   459  
   460  	case D_F0+0:
   461  		return	Yf0;
   462  
   463  	case D_F0+1:
   464  	case D_F0+2:
   465  	case D_F0+3:
   466  	case D_F0+4:
   467  	case D_F0+5:
   468  	case D_F0+6:
   469  	case D_F0+7:
   470  		return	Yrf;
   471  
   472  	case D_M0+0:
   473  	case D_M0+1:
   474  	case D_M0+2:
   475  	case D_M0+3:
   476  	case D_M0+4:
   477  	case D_M0+5:
   478  	case D_M0+6:
   479  	case D_M0+7:
   480  		return	Ymr;
   481  
   482  	case D_X0+0:
   483  	case D_X0+1:
   484  	case D_X0+2:
   485  	case D_X0+3:
   486  	case D_X0+4:
   487  	case D_X0+5:
   488  	case D_X0+6:
   489  	case D_X0+7:
   490  	case D_X0+8:
   491  	case D_X0+9:
   492  	case D_X0+10:
   493  	case D_X0+11:
   494  	case D_X0+12:
   495  	case D_X0+13:
   496  	case D_X0+14:
   497  	case D_X0+15:
   498  		return	Yxr;
   499  
   500  	case D_NONE:
   501  		return Ynone;
   502  
   503  	case D_CS:	return	Ycs;
   504  	case D_SS:	return	Yss;
   505  	case D_DS:	return	Yds;
   506  	case D_ES:	return	Yes;
   507  	case D_FS:	return	Yfs;
   508  	case D_GS:	return	Ygs;
   509  
   510  	case D_GDTR:	return	Ygdtr;
   511  	case D_IDTR:	return	Yidtr;
   512  	case D_LDTR:	return	Yldtr;
   513  	case D_MSW:	return	Ymsw;
   514  	case D_TASK:	return	Ytask;
   515  
   516  	case D_CR+0:	return	Ycr0;
   517  	case D_CR+1:	return	Ycr1;
   518  	case D_CR+2:	return	Ycr2;
   519  	case D_CR+3:	return	Ycr3;
   520  	case D_CR+4:	return	Ycr4;
   521  	case D_CR+5:	return	Ycr5;
   522  	case D_CR+6:	return	Ycr6;
   523  	case D_CR+7:	return	Ycr7;
   524  	case D_CR+8:	return	Ycr8;
   525  
   526  	case D_DR+0:	return	Ydr0;
   527  	case D_DR+1:	return	Ydr1;
   528  	case D_DR+2:	return	Ydr2;
   529  	case D_DR+3:	return	Ydr3;
   530  	case D_DR+4:	return	Ydr4;
   531  	case D_DR+5:	return	Ydr5;
   532  	case D_DR+6:	return	Ydr6;
   533  	case D_DR+7:	return	Ydr7;
   534  
   535  	case D_TR+0:	return	Ytr0;
   536  	case D_TR+1:	return	Ytr1;
   537  	case D_TR+2:	return	Ytr2;
   538  	case D_TR+3:	return	Ytr3;
   539  	case D_TR+4:	return	Ytr4;
   540  	case D_TR+5:	return	Ytr5;
   541  	case D_TR+6:	return	Ytr6;
   542  	case D_TR+7:	return	Ytr7;
   543  
   544  	case D_EXTERN:
   545  	case D_STATIC:
   546  	case D_AUTO:
   547  	case D_PARAM:
   548  		return Ym;
   549  
   550  	case D_CONST:
   551  	case D_ADDR:
   552  		if(a->sym == S) {
   553  			v = a->offset;
   554  			if(v == 0)
   555  				return Yi0;
   556  			if(v == 1)
   557  				return Yi1;
   558  			if(v >= -128 && v <= 127)
   559  				return Yi8;
   560  			l = v;
   561  			if((vlong)l == v)
   562  				return Ys32;	/* can sign extend */
   563  			if((v>>32) == 0)
   564  				return Yi32;	/* unsigned */
   565  			return Yi64;
   566  		}
   567  		return Yi32;	/* TO DO: D_ADDR as Yi64 */
   568  
   569  	case D_BRANCH:
   570  		return Ybr;
   571  	}
   572  	return Yxxx;
   573  }
   574  
   575  void
   576  asmidx(int scale, int index, int base)
   577  {
   578  	int i;
   579  
   580  	switch(index) {
   581  	default:
   582  		goto bad;
   583  
   584  	case D_NONE:
   585  		i = 4 << 3;
   586  		goto bas;
   587  
   588  	case D_R8:
   589  	case D_R9:
   590  	case D_R10:
   591  	case D_R11:
   592  	case D_R12:
   593  	case D_R13:
   594  	case D_R14:
   595  	case D_R15:
   596  		if(asmode != 64)
   597  			goto bad;
   598  	case D_AX:
   599  	case D_CX:
   600  	case D_DX:
   601  	case D_BX:
   602  	case D_BP:
   603  	case D_SI:
   604  	case D_DI:
   605  		i = reg[index] << 3;
   606  		break;
   607  	}
   608  	switch(scale) {
   609  	default:
   610  		goto bad;
   611  	case 1:
   612  		break;
   613  	case 2:
   614  		i |= (1<<6);
   615  		break;
   616  	case 4:
   617  		i |= (2<<6);
   618  		break;
   619  	case 8:
   620  		i |= (3<<6);
   621  		break;
   622  	}
   623  bas:
   624  	switch(base) {
   625  	default:
   626  		goto bad;
   627  	case D_NONE:	/* must be mod=00 */
   628  		i |= 5;
   629  		break;
   630  	case D_R8:
   631  	case D_R9:
   632  	case D_R10:
   633  	case D_R11:
   634  	case D_R12:
   635  	case D_R13:
   636  	case D_R14:
   637  	case D_R15:
   638  		if(asmode != 64)
   639  			goto bad;
   640  	case D_AX:
   641  	case D_CX:
   642  	case D_DX:
   643  	case D_BX:
   644  	case D_SP:
   645  	case D_BP:
   646  	case D_SI:
   647  	case D_DI:
   648  		i |= reg[base];
   649  		break;
   650  	}
   651  	*andptr++ = i;
   652  	return;
   653  bad:
   654  	diag("asmidx: bad address %d/%d/%d", scale, index, base);
   655  	*andptr++ = 0;
   656  	return;
   657  }
   658  
   659  static void
   660  put4(int32 v)
   661  {
   662  	andptr[0] = v;
   663  	andptr[1] = v>>8;
   664  	andptr[2] = v>>16;
   665  	andptr[3] = v>>24;
   666  	andptr += 4;
   667  }
   668  
   669  static void
   670  relput4(Prog *p, Adr *a)
   671  {
   672  	vlong v;
   673  	Reloc rel, *r;
   674  	
   675  	v = vaddr(a, &rel);
   676  	if(rel.siz != 0) {
   677  		if(rel.siz != 4)
   678  			diag("bad reloc");
   679  		r = addrel(cursym);
   680  		*r = rel;
   681  		r->off = p->pc + andptr - and;
   682  	}
   683  	put4(v);
   684  }
   685  
   686  static void
   687  put8(vlong v)
   688  {
   689  	andptr[0] = v;
   690  	andptr[1] = v>>8;
   691  	andptr[2] = v>>16;
   692  	andptr[3] = v>>24;
   693  	andptr[4] = v>>32;
   694  	andptr[5] = v>>40;
   695  	andptr[6] = v>>48;
   696  	andptr[7] = v>>56;
   697  	andptr += 8;
   698  }
   699  
   700  /*
   701  static void
   702  relput8(Prog *p, Adr *a)
   703  {
   704  	vlong v;
   705  	Reloc rel, *r;
   706  	
   707  	v = vaddr(a, &rel);
   708  	if(rel.siz != 0) {
   709  		r = addrel(cursym);
   710  		*r = rel;
   711  		r->siz = 8;
   712  		r->off = p->pc + andptr - and;
   713  	}
   714  	put8(v);
   715  }
   716  */
   717  
   718  vlong
   719  symaddr(Sym *s)
   720  {
   721  	if(!s->reachable)
   722  		diag("unreachable symbol in symaddr - %s", s->name);
   723  	return s->value;
   724  }
   725  
   726  static vlong
   727  vaddr(Adr *a, Reloc *r)
   728  {
   729  	int t;
   730  	vlong v;
   731  	Sym *s;
   732  	
   733  	if(r != nil)
   734  		memset(r, 0, sizeof *r);
   735  
   736  	t = a->type;
   737  	v = a->offset;
   738  	if(t == D_ADDR)
   739  		t = a->index;
   740  	switch(t) {
   741  	case D_STATIC:
   742  	case D_EXTERN:
   743  		s = a->sym;
   744  		if(!s->reachable)
   745  			diag("unreachable symbol in vaddr - %s", s->name);
   746  		if(r == nil) {
   747  			diag("need reloc for %D", a);
   748  			errorexit();
   749  		}
   750  		r->siz = 4;	// TODO: 8 for external symbols
   751  		r->off = -1;	// caller must fill in
   752  		r->sym = s;
   753  		r->add = v;
   754  		v = 0;
   755  		if(flag_shared) {
   756  			if(s->type == STLSBSS) {
   757  				r->xadd = r->add - r->siz;
   758  				r->type = D_TLS;
   759  				r->xsym = s;
   760  			} else
   761  				r->type = D_PCREL;
   762  		} else
   763  			r->type = D_ADDR;
   764  	}
   765  	return v;
   766  }
   767  
   768  static void
   769  asmandsz(Adr *a, int r, int rex, int m64)
   770  {
   771  	int32 v;
   772  	int t, scale;
   773  	Reloc rel;
   774  
   775  	USED(m64);
   776  	rex &= (0x40 | Rxr);
   777  	v = a->offset;
   778  	t = a->type;
   779  	rel.siz = 0;
   780  	if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) {
   781  		if(t < D_INDIR) { 
   782  			switch(t) {
   783  			default:
   784  				goto bad;
   785  			case D_STATIC:
   786  			case D_EXTERN:
   787  				if(flag_shared)
   788  					goto bad;
   789  				t = D_NONE;
   790  				v = vaddr(a, &rel);
   791  				break;
   792  			case D_AUTO:
   793  			case D_PARAM:
   794  				t = D_SP;
   795  				break;
   796  			}
   797  		} else
   798  			t -= D_INDIR;
   799  		rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
   800  		if(t == D_NONE) {
   801  			*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
   802  			asmidx(a->scale, a->index, t);
   803  			goto putrelv;
   804  		}
   805  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
   806  			*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
   807  			asmidx(a->scale, a->index, t);
   808  			return;
   809  		}
   810  		if(v >= -128 && v < 128 && rel.siz == 0) {
   811  			*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
   812  			asmidx(a->scale, a->index, t);
   813  			*andptr++ = v;
   814  			return;
   815  		}
   816  		*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
   817  		asmidx(a->scale, a->index, t);
   818  		goto putrelv;
   819  	}
   820  	if(t >= D_AL && t <= D_X0+15) {
   821  		if(v)
   822  			goto bad;
   823  		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
   824  		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
   825  		return;
   826  	}
   827  	
   828  	scale = a->scale;
   829  	if(t < D_INDIR) {
   830  		switch(a->type) {
   831  		default:
   832  			goto bad;
   833  		case D_STATIC:
   834  		case D_EXTERN:
   835  			t = D_NONE;
   836  			v = vaddr(a, &rel);
   837  			break;
   838  		case D_AUTO:
   839  		case D_PARAM:
   840  			t = D_SP;
   841  			break;
   842  		}
   843  		scale = 1;
   844  	} else
   845  		t -= D_INDIR;
   846  
   847  	rexflag |= (regrex[t] & Rxb) | rex;
   848  	if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
   849  		if(flag_shared && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || asmode != 64) {
   850  			*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
   851  			goto putrelv;
   852  		}
   853  		/* temporary */
   854  		*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
   855  		*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
   856  		goto putrelv;
   857  	}
   858  	if(t == D_SP || t == D_R12) {
   859  		if(v == 0) {
   860  			*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
   861  			asmidx(scale, D_NONE, t);
   862  			return;
   863  		}
   864  		if(v >= -128 && v < 128) {
   865  			*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
   866  			asmidx(scale, D_NONE, t);
   867  			*andptr++ = v;
   868  			return;
   869  		}
   870  		*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
   871  		asmidx(scale, D_NONE, t);
   872  		goto putrelv;
   873  	}
   874  	if(t >= D_AX && t <= D_R15) {
   875  		if(v == 0 && t != D_BP && t != D_R13) {
   876  			*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
   877  			return;
   878  		}
   879  		if(v >= -128 && v < 128) {
   880  			andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
   881  			andptr[1] = v;
   882  			andptr += 2;
   883  			return;
   884  		}
   885  		*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
   886  		goto putrelv;
   887  	}
   888  	goto bad;
   889  	
   890  putrelv:
   891  	if(rel.siz != 0) {
   892  		Reloc *r;
   893  
   894  		if(rel.siz != 4) {
   895  			diag("bad rel");
   896  			goto bad;
   897  		}
   898  		r = addrel(cursym);
   899  		*r = rel;
   900  		r->off = curp->pc + andptr - and;
   901  	} else if(iself && linkmode == LinkExternal && a->type == D_INDIR+D_FS
   902  		&& HEADTYPE != Hopenbsd) {
   903  		Reloc *r;
   904  		Sym *s;
   905  		
   906  		r = addrel(cursym);
   907  		r->off = curp->pc + andptr - and;
   908  		r->add = a->offset-tlsoffset;
   909  		r->xadd = r->add;
   910  		r->siz = 4;
   911  		r->type = D_TLS;
   912  		s = lookup("runtime.tlsgm", 0);
   913  		r->sym = s;
   914  		r->xsym = s;
   915  		v = 0;
   916  	}
   917  		
   918  	put4(v);
   919  	return;
   920  
   921  bad:
   922  	diag("asmand: bad address %D", a);
   923  	return;
   924  }
   925  
   926  void
   927  asmand(Adr *a, Adr *ra)
   928  {
   929  	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
   930  }
   931  
   932  void
   933  asmando(Adr *a, int o)
   934  {
   935  	asmandsz(a, o, 0, 0);
   936  }
   937  
   938  static void
   939  bytereg(Adr *a, char *t)
   940  {
   941  	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
   942  		a->type = D_AL + (a->type-D_AX);
   943  		*t = 0;
   944  	}
   945  }
   946  
   947  #define	E	0xff
   948  Movtab	ymovtab[] =
   949  {
   950  /* push */
   951  	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
   952  	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
   953  	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
   954  	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
   955  	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
   956  	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
   957  	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
   958  	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
   959  
   960  	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
   961  	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
   962  	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
   963  	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
   964  	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
   965  	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
   966  
   967  /* pop */
   968  	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
   969  	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
   970  	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
   971  	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
   972  	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
   973  	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
   974  	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
   975  
   976  	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
   977  	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
   978  	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
   979  	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
   980  	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
   981  
   982  /* mov seg */
   983  	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
   984  	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
   985  	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
   986  	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
   987  	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
   988  	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
   989  
   990  	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
   991  	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
   992  	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
   993  	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
   994  	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
   995  	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
   996  
   997  /* mov cr */
   998  	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
   999  	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
  1000  	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
  1001  	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
  1002  	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
  1003  	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
  1004  	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
  1005  	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
  1006  	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
  1007  	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
  1008  
  1009  	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
  1010  	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
  1011  	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
  1012  	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
  1013  	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
  1014  	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
  1015  	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
  1016  	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
  1017  	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
  1018  	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
  1019  
  1020  /* mov dr */
  1021  	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
  1022  	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
  1023  	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
  1024  	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
  1025  	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
  1026  	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
  1027  
  1028  	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
  1029  	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
  1030  	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
  1031  	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
  1032  	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
  1033  	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
  1034  
  1035  /* mov tr */
  1036  	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
  1037  	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
  1038  
  1039  	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
  1040  	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
  1041  
  1042  /* lgdt, sgdt, lidt, sidt */
  1043  	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
  1044  	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
  1045  	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
  1046  	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
  1047  	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
  1048  	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
  1049  	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
  1050  	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
  1051  
  1052  /* lldt, sldt */
  1053  	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
  1054  	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
  1055  
  1056  /* lmsw, smsw */
  1057  	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
  1058  	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
  1059  
  1060  /* ltr, str */
  1061  	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
  1062  	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
  1063  
  1064  /* load full pointer */
  1065  	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
  1066  	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
  1067  
  1068  /* double shift */
  1069  	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
  1070  	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
  1071  	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
  1072  	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
  1073  	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
  1074  	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
  1075  	0
  1076  };
  1077  
  1078  int
  1079  isax(Adr *a)
  1080  {
  1081  
  1082  	switch(a->type) {
  1083  	case D_AX:
  1084  	case D_AL:
  1085  	case D_AH:
  1086  	case D_INDIR+D_AX:
  1087  		return 1;
  1088  	}
  1089  	if(a->index == D_AX)
  1090  		return 1;
  1091  	return 0;
  1092  }
  1093  
  1094  void
  1095  subreg(Prog *p, int from, int to)
  1096  {
  1097  
  1098  	if(debug['Q'])
  1099  		print("\n%P	s/%R/%R/\n", p, from, to);
  1100  
  1101  	if(p->from.type == from)
  1102  		p->from.type = to;
  1103  	if(p->to.type == from)
  1104  		p->to.type = to;
  1105  
  1106  	if(p->from.index == from)
  1107  		p->from.index = to;
  1108  	if(p->to.index == from)
  1109  		p->to.index = to;
  1110  
  1111  	from += D_INDIR;
  1112  	if(p->from.type == from)
  1113  		p->from.type = to+D_INDIR;
  1114  	if(p->to.type == from)
  1115  		p->to.type = to+D_INDIR;
  1116  
  1117  	if(debug['Q'])
  1118  		print("%P\n", p);
  1119  }
  1120  
  1121  static int
  1122  mediaop(Optab *o, int op, int osize, int z)
  1123  {
  1124  	switch(op){
  1125  	case Pm:
  1126  	case Pe:
  1127  	case Pf2:
  1128  	case Pf3:
  1129  		if(osize != 1){
  1130  			if(op != Pm)
  1131  				*andptr++ = op;
  1132  			*andptr++ = Pm;
  1133  			op = o->op[++z];
  1134  			break;
  1135  		}
  1136  	default:
  1137  		if(andptr == and || andptr[-1] != Pm)
  1138  			*andptr++ = Pm;
  1139  		break;
  1140  	}
  1141  	*andptr++ = op;
  1142  	return z;
  1143  }
  1144  
  1145  void
  1146  doasm(Prog *p)
  1147  {
  1148  	Optab *o;
  1149  	Prog *q, pp;
  1150  	uchar *t;
  1151  	Movtab *mo;
  1152  	int z, op, ft, tt, xo, l, pre;
  1153  	vlong v;
  1154  	Reloc rel, *r;
  1155  	Adr *a;
  1156  	
  1157  	curp = p;	// TODO
  1158  
  1159  	o = opindex[p->as];
  1160  	if(o == nil) {
  1161  		diag("asmins: missing op %P", p);
  1162  		return;
  1163  	}
  1164  	
  1165  	pre = prefixof(&p->from);
  1166  	if(pre)
  1167  		*andptr++ = pre;
  1168  	pre = prefixof(&p->to);
  1169  	if(pre)
  1170  		*andptr++ = pre;
  1171  
  1172  	if(p->ft == 0)
  1173  		p->ft = oclass(&p->from);
  1174  	if(p->tt == 0)
  1175  		p->tt = oclass(&p->to);
  1176  
  1177  	ft = p->ft * Ymax;
  1178  	tt = p->tt * Ymax;
  1179  
  1180  	t = o->ytab;
  1181  	if(t == 0) {
  1182  		diag("asmins: noproto %P", p);
  1183  		return;
  1184  	}
  1185  	xo = o->op[0] == 0x0f;
  1186  	for(z=0; *t; z+=t[3]+xo,t+=4)
  1187  		if(ycover[ft+t[0]])
  1188  		if(ycover[tt+t[1]])
  1189  			goto found;
  1190  	goto domov;
  1191  
  1192  found:
  1193  	switch(o->prefix) {
  1194  	case Pq:	/* 16 bit escape and opcode escape */
  1195  		*andptr++ = Pe;
  1196  		*andptr++ = Pm;
  1197  		break;
  1198  	case Pq3:	/* 16 bit escape, Rex.w, and opcode escape */
  1199  		*andptr++ = Pe;
  1200  		*andptr++ = Pw;
  1201  		*andptr++ = Pm;
  1202  		break;
  1203  
  1204  	case Pf2:	/* xmm opcode escape */
  1205  	case Pf3:
  1206  		*andptr++ = o->prefix;
  1207  		*andptr++ = Pm;
  1208  		break;
  1209  
  1210  	case Pm:	/* opcode escape */
  1211  		*andptr++ = Pm;
  1212  		break;
  1213  
  1214  	case Pe:	/* 16 bit escape */
  1215  		*andptr++ = Pe;
  1216  		break;
  1217  
  1218  	case Pw:	/* 64-bit escape */
  1219  		if(p->mode != 64)
  1220  			diag("asmins: illegal 64: %P", p);
  1221  		rexflag |= Pw;
  1222  		break;
  1223  
  1224  	case Pb:	/* botch */
  1225  		bytereg(&p->from, &p->ft);
  1226  		bytereg(&p->to, &p->tt);
  1227  		break;
  1228  
  1229  	case P32:	/* 32 bit but illegal if 64-bit mode */
  1230  		if(p->mode == 64)
  1231  			diag("asmins: illegal in 64-bit mode: %P", p);
  1232  		break;
  1233  
  1234  	case Py:	/* 64-bit only, no prefix */
  1235  		if(p->mode != 64)
  1236  			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
  1237  		break;
  1238  	}
  1239  
  1240  	if(z >= nelem(o->op))
  1241  		sysfatal("asmins bad table %P", p);
  1242  	op = o->op[z];
  1243  	if(op == 0x0f) {
  1244  		*andptr++ = op;
  1245  		op = o->op[++z];
  1246  	}
  1247  	switch(t[2]) {
  1248  	default:
  1249  		diag("asmins: unknown z %d %P", t[2], p);
  1250  		return;
  1251  
  1252  	case Zpseudo:
  1253  		break;
  1254  
  1255  	case Zlit:
  1256  		for(; op = o->op[z]; z++)
  1257  			*andptr++ = op;
  1258  		break;
  1259  
  1260  	case Zlitm_r:
  1261  		for(; op = o->op[z]; z++)
  1262  			*andptr++ = op;
  1263  		asmand(&p->from, &p->to);
  1264  		break;
  1265  
  1266  	case Zmb_r:
  1267  		bytereg(&p->from, &p->ft);
  1268  		/* fall through */
  1269  	case Zm_r:
  1270  		*andptr++ = op;
  1271  		asmand(&p->from, &p->to);
  1272  		break;
  1273  	case Zm2_r:
  1274  		*andptr++ = op;
  1275  		*andptr++ = o->op[z+1];
  1276  		asmand(&p->from, &p->to);
  1277  		break;
  1278  
  1279  	case Zm_r_xm:
  1280  		mediaop(o, op, t[3], z);
  1281  		asmand(&p->from, &p->to);
  1282  		break;
  1283  
  1284  	case Zm_r_xm_nr:
  1285  		rexflag = 0;
  1286  		mediaop(o, op, t[3], z);
  1287  		asmand(&p->from, &p->to);
  1288  		break;
  1289  
  1290  	case Zm_r_i_xm:
  1291  		mediaop(o, op, t[3], z);
  1292  		asmand(&p->from, &p->to);
  1293  		*andptr++ = p->to.offset;
  1294  		break;
  1295  
  1296  	case Zm_r_3d:
  1297  		*andptr++ = 0x0f;
  1298  		*andptr++ = 0x0f;
  1299  		asmand(&p->from, &p->to);
  1300  		*andptr++ = op;
  1301  		break;
  1302  
  1303  	case Zibm_r:
  1304  		while ((op = o->op[z++]) != 0)
  1305  			*andptr++ = op;
  1306  		asmand(&p->from, &p->to);
  1307  		*andptr++ = p->to.offset;
  1308  		break;
  1309  
  1310  	case Zaut_r:
  1311  		*andptr++ = 0x8d;	/* leal */
  1312  		if(p->from.type != D_ADDR)
  1313  			diag("asmins: Zaut sb type ADDR");
  1314  		p->from.type = p->from.index;
  1315  		p->from.index = D_NONE;
  1316  		asmand(&p->from, &p->to);
  1317  		p->from.index = p->from.type;
  1318  		p->from.type = D_ADDR;
  1319  		break;
  1320  
  1321  	case Zm_o:
  1322  		*andptr++ = op;
  1323  		asmando(&p->from, o->op[z+1]);
  1324  		break;
  1325  
  1326  	case Zr_m:
  1327  		*andptr++ = op;
  1328  		asmand(&p->to, &p->from);
  1329  		break;
  1330  
  1331  	case Zr_m_xm:
  1332  		mediaop(o, op, t[3], z);
  1333  		asmand(&p->to, &p->from);
  1334  		break;
  1335  
  1336  	case Zr_m_xm_nr:
  1337  		rexflag = 0;
  1338  		mediaop(o, op, t[3], z);
  1339  		asmand(&p->to, &p->from);
  1340  		break;
  1341  
  1342  	case Zr_m_i_xm:
  1343  		mediaop(o, op, t[3], z);
  1344  		asmand(&p->to, &p->from);
  1345  		*andptr++ = p->from.offset;
  1346  		break;
  1347  
  1348  	case Zo_m:
  1349  		*andptr++ = op;
  1350  		asmando(&p->to, o->op[z+1]);
  1351  		break;
  1352  
  1353  	case Zo_m64:
  1354  		*andptr++ = op;
  1355  		asmandsz(&p->to, o->op[z+1], 0, 1);
  1356  		break;
  1357  
  1358  	case Zm_ibo:
  1359  		*andptr++ = op;
  1360  		asmando(&p->from, o->op[z+1]);
  1361  		*andptr++ = vaddr(&p->to, nil);
  1362  		break;
  1363  
  1364  	case Zibo_m:
  1365  		*andptr++ = op;
  1366  		asmando(&p->to, o->op[z+1]);
  1367  		*andptr++ = vaddr(&p->from, nil);
  1368  		break;
  1369  
  1370  	case Zibo_m_xm:
  1371  		z = mediaop(o, op, t[3], z);
  1372  		asmando(&p->to, o->op[z+1]);
  1373  		*andptr++ = vaddr(&p->from, nil);
  1374  		break;
  1375  
  1376  	case Z_ib:
  1377  	case Zib_:
  1378  		if(t[2] == Zib_)
  1379  			a = &p->from;
  1380  		else
  1381  			a = &p->to;
  1382  		*andptr++ = op;
  1383  		*andptr++ = vaddr(a, nil);
  1384  		break;
  1385  
  1386  	case Zib_rp:
  1387  		rexflag |= regrex[p->to.type] & (Rxb|0x40);
  1388  		*andptr++ = op + reg[p->to.type];
  1389  		*andptr++ = vaddr(&p->from, nil);
  1390  		break;
  1391  
  1392  	case Zil_rp:
  1393  		rexflag |= regrex[p->to.type] & Rxb;
  1394  		*andptr++ = op + reg[p->to.type];
  1395  		if(o->prefix == Pe) {
  1396  			v = vaddr(&p->from, nil);
  1397  			*andptr++ = v;
  1398  			*andptr++ = v>>8;
  1399  		}
  1400  		else
  1401  			relput4(p, &p->from);
  1402  		break;
  1403  
  1404  	case Zo_iw:
  1405  		*andptr++ = op;
  1406  		if(p->from.type != D_NONE){
  1407  			v = vaddr(&p->from, nil);
  1408  			*andptr++ = v;
  1409  			*andptr++ = v>>8;
  1410  		}
  1411  		break;
  1412  
  1413  	case Ziq_rp:
  1414  		v = vaddr(&p->from, &rel);
  1415  		l = v>>32;
  1416  		if(l == 0 && rel.siz != 8){
  1417  			//p->mark |= 0100;
  1418  			//print("zero: %llux %P\n", v, p);
  1419  			rexflag &= ~(0x40|Rxw);
  1420  			rexflag |= regrex[p->to.type] & Rxb;
  1421  			*andptr++ = 0xb8 + reg[p->to.type];
  1422  			if(rel.type != 0) {
  1423  				r = addrel(cursym);
  1424  				*r = rel;
  1425  				r->off = p->pc + andptr - and;
  1426  			}
  1427  			put4(v);
  1428  		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
  1429  			//p->mark |= 0100;
  1430  			//print("sign: %llux %P\n", v, p);
  1431  			*andptr ++ = 0xc7;
  1432  			asmando(&p->to, 0);
  1433  			put4(v);
  1434  		}else{	/* need all 8 */
  1435  			//print("all: %llux %P\n", v, p);
  1436  			rexflag |= regrex[p->to.type] & Rxb;
  1437  			*andptr++ = op + reg[p->to.type];
  1438  			if(rel.type != 0) {
  1439  				r = addrel(cursym);
  1440  				*r = rel;
  1441  				r->off = p->pc + andptr - and;
  1442  			}
  1443  			put8(v);
  1444  		}
  1445  		break;
  1446  
  1447  	case Zib_rr:
  1448  		*andptr++ = op;
  1449  		asmand(&p->to, &p->to);
  1450  		*andptr++ = vaddr(&p->from, nil);
  1451  		break;
  1452  
  1453  	case Z_il:
  1454  	case Zil_:
  1455  		if(t[2] == Zil_)
  1456  			a = &p->from;
  1457  		else
  1458  			a = &p->to;
  1459  		*andptr++ = op;
  1460  		if(o->prefix == Pe) {
  1461  			v = vaddr(a, nil);
  1462  			*andptr++ = v;
  1463  			*andptr++ = v>>8;
  1464  		}
  1465  		else
  1466  			relput4(p, a);
  1467  		break;
  1468  
  1469  	case Zm_ilo:
  1470  	case Zilo_m:
  1471  		*andptr++ = op;
  1472  		if(t[2] == Zilo_m) {
  1473  			a = &p->from;
  1474  			asmando(&p->to, o->op[z+1]);
  1475  		} else {
  1476  			a = &p->to;
  1477  			asmando(&p->from, o->op[z+1]);
  1478  		}
  1479  		if(o->prefix == Pe) {
  1480  			v = vaddr(a, nil);
  1481  			*andptr++ = v;
  1482  			*andptr++ = v>>8;
  1483  		}
  1484  		else
  1485  			relput4(p, a);
  1486  		break;
  1487  
  1488  	case Zil_rr:
  1489  		*andptr++ = op;
  1490  		asmand(&p->to, &p->to);
  1491  		if(o->prefix == Pe) {
  1492  			v = vaddr(&p->from, nil);
  1493  			*andptr++ = v;
  1494  			*andptr++ = v>>8;
  1495  		}
  1496  		else
  1497  			relput4(p, &p->from);
  1498  		break;
  1499  
  1500  	case Z_rp:
  1501  		rexflag |= regrex[p->to.type] & (Rxb|0x40);
  1502  		*andptr++ = op + reg[p->to.type];
  1503  		break;
  1504  
  1505  	case Zrp_:
  1506  		rexflag |= regrex[p->from.type] & (Rxb|0x40);
  1507  		*andptr++ = op + reg[p->from.type];
  1508  		break;
  1509  
  1510  	case Zclr:
  1511  		*andptr++ = op;
  1512  		asmand(&p->to, &p->to);
  1513  		break;
  1514  
  1515  	case Zcall:
  1516  		q = p->pcond;
  1517  		if(q == nil) {
  1518  			diag("call without target");
  1519  			errorexit();
  1520  		}
  1521  		if(q->as != ATEXT) {
  1522  			// Could handle this case by making D_PCREL
  1523  			// record the Prog* instead of the Sym*, but let's
  1524  			// wait until the need arises.
  1525  			diag("call of non-TEXT %P", q);
  1526  			errorexit();
  1527  		}
  1528  		*andptr++ = op;
  1529  		r = addrel(cursym);
  1530  		r->off = p->pc + andptr - and;
  1531  		r->sym = q->from.sym;
  1532  		r->type = D_PCREL;
  1533  		r->siz = 4;
  1534  		put4(0);
  1535  		break;
  1536  
  1537  	case Zbr:
  1538  	case Zjmp:
  1539  	case Zloop:
  1540  		// TODO: jump across functions needs reloc
  1541  		q = p->pcond;
  1542  		if(q == nil) {
  1543  			diag("jmp/branch/loop without target");
  1544  			errorexit();
  1545  		}
  1546  		if(q->as == ATEXT) {
  1547  			if(t[2] == Zbr) {
  1548  				diag("branch to ATEXT");
  1549  				errorexit();
  1550  			}
  1551  			*andptr++ = o->op[z+1];
  1552  			r = addrel(cursym);
  1553  			r->off = p->pc + andptr - and;
  1554  			r->sym = q->from.sym;
  1555  			r->type = D_PCREL;
  1556  			r->siz = 4;
  1557  			put4(0);
  1558  			break;
  1559  		}
  1560  		// Assumes q is in this function.
  1561  		// TODO: Check in input, preserve in brchain.
  1562  
  1563  		// Fill in backward jump now.
  1564  		if(p->back & 1) {
  1565  			v = q->pc - (p->pc + 2);
  1566  			if(v >= -128) {
  1567  				if(p->as == AJCXZL)
  1568  					*andptr++ = 0x67;
  1569  				*andptr++ = op;
  1570  				*andptr++ = v;
  1571  			} else if(t[2] == Zloop) {
  1572  				diag("loop too far: %P", p);
  1573  			} else {
  1574  				v -= 5-2;
  1575  				if(t[2] == Zbr) {
  1576  					*andptr++ = 0x0f;
  1577  					v--;
  1578  				}
  1579  				*andptr++ = o->op[z+1];
  1580  				*andptr++ = v;
  1581  				*andptr++ = v>>8;
  1582  				*andptr++ = v>>16;
  1583  				*andptr++ = v>>24;
  1584  			}
  1585  			break;
  1586  		}
  1587  		
  1588  		// Annotate target; will fill in later.
  1589  		p->forwd = q->comefrom;
  1590  		q->comefrom = p;
  1591  		if(p->back & 2)	{ // short
  1592  			if(p->as == AJCXZL)
  1593  				*andptr++ = 0x67;
  1594  			*andptr++ = op;
  1595  			*andptr++ = 0;
  1596  		} else if(t[2] == Zloop) {
  1597  			diag("loop too far: %P", p);
  1598  		} else {
  1599  			if(t[2] == Zbr)
  1600  				*andptr++ = 0x0f;
  1601  			*andptr++ = o->op[z+1];
  1602  			*andptr++ = 0;
  1603  			*andptr++ = 0;
  1604  			*andptr++ = 0;
  1605  			*andptr++ = 0;
  1606  		}
  1607  		break;
  1608  				
  1609  /*
  1610  		v = q->pc - p->pc - 2;
  1611  		if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  1612  			*andptr++ = op;
  1613  			*andptr++ = v;
  1614  		} else {
  1615  			v -= 5-2;
  1616  			if(t[2] == Zbr) {
  1617  				*andptr++ = 0x0f;
  1618  				v--;
  1619  			}
  1620  			*andptr++ = o->op[z+1];
  1621  			*andptr++ = v;
  1622  			*andptr++ = v>>8;
  1623  			*andptr++ = v>>16;
  1624  			*andptr++ = v>>24;
  1625  		}
  1626  */
  1627  		break;
  1628  
  1629  	case Zbyte:
  1630  		v = vaddr(&p->from, &rel);
  1631  		if(rel.siz != 0) {
  1632  			rel.siz = op;
  1633  			r = addrel(cursym);
  1634  			*r = rel;
  1635  			r->off = p->pc + andptr - and;
  1636  		}
  1637  		*andptr++ = v;
  1638  		if(op > 1) {
  1639  			*andptr++ = v>>8;
  1640  			if(op > 2) {
  1641  				*andptr++ = v>>16;
  1642  				*andptr++ = v>>24;
  1643  				if(op > 4) {
  1644  					*andptr++ = v>>32;
  1645  					*andptr++ = v>>40;
  1646  					*andptr++ = v>>48;
  1647  					*andptr++ = v>>56;
  1648  				}
  1649  			}
  1650  		}
  1651  		break;
  1652  	}
  1653  	return;
  1654  
  1655  domov:
  1656  	for(mo=ymovtab; mo->as; mo++)
  1657  		if(p->as == mo->as)
  1658  		if(ycover[ft+mo->ft])
  1659  		if(ycover[tt+mo->tt]){
  1660  			t = mo->op;
  1661  			goto mfound;
  1662  		}
  1663  bad:
  1664  	if(p->mode != 64){
  1665  		/*
  1666  		 * here, the assembly has failed.
  1667  		 * if its a byte instruction that has
  1668  		 * unaddressable registers, try to
  1669  		 * exchange registers and reissue the
  1670  		 * instruction with the operands renamed.
  1671  		 */
  1672  		pp = *p;
  1673  		z = p->from.type;
  1674  		if(z >= D_BP && z <= D_DI) {
  1675  			if(isax(&p->to) || p->to.type == D_NONE) {
  1676  				// We certainly don't want to exchange
  1677  				// with AX if the op is MUL or DIV.
  1678  				*andptr++ = 0x87;			/* xchg lhs,bx */
  1679  				asmando(&p->from, reg[D_BX]);
  1680  				subreg(&pp, z, D_BX);
  1681  				doasm(&pp);
  1682  				*andptr++ = 0x87;			/* xchg lhs,bx */
  1683  				asmando(&p->from, reg[D_BX]);
  1684  			} else {
  1685  				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  1686  				subreg(&pp, z, D_AX);
  1687  				doasm(&pp);
  1688  				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  1689  			}
  1690  			return;
  1691  		}
  1692  		z = p->to.type;
  1693  		if(z >= D_BP && z <= D_DI) {
  1694  			if(isax(&p->from)) {
  1695  				*andptr++ = 0x87;			/* xchg rhs,bx */
  1696  				asmando(&p->to, reg[D_BX]);
  1697  				subreg(&pp, z, D_BX);
  1698  				doasm(&pp);
  1699  				*andptr++ = 0x87;			/* xchg rhs,bx */
  1700  				asmando(&p->to, reg[D_BX]);
  1701  			} else {
  1702  				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  1703  				subreg(&pp, z, D_AX);
  1704  				doasm(&pp);
  1705  				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  1706  			}
  1707  			return;
  1708  		}
  1709  	}
  1710  	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
  1711  	return;
  1712  
  1713  mfound:
  1714  	switch(mo->code) {
  1715  	default:
  1716  		diag("asmins: unknown mov %d %P", mo->code, p);
  1717  		break;
  1718  
  1719  	case 0:	/* lit */
  1720  		for(z=0; t[z]!=E; z++)
  1721  			*andptr++ = t[z];
  1722  		break;
  1723  
  1724  	case 1:	/* r,m */
  1725  		*andptr++ = t[0];
  1726  		asmando(&p->to, t[1]);
  1727  		break;
  1728  
  1729  	case 2:	/* m,r */
  1730  		*andptr++ = t[0];
  1731  		asmando(&p->from, t[1]);
  1732  		break;
  1733  
  1734  	case 3:	/* r,m - 2op */
  1735  		*andptr++ = t[0];
  1736  		*andptr++ = t[1];
  1737  		asmando(&p->to, t[2]);
  1738  		rexflag |= regrex[p->from.type] & (Rxr|0x40);
  1739  		break;
  1740  
  1741  	case 4:	/* m,r - 2op */
  1742  		*andptr++ = t[0];
  1743  		*andptr++ = t[1];
  1744  		asmando(&p->from, t[2]);
  1745  		rexflag |= regrex[p->to.type] & (Rxr|0x40);
  1746  		break;
  1747  
  1748  	case 5:	/* load full pointer, trash heap */
  1749  		if(t[0])
  1750  			*andptr++ = t[0];
  1751  		switch(p->to.index) {
  1752  		default:
  1753  			goto bad;
  1754  		case D_DS:
  1755  			*andptr++ = 0xc5;
  1756  			break;
  1757  		case D_SS:
  1758  			*andptr++ = 0x0f;
  1759  			*andptr++ = 0xb2;
  1760  			break;
  1761  		case D_ES:
  1762  			*andptr++ = 0xc4;
  1763  			break;
  1764  		case D_FS:
  1765  			*andptr++ = 0x0f;
  1766  			*andptr++ = 0xb4;
  1767  			break;
  1768  		case D_GS:
  1769  			*andptr++ = 0x0f;
  1770  			*andptr++ = 0xb5;
  1771  			break;
  1772  		}
  1773  		asmand(&p->from, &p->to);
  1774  		break;
  1775  
  1776  	case 6:	/* double shift */
  1777  		if(t[0] == Pw){
  1778  			if(p->mode != 64)
  1779  				diag("asmins: illegal 64: %P", p);
  1780  			rexflag |= Pw;
  1781  			t++;
  1782  		}else if(t[0] == Pe){
  1783  			*andptr++ = Pe;
  1784  			t++;
  1785  		}
  1786  		z = p->from.type;
  1787  		switch(z) {
  1788  		default:
  1789  			goto bad;
  1790  		case D_CONST:
  1791  			*andptr++ = 0x0f;
  1792  			*andptr++ = t[0];
  1793  			asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  1794  			*andptr++ = p->from.offset;
  1795  			break;
  1796  		case D_CL:
  1797  		case D_CX:
  1798  			*andptr++ = 0x0f;
  1799  			*andptr++ = t[1];
  1800  			asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  1801  			break;
  1802  		}
  1803  		break;
  1804  	}
  1805  }
  1806  
  1807  void
  1808  asmins(Prog *p)
  1809  {
  1810  	int n, np, c;
  1811  	Reloc *r;
  1812  
  1813  	rexflag = 0;
  1814  	andptr = and;
  1815  	asmode = p->mode;
  1816  	doasm(p);
  1817  	if(rexflag){
  1818  		/*
  1819  		 * as befits the whole approach of the architecture,
  1820  		 * the rex prefix must appear before the first opcode byte
  1821  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  1822  		 * before the 0f opcode escape!), or it might be ignored.
  1823  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  1824  		 */
  1825  		if(p->mode != 64)
  1826  			diag("asmins: illegal in mode %d: %P", p->mode, p);
  1827  		n = andptr - and;
  1828  		for(np = 0; np < n; np++) {
  1829  			c = and[np];
  1830  			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
  1831  				break;
  1832  		}
  1833  		memmove(and+np+1, and+np, n-np);
  1834  		and[np] = 0x40 | rexflag;
  1835  		andptr++;
  1836  	}
  1837  	n = andptr - and;
  1838  	for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
  1839  		if(r->off < p->pc)
  1840  			break;
  1841  		if(rexflag)
  1842  			r->off++;
  1843  		if(r->type == D_PCREL)
  1844  			r->add -= p->pc + n - (r->off + r->siz);
  1845  	}
  1846  }