github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6l/span.c (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // Instruction layout.
    32  
    33  #include	"l.h"
    34  #include	"../ld/lib.h"
    35  #include	"../ld/elf.h"
    36  
    37  static int	rexflag;
    38  static int	asmode;
    39  static vlong	vaddr(Adr*, Reloc*);
    40  
    41  // single-instruction no-ops of various lengths.
    42  // constructed by hand and disassembled with gdb to verify.
    43  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
    44  static uchar nop[][16] = {
    45  	{0x90},
    46  	{0x66, 0x90},
    47  	{0x0F, 0x1F, 0x00},
    48  	{0x0F, 0x1F, 0x40, 0x00},
    49  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
    50  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
    51  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
    52  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
    53  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
    54  	{0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
    55  };
    56  
    57  static void
    58  fillnop(uchar *p, int n)
    59  {
    60  	int m;
    61  
    62  	while(n > 0) {
    63  		m = n;
    64  		if(m > nelem(nop))
    65  			m = nelem(nop);
    66  		memmove(p, nop[m-1], m);
    67  		p += m;
    68  		n -= m;
    69  	}
    70  }
    71  
    72  void
    73  span1(Sym *s)
    74  {
    75  	Prog *p, *q;
    76  	int32 c, v, loop;
    77  	uchar *bp;
    78  	int n, m, i;
    79  
    80  	cursym = s;
    81  	
    82  	if(s->p != nil)
    83  		return;
    84  
    85  	for(p = s->text; p != P; p = p->link) {
    86  		p->back = 2;	// use short branches first time through
    87  		if((q = p->pcond) != P && (q->back & 2)) {
    88  			p->back |= 1;	// backward jump
    89  			q->back |= 4;   // loop head
    90  		}
    91  
    92  		if(p->as == AADJSP) {
    93  			p->to.type = D_SP;
    94  			v = -p->from.offset;
    95  			p->from.offset = v;
    96  			p->as = p->mode != 64? AADDL: AADDQ;
    97  			if(v < 0) {
    98  				p->as = p->mode != 64? ASUBL: ASUBQ;
    99  				v = -v;
   100  				p->from.offset = v;
   101  			}
   102  			if(v == 0)
   103  				p->as = ANOP;
   104  		}
   105  	}
   106  	
   107  	n = 0;
   108  	do {
   109  		loop = 0;
   110  		memset(s->r, 0, s->nr*sizeof s->r[0]);
   111  		s->nr = 0;
   112  		s->np = 0;
   113  		c = 0;
   114  		for(p = s->text; p != P; p = p->link) {
   115  			if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
   116  				// pad with NOPs
   117  				v = -c&(LoopAlign-1);
   118  				if(v <= MaxLoopPad) {
   119  					symgrow(s, c+v);
   120  					fillnop(s->p+c, v);
   121  					c += v;
   122  				}
   123  			}
   124  
   125  			p->pc = c;
   126  
   127  			// process forward jumps to p
   128  			for(q = p->comefrom; q != P; q = q->forwd) {
   129  				v = p->pc - (q->pc + q->mark);
   130  				if(q->back & 2)	{	// short
   131  					if(v > 127) {
   132  						loop++;
   133  						q->back ^= 2;
   134  					}
   135  					if(q->as == AJCXZL)
   136  						s->p[q->pc+2] = v;
   137  					else
   138  						s->p[q->pc+1] = v;
   139  				} else {
   140  					bp = s->p + q->pc + q->mark - 4;
   141  					*bp++ = v;
   142  					*bp++ = v>>8;
   143  					*bp++ = v>>16;
   144  					*bp = v>>24;
   145  				}	
   146  			}
   147  			p->comefrom = P;
   148  
   149  			asmins(p);
   150  			p->pc = c;
   151  			m = andptr-and;
   152  			symgrow(s, p->pc+m);
   153  			memmove(s->p+p->pc, and, m);
   154  			p->mark = m;
   155  			c += m;
   156  		}
   157  		if(++n > 20) {
   158  			diag("span must be looping");
   159  			errorexit();
   160  		}
   161  	} while(loop);
   162  	s->size = c;
   163  
   164  	if(debug['a'] > 1) {
   165  		print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
   166  		for(i=0; i<s->np; i++) {
   167  			print(" %.2ux", s->p[i]);
   168  			if(i%16 == 15)
   169  				print("\n  %.6ux", i+1);
   170  		}
   171  		if(i%16)
   172  			print("\n");
   173  	
   174  		for(i=0; i<s->nr; i++) {
   175  			Reloc *r;
   176  			
   177  			r = &s->r[i];
   178  			print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
   179  		}
   180  	}
   181  }
   182  
   183  void
   184  span(void)
   185  {
   186  	Prog *p, *q;
   187  	int32 v;
   188  	int n;
   189  
   190  	if(debug['v'])
   191  		Bprint(&bso, "%5.2f span\n", cputime());
   192  
   193  	// NOTE(rsc): If we get rid of the globals we should
   194  	// be able to parallelize these iterations.
   195  	for(cursym = textp; cursym != nil; cursym = cursym->next) {
   196  		if(cursym->p != nil)
   197  			continue;
   198  		// TODO: move into span1
   199  		for(p = cursym->text; p != P; p = p->link) {
   200  			n = 0;
   201  			if(p->to.type == D_BRANCH)
   202  				if(p->pcond == P)
   203  					p->pcond = p;
   204  			if((q = p->pcond) != P)
   205  				if(q->back != 2)
   206  					n = 1;
   207  			p->back = n;
   208  			if(p->as == AADJSP) {
   209  				p->to.type = D_SP;
   210  				v = -p->from.offset;
   211  				p->from.offset = v;
   212  				p->as = p->mode != 64? AADDL: AADDQ;
   213  				if(v < 0) {
   214  					p->as = p->mode != 64? ASUBL: ASUBQ;
   215  					v = -v;
   216  					p->from.offset = v;
   217  				}
   218  				if(v == 0)
   219  					p->as = ANOP;
   220  			}
   221  		}
   222  		span1(cursym);
   223  	}
   224  }
   225  
   226  void
   227  xdefine(char *p, int t, vlong v)
   228  {
   229  	Sym *s;
   230  
   231  	s = lookup(p, 0);
   232  	s->type = t;
   233  	s->value = v;
   234  	s->reachable = 1;
   235  	s->special = 1;
   236  }
   237  
   238  void
   239  instinit(void)
   240  {
   241  	int c, i;
   242  
   243  	for(i=1; optab[i].as; i++) {
   244  		c = optab[i].as;
   245  		if(opindex[c] != nil) {
   246  			diag("phase error in optab: %d (%A)", i, c);
   247  			errorexit();
   248  		}
   249  		opindex[c] = &optab[i];
   250  	}
   251  
   252  	for(i=0; i<Ymax; i++)
   253  		ycover[i*Ymax + i] = 1;
   254  
   255  	ycover[Yi0*Ymax + Yi8] = 1;
   256  	ycover[Yi1*Ymax + Yi8] = 1;
   257  
   258  	ycover[Yi0*Ymax + Ys32] = 1;
   259  	ycover[Yi1*Ymax + Ys32] = 1;
   260  	ycover[Yi8*Ymax + Ys32] = 1;
   261  
   262  	ycover[Yi0*Ymax + Yi32] = 1;
   263  	ycover[Yi1*Ymax + Yi32] = 1;
   264  	ycover[Yi8*Ymax + Yi32] = 1;
   265  	ycover[Ys32*Ymax + Yi32] = 1;
   266  
   267  	ycover[Yi0*Ymax + Yi64] = 1;
   268  	ycover[Yi1*Ymax + Yi64] = 1;
   269  	ycover[Yi8*Ymax + Yi64] = 1;
   270  	ycover[Ys32*Ymax + Yi64] = 1;
   271  	ycover[Yi32*Ymax + Yi64] = 1;
   272  
   273  	ycover[Yal*Ymax + Yrb] = 1;
   274  	ycover[Ycl*Ymax + Yrb] = 1;
   275  	ycover[Yax*Ymax + Yrb] = 1;
   276  	ycover[Ycx*Ymax + Yrb] = 1;
   277  	ycover[Yrx*Ymax + Yrb] = 1;
   278  	ycover[Yrl*Ymax + Yrb] = 1;
   279  
   280  	ycover[Ycl*Ymax + Ycx] = 1;
   281  
   282  	ycover[Yax*Ymax + Yrx] = 1;
   283  	ycover[Ycx*Ymax + Yrx] = 1;
   284  
   285  	ycover[Yax*Ymax + Yrl] = 1;
   286  	ycover[Ycx*Ymax + Yrl] = 1;
   287  	ycover[Yrx*Ymax + Yrl] = 1;
   288  
   289  	ycover[Yf0*Ymax + Yrf] = 1;
   290  
   291  	ycover[Yal*Ymax + Ymb] = 1;
   292  	ycover[Ycl*Ymax + Ymb] = 1;
   293  	ycover[Yax*Ymax + Ymb] = 1;
   294  	ycover[Ycx*Ymax + Ymb] = 1;
   295  	ycover[Yrx*Ymax + Ymb] = 1;
   296  	ycover[Yrb*Ymax + Ymb] = 1;
   297  	ycover[Yrl*Ymax + Ymb] = 1;
   298  	ycover[Ym*Ymax + Ymb] = 1;
   299  
   300  	ycover[Yax*Ymax + Yml] = 1;
   301  	ycover[Ycx*Ymax + Yml] = 1;
   302  	ycover[Yrx*Ymax + Yml] = 1;
   303  	ycover[Yrl*Ymax + Yml] = 1;
   304  	ycover[Ym*Ymax + Yml] = 1;
   305  
   306  	ycover[Yax*Ymax + Ymm] = 1;
   307  	ycover[Ycx*Ymax + Ymm] = 1;
   308  	ycover[Yrx*Ymax + Ymm] = 1;
   309  	ycover[Yrl*Ymax + Ymm] = 1;
   310  	ycover[Ym*Ymax + Ymm] = 1;
   311  	ycover[Ymr*Ymax + Ymm] = 1;
   312  
   313  	ycover[Ym*Ymax + Yxm] = 1;
   314  	ycover[Yxr*Ymax + Yxm] = 1;
   315  
   316  	for(i=0; i<D_NONE; i++) {
   317  		reg[i] = -1;
   318  		if(i >= D_AL && i <= D_R15B) {
   319  			reg[i] = (i-D_AL) & 7;
   320  			if(i >= D_SPB && i <= D_DIB)
   321  				regrex[i] = 0x40;
   322  			if(i >= D_R8B && i <= D_R15B)
   323  				regrex[i] = Rxr | Rxx | Rxb;
   324  		}
   325  		if(i >= D_AH && i<= D_BH)
   326  			reg[i] = 4 + ((i-D_AH) & 7);
   327  		if(i >= D_AX && i <= D_R15) {
   328  			reg[i] = (i-D_AX) & 7;
   329  			if(i >= D_R8)
   330  				regrex[i] = Rxr | Rxx | Rxb;
   331  		}
   332  		if(i >= D_F0 && i <= D_F0+7)
   333  			reg[i] = (i-D_F0) & 7;
   334  		if(i >= D_M0 && i <= D_M0+7)
   335  			reg[i] = (i-D_M0) & 7;
   336  		if(i >= D_X0 && i <= D_X0+15) {
   337  			reg[i] = (i-D_X0) & 7;
   338  			if(i >= D_X0+8)
   339  				regrex[i] = Rxr | Rxx | Rxb;
   340  		}
   341  		if(i >= D_CR+8 && i <= D_CR+15)
   342  			regrex[i] = Rxr;
   343  	}
   344  }
   345  
   346  int
   347  prefixof(Adr *a)
   348  {
   349  	switch(a->type) {
   350  	case D_INDIR+D_CS:
   351  		return 0x2e;
   352  	case D_INDIR+D_DS:
   353  		return 0x3e;
   354  	case D_INDIR+D_ES:
   355  		return 0x26;
   356  	case D_INDIR+D_FS:
   357  		return 0x64;
   358  	case D_INDIR+D_GS:
   359  		return 0x65;
   360  	}
   361  	return 0;
   362  }
   363  
   364  int
   365  oclass(Adr *a)
   366  {
   367  	vlong v;
   368  	int32 l;
   369  
   370  	if(a->type >= D_INDIR || a->index != D_NONE) {
   371  		if(a->index != D_NONE && a->scale == 0) {
   372  			if(a->type == D_ADDR) {
   373  				switch(a->index) {
   374  				case D_EXTERN:
   375  				case D_STATIC:
   376  					if(flag_shared)
   377  						return Yiauto;
   378  					else
   379  						return Yi32;	/* TO DO: Yi64 */
   380  				case D_AUTO:
   381  				case D_PARAM:
   382  					return Yiauto;
   383  				}
   384  				return Yxxx;
   385  			}
   386  			return Ycol;
   387  		}
   388  		return Ym;
   389  	}
   390  	switch(a->type)
   391  	{
   392  	case D_AL:
   393  		return Yal;
   394  
   395  	case D_AX:
   396  		return Yax;
   397  
   398  /*
   399  	case D_SPB:
   400  */
   401  	case D_BPB:
   402  	case D_SIB:
   403  	case D_DIB:
   404  	case D_R8B:
   405  	case D_R9B:
   406  	case D_R10B:
   407  	case D_R11B:
   408  	case D_R12B:
   409  	case D_R13B:
   410  	case D_R14B:
   411  	case D_R15B:
   412  		if(asmode != 64)
   413  			return Yxxx;
   414  	case D_DL:
   415  	case D_BL:
   416  	case D_AH:
   417  	case D_CH:
   418  	case D_DH:
   419  	case D_BH:
   420  		return Yrb;
   421  
   422  	case D_CL:
   423  		return Ycl;
   424  
   425  	case D_CX:
   426  		return Ycx;
   427  
   428  	case D_DX:
   429  	case D_BX:
   430  		return Yrx;
   431  
   432  	case D_R8:	/* not really Yrl */
   433  	case D_R9:
   434  	case D_R10:
   435  	case D_R11:
   436  	case D_R12:
   437  	case D_R13:
   438  	case D_R14:
   439  	case D_R15:
   440  		if(asmode != 64)
   441  			return Yxxx;
   442  	case D_SP:
   443  	case D_BP:
   444  	case D_SI:
   445  	case D_DI:
   446  		return Yrl;
   447  
   448  	case D_F0+0:
   449  		return	Yf0;
   450  
   451  	case D_F0+1:
   452  	case D_F0+2:
   453  	case D_F0+3:
   454  	case D_F0+4:
   455  	case D_F0+5:
   456  	case D_F0+6:
   457  	case D_F0+7:
   458  		return	Yrf;
   459  
   460  	case D_M0+0:
   461  	case D_M0+1:
   462  	case D_M0+2:
   463  	case D_M0+3:
   464  	case D_M0+4:
   465  	case D_M0+5:
   466  	case D_M0+6:
   467  	case D_M0+7:
   468  		return	Ymr;
   469  
   470  	case D_X0+0:
   471  	case D_X0+1:
   472  	case D_X0+2:
   473  	case D_X0+3:
   474  	case D_X0+4:
   475  	case D_X0+5:
   476  	case D_X0+6:
   477  	case D_X0+7:
   478  	case D_X0+8:
   479  	case D_X0+9:
   480  	case D_X0+10:
   481  	case D_X0+11:
   482  	case D_X0+12:
   483  	case D_X0+13:
   484  	case D_X0+14:
   485  	case D_X0+15:
   486  		return	Yxr;
   487  
   488  	case D_NONE:
   489  		return Ynone;
   490  
   491  	case D_CS:	return	Ycs;
   492  	case D_SS:	return	Yss;
   493  	case D_DS:	return	Yds;
   494  	case D_ES:	return	Yes;
   495  	case D_FS:	return	Yfs;
   496  	case D_GS:	return	Ygs;
   497  
   498  	case D_GDTR:	return	Ygdtr;
   499  	case D_IDTR:	return	Yidtr;
   500  	case D_LDTR:	return	Yldtr;
   501  	case D_MSW:	return	Ymsw;
   502  	case D_TASK:	return	Ytask;
   503  
   504  	case D_CR+0:	return	Ycr0;
   505  	case D_CR+1:	return	Ycr1;
   506  	case D_CR+2:	return	Ycr2;
   507  	case D_CR+3:	return	Ycr3;
   508  	case D_CR+4:	return	Ycr4;
   509  	case D_CR+5:	return	Ycr5;
   510  	case D_CR+6:	return	Ycr6;
   511  	case D_CR+7:	return	Ycr7;
   512  	case D_CR+8:	return	Ycr8;
   513  
   514  	case D_DR+0:	return	Ydr0;
   515  	case D_DR+1:	return	Ydr1;
   516  	case D_DR+2:	return	Ydr2;
   517  	case D_DR+3:	return	Ydr3;
   518  	case D_DR+4:	return	Ydr4;
   519  	case D_DR+5:	return	Ydr5;
   520  	case D_DR+6:	return	Ydr6;
   521  	case D_DR+7:	return	Ydr7;
   522  
   523  	case D_TR+0:	return	Ytr0;
   524  	case D_TR+1:	return	Ytr1;
   525  	case D_TR+2:	return	Ytr2;
   526  	case D_TR+3:	return	Ytr3;
   527  	case D_TR+4:	return	Ytr4;
   528  	case D_TR+5:	return	Ytr5;
   529  	case D_TR+6:	return	Ytr6;
   530  	case D_TR+7:	return	Ytr7;
   531  
   532  	case D_EXTERN:
   533  	case D_STATIC:
   534  	case D_AUTO:
   535  	case D_PARAM:
   536  		return Ym;
   537  
   538  	case D_CONST:
   539  	case D_ADDR:
   540  		if(a->sym == S) {
   541  			v = a->offset;
   542  			if(v == 0)
   543  				return Yi0;
   544  			if(v == 1)
   545  				return Yi1;
   546  			if(v >= -128 && v <= 127)
   547  				return Yi8;
   548  			l = v;
   549  			if((vlong)l == v)
   550  				return Ys32;	/* can sign extend */
   551  			if((v>>32) == 0)
   552  				return Yi32;	/* unsigned */
   553  			return Yi64;
   554  		}
   555  		return Yi32;	/* TO DO: D_ADDR as Yi64 */
   556  
   557  	case D_BRANCH:
   558  		return Ybr;
   559  	}
   560  	return Yxxx;
   561  }
   562  
   563  void
   564  asmidx(int scale, int index, int base)
   565  {
   566  	int i;
   567  
   568  	switch(index) {
   569  	default:
   570  		goto bad;
   571  
   572  	case D_NONE:
   573  		i = 4 << 3;
   574  		goto bas;
   575  
   576  	case D_R8:
   577  	case D_R9:
   578  	case D_R10:
   579  	case D_R11:
   580  	case D_R12:
   581  	case D_R13:
   582  	case D_R14:
   583  	case D_R15:
   584  		if(asmode != 64)
   585  			goto bad;
   586  	case D_AX:
   587  	case D_CX:
   588  	case D_DX:
   589  	case D_BX:
   590  	case D_BP:
   591  	case D_SI:
   592  	case D_DI:
   593  		i = reg[index] << 3;
   594  		break;
   595  	}
   596  	switch(scale) {
   597  	default:
   598  		goto bad;
   599  	case 1:
   600  		break;
   601  	case 2:
   602  		i |= (1<<6);
   603  		break;
   604  	case 4:
   605  		i |= (2<<6);
   606  		break;
   607  	case 8:
   608  		i |= (3<<6);
   609  		break;
   610  	}
   611  bas:
   612  	switch(base) {
   613  	default:
   614  		goto bad;
   615  	case D_NONE:	/* must be mod=00 */
   616  		i |= 5;
   617  		break;
   618  	case D_R8:
   619  	case D_R9:
   620  	case D_R10:
   621  	case D_R11:
   622  	case D_R12:
   623  	case D_R13:
   624  	case D_R14:
   625  	case D_R15:
   626  		if(asmode != 64)
   627  			goto bad;
   628  	case D_AX:
   629  	case D_CX:
   630  	case D_DX:
   631  	case D_BX:
   632  	case D_SP:
   633  	case D_BP:
   634  	case D_SI:
   635  	case D_DI:
   636  		i |= reg[base];
   637  		break;
   638  	}
   639  	*andptr++ = i;
   640  	return;
   641  bad:
   642  	diag("asmidx: bad address %d/%d/%d", scale, index, base);
   643  	*andptr++ = 0;
   644  	return;
   645  }
   646  
   647  static void
   648  put4(int32 v)
   649  {
   650  	andptr[0] = v;
   651  	andptr[1] = v>>8;
   652  	andptr[2] = v>>16;
   653  	andptr[3] = v>>24;
   654  	andptr += 4;
   655  }
   656  
   657  static void
   658  relput4(Prog *p, Adr *a)
   659  {
   660  	vlong v;
   661  	Reloc rel, *r;
   662  	
   663  	v = vaddr(a, &rel);
   664  	if(rel.siz != 0) {
   665  		if(rel.siz != 4)
   666  			diag("bad reloc");
   667  		r = addrel(cursym);
   668  		*r = rel;
   669  		r->off = p->pc + andptr - and;
   670  	}
   671  	put4(v);
   672  }
   673  
   674  static void
   675  put8(vlong v)
   676  {
   677  	andptr[0] = v;
   678  	andptr[1] = v>>8;
   679  	andptr[2] = v>>16;
   680  	andptr[3] = v>>24;
   681  	andptr[4] = v>>32;
   682  	andptr[5] = v>>40;
   683  	andptr[6] = v>>48;
   684  	andptr[7] = v>>56;
   685  	andptr += 8;
   686  }
   687  
   688  /*
   689  static void
   690  relput8(Prog *p, Adr *a)
   691  {
   692  	vlong v;
   693  	Reloc rel, *r;
   694  	
   695  	v = vaddr(a, &rel);
   696  	if(rel.siz != 0) {
   697  		r = addrel(cursym);
   698  		*r = rel;
   699  		r->siz = 8;
   700  		r->off = p->pc + andptr - and;
   701  	}
   702  	put8(v);
   703  }
   704  */
   705  
   706  vlong
   707  symaddr(Sym *s)
   708  {
   709  	if(!s->reachable)
   710  		diag("unreachable symbol in symaddr - %s", s->name);
   711  	return s->value;
   712  }
   713  
   714  static vlong
   715  vaddr(Adr *a, Reloc *r)
   716  {
   717  	int t;
   718  	vlong v;
   719  	Sym *s;
   720  	
   721  	if(r != nil)
   722  		memset(r, 0, sizeof *r);
   723  
   724  	t = a->type;
   725  	v = a->offset;
   726  	if(t == D_ADDR)
   727  		t = a->index;
   728  	switch(t) {
   729  	case D_STATIC:
   730  	case D_EXTERN:
   731  		s = a->sym;
   732  		if(!s->reachable)
   733  			diag("unreachable symbol in vaddr - %s", s->name);
   734  		if(r == nil) {
   735  			diag("need reloc for %D", a);
   736  			errorexit();
   737  		}
   738  		if(flag_shared)
   739  			r->type = D_PCREL;
   740  		else
   741  			r->type = D_ADDR;
   742  		r->siz = 4;	// TODO: 8 for external symbols
   743  		r->off = -1;	// caller must fill in
   744  		r->sym = s;
   745  		r->add = v;
   746  		v = 0;
   747  	}
   748  	return v;
   749  }
   750  
   751  static void
   752  asmandsz(Adr *a, int r, int rex, int m64)
   753  {
   754  	int32 v;
   755  	int t, scale;
   756  	Reloc rel;
   757  
   758  	USED(m64);
   759  	rex &= (0x40 | Rxr);
   760  	v = a->offset;
   761  	t = a->type;
   762  	rel.siz = 0;
   763  	if(a->index != D_NONE) {
   764  		if(t < D_INDIR) { 
   765  			switch(t) {
   766  			default:
   767  				goto bad;
   768  			case D_STATIC:
   769  			case D_EXTERN:
   770  				if(flag_shared)
   771  					goto bad;
   772  				t = D_NONE;
   773  				v = vaddr(a, &rel);
   774  				break;
   775  			case D_AUTO:
   776  			case D_PARAM:
   777  				t = D_SP;
   778  				break;
   779  			}
   780  		} else
   781  			t -= D_INDIR;
   782  		rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
   783  		if(t == D_NONE) {
   784  			*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
   785  			asmidx(a->scale, a->index, t);
   786  			goto putrelv;
   787  		}
   788  		if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
   789  			*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
   790  			asmidx(a->scale, a->index, t);
   791  			return;
   792  		}
   793  		if(v >= -128 && v < 128 && rel.siz == 0) {
   794  			*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
   795  			asmidx(a->scale, a->index, t);
   796  			*andptr++ = v;
   797  			return;
   798  		}
   799  		*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
   800  		asmidx(a->scale, a->index, t);
   801  		goto putrelv;
   802  	}
   803  	if(t >= D_AL && t <= D_X0+15) {
   804  		if(v)
   805  			goto bad;
   806  		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
   807  		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
   808  		return;
   809  	}
   810  	
   811  	scale = a->scale;
   812  	if(t < D_INDIR) {
   813  		switch(a->type) {
   814  		default:
   815  			goto bad;
   816  		case D_STATIC:
   817  		case D_EXTERN:
   818  			t = D_NONE;
   819  			v = vaddr(a, &rel);
   820  			break;
   821  		case D_AUTO:
   822  		case D_PARAM:
   823  			t = D_SP;
   824  			break;
   825  		}
   826  		scale = 1;
   827  	} else
   828  		t -= D_INDIR;
   829  
   830  	rexflag |= (regrex[t] & Rxb) | rex;
   831  	if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
   832  		if(flag_shared && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || asmode != 64) {
   833  			*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
   834  			goto putrelv;
   835  		}
   836  		/* temporary */
   837  		*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
   838  		*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
   839  		goto putrelv;
   840  	}
   841  	if(t == D_SP || t == D_R12) {
   842  		if(v == 0) {
   843  			*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
   844  			asmidx(scale, D_NONE, t);
   845  			return;
   846  		}
   847  		if(v >= -128 && v < 128) {
   848  			*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
   849  			asmidx(scale, D_NONE, t);
   850  			*andptr++ = v;
   851  			return;
   852  		}
   853  		*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
   854  		asmidx(scale, D_NONE, t);
   855  		goto putrelv;
   856  	}
   857  	if(t >= D_AX && t <= D_R15) {
   858  		if(v == 0 && t != D_BP && t != D_R13) {
   859  			*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
   860  			return;
   861  		}
   862  		if(v >= -128 && v < 128) {
   863  			andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
   864  			andptr[1] = v;
   865  			andptr += 2;
   866  			return;
   867  		}
   868  		*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
   869  		goto putrelv;
   870  	}
   871  	goto bad;
   872  	
   873  putrelv:
   874  	if(rel.siz != 0) {
   875  		Reloc *r;
   876  
   877  		if(rel.siz != 4) {
   878  			diag("bad rel");
   879  			goto bad;
   880  		}
   881  		r = addrel(cursym);
   882  		*r = rel;
   883  		r->off = curp->pc + andptr - and;
   884  	} else if(iself && linkmode == LinkExternal && a->type == D_INDIR+D_FS
   885  		&& HEADTYPE != Hopenbsd) {
   886  		Reloc *r;
   887  		Sym *s;
   888  		
   889  		r = addrel(cursym);
   890  		r->off = curp->pc + andptr - and;
   891  		r->add = 0;
   892  		r->xadd = 0;
   893  		r->siz = 4;
   894  		r->type = D_TLS;
   895  		if(a->offset == tlsoffset+0)
   896  			s = lookup("runtime.g", 0);
   897  		else
   898  			s = lookup("runtime.m", 0);
   899  		s->type = STLSBSS;
   900  		s->reachable = 1;
   901  		s->size = PtrSize;
   902  		s->hide = 1;
   903  		r->sym = s;
   904  		r->xsym = s;
   905  		v = 0;
   906  	}
   907  		
   908  	put4(v);
   909  	return;
   910  
   911  bad:
   912  	diag("asmand: bad address %D", a);
   913  	return;
   914  }
   915  
   916  void
   917  asmand(Adr *a, Adr *ra)
   918  {
   919  	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
   920  }
   921  
   922  void
   923  asmando(Adr *a, int o)
   924  {
   925  	asmandsz(a, o, 0, 0);
   926  }
   927  
   928  static void
   929  bytereg(Adr *a, char *t)
   930  {
   931  	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
   932  		a->type = D_AL + (a->type-D_AX);
   933  		*t = 0;
   934  	}
   935  }
   936  
   937  #define	E	0xff
   938  Movtab	ymovtab[] =
   939  {
   940  /* push */
   941  	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
   942  	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
   943  	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
   944  	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
   945  	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
   946  	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
   947  	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
   948  	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
   949  
   950  	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
   951  	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
   952  	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
   953  	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
   954  	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
   955  	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
   956  
   957  /* pop */
   958  	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
   959  	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
   960  	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
   961  	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
   962  	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
   963  	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
   964  	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
   965  
   966  	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
   967  	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
   968  	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
   969  	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
   970  	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
   971  
   972  /* mov seg */
   973  	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
   974  	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
   975  	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
   976  	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
   977  	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
   978  	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
   979  
   980  	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
   981  	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
   982  	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
   983  	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
   984  	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
   985  	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
   986  
   987  /* mov cr */
   988  	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
   989  	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
   990  	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
   991  	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
   992  	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
   993  	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
   994  	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
   995  	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
   996  	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
   997  	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
   998  
   999  	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
  1000  	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
  1001  	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
  1002  	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
  1003  	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
  1004  	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
  1005  	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
  1006  	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
  1007  	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
  1008  	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
  1009  
  1010  /* mov dr */
  1011  	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
  1012  	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
  1013  	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
  1014  	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
  1015  	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
  1016  	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
  1017  
  1018  	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
  1019  	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
  1020  	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
  1021  	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
  1022  	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
  1023  	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
  1024  
  1025  /* mov tr */
  1026  	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
  1027  	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
  1028  
  1029  	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
  1030  	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
  1031  
  1032  /* lgdt, sgdt, lidt, sidt */
  1033  	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
  1034  	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
  1035  	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
  1036  	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
  1037  	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
  1038  	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
  1039  	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
  1040  	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
  1041  
  1042  /* lldt, sldt */
  1043  	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
  1044  	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
  1045  
  1046  /* lmsw, smsw */
  1047  	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
  1048  	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
  1049  
  1050  /* ltr, str */
  1051  	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
  1052  	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
  1053  
  1054  /* load full pointer */
  1055  	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
  1056  	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
  1057  
  1058  /* double shift */
  1059  	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
  1060  	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
  1061  	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
  1062  	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
  1063  	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
  1064  	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
  1065  	0
  1066  };
  1067  
  1068  int
  1069  isax(Adr *a)
  1070  {
  1071  
  1072  	switch(a->type) {
  1073  	case D_AX:
  1074  	case D_AL:
  1075  	case D_AH:
  1076  	case D_INDIR+D_AX:
  1077  		return 1;
  1078  	}
  1079  	if(a->index == D_AX)
  1080  		return 1;
  1081  	return 0;
  1082  }
  1083  
  1084  void
  1085  subreg(Prog *p, int from, int to)
  1086  {
  1087  
  1088  	if(debug['Q'])
  1089  		print("\n%P	s/%R/%R/\n", p, from, to);
  1090  
  1091  	if(p->from.type == from)
  1092  		p->from.type = to;
  1093  	if(p->to.type == from)
  1094  		p->to.type = to;
  1095  
  1096  	if(p->from.index == from)
  1097  		p->from.index = to;
  1098  	if(p->to.index == from)
  1099  		p->to.index = to;
  1100  
  1101  	from += D_INDIR;
  1102  	if(p->from.type == from)
  1103  		p->from.type = to+D_INDIR;
  1104  	if(p->to.type == from)
  1105  		p->to.type = to+D_INDIR;
  1106  
  1107  	if(debug['Q'])
  1108  		print("%P\n", p);
  1109  }
  1110  
  1111  static int
  1112  mediaop(Optab *o, int op, int osize, int z)
  1113  {
  1114  	switch(op){
  1115  	case Pm:
  1116  	case Pe:
  1117  	case Pf2:
  1118  	case Pf3:
  1119  		if(osize != 1){
  1120  			if(op != Pm)
  1121  				*andptr++ = op;
  1122  			*andptr++ = Pm;
  1123  			op = o->op[++z];
  1124  			break;
  1125  		}
  1126  	default:
  1127  		if(andptr == and || andptr[-1] != Pm)
  1128  			*andptr++ = Pm;
  1129  		break;
  1130  	}
  1131  	*andptr++ = op;
  1132  	return z;
  1133  }
  1134  
  1135  void
  1136  doasm(Prog *p)
  1137  {
  1138  	Optab *o;
  1139  	Prog *q, pp;
  1140  	uchar *t;
  1141  	Movtab *mo;
  1142  	int z, op, ft, tt, xo, l, pre;
  1143  	vlong v;
  1144  	Reloc rel, *r;
  1145  	Adr *a;
  1146  	
  1147  	curp = p;	// TODO
  1148  
  1149  	o = opindex[p->as];
  1150  	if(o == nil) {
  1151  		diag("asmins: missing op %P", p);
  1152  		return;
  1153  	}
  1154  	
  1155  	pre = prefixof(&p->from);
  1156  	if(pre)
  1157  		*andptr++ = pre;
  1158  	pre = prefixof(&p->to);
  1159  	if(pre)
  1160  		*andptr++ = pre;
  1161  
  1162  	if(p->ft == 0)
  1163  		p->ft = oclass(&p->from);
  1164  	if(p->tt == 0)
  1165  		p->tt = oclass(&p->to);
  1166  
  1167  	ft = p->ft * Ymax;
  1168  	tt = p->tt * Ymax;
  1169  
  1170  	t = o->ytab;
  1171  	if(t == 0) {
  1172  		diag("asmins: noproto %P", p);
  1173  		return;
  1174  	}
  1175  	xo = o->op[0] == 0x0f;
  1176  	for(z=0; *t; z+=t[3]+xo,t+=4)
  1177  		if(ycover[ft+t[0]])
  1178  		if(ycover[tt+t[1]])
  1179  			goto found;
  1180  	goto domov;
  1181  
  1182  found:
  1183  	switch(o->prefix) {
  1184  	case Pq:	/* 16 bit escape and opcode escape */
  1185  		*andptr++ = Pe;
  1186  		*andptr++ = Pm;
  1187  		break;
  1188  	case Pq3:	/* 16 bit escape, Rex.w, and opcode escape */
  1189  		*andptr++ = Pe;
  1190  		*andptr++ = Pw;
  1191  		*andptr++ = Pm;
  1192  		break;
  1193  
  1194  	case Pf2:	/* xmm opcode escape */
  1195  	case Pf3:
  1196  		*andptr++ = o->prefix;
  1197  		*andptr++ = Pm;
  1198  		break;
  1199  
  1200  	case Pm:	/* opcode escape */
  1201  		*andptr++ = Pm;
  1202  		break;
  1203  
  1204  	case Pe:	/* 16 bit escape */
  1205  		*andptr++ = Pe;
  1206  		break;
  1207  
  1208  	case Pw:	/* 64-bit escape */
  1209  		if(p->mode != 64)
  1210  			diag("asmins: illegal 64: %P", p);
  1211  		rexflag |= Pw;
  1212  		break;
  1213  
  1214  	case Pb:	/* botch */
  1215  		bytereg(&p->from, &p->ft);
  1216  		bytereg(&p->to, &p->tt);
  1217  		break;
  1218  
  1219  	case P32:	/* 32 bit but illegal if 64-bit mode */
  1220  		if(p->mode == 64)
  1221  			diag("asmins: illegal in 64-bit mode: %P", p);
  1222  		break;
  1223  
  1224  	case Py:	/* 64-bit only, no prefix */
  1225  		if(p->mode != 64)
  1226  			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
  1227  		break;
  1228  	}
  1229  
  1230  	op = o->op[z];
  1231  	if(op == 0x0f) {
  1232  		*andptr++ = op;
  1233  		op = o->op[++z];
  1234  	}
  1235  	switch(t[2]) {
  1236  	default:
  1237  		diag("asmins: unknown z %d %P", t[2], p);
  1238  		return;
  1239  
  1240  	case Zpseudo:
  1241  		break;
  1242  
  1243  	case Zlit:
  1244  		for(; op = o->op[z]; z++)
  1245  			*andptr++ = op;
  1246  		break;
  1247  
  1248  	case Zlitm_r:
  1249  		for(; op = o->op[z]; z++)
  1250  			*andptr++ = op;
  1251  		asmand(&p->from, &p->to);
  1252  		break;
  1253  
  1254  	case Zmb_r:
  1255  		bytereg(&p->from, &p->ft);
  1256  		/* fall through */
  1257  	case Zm_r:
  1258  		*andptr++ = op;
  1259  		asmand(&p->from, &p->to);
  1260  		break;
  1261  	case Zm2_r:
  1262  		*andptr++ = op;
  1263  		*andptr++ = o->op[z+1];
  1264  		asmand(&p->from, &p->to);
  1265  		break;
  1266  
  1267  	case Zm_r_xm:
  1268  		mediaop(o, op, t[3], z);
  1269  		asmand(&p->from, &p->to);
  1270  		break;
  1271  
  1272  	case Zm_r_xm_nr:
  1273  		rexflag = 0;
  1274  		mediaop(o, op, t[3], z);
  1275  		asmand(&p->from, &p->to);
  1276  		break;
  1277  
  1278  	case Zm_r_i_xm:
  1279  		mediaop(o, op, t[3], z);
  1280  		asmand(&p->from, &p->to);
  1281  		*andptr++ = p->to.offset;
  1282  		break;
  1283  
  1284  	case Zm_r_3d:
  1285  		*andptr++ = 0x0f;
  1286  		*andptr++ = 0x0f;
  1287  		asmand(&p->from, &p->to);
  1288  		*andptr++ = op;
  1289  		break;
  1290  
  1291  	case Zibm_r:
  1292  		while ((op = o->op[z++]) != 0)
  1293  			*andptr++ = op;
  1294  		asmand(&p->from, &p->to);
  1295  		*andptr++ = p->to.offset;
  1296  		break;
  1297  
  1298  	case Zaut_r:
  1299  		*andptr++ = 0x8d;	/* leal */
  1300  		if(p->from.type != D_ADDR)
  1301  			diag("asmins: Zaut sb type ADDR");
  1302  		p->from.type = p->from.index;
  1303  		p->from.index = D_NONE;
  1304  		asmand(&p->from, &p->to);
  1305  		p->from.index = p->from.type;
  1306  		p->from.type = D_ADDR;
  1307  		break;
  1308  
  1309  	case Zm_o:
  1310  		*andptr++ = op;
  1311  		asmando(&p->from, o->op[z+1]);
  1312  		break;
  1313  
  1314  	case Zr_m:
  1315  		*andptr++ = op;
  1316  		asmand(&p->to, &p->from);
  1317  		break;
  1318  
  1319  	case Zr_m_xm:
  1320  		mediaop(o, op, t[3], z);
  1321  		asmand(&p->to, &p->from);
  1322  		break;
  1323  
  1324  	case Zr_m_xm_nr:
  1325  		rexflag = 0;
  1326  		mediaop(o, op, t[3], z);
  1327  		asmand(&p->to, &p->from);
  1328  		break;
  1329  
  1330  	case Zr_m_i_xm:
  1331  		mediaop(o, op, t[3], z);
  1332  		asmand(&p->to, &p->from);
  1333  		*andptr++ = p->from.offset;
  1334  		break;
  1335  
  1336  	case Zo_m:
  1337  		*andptr++ = op;
  1338  		asmando(&p->to, o->op[z+1]);
  1339  		break;
  1340  
  1341  	case Zo_m64:
  1342  		*andptr++ = op;
  1343  		asmandsz(&p->to, o->op[z+1], 0, 1);
  1344  		break;
  1345  
  1346  	case Zm_ibo:
  1347  		*andptr++ = op;
  1348  		asmando(&p->from, o->op[z+1]);
  1349  		*andptr++ = vaddr(&p->to, nil);
  1350  		break;
  1351  
  1352  	case Zibo_m:
  1353  		*andptr++ = op;
  1354  		asmando(&p->to, o->op[z+1]);
  1355  		*andptr++ = vaddr(&p->from, nil);
  1356  		break;
  1357  
  1358  	case Zibo_m_xm:
  1359  		z = mediaop(o, op, t[3], z);
  1360  		asmando(&p->to, o->op[z+1]);
  1361  		*andptr++ = vaddr(&p->from, nil);
  1362  		break;
  1363  
  1364  	case Z_ib:
  1365  	case Zib_:
  1366  		if(t[2] == Zib_)
  1367  			a = &p->from;
  1368  		else
  1369  			a = &p->to;
  1370  		*andptr++ = op;
  1371  		*andptr++ = vaddr(a, nil);
  1372  		break;
  1373  
  1374  	case Zib_rp:
  1375  		rexflag |= regrex[p->to.type] & (Rxb|0x40);
  1376  		*andptr++ = op + reg[p->to.type];
  1377  		*andptr++ = vaddr(&p->from, nil);
  1378  		break;
  1379  
  1380  	case Zil_rp:
  1381  		rexflag |= regrex[p->to.type] & Rxb;
  1382  		*andptr++ = op + reg[p->to.type];
  1383  		if(o->prefix == Pe) {
  1384  			v = vaddr(&p->from, nil);
  1385  			*andptr++ = v;
  1386  			*andptr++ = v>>8;
  1387  		}
  1388  		else
  1389  			relput4(p, &p->from);
  1390  		break;
  1391  
  1392  	case Zo_iw:
  1393  		*andptr++ = op;
  1394  		if(p->from.type != D_NONE){
  1395  			v = vaddr(&p->from, nil);
  1396  			*andptr++ = v;
  1397  			*andptr++ = v>>8;
  1398  		}
  1399  		break;
  1400  
  1401  	case Ziq_rp:
  1402  		v = vaddr(&p->from, &rel);
  1403  		l = v>>32;
  1404  		if(l == 0 && rel.siz != 8){
  1405  			//p->mark |= 0100;
  1406  			//print("zero: %llux %P\n", v, p);
  1407  			rexflag &= ~(0x40|Rxw);
  1408  			rexflag |= regrex[p->to.type] & Rxb;
  1409  			*andptr++ = 0xb8 + reg[p->to.type];
  1410  			if(rel.type != 0) {
  1411  				r = addrel(cursym);
  1412  				*r = rel;
  1413  				r->off = p->pc + andptr - and;
  1414  			}
  1415  			put4(v);
  1416  		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
  1417  			//p->mark |= 0100;
  1418  			//print("sign: %llux %P\n", v, p);
  1419  			*andptr ++ = 0xc7;
  1420  			asmando(&p->to, 0);
  1421  			put4(v);
  1422  		}else{	/* need all 8 */
  1423  			//print("all: %llux %P\n", v, p);
  1424  			rexflag |= regrex[p->to.type] & Rxb;
  1425  			*andptr++ = op + reg[p->to.type];
  1426  			if(rel.type != 0) {
  1427  				r = addrel(cursym);
  1428  				*r = rel;
  1429  				r->off = p->pc + andptr - and;
  1430  			}
  1431  			put8(v);
  1432  		}
  1433  		break;
  1434  
  1435  	case Zib_rr:
  1436  		*andptr++ = op;
  1437  		asmand(&p->to, &p->to);
  1438  		*andptr++ = vaddr(&p->from, nil);
  1439  		break;
  1440  
  1441  	case Z_il:
  1442  	case Zil_:
  1443  		if(t[2] == Zil_)
  1444  			a = &p->from;
  1445  		else
  1446  			a = &p->to;
  1447  		*andptr++ = op;
  1448  		if(o->prefix == Pe) {
  1449  			v = vaddr(a, nil);
  1450  			*andptr++ = v;
  1451  			*andptr++ = v>>8;
  1452  		}
  1453  		else
  1454  			relput4(p, a);
  1455  		break;
  1456  
  1457  	case Zm_ilo:
  1458  	case Zilo_m:
  1459  		*andptr++ = op;
  1460  		if(t[2] == Zilo_m) {
  1461  			a = &p->from;
  1462  			asmando(&p->to, o->op[z+1]);
  1463  		} else {
  1464  			a = &p->to;
  1465  			asmando(&p->from, o->op[z+1]);
  1466  		}
  1467  		if(o->prefix == Pe) {
  1468  			v = vaddr(a, nil);
  1469  			*andptr++ = v;
  1470  			*andptr++ = v>>8;
  1471  		}
  1472  		else
  1473  			relput4(p, a);
  1474  		break;
  1475  
  1476  	case Zil_rr:
  1477  		*andptr++ = op;
  1478  		asmand(&p->to, &p->to);
  1479  		if(o->prefix == Pe) {
  1480  			v = vaddr(&p->from, nil);
  1481  			*andptr++ = v;
  1482  			*andptr++ = v>>8;
  1483  		}
  1484  		else
  1485  			relput4(p, &p->from);
  1486  		break;
  1487  
  1488  	case Z_rp:
  1489  		rexflag |= regrex[p->to.type] & (Rxb|0x40);
  1490  		*andptr++ = op + reg[p->to.type];
  1491  		break;
  1492  
  1493  	case Zrp_:
  1494  		rexflag |= regrex[p->from.type] & (Rxb|0x40);
  1495  		*andptr++ = op + reg[p->from.type];
  1496  		break;
  1497  
  1498  	case Zclr:
  1499  		*andptr++ = op;
  1500  		asmand(&p->to, &p->to);
  1501  		break;
  1502  
  1503  	case Zcall:
  1504  		q = p->pcond;
  1505  		if(q == nil) {
  1506  			diag("call without target");
  1507  			errorexit();
  1508  		}
  1509  		if(q->as != ATEXT) {
  1510  			// Could handle this case by making D_PCREL
  1511  			// record the Prog* instead of the Sym*, but let's
  1512  			// wait until the need arises.
  1513  			diag("call of non-TEXT %P", q);
  1514  			errorexit();
  1515  		}
  1516  		*andptr++ = op;
  1517  		r = addrel(cursym);
  1518  		r->off = p->pc + andptr - and;
  1519  		r->sym = q->from.sym;
  1520  		r->type = D_PCREL;
  1521  		r->siz = 4;
  1522  		put4(0);
  1523  		break;
  1524  
  1525  	case Zbr:
  1526  	case Zjmp:
  1527  	case Zloop:
  1528  		// TODO: jump across functions needs reloc
  1529  		q = p->pcond;
  1530  		if(q == nil) {
  1531  			diag("jmp/branch/loop without target");
  1532  			errorexit();
  1533  		}
  1534  		if(q->as == ATEXT) {
  1535  			if(t[2] == Zbr) {
  1536  				diag("branch to ATEXT");
  1537  				errorexit();
  1538  			}
  1539  			*andptr++ = o->op[z+1];
  1540  			r = addrel(cursym);
  1541  			r->off = p->pc + andptr - and;
  1542  			r->sym = q->from.sym;
  1543  			r->type = D_PCREL;
  1544  			r->siz = 4;
  1545  			put4(0);
  1546  			break;
  1547  		}
  1548  		// Assumes q is in this function.
  1549  		// TODO: Check in input, preserve in brchain.
  1550  
  1551  		// Fill in backward jump now.
  1552  		if(p->back & 1) {
  1553  			v = q->pc - (p->pc + 2);
  1554  			if(v >= -128) {
  1555  				if(p->as == AJCXZL)
  1556  					*andptr++ = 0x67;
  1557  				*andptr++ = op;
  1558  				*andptr++ = v;
  1559  			} else if(t[2] == Zloop) {
  1560  				diag("loop too far: %P", p);
  1561  			} else {
  1562  				v -= 5-2;
  1563  				if(t[2] == Zbr) {
  1564  					*andptr++ = 0x0f;
  1565  					v--;
  1566  				}
  1567  				*andptr++ = o->op[z+1];
  1568  				*andptr++ = v;
  1569  				*andptr++ = v>>8;
  1570  				*andptr++ = v>>16;
  1571  				*andptr++ = v>>24;
  1572  			}
  1573  			break;
  1574  		}
  1575  		
  1576  		// Annotate target; will fill in later.
  1577  		p->forwd = q->comefrom;
  1578  		q->comefrom = p;
  1579  		if(p->back & 2)	{ // short
  1580  			if(p->as == AJCXZL)
  1581  				*andptr++ = 0x67;
  1582  			*andptr++ = op;
  1583  			*andptr++ = 0;
  1584  		} else if(t[2] == Zloop) {
  1585  			diag("loop too far: %P", p);
  1586  		} else {
  1587  			if(t[2] == Zbr)
  1588  				*andptr++ = 0x0f;
  1589  			*andptr++ = o->op[z+1];
  1590  			*andptr++ = 0;
  1591  			*andptr++ = 0;
  1592  			*andptr++ = 0;
  1593  			*andptr++ = 0;
  1594  		}
  1595  		break;
  1596  				
  1597  /*
  1598  		v = q->pc - p->pc - 2;
  1599  		if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  1600  			*andptr++ = op;
  1601  			*andptr++ = v;
  1602  		} else {
  1603  			v -= 5-2;
  1604  			if(t[2] == Zbr) {
  1605  				*andptr++ = 0x0f;
  1606  				v--;
  1607  			}
  1608  			*andptr++ = o->op[z+1];
  1609  			*andptr++ = v;
  1610  			*andptr++ = v>>8;
  1611  			*andptr++ = v>>16;
  1612  			*andptr++ = v>>24;
  1613  		}
  1614  */
  1615  		break;
  1616  
  1617  	case Zbyte:
  1618  		v = vaddr(&p->from, &rel);
  1619  		if(rel.siz != 0) {
  1620  			rel.siz = op;
  1621  			r = addrel(cursym);
  1622  			*r = rel;
  1623  			r->off = p->pc + andptr - and;
  1624  		}
  1625  		*andptr++ = v;
  1626  		if(op > 1) {
  1627  			*andptr++ = v>>8;
  1628  			if(op > 2) {
  1629  				*andptr++ = v>>16;
  1630  				*andptr++ = v>>24;
  1631  				if(op > 4) {
  1632  					*andptr++ = v>>32;
  1633  					*andptr++ = v>>40;
  1634  					*andptr++ = v>>48;
  1635  					*andptr++ = v>>56;
  1636  				}
  1637  			}
  1638  		}
  1639  		break;
  1640  	}
  1641  	return;
  1642  
  1643  domov:
  1644  	for(mo=ymovtab; mo->as; mo++)
  1645  		if(p->as == mo->as)
  1646  		if(ycover[ft+mo->ft])
  1647  		if(ycover[tt+mo->tt]){
  1648  			t = mo->op;
  1649  			goto mfound;
  1650  		}
  1651  bad:
  1652  	if(p->mode != 64){
  1653  		/*
  1654  		 * here, the assembly has failed.
  1655  		 * if its a byte instruction that has
  1656  		 * unaddressable registers, try to
  1657  		 * exchange registers and reissue the
  1658  		 * instruction with the operands renamed.
  1659  		 */
  1660  		pp = *p;
  1661  		z = p->from.type;
  1662  		if(z >= D_BP && z <= D_DI) {
  1663  			if(isax(&p->to) || p->to.type == D_NONE) {
  1664  				// We certainly don't want to exchange
  1665  				// with AX if the op is MUL or DIV.
  1666  				*andptr++ = 0x87;			/* xchg lhs,bx */
  1667  				asmando(&p->from, reg[D_BX]);
  1668  				subreg(&pp, z, D_BX);
  1669  				doasm(&pp);
  1670  				*andptr++ = 0x87;			/* xchg lhs,bx */
  1671  				asmando(&p->from, reg[D_BX]);
  1672  			} else {
  1673  				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  1674  				subreg(&pp, z, D_AX);
  1675  				doasm(&pp);
  1676  				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  1677  			}
  1678  			return;
  1679  		}
  1680  		z = p->to.type;
  1681  		if(z >= D_BP && z <= D_DI) {
  1682  			if(isax(&p->from)) {
  1683  				*andptr++ = 0x87;			/* xchg rhs,bx */
  1684  				asmando(&p->to, reg[D_BX]);
  1685  				subreg(&pp, z, D_BX);
  1686  				doasm(&pp);
  1687  				*andptr++ = 0x87;			/* xchg rhs,bx */
  1688  				asmando(&p->to, reg[D_BX]);
  1689  			} else {
  1690  				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  1691  				subreg(&pp, z, D_AX);
  1692  				doasm(&pp);
  1693  				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  1694  			}
  1695  			return;
  1696  		}
  1697  	}
  1698  	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
  1699  	return;
  1700  
  1701  mfound:
  1702  	switch(mo->code) {
  1703  	default:
  1704  		diag("asmins: unknown mov %d %P", mo->code, p);
  1705  		break;
  1706  
  1707  	case 0:	/* lit */
  1708  		for(z=0; t[z]!=E; z++)
  1709  			*andptr++ = t[z];
  1710  		break;
  1711  
  1712  	case 1:	/* r,m */
  1713  		*andptr++ = t[0];
  1714  		asmando(&p->to, t[1]);
  1715  		break;
  1716  
  1717  	case 2:	/* m,r */
  1718  		*andptr++ = t[0];
  1719  		asmando(&p->from, t[1]);
  1720  		break;
  1721  
  1722  	case 3:	/* r,m - 2op */
  1723  		*andptr++ = t[0];
  1724  		*andptr++ = t[1];
  1725  		asmando(&p->to, t[2]);
  1726  		rexflag |= regrex[p->from.type] & (Rxr|0x40);
  1727  		break;
  1728  
  1729  	case 4:	/* m,r - 2op */
  1730  		*andptr++ = t[0];
  1731  		*andptr++ = t[1];
  1732  		asmando(&p->from, t[2]);
  1733  		rexflag |= regrex[p->to.type] & (Rxr|0x40);
  1734  		break;
  1735  
  1736  	case 5:	/* load full pointer, trash heap */
  1737  		if(t[0])
  1738  			*andptr++ = t[0];
  1739  		switch(p->to.index) {
  1740  		default:
  1741  			goto bad;
  1742  		case D_DS:
  1743  			*andptr++ = 0xc5;
  1744  			break;
  1745  		case D_SS:
  1746  			*andptr++ = 0x0f;
  1747  			*andptr++ = 0xb2;
  1748  			break;
  1749  		case D_ES:
  1750  			*andptr++ = 0xc4;
  1751  			break;
  1752  		case D_FS:
  1753  			*andptr++ = 0x0f;
  1754  			*andptr++ = 0xb4;
  1755  			break;
  1756  		case D_GS:
  1757  			*andptr++ = 0x0f;
  1758  			*andptr++ = 0xb5;
  1759  			break;
  1760  		}
  1761  		asmand(&p->from, &p->to);
  1762  		break;
  1763  
  1764  	case 6:	/* double shift */
  1765  		if(t[0] == Pw){
  1766  			if(p->mode != 64)
  1767  				diag("asmins: illegal 64: %P", p);
  1768  			rexflag |= Pw;
  1769  			t++;
  1770  		}else if(t[0] == Pe){
  1771  			*andptr++ = Pe;
  1772  			t++;
  1773  		}
  1774  		z = p->from.type;
  1775  		switch(z) {
  1776  		default:
  1777  			goto bad;
  1778  		case D_CONST:
  1779  			*andptr++ = 0x0f;
  1780  			*andptr++ = t[0];
  1781  			asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  1782  			*andptr++ = p->from.offset;
  1783  			break;
  1784  		case D_CL:
  1785  		case D_CX:
  1786  			*andptr++ = 0x0f;
  1787  			*andptr++ = t[1];
  1788  			asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  1789  			break;
  1790  		}
  1791  		break;
  1792  	}
  1793  }
  1794  
  1795  void
  1796  asmins(Prog *p)
  1797  {
  1798  	int n, np, c;
  1799  	Reloc *r;
  1800  
  1801  	rexflag = 0;
  1802  	andptr = and;
  1803  	asmode = p->mode;
  1804  	doasm(p);
  1805  	if(rexflag){
  1806  		/*
  1807  		 * as befits the whole approach of the architecture,
  1808  		 * the rex prefix must appear before the first opcode byte
  1809  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  1810  		 * before the 0f opcode escape!), or it might be ignored.
  1811  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  1812  		 */
  1813  		if(p->mode != 64)
  1814  			diag("asmins: illegal in mode %d: %P", p->mode, p);
  1815  		n = andptr - and;
  1816  		for(np = 0; np < n; np++) {
  1817  			c = and[np];
  1818  			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
  1819  				break;
  1820  		}
  1821  		memmove(and+np+1, and+np, n-np);
  1822  		and[np] = 0x40 | rexflag;
  1823  		andptr++;
  1824  	}
  1825  	n = andptr - and;
  1826  	for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
  1827  		if(r->off < p->pc)
  1828  			break;
  1829  		if(rexflag)
  1830  			r->off++;
  1831  		if(r->type == D_PCREL)
  1832  			r->add -= p->pc + n - (r->off + r->siz);
  1833  	}
  1834  }