github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/8g/ggen.c

github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/8g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame, ax;
    19  	Prog *p;
    20  	vlong lo, hi;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    26  
    27  	// fill in final stack size
    28  	frame = rnd(stksize+maxarg, widthptr);
    29  	ptxt->to.offset = frame;
    30  	
    31  	// insert code to zero ambiguously live variables
    32  	// so that the garbage collector only sees initialized values
    33  	// when it looks for pointers.
    34  	p = ptxt;
    35  	hi = 0;
    36  	lo = hi;
    37  	ax = 0;
    38  	for(l=curfn->dcl; l != nil; l = l->next) {
    39  		n = l->n;
    40  		if(!n->needzero)
    41  			continue;
    42  		if(n->class != PAUTO)
    43  			fatal("needzero class %d", n->class);
    44  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    45  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    46  		if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) {
    47  			// merge with range we already have
    48  			lo = n->xoffset;
    49  			continue;
    50  		}
    51  		// zero old range
    52  		p = zerorange(p, frame, lo, hi, &ax);
    53  
    54  		// set new range
    55  		hi = n->xoffset + n->type->width;
    56  		lo = n->xoffset;
    57  	}
    58  	// zero final range
    59  	zerorange(p, frame, lo, hi, &ax);
    60  }
    61  
    62  static Prog*
    63  zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
    64  {
    65  	vlong cnt, i;
    66  
    67  	cnt = hi - lo;
    68  	if(cnt == 0)
    69  		return p;
    70  	if(*ax == 0) {
    71  		p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
    72  		*ax = 1;
    73  	}
    74  	if(cnt <= 4*widthreg) {
    75  		for(i = 0; i < cnt; i += widthreg) {
    76  			p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i);
    77  		}
    78  	} else if(!nacl && cnt <= 128*widthreg) {
    79  		p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
    80  		p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg));
    81  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
    82  	} else {
    83  		p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0);
    84  		p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
    85  		p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
    86  		p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
    87  	}
    88  	return p;
    89  }
    90  
    91  static Prog*	
    92  appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)	
    93  {
    94  	Prog *q;
    95  	q = mal(sizeof(*q));	
    96  	clearp(q);	
    97  	q->as = as;	
    98  	q->lineno = p->lineno;	
    99  	q->from.type = ftype;	
   100  	q->from.offset = foffset;	
   101  	q->to.type = ttype;	
   102  	q->to.offset = toffset;	
   103  	q->link = p->link;	
   104  	p->link = q;	
   105  	return q;	
   106  }
   107  
   108  // Sweep the prog list to mark any used nodes.
   109  void
   110  markautoused(Prog* p)
   111  {
   112  	for (; p; p = p->link) {
   113  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   114  			continue;
   115  
   116  		if (p->from.node)
   117  			p->from.node->used = 1;
   118  
   119  		if (p->to.node)
   120  			p->to.node->used = 1;
   121  	}
   122  }
   123  
   124  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   125  void
   126  fixautoused(Prog* p)
   127  {
   128  	Prog **lp;
   129  
   130  	for (lp=&p; (p=*lp) != P; ) {
   131  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
   132  			*lp = p->link;
   133  			continue;
   134  		}
   135  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   136  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   137  			// VARDEFs are interspersed with other code, and a jump might be using the
   138  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   139  			// the no-ops.
   140  			p->to.type = D_NONE;
   141  			p->to.node = N;
   142  			p->as = ANOP;
   143  			continue;
   144  		}
   145  
   146  		if (p->from.type == D_AUTO && p->from.node)
   147  			p->from.offset += p->from.node->stkdelta;
   148  
   149  		if (p->to.type == D_AUTO && p->to.node)
   150  			p->to.offset += p->to.node->stkdelta;
   151  
   152  		lp = &p->link;
   153  	}
   154  }
   155  
   156  void
   157  clearfat(Node *nl)
   158  {
   159  	uint32 w, c, q;
   160  	Node n1, z;
   161  	Prog *p;
   162  
   163  	/* clear a fat object */
   164  	if(debug['g'])
   165  		dump("\nclearfat", nl);
   166  
   167  	w = nl->type->width;
   168  	// Avoid taking the address for simple enough types.
   169  	if(componentgen(N, nl))
   170  		return;
   171  
   172  	c = w % 4;	// bytes
   173  	q = w / 4;	// quads
   174  
   175  	if(q < 4) {
   176  		// Write sequence of MOV 0, off(base) instead of using STOSL.
   177  		// The hope is that although the code will be slightly longer,
   178  		// the MOVs will have no dependencies and pipeline better
   179  		// than the unrolled STOSL loop.
   180  		// NOTE: Must use agen, not igen, so that optimizer sees address
   181  		// being taken. We are not writing on field boundaries.
   182  		regalloc(&n1, types[tptr], N);
   183  		agen(nl, &n1);
   184  		n1.op = OINDREG;
   185  		nodconst(&z, types[TUINT64], 0);
   186  		while(q-- > 0) {
   187  			n1.type = z.type;
   188  			gins(AMOVL, &z, &n1);
   189  			n1.xoffset += 4;
   190  		}
   191  		nodconst(&z, types[TUINT8], 0);
   192  		while(c-- > 0) {
   193  			n1.type = z.type;
   194  			gins(AMOVB, &z, &n1);
   195  			n1.xoffset++;
   196  		}
   197  		regfree(&n1);
   198  		return;
   199  	}
   200  
   201  	nodreg(&n1, types[tptr], D_DI);
   202  	agen(nl, &n1);
   203  	gconreg(AMOVL, 0, D_AX);
   204  
   205  	if(q > 128 || (q >= 4 && nacl)) {
   206  		gconreg(AMOVL, q, D_CX);
   207  		gins(AREP, N, N);	// repeat
   208  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   209  	} else if(q >= 4) {
   210  		p = gins(ADUFFZERO, N, N);
   211  		p->to.type = D_ADDR;
   212  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
   213  		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
   214  		p->to.offset = 1*(128-q);
   215  	} else
   216  	while(q > 0) {
   217  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   218  		q--;
   219  	}
   220  
   221  	while(c > 0) {
   222  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   223  		c--;
   224  	}
   225  }
   226  
   227  /*
   228   * generate:
   229   *	call f
   230   *	proc=-1	normal call but no return
   231   *	proc=0	normal call
   232   *	proc=1	goroutine run in new proc
   233   *	proc=2	defer call save away stack
   234    *	proc=3	normal call to C pointer (not Go func value)
   235   */
   236  void
   237  ginscall(Node *f, int proc)
   238  {
   239  	Prog *p;
   240  	Node reg, r1, con, stk;
   241  	int32 extra;
   242  
   243  	if(f->type != T) {
   244  		extra = 0;
   245  		if(proc == 1 || proc == 2)
   246  			extra = 2 * widthptr;
   247  		setmaxarg(f->type, extra);
   248  	}
   249  
   250  	switch(proc) {
   251  	default:
   252  		fatal("ginscall: bad proc %d", proc);
   253  		break;
   254  
   255  	case 0:	// normal call
   256  	case -1:	// normal call but no return
   257  		if(f->op == ONAME && f->class == PFUNC) {
   258  			if(f == deferreturn) {
   259  				// Deferred calls will appear to be returning to
   260  				// the CALL deferreturn(SB) that we are about to emit.
   261  				// However, the stack trace code will show the line
   262  				// of the instruction byte before the return PC. 
   263  				// To avoid that being an unrelated instruction,
   264  				// insert an x86 NOP that we will have the right line number.
   265  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   266  				// because the NOP pseudo-instruction will be removed by
   267  				// the linker.
   268  				nodreg(&reg, types[TINT], D_AX);
   269  				gins(AXCHGL, &reg, &reg);
   270  			}
   271  			p = gins(ACALL, N, f);
   272  			afunclit(&p->to, f);
   273  			if(proc == -1 || noreturn(p))
   274  				gins(AUNDEF, N, N);
   275  			break;
   276  		}
   277  		nodreg(&reg, types[tptr], D_DX);
   278  		nodreg(&r1, types[tptr], D_BX);
   279  		gmove(f, &reg);
   280  		reg.op = OINDREG;
   281  		gmove(&reg, &r1);
   282  		reg.op = OREGISTER;
   283  		gins(ACALL, &reg, &r1);
   284  		break;
   285  	
   286  	case 3:	// normal call of c function pointer
   287  		gins(ACALL, N, f);
   288  		break;
   289  
   290  	case 1:	// call in new proc (go)
   291  	case 2:	// deferred call (defer)
   292  		memset(&stk, 0, sizeof(stk));
   293  		stk.op = OINDREG;
   294  		stk.val.u.reg = D_SP;
   295  		stk.xoffset = 0;
   296  
   297  		// size of arguments at 0(SP)
   298  		nodconst(&con, types[TINT32], argsize(f->type));
   299  		gins(AMOVL, &con, &stk);
   300  
   301  		// FuncVal* at 4(SP)
   302  		stk.xoffset = widthptr;
   303  		gins(AMOVL, f, &stk);
   304  
   305  		if(proc == 1)
   306  			ginscall(newproc, 0);
   307  		else
   308  			ginscall(deferproc, 0);
   309  		if(proc == 2) {
   310  			nodreg(&reg, types[TINT32], D_AX);
   311  			gins(ATESTL, &reg, &reg);
   312  			p = gbranch(AJEQ, T, +1);
   313  			cgen_ret(N);
   314  			patch(p, pc);
   315  		}
   316  		break;
   317  	}
   318  }
   319  
   320  /*
   321   * n is call to interface method.
   322   * generate res = n.
   323   */
   324  void
   325  cgen_callinter(Node *n, Node *res, int proc)
   326  {
   327  	Node *i, *f;
   328  	Node tmpi, nodi, nodo, nodr, nodsp;
   329  
   330  	i = n->left;
   331  	if(i->op != ODOTINTER)
   332  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   333  
   334  	f = i->right;		// field
   335  	if(f->op != ONAME)
   336  		fatal("cgen_callinter: not ONAME %O", f->op);
   337  
   338  	i = i->left;		// interface
   339  
   340  	if(!i->addable) {
   341  		tempname(&tmpi, i->type);
   342  		cgen(i, &tmpi);
   343  		i = &tmpi;
   344  	}
   345  
   346  	genlist(n->list);		// assign the args
   347  
   348  	// i is now addable, prepare an indirected
   349  	// register to hold its address.
   350  	igen(i, &nodi, res);		// REG = &inter
   351  
   352  	nodindreg(&nodsp, types[tptr], D_SP);
   353  	nodsp.xoffset = 0;
   354  	if(proc != 0)
   355  		nodsp.xoffset += 2 * widthptr; // leave room for size & fn
   356  	nodi.type = types[tptr];
   357  	nodi.xoffset += widthptr;
   358  	cgen(&nodi, &nodsp);	// {0 or 8}(SP) = 4(REG) -- i.data
   359  
   360  	regalloc(&nodo, types[tptr], res);
   361  	nodi.type = types[tptr];
   362  	nodi.xoffset -= widthptr;
   363  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   364  	regfree(&nodi);
   365  
   366  	regalloc(&nodr, types[tptr], &nodo);
   367  	if(n->left->xoffset == BADWIDTH)
   368  		fatal("cgen_callinter: badwidth");
   369  	cgen_checknil(&nodo);
   370  	nodo.op = OINDREG;
   371  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   372  	
   373  	if(proc == 0) {
   374  		// plain call: use direct c function pointer - more efficient
   375  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   376  		proc = 3;
   377  	} else {
   378  		// go/defer. generate go func value.
   379  		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   380  	}
   381  
   382  	nodr.type = n->left->type;
   383  	ginscall(&nodr, proc);
   384  
   385  	regfree(&nodr);
   386  	regfree(&nodo);
   387  }
   388  
   389  /*
   390   * generate function call;
   391   *	proc=0	normal call
   392   *	proc=1	goroutine run in new proc
   393   *	proc=2	defer call save away stack
   394   */
   395  void
   396  cgen_call(Node *n, int proc)
   397  {
   398  	Type *t;
   399  	Node nod, afun;
   400  
   401  	if(n == N)
   402  		return;
   403  
   404  	if(n->left->ullman >= UINF) {
   405  		// if name involves a fn call
   406  		// precompute the address of the fn
   407  		tempname(&afun, types[tptr]);
   408  		cgen(n->left, &afun);
   409  	}
   410  
   411  	genlist(n->list);		// assign the args
   412  	t = n->left->type;
   413  
   414  	// call tempname pointer
   415  	if(n->left->ullman >= UINF) {
   416  		regalloc(&nod, types[tptr], N);
   417  		cgen_as(&nod, &afun);
   418  		nod.type = t;
   419  		ginscall(&nod, proc);
   420  		regfree(&nod);
   421  		return;
   422  	}
   423  
   424  	// call pointer
   425  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   426  		regalloc(&nod, types[tptr], N);
   427  		cgen_as(&nod, n->left);
   428  		nod.type = t;
   429  		ginscall(&nod, proc);
   430  		regfree(&nod);
   431  		return;
   432  	}
   433  
   434  	// call direct
   435  	n->left->method = 1;
   436  	ginscall(n->left, proc);
   437  }
   438  
   439  /*
   440   * call to n has already been generated.
   441   * generate:
   442   *	res = return value from call.
   443   */
   444  void
   445  cgen_callret(Node *n, Node *res)
   446  {
   447  	Node nod;
   448  	Type *fp, *t;
   449  	Iter flist;
   450  
   451  	t = n->left->type;
   452  	if(t->etype == TPTR32 || t->etype == TPTR64)
   453  		t = t->type;
   454  
   455  	fp = structfirst(&flist, getoutarg(t));
   456  	if(fp == T)
   457  		fatal("cgen_callret: nil");
   458  
   459  	memset(&nod, 0, sizeof(nod));
   460  	nod.op = OINDREG;
   461  	nod.val.u.reg = D_SP;
   462  	nod.addable = 1;
   463  
   464  	nod.xoffset = fp->width;
   465  	nod.type = fp->type;
   466  	cgen_as(res, &nod);
   467  }
   468  
   469  /*
   470   * call to n has already been generated.
   471   * generate:
   472   *	res = &return value from call.
   473   */
   474  void
   475  cgen_aret(Node *n, Node *res)
   476  {
   477  	Node nod1, nod2;
   478  	Type *fp, *t;
   479  	Iter flist;
   480  
   481  	t = n->left->type;
   482  	if(isptr[t->etype])
   483  		t = t->type;
   484  
   485  	fp = structfirst(&flist, getoutarg(t));
   486  	if(fp == T)
   487  		fatal("cgen_aret: nil");
   488  
   489  	memset(&nod1, 0, sizeof(nod1));
   490  	nod1.op = OINDREG;
   491  	nod1.val.u.reg = D_SP;
   492  	nod1.addable = 1;
   493  
   494  	nod1.xoffset = fp->width;
   495  	nod1.type = fp->type;
   496  
   497  	if(res->op != OREGISTER) {
   498  		regalloc(&nod2, types[tptr], res);
   499  		gins(ALEAL, &nod1, &nod2);
   500  		gins(AMOVL, &nod2, res);
   501  		regfree(&nod2);
   502  	} else
   503  		gins(ALEAL, &nod1, res);
   504  }
   505  
   506  /*
   507   * generate return.
   508   * n->left is assignments to return values.
   509   */
   510  void
   511  cgen_ret(Node *n)
   512  {
   513  	Prog *p;
   514  
   515  	if(n != N)
   516  		genlist(n->list);		// copy out args
   517  	if(hasdefer)
   518  		ginscall(deferreturn, 0);
   519  	genlist(curfn->exit);
   520  	p = gins(ARET, N, N);
   521  	if(n != N && n->op == ORETJMP) {
   522  		p->to.type = D_EXTERN;
   523  		p->to.sym = linksym(n->left->sym);
   524  	}
   525  }
   526  
   527  /*
   528   * generate += *= etc.
   529   */
   530  void
   531  cgen_asop(Node *n)
   532  {
   533  	Node n1, n2, n3, n4;
   534  	Node *nl, *nr;
   535  	Prog *p1;
   536  	Addr addr;
   537  	int a;
   538  
   539  	nl = n->left;
   540  	nr = n->right;
   541  
   542  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   543  		tempname(&n1, nr->type);
   544  		cgen(nr, &n1);
   545  		n2 = *n;
   546  		n2.right = &n1;
   547  		cgen_asop(&n2);
   548  		goto ret;
   549  	}
   550  
   551  	if(!isint[nl->type->etype])
   552  		goto hard;
   553  	if(!isint[nr->type->etype])
   554  		goto hard;
   555  	if(is64(nl->type) || is64(nr->type))
   556  		goto hard;
   557  
   558  	switch(n->etype) {
   559  	case OADD:
   560  		if(smallintconst(nr))
   561  		if(mpgetfix(nr->val.u.xval) == 1) {
   562  			a = optoas(OINC, nl->type);
   563  			if(nl->addable) {
   564  				gins(a, N, nl);
   565  				goto ret;
   566  			}
   567  			if(sudoaddable(a, nl, &addr)) {
   568  				p1 = gins(a, N, N);
   569  				p1->to = addr;
   570  				sudoclean();
   571  				goto ret;
   572  			}
   573  		}
   574  		break;
   575  
   576  	case OSUB:
   577  		if(smallintconst(nr))
   578  		if(mpgetfix(nr->val.u.xval) == 1) {
   579  			a = optoas(ODEC, nl->type);
   580  			if(nl->addable) {
   581  				gins(a, N, nl);
   582  				goto ret;
   583  			}
   584  			if(sudoaddable(a, nl, &addr)) {
   585  				p1 = gins(a, N, N);
   586  				p1->to = addr;
   587  				sudoclean();
   588  				goto ret;
   589  			}
   590  		}
   591  		break;
   592  	}
   593  
   594  	switch(n->etype) {
   595  	case OADD:
   596  	case OSUB:
   597  	case OXOR:
   598  	case OAND:
   599  	case OOR:
   600  		a = optoas(n->etype, nl->type);
   601  		if(nl->addable) {
   602  			if(smallintconst(nr)) {
   603  				gins(a, nr, nl);
   604  				goto ret;
   605  			}
   606  			regalloc(&n2, nr->type, N);
   607  			cgen(nr, &n2);
   608  			gins(a, &n2, nl);
   609  			regfree(&n2);
   610  			goto ret;
   611  		}
   612  		if(nr->ullman < UINF)
   613  		if(sudoaddable(a, nl, &addr)) {
   614  			if(smallintconst(nr)) {
   615  				p1 = gins(a, nr, N);
   616  				p1->to = addr;
   617  				sudoclean();
   618  				goto ret;
   619  			}
   620  			regalloc(&n2, nr->type, N);
   621  			cgen(nr, &n2);
   622  			p1 = gins(a, &n2, N);
   623  			p1->to = addr;
   624  			regfree(&n2);
   625  			sudoclean();
   626  			goto ret;
   627  		}
   628  	}
   629  
   630  hard:
   631  	n2.op = 0;
   632  	n1.op = 0;
   633  	if(nr->ullman >= nl->ullman || nl->addable) {
   634  		mgen(nr, &n2, N);
   635  		nr = &n2;
   636  	} else {
   637  		tempname(&n2, nr->type);
   638  		cgen(nr, &n2);
   639  		nr = &n2;
   640  	}
   641  	if(!nl->addable) {
   642  		igen(nl, &n1, N);
   643  		nl = &n1;
   644  	}
   645  
   646  	n3 = *n;
   647  	n3.left = nl;
   648  	n3.right = nr;
   649  	n3.op = n->etype;
   650  
   651  	mgen(&n3, &n4, N);
   652  	gmove(&n4, nl);
   653  
   654  	if(n1.op)
   655  		regfree(&n1);
   656  	mfree(&n2);
   657  	mfree(&n4);
   658  
   659  ret:
   660  	;
   661  }
   662  
   663  int
   664  samereg(Node *a, Node *b)
   665  {
   666  	if(a->op != OREGISTER)
   667  		return 0;
   668  	if(b->op != OREGISTER)
   669  		return 0;
   670  	if(a->val.u.reg != b->val.u.reg)
   671  		return 0;
   672  	return 1;
   673  }
   674  
   675  /*
   676   * generate division.
   677   * caller must set:
   678   *	ax = allocated AX register
   679   *	dx = allocated DX register
   680   * generates one of:
   681   *	res = nl / nr
   682   *	res = nl % nr
   683   * according to op.
   684   */
   685  void
   686  dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
   687  {
   688  	int check;
   689  	Node n1, t1, t2, t3, t4, n4, nz;
   690  	Type *t, *t0;
   691  	Prog *p1, *p2;
   692  
   693  	// Have to be careful about handling
   694  	// most negative int divided by -1 correctly.
   695  	// The hardware will trap.
   696  	// Also the byte divide instruction needs AH,
   697  	// which we otherwise don't have to deal with.
   698  	// Easiest way to avoid for int8, int16: use int32.
   699  	// For int32 and int64, use explicit test.
   700  	// Could use int64 hw for int32.
   701  	t = nl->type;
   702  	t0 = t;
   703  	check = 0;
   704  	if(issigned[t->etype]) {
   705  		check = 1;
   706  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   707  			check = 0;
   708  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   709  			check = 0;
   710  	}
   711  	if(t->width < 4) {
   712  		if(issigned[t->etype])
   713  			t = types[TINT32];
   714  		else
   715  			t = types[TUINT32];
   716  		check = 0;
   717  	}
   718  
   719  	tempname(&t1, t);
   720  	tempname(&t2, t);
   721  	if(t0 != t) {
   722  		tempname(&t3, t0);
   723  		tempname(&t4, t0);
   724  		cgen(nl, &t3);
   725  		cgen(nr, &t4);
   726  		// Convert.
   727  		gmove(&t3, &t1);
   728  		gmove(&t4, &t2);
   729  	} else {
   730  		cgen(nl, &t1);
   731  		cgen(nr, &t2);
   732  	}
   733  
   734  	if(!samereg(ax, res) && !samereg(dx, res))
   735  		regalloc(&n1, t, res);
   736  	else
   737  		regalloc(&n1, t, N);
   738  	gmove(&t2, &n1);
   739  	gmove(&t1, ax);
   740  	p2 = P;
   741  	if(nacl) {
   742  		// Native Client does not relay the divide-by-zero trap
   743  		// to the executing program, so we must insert a check
   744  		// for ourselves.
   745  		nodconst(&n4, t, 0);
   746  		gins(optoas(OCMP, t), &n1, &n4);
   747  		p1 = gbranch(optoas(ONE, t), T, +1);
   748  		if(panicdiv == N)
   749  			panicdiv = sysfunc("panicdivide");
   750  		ginscall(panicdiv, -1);
   751  		patch(p1, pc);
   752  	}
   753  	if(check) {
   754  		nodconst(&n4, t, -1);
   755  		gins(optoas(OCMP, t), &n1, &n4);
   756  		p1 = gbranch(optoas(ONE, t), T, +1);
   757  		if(op == ODIV) {
   758  			// a / (-1) is -a.
   759  			gins(optoas(OMINUS, t), N, ax);
   760  			gmove(ax, res);
   761  		} else {
   762  			// a % (-1) is 0.
   763  			nodconst(&n4, t, 0);
   764  			gmove(&n4, res);
   765  		}
   766  		p2 = gbranch(AJMP, T, 0);
   767  		patch(p1, pc);
   768  	}
   769  	if(!issigned[t->etype]) {
   770  		nodconst(&nz, t, 0);
   771  		gmove(&nz, dx);
   772  	} else
   773  		gins(optoas(OEXTEND, t), N, N);
   774  	gins(optoas(op, t), &n1, N);
   775  	regfree(&n1);
   776  
   777  	if(op == ODIV)
   778  		gmove(ax, res);
   779  	else
   780  		gmove(dx, res);
   781  	if(check)
   782  		patch(p2, pc);
   783  }
   784  
   785  static void
   786  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   787  {
   788  	int r;
   789  
   790  	r = reg[dr];
   791  	nodreg(x, types[TINT32], dr);
   792  
   793  	// save current ax and dx if they are live
   794  	// and not the destination
   795  	memset(oldx, 0, sizeof *oldx);
   796  	if(r > 0 && !samereg(x, res)) {
   797  		tempname(oldx, types[TINT32]);
   798  		gmove(x, oldx);
   799  	}
   800  
   801  	regalloc(x, t, x);
   802  }
   803  
   804  static void
   805  restx(Node *x, Node *oldx)
   806  {
   807  	regfree(x);
   808  
   809  	if(oldx->op != 0) {
   810  		x->type = types[TINT32];
   811  		gmove(oldx, x);
   812  	}
   813  }
   814  
   815  /*
   816   * generate division according to op, one of:
   817   *	res = nl / nr
   818   *	res = nl % nr
   819   */
   820  void
   821  cgen_div(int op, Node *nl, Node *nr, Node *res)
   822  {
   823  	Node ax, dx, oldax, olddx;
   824  	Type *t;
   825  
   826  	if(is64(nl->type))
   827  		fatal("cgen_div %T", nl->type);
   828  
   829  	if(issigned[nl->type->etype])
   830  		t = types[TINT32];
   831  	else
   832  		t = types[TUINT32];
   833  	savex(D_AX, &ax, &oldax, res, t);
   834  	savex(D_DX, &dx, &olddx, res, t);
   835  	dodiv(op, nl, nr, res, &ax, &dx);
   836  	restx(&dx, &olddx);
   837  	restx(&ax, &oldax);
   838  }
   839  
   840  /*
   841   * generate shift according to op, one of:
   842   *	res = nl << nr
   843   *	res = nl >> nr
   844   */
   845  void
   846  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   847  {
   848  	Node n1, n2, nt, cx, oldcx, hi, lo;
   849  	int a, w;
   850  	Prog *p1, *p2;
   851  	uvlong sc;
   852  
   853  	if(nl->type->width > 4)
   854  		fatal("cgen_shift %T", nl->type);
   855  
   856  	w = nl->type->width * 8;
   857  
   858  	a = optoas(op, nl->type);
   859  
   860  	if(nr->op == OLITERAL) {
   861  		tempname(&n2, nl->type);
   862  		cgen(nl, &n2);
   863  		regalloc(&n1, nl->type, res);
   864  		gmove(&n2, &n1);
   865  		sc = mpgetfix(nr->val.u.xval);
   866  		if(sc >= nl->type->width*8) {
   867  			// large shift gets 2 shifts by width-1
   868  			gins(a, ncon(w-1), &n1);
   869  			gins(a, ncon(w-1), &n1);
   870  		} else
   871  			gins(a, nr, &n1);
   872  		gmove(&n1, res);
   873  		regfree(&n1);
   874  		return;
   875  	}
   876  
   877  	memset(&oldcx, 0, sizeof oldcx);
   878  	nodreg(&cx, types[TUINT32], D_CX);
   879  	if(reg[D_CX] > 1 && !samereg(&cx, res)) {
   880  		tempname(&oldcx, types[TUINT32]);
   881  		gmove(&cx, &oldcx);
   882  	}
   883  
   884  	if(nr->type->width > 4) {
   885  		tempname(&nt, nr->type);
   886  		n1 = nt;
   887  	} else {
   888  		nodreg(&n1, types[TUINT32], D_CX);
   889  		regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   890  	}
   891  
   892  	if(samereg(&cx, res))
   893  		regalloc(&n2, nl->type, N);
   894  	else
   895  		regalloc(&n2, nl->type, res);
   896  	if(nl->ullman >= nr->ullman) {
   897  		cgen(nl, &n2);
   898  		cgen(nr, &n1);
   899  	} else {
   900  		cgen(nr, &n1);
   901  		cgen(nl, &n2);
   902  	}
   903  
   904  	// test and fix up large shifts
   905  	if(bounded) {
   906  		if(nr->type->width > 4) {
   907  			// delayed reg alloc
   908  			nodreg(&n1, types[TUINT32], D_CX);
   909  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   910  			split64(&nt, &lo, &hi);
   911  			gmove(&lo, &n1);
   912  			splitclean();
   913  		}
   914  	} else {
   915  		if(nr->type->width > 4) {
   916  			// delayed reg alloc
   917  			nodreg(&n1, types[TUINT32], D_CX);
   918  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   919  			split64(&nt, &lo, &hi);
   920  			gmove(&lo, &n1);
   921  			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
   922  			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
   923  			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
   924  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   925  			splitclean();
   926  			patch(p2, pc);
   927  		} else {
   928  			gins(optoas(OCMP, nr->type), &n1, ncon(w));
   929  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   930  		}
   931  		if(op == ORSH && issigned[nl->type->etype]) {
   932  			gins(a, ncon(w-1), &n2);
   933  		} else {
   934  			gmove(ncon(0), &n2);
   935  		}
   936  		patch(p1, pc);
   937  	}
   938  	gins(a, &n1, &n2);
   939  
   940  	if(oldcx.op != 0)
   941  		gmove(&oldcx, &cx);
   942  
   943  	gmove(&n2, res);
   944  
   945  	regfree(&n1);
   946  	regfree(&n2);
   947  }
   948  
   949  /*
   950   * generate byte multiply:
   951   *	res = nl * nr
   952   * there is no 2-operand byte multiply instruction so
   953   * we do a full-width multiplication and truncate afterwards.
   954   */
   955  void
   956  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   957  {
   958  	Node n1, n2, nt, *tmp;
   959  	Type *t;
   960  	int a;
   961  
   962  	// copy from byte to full registers
   963  	t = types[TUINT32];
   964  	if(issigned[nl->type->etype])
   965  		t = types[TINT32];
   966  
   967  	// largest ullman on left.
   968  	if(nl->ullman < nr->ullman) {
   969  		tmp = nl;
   970  		nl = nr;
   971  		nr = tmp;
   972  	}
   973  
   974  	tempname(&nt, nl->type);
   975  	cgen(nl, &nt);
   976  	regalloc(&n1, t, res);
   977  	cgen(nr, &n1);
   978  	regalloc(&n2, t, N);
   979  	gmove(&nt, &n2);
   980  	a = optoas(op, t);
   981  	gins(a, &n2, &n1);
   982  	regfree(&n2);
   983  	gmove(&n1, res);
   984  	regfree(&n1);
   985  }
   986  
   987  /*
   988   * generate high multiply:
   989   *   res = (nl*nr) >> width
   990   */
   991  void
   992  cgen_hmul(Node *nl, Node *nr, Node *res)
   993  {
   994  	Type *t;
   995  	int a;
   996  	Node n1, n2, ax, dx;
   997  
   998  	t = nl->type;
   999  	a = optoas(OHMUL, t);
  1000  	// gen nl in n1.
  1001  	tempname(&n1, t);
  1002  	cgen(nl, &n1);
  1003  	// gen nr in n2.
  1004  	regalloc(&n2, t, res);
  1005  	cgen(nr, &n2);
  1006  
  1007  	// multiply.
  1008  	nodreg(&ax, t, D_AX);
  1009  	gmove(&n2, &ax);
  1010  	gins(a, &n1, N);
  1011  	regfree(&n2);
  1012  
  1013  	if(t->width == 1) {
  1014  		// byte multiply behaves differently.
  1015  		nodreg(&ax, t, D_AH);
  1016  		nodreg(&dx, t, D_DX);
  1017  		gmove(&ax, &dx);
  1018  	}
  1019  	nodreg(&dx, t, D_DX);
  1020  	gmove(&dx, res);
  1021  }
  1022  
  1023  static void cgen_float387(Node *n, Node *res);
  1024  static void cgen_floatsse(Node *n, Node *res);
  1025  
  1026  /*
  1027   * generate floating-point operation.
  1028   */
  1029  void
  1030  cgen_float(Node *n, Node *res)
  1031  {
  1032  	Node *nl;
  1033  	Node n1, n2;
  1034  	Prog *p1, *p2, *p3;
  1035  
  1036  	nl = n->left;
  1037  	switch(n->op) {
  1038  	case OEQ:
  1039  	case ONE:
  1040  	case OLT:
  1041  	case OLE:
  1042  	case OGE:
  1043  		p1 = gbranch(AJMP, T, 0);
  1044  		p2 = pc;
  1045  		gmove(nodbool(1), res);
  1046  		p3 = gbranch(AJMP, T, 0);
  1047  		patch(p1, pc);
  1048  		bgen(n, 1, 0, p2);
  1049  		gmove(nodbool(0), res);
  1050  		patch(p3, pc);
  1051  		return;
  1052  
  1053  	case OPLUS:
  1054  		cgen(nl, res);
  1055  		return;
  1056  
  1057  	case OCONV:
  1058  		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
  1059  			cgen(nl, res);
  1060  			return;
  1061  		}
  1062  
  1063  		tempname(&n2, n->type);
  1064  		mgen(nl, &n1, res);
  1065  		gmove(&n1, &n2);
  1066  		gmove(&n2, res);
  1067  		mfree(&n1);
  1068  		return;
  1069  	}
  1070  
  1071  	if(use_sse)
  1072  		cgen_floatsse(n, res);
  1073  	else
  1074  		cgen_float387(n, res);
  1075  }
  1076  
  1077  // floating-point.  387 (not SSE2)
  1078  static void
  1079  cgen_float387(Node *n, Node *res)
  1080  {
  1081  	Node f0, f1;
  1082  	Node *nl, *nr;
  1083  
  1084  	nl = n->left;
  1085  	nr = n->right;
  1086  	nodreg(&f0, nl->type, D_F0);
  1087  	nodreg(&f1, n->type, D_F0+1);
  1088  	if(nr != N)
  1089  		goto flt2;
  1090  
  1091  	// unary
  1092  	cgen(nl, &f0);
  1093  	if(n->op != OCONV && n->op != OPLUS)
  1094  		gins(foptoas(n->op, n->type, 0), N, N);
  1095  	gmove(&f0, res);
  1096  	return;
  1097  
  1098  flt2:	// binary
  1099  	if(nl->ullman >= nr->ullman) {
  1100  		cgen(nl, &f0);
  1101  		if(nr->addable)
  1102  			gins(foptoas(n->op, n->type, 0), nr, &f0);
  1103  		else {
  1104  			cgen(nr, &f0);
  1105  			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
  1106  		}
  1107  	} else {
  1108  		cgen(nr, &f0);
  1109  		if(nl->addable)
  1110  			gins(foptoas(n->op, n->type, Frev), nl, &f0);
  1111  		else {
  1112  			cgen(nl, &f0);
  1113  			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
  1114  		}
  1115  	}
  1116  	gmove(&f0, res);
  1117  	return;
  1118  
  1119  }
  1120  
  1121  static void
  1122  cgen_floatsse(Node *n, Node *res)
  1123  {
  1124  	Node *nl, *nr, *r;
  1125  	Node n1, n2, nt;
  1126  	int a;
  1127  
  1128  	nl = n->left;
  1129  	nr = n->right;
  1130  	switch(n->op) {
  1131  	default:
  1132  		dump("cgen_floatsse", n);
  1133  		fatal("cgen_floatsse %O", n->op);
  1134  		return;
  1135  
  1136  	case OMINUS:
  1137  	case OCOM:
  1138  		nr = nodintconst(-1);
  1139  		convlit(&nr, n->type);
  1140  		a = foptoas(OMUL, nl->type, 0);
  1141  		goto sbop;
  1142  
  1143  	// symmetric binary
  1144  	case OADD:
  1145  	case OMUL:
  1146  		a = foptoas(n->op, nl->type, 0);
  1147  		goto sbop;
  1148  
  1149  	// asymmetric binary
  1150  	case OSUB:
  1151  	case OMOD:
  1152  	case ODIV:
  1153  		a = foptoas(n->op, nl->type, 0);
  1154  		goto abop;
  1155  	}
  1156  
  1157  sbop:	// symmetric binary
  1158  	if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
  1159  		r = nl;
  1160  		nl = nr;
  1161  		nr = r;
  1162  	}
  1163  
  1164  abop:	// asymmetric binary
  1165  	if(nl->ullman >= nr->ullman) {
  1166  		tempname(&nt, nl->type);
  1167  		cgen(nl, &nt);
  1168  		mgen(nr, &n2, N);
  1169  		regalloc(&n1, nl->type, res);
  1170  		gmove(&nt, &n1);
  1171  		gins(a, &n2, &n1);
  1172  		gmove(&n1, res);
  1173  		regfree(&n1);
  1174  		mfree(&n2);
  1175  	} else {
  1176  		regalloc(&n2, nr->type, res);
  1177  		cgen(nr, &n2);
  1178  		regalloc(&n1, nl->type, N);
  1179  		cgen(nl, &n1);
  1180  		gins(a, &n2, &n1);
  1181  		regfree(&n2);
  1182  		gmove(&n1, res);
  1183  		regfree(&n1);
  1184  	}
  1185  	return;
  1186  }
  1187  
  1188  void
  1189  bgen_float(Node *n, int true, int likely, Prog *to)
  1190  {
  1191  	int et, a;
  1192  	Node *nl, *nr, *r;
  1193  	Node n1, n2, n3, tmp, t1, t2, ax;
  1194  	Prog *p1, *p2;
  1195  
  1196  	nl = n->left;
  1197  	nr = n->right;
  1198  	a = n->op;
  1199  	if(!true) {
  1200  		// brcom is not valid on floats when NaN is involved.
  1201  		p1 = gbranch(AJMP, T, 0);
  1202  		p2 = gbranch(AJMP, T, 0);
  1203  		patch(p1, pc);
  1204  		// No need to avoid re-genning ninit.
  1205  		bgen_float(n, 1, -likely, p2);
  1206  		patch(gbranch(AJMP, T, 0), to);
  1207  		patch(p2, pc);
  1208  		return;
  1209  	}
  1210  
  1211  	if(use_sse)
  1212  		goto sse;
  1213  	else
  1214  		goto x87;
  1215  
  1216  x87:
  1217  	a = brrev(a);	// because the args are stacked
  1218  	if(a == OGE || a == OGT) {
  1219  		// only < and <= work right with NaN; reverse if needed
  1220  		r = nr;
  1221  		nr = nl;
  1222  		nl = r;
  1223  		a = brrev(a);
  1224  	}
  1225  
  1226  	nodreg(&tmp, nr->type, D_F0);
  1227  	nodreg(&n2, nr->type, D_F0 + 1);
  1228  	nodreg(&ax, types[TUINT16], D_AX);
  1229  	et = simsimtype(nr->type);
  1230  	if(et == TFLOAT64) {
  1231  		if(nl->ullman > nr->ullman) {
  1232  			cgen(nl, &tmp);
  1233  			cgen(nr, &tmp);
  1234  			gins(AFXCHD, &tmp, &n2);
  1235  		} else {
  1236  			cgen(nr, &tmp);
  1237  			cgen(nl, &tmp);
  1238  		}
  1239  		gins(AFUCOMIP, &tmp, &n2);
  1240  		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF
  1241  	} else {
  1242  		// TODO(rsc): The moves back and forth to memory
  1243  		// here are for truncating the value to 32 bits.
  1244  		// This handles 32-bit comparison but presumably
  1245  		// all the other ops have the same problem.
  1246  		// We need to figure out what the right general
  1247  		// solution is, besides telling people to use float64.
  1248  		tempname(&t1, types[TFLOAT32]);
  1249  		tempname(&t2, types[TFLOAT32]);
  1250  		cgen(nr, &t1);
  1251  		cgen(nl, &t2);
  1252  		gmove(&t2, &tmp);
  1253  		gins(AFCOMFP, &t1, &tmp);
  1254  		gins(AFSTSW, N, &ax);
  1255  		gins(ASAHF, N, N);
  1256  	}
  1257  
  1258  	goto ret;
  1259  
  1260  sse:
  1261  	if(!nl->addable) {
  1262  		tempname(&n1, nl->type);
  1263  		cgen(nl, &n1);
  1264  		nl = &n1;
  1265  	}
  1266  	if(!nr->addable) {
  1267  		tempname(&tmp, nr->type);
  1268  		cgen(nr, &tmp);
  1269  		nr = &tmp;
  1270  	}
  1271  	regalloc(&n2, nr->type, N);
  1272  	gmove(nr, &n2);
  1273  	nr = &n2;
  1274  
  1275  	if(nl->op != OREGISTER) {
  1276  		regalloc(&n3, nl->type, N);
  1277  		gmove(nl, &n3);
  1278  		nl = &n3;
  1279  	}
  1280  
  1281  	if(a == OGE || a == OGT) {
  1282  		// only < and <= work right with NaN; reverse if needed
  1283  		r = nr;
  1284  		nr = nl;
  1285  		nl = r;
  1286  		a = brrev(a);
  1287  	}
  1288  
  1289  	gins(foptoas(OCMP, nr->type, 0), nl, nr);
  1290  	if(nl->op == OREGISTER)
  1291  		regfree(nl);
  1292  	regfree(nr);
  1293  
  1294  ret:
  1295  	if(a == OEQ) {
  1296  		// neither NE nor P
  1297  		p1 = gbranch(AJNE, T, -likely);
  1298  		p2 = gbranch(AJPS, T, -likely);
  1299  		patch(gbranch(AJMP, T, 0), to);
  1300  		patch(p1, pc);
  1301  		patch(p2, pc);
  1302  	} else if(a == ONE) {
  1303  		// either NE or P
  1304  		patch(gbranch(AJNE, T, likely), to);
  1305  		patch(gbranch(AJPS, T, likely), to);
  1306  	} else
  1307  		patch(gbranch(optoas(a, nr->type), T, likely), to);
  1308  
  1309  }
  1310  
  1311  // Called after regopt and peep have run.
  1312  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1313  void
  1314  expandchecks(Prog *firstp)
  1315  {
  1316  	Prog *p, *p1, *p2;
  1317  
  1318  	for(p = firstp; p != P; p = p->link) {
  1319  		if(p->as != ACHECKNIL)
  1320  			continue;
  1321  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1322  			warnl(p->lineno, "generated nil check");
  1323  		// check is
  1324  		//	CMP arg, $0
  1325  		//	JNE 2(PC) (likely)
  1326  		//	MOV AX, 0
  1327  		p1 = mal(sizeof *p1);
  1328  		p2 = mal(sizeof *p2);
  1329  		clearp(p1);
  1330  		clearp(p2);
  1331  		p1->link = p2;
  1332  		p2->link = p->link;
  1333  		p->link = p1;
  1334  		p1->lineno = p->lineno;
  1335  		p2->lineno = p->lineno;
  1336  		p1->pc = 9999;
  1337  		p2->pc = 9999;
  1338  		p->as = ACMPL;
  1339  		p->to.type = D_CONST;
  1340  		p->to.offset = 0;
  1341  		p1->as = AJNE;
  1342  		p1->from.type = D_CONST;
  1343  		p1->from.offset = 1; // likely
  1344  		p1->to.type = D_BRANCH;
  1345  		p1->to.u.branch = p2->link;
  1346  		// crash by write to memory address 0.
  1347  		// if possible, since we know arg is 0, use 0(arg),
  1348  		// which will be shorter to encode than plain 0.
  1349  		p2->as = AMOVL;
  1350  		p2->from.type = D_AX;
  1351  		if(regtyp(&p->from))
  1352  			p2->to.type = p->from.type + D_INDIR;
  1353  		else
  1354  			p2->to.type = D_INDIR+D_NONE;
  1355  		p2->to.offset = 0;
  1356  	}
  1357  }