github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/8g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame, ax;
    19  	Prog *p;
    20  	vlong lo, hi;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    26  
    27  	// fill in final stack size
    28  	frame = rnd(stksize+maxarg, widthptr);
    29  	ptxt->to.offset = frame;
    30  	
    31  	// insert code to zero ambiguously live variables
    32  	// so that the garbage collector only sees initialized values
    33  	// when it looks for pointers.
    34  	p = ptxt;
    35  	hi = 0;
    36  	lo = hi;
    37  	ax = 0;
    38  	for(l=curfn->dcl; l != nil; l = l->next) {
    39  		n = l->n;
    40  		if(!n->needzero)
    41  			continue;
    42  		if(n->class != PAUTO)
    43  			fatal("needzero class %d", n->class);
    44  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    45  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    46  		if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) {
    47  			// merge with range we already have
    48  			lo = n->xoffset;
    49  			continue;
    50  		}
    51  		// zero old range
    52  		p = zerorange(p, frame, lo, hi, &ax);
    53  
    54  		// set new range
    55  		hi = n->xoffset + n->type->width;
    56  		lo = n->xoffset;
    57  	}
    58  	// zero final range
    59  	zerorange(p, frame, lo, hi, &ax);
    60  }
    61  
    62  static Prog*
    63  zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
    64  {
    65  	vlong cnt, i;
    66  
    67  	cnt = hi - lo;
    68  	if(cnt == 0)
    69  		return p;
    70  	if(*ax == 0) {
    71  		p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
    72  		*ax = 1;
    73  	}
    74  	if(cnt <= 4*widthreg) {
    75  		for(i = 0; i < cnt; i += widthreg) {
    76  			p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i);
    77  		}
    78  	} else if(!nacl && cnt <= 128*widthreg) {
    79  		p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
    80  		p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg));
    81  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
    82  	} else {
    83  		p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0);
    84  		p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
    85  		p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
    86  		p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
    87  	}
    88  	return p;
    89  }
    90  
    91  static Prog*	
    92  appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)	
    93  {
    94  	Prog *q;
    95  	q = mal(sizeof(*q));	
    96  	clearp(q);	
    97  	q->as = as;	
    98  	q->lineno = p->lineno;	
    99  	q->from.type = ftype;	
   100  	q->from.offset = foffset;	
   101  	q->to.type = ttype;	
   102  	q->to.offset = toffset;	
   103  	q->link = p->link;	
   104  	p->link = q;	
   105  	return q;	
   106  }
   107  
   108  // Sweep the prog list to mark any used nodes.
   109  void
   110  markautoused(Prog* p)
   111  {
   112  	for (; p; p = p->link) {
   113  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   114  			continue;
   115  
   116  		if (p->from.node)
   117  			p->from.node->used = 1;
   118  
   119  		if (p->to.node)
   120  			p->to.node->used = 1;
   121  	}
   122  }
   123  
   124  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   125  void
   126  fixautoused(Prog* p)
   127  {
   128  	Prog **lp;
   129  
   130  	for (lp=&p; (p=*lp) != P; ) {
   131  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
   132  			*lp = p->link;
   133  			continue;
   134  		}
   135  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   136  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   137  			// VARDEFs are interspersed with other code, and a jump might be using the
   138  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   139  			// the no-ops.
   140  			p->to.type = D_NONE;
   141  			p->to.node = N;
   142  			p->as = ANOP;
   143  			continue;
   144  		}
   145  
   146  		if (p->from.type == D_AUTO && p->from.node)
   147  			p->from.offset += p->from.node->stkdelta;
   148  
   149  		if (p->to.type == D_AUTO && p->to.node)
   150  			p->to.offset += p->to.node->stkdelta;
   151  
   152  		lp = &p->link;
   153  	}
   154  }
   155  
   156  void
   157  clearfat(Node *nl)
   158  {
   159  	uint32 w, c, q;
   160  	Node n1;
   161  	Prog *p;
   162  
   163  	/* clear a fat object */
   164  	if(debug['g'])
   165  		dump("\nclearfat", nl);
   166  
   167  	w = nl->type->width;
   168  	// Avoid taking the address for simple enough types.
   169  	if(componentgen(N, nl))
   170  		return;
   171  
   172  	c = w % 4;	// bytes
   173  	q = w / 4;	// quads
   174  
   175  	nodreg(&n1, types[tptr], D_DI);
   176  	agen(nl, &n1);
   177  	gconreg(AMOVL, 0, D_AX);
   178  
   179  	if(q > 128 || (q >= 4 && nacl)) {
   180  		gconreg(AMOVL, q, D_CX);
   181  		gins(AREP, N, N);	// repeat
   182  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   183  	} else if(q >= 4) {
   184  		p = gins(ADUFFZERO, N, N);
   185  		p->to.type = D_ADDR;
   186  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
   187  		// 1 and 128 = magic constants: see ../../pkg/runtime/asm_386.s
   188  		p->to.offset = 1*(128-q);
   189  	} else
   190  	while(q > 0) {
   191  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   192  		q--;
   193  	}
   194  
   195  	while(c > 0) {
   196  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   197  		c--;
   198  	}
   199  }
   200  
   201  /*
   202   * generate:
   203   *	call f
   204   *	proc=-1	normal call but no return
   205   *	proc=0	normal call
   206   *	proc=1	goroutine run in new proc
   207   *	proc=2	defer call save away stack
   208    *	proc=3	normal call to C pointer (not Go func value)
   209   */
   210  void
   211  ginscall(Node *f, int proc)
   212  {
   213  	int32 arg;
   214  	Prog *p;
   215  	Node reg, r1, con;
   216  
   217  	if(f->type != T)
   218  		setmaxarg(f->type);
   219  
   220  	arg = -1;
   221  	// Most functions have a fixed-size argument block, so traceback uses that during unwind.
   222  	// Not all, though: there are some variadic functions in package runtime,
   223  	// and for those we emit call-specific metadata recorded by caller.
   224  	// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
   225  	// so we do this for all indirect calls as well.
   226  	if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
   227  		arg = f->type->argwid;
   228  		if(proc == 1 || proc == 2)
   229  			arg += 2*widthptr;
   230  	}
   231  
   232  	if(arg != -1)
   233  		gargsize(arg);
   234  
   235  	switch(proc) {
   236  	default:
   237  		fatal("ginscall: bad proc %d", proc);
   238  		break;
   239  
   240  	case 0:	// normal call
   241  	case -1:	// normal call but no return
   242  		if(f->op == ONAME && f->class == PFUNC) {
   243  			if(f == deferreturn) {
   244  				// Deferred calls will appear to be returning to
   245  				// the CALL deferreturn(SB) that we are about to emit.
   246  				// However, the stack trace code will show the line
   247  				// of the instruction byte before the return PC. 
   248  				// To avoid that being an unrelated instruction,
   249  				// insert an x86 NOP that we will have the right line number.
   250  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   251  				// because the NOP pseudo-instruction will be removed by
   252  				// the linker.
   253  				nodreg(&reg, types[TINT], D_AX);
   254  				gins(AXCHGL, &reg, &reg);
   255  			}
   256  			p = gins(ACALL, N, f);
   257  			afunclit(&p->to, f);
   258  			if(proc == -1 || noreturn(p))
   259  				gins(AUNDEF, N, N);
   260  			break;
   261  		}
   262  		nodreg(&reg, types[tptr], D_DX);
   263  		nodreg(&r1, types[tptr], D_BX);
   264  		gmove(f, &reg);
   265  		reg.op = OINDREG;
   266  		gmove(&reg, &r1);
   267  		reg.op = OREGISTER;
   268  		gins(ACALL, &reg, &r1);
   269  		break;
   270  	
   271  	case 3:	// normal call of c function pointer
   272  		gins(ACALL, N, f);
   273  		break;
   274  
   275  	case 1:	// call in new proc (go)
   276  	case 2:	// deferred call (defer)
   277  		nodreg(&reg, types[TINT32], D_CX);
   278  		gins(APUSHL, f, N);
   279  		nodconst(&con, types[TINT32], argsize(f->type));
   280  		gins(APUSHL, &con, N);
   281  		if(proc == 1)
   282  			ginscall(newproc, 0);
   283  		else
   284  			ginscall(deferproc, 0);
   285  		gins(APOPL, N, &reg);
   286  		gins(APOPL, N, &reg);
   287  		if(proc == 2) {
   288  			nodreg(&reg, types[TINT64], D_AX);
   289  			gins(ATESTL, &reg, &reg);
   290  			p = gbranch(AJEQ, T, +1);
   291  			cgen_ret(N);
   292  			patch(p, pc);
   293  		}
   294  		break;
   295  	}
   296  	
   297  	if(arg != -1)
   298  		gargsize(-1);
   299  }
   300  
   301  /*
   302   * n is call to interface method.
   303   * generate res = n.
   304   */
   305  void
   306  cgen_callinter(Node *n, Node *res, int proc)
   307  {
   308  	Node *i, *f;
   309  	Node tmpi, nodi, nodo, nodr, nodsp;
   310  
   311  	i = n->left;
   312  	if(i->op != ODOTINTER)
   313  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   314  
   315  	f = i->right;		// field
   316  	if(f->op != ONAME)
   317  		fatal("cgen_callinter: not ONAME %O", f->op);
   318  
   319  	i = i->left;		// interface
   320  
   321  	if(!i->addable) {
   322  		tempname(&tmpi, i->type);
   323  		cgen(i, &tmpi);
   324  		i = &tmpi;
   325  	}
   326  
   327  	genlist(n->list);		// assign the args
   328  
   329  	// i is now addable, prepare an indirected
   330  	// register to hold its address.
   331  	igen(i, &nodi, res);		// REG = &inter
   332  
   333  	nodindreg(&nodsp, types[tptr], D_SP);
   334  	nodi.type = types[tptr];
   335  	nodi.xoffset += widthptr;
   336  	cgen(&nodi, &nodsp);	// 0(SP) = 4(REG) -- i.data
   337  
   338  	regalloc(&nodo, types[tptr], res);
   339  	nodi.type = types[tptr];
   340  	nodi.xoffset -= widthptr;
   341  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   342  	regfree(&nodi);
   343  
   344  	regalloc(&nodr, types[tptr], &nodo);
   345  	if(n->left->xoffset == BADWIDTH)
   346  		fatal("cgen_callinter: badwidth");
   347  	cgen_checknil(&nodo);
   348  	nodo.op = OINDREG;
   349  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   350  	
   351  	if(proc == 0) {
   352  		// plain call: use direct c function pointer - more efficient
   353  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   354  		proc = 3;
   355  	} else {
   356  		// go/defer. generate go func value.
   357  		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   358  	}
   359  
   360  	nodr.type = n->left->type;
   361  	ginscall(&nodr, proc);
   362  
   363  	regfree(&nodr);
   364  	regfree(&nodo);
   365  }
   366  
   367  /*
   368   * generate function call;
   369   *	proc=0	normal call
   370   *	proc=1	goroutine run in new proc
   371   *	proc=2	defer call save away stack
   372   */
   373  void
   374  cgen_call(Node *n, int proc)
   375  {
   376  	Type *t;
   377  	Node nod, afun;
   378  
   379  	if(n == N)
   380  		return;
   381  
   382  	if(n->left->ullman >= UINF) {
   383  		// if name involves a fn call
   384  		// precompute the address of the fn
   385  		tempname(&afun, types[tptr]);
   386  		cgen(n->left, &afun);
   387  	}
   388  
   389  	genlist(n->list);		// assign the args
   390  	t = n->left->type;
   391  
   392  	// call tempname pointer
   393  	if(n->left->ullman >= UINF) {
   394  		regalloc(&nod, types[tptr], N);
   395  		cgen_as(&nod, &afun);
   396  		nod.type = t;
   397  		ginscall(&nod, proc);
   398  		regfree(&nod);
   399  		return;
   400  	}
   401  
   402  	// call pointer
   403  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   404  		regalloc(&nod, types[tptr], N);
   405  		cgen_as(&nod, n->left);
   406  		nod.type = t;
   407  		ginscall(&nod, proc);
   408  		regfree(&nod);
   409  		return;
   410  	}
   411  
   412  	// call direct
   413  	n->left->method = 1;
   414  	ginscall(n->left, proc);
   415  }
   416  
   417  /*
   418   * call to n has already been generated.
   419   * generate:
   420   *	res = return value from call.
   421   */
   422  void
   423  cgen_callret(Node *n, Node *res)
   424  {
   425  	Node nod;
   426  	Type *fp, *t;
   427  	Iter flist;
   428  
   429  	t = n->left->type;
   430  	if(t->etype == TPTR32 || t->etype == TPTR64)
   431  		t = t->type;
   432  
   433  	fp = structfirst(&flist, getoutarg(t));
   434  	if(fp == T)
   435  		fatal("cgen_callret: nil");
   436  
   437  	memset(&nod, 0, sizeof(nod));
   438  	nod.op = OINDREG;
   439  	nod.val.u.reg = D_SP;
   440  	nod.addable = 1;
   441  
   442  	nod.xoffset = fp->width;
   443  	nod.type = fp->type;
   444  	cgen_as(res, &nod);
   445  }
   446  
   447  /*
   448   * call to n has already been generated.
   449   * generate:
   450   *	res = &return value from call.
   451   */
   452  void
   453  cgen_aret(Node *n, Node *res)
   454  {
   455  	Node nod1, nod2;
   456  	Type *fp, *t;
   457  	Iter flist;
   458  
   459  	t = n->left->type;
   460  	if(isptr[t->etype])
   461  		t = t->type;
   462  
   463  	fp = structfirst(&flist, getoutarg(t));
   464  	if(fp == T)
   465  		fatal("cgen_aret: nil");
   466  
   467  	memset(&nod1, 0, sizeof(nod1));
   468  	nod1.op = OINDREG;
   469  	nod1.val.u.reg = D_SP;
   470  	nod1.addable = 1;
   471  
   472  	nod1.xoffset = fp->width;
   473  	nod1.type = fp->type;
   474  
   475  	if(res->op != OREGISTER) {
   476  		regalloc(&nod2, types[tptr], res);
   477  		gins(ALEAL, &nod1, &nod2);
   478  		gins(AMOVL, &nod2, res);
   479  		regfree(&nod2);
   480  	} else
   481  		gins(ALEAL, &nod1, res);
   482  }
   483  
   484  /*
   485   * generate return.
   486   * n->left is assignments to return values.
   487   */
   488  void
   489  cgen_ret(Node *n)
   490  {
   491  	Prog *p;
   492  
   493  	if(n != N)
   494  		genlist(n->list);		// copy out args
   495  	if(hasdefer)
   496  		ginscall(deferreturn, 0);
   497  	genlist(curfn->exit);
   498  	p = gins(ARET, N, N);
   499  	if(n != N && n->op == ORETJMP) {
   500  		p->to.type = D_EXTERN;
   501  		p->to.sym = linksym(n->left->sym);
   502  	}
   503  }
   504  
   505  /*
   506   * generate += *= etc.
   507   */
   508  void
   509  cgen_asop(Node *n)
   510  {
   511  	Node n1, n2, n3, n4;
   512  	Node *nl, *nr;
   513  	Prog *p1;
   514  	Addr addr;
   515  	int a;
   516  
   517  	nl = n->left;
   518  	nr = n->right;
   519  
   520  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   521  		tempname(&n1, nr->type);
   522  		cgen(nr, &n1);
   523  		n2 = *n;
   524  		n2.right = &n1;
   525  		cgen_asop(&n2);
   526  		goto ret;
   527  	}
   528  
   529  	if(!isint[nl->type->etype])
   530  		goto hard;
   531  	if(!isint[nr->type->etype])
   532  		goto hard;
   533  	if(is64(nl->type) || is64(nr->type))
   534  		goto hard;
   535  
   536  	switch(n->etype) {
   537  	case OADD:
   538  		if(smallintconst(nr))
   539  		if(mpgetfix(nr->val.u.xval) == 1) {
   540  			a = optoas(OINC, nl->type);
   541  			if(nl->addable) {
   542  				gins(a, N, nl);
   543  				goto ret;
   544  			}
   545  			if(sudoaddable(a, nl, &addr)) {
   546  				p1 = gins(a, N, N);
   547  				p1->to = addr;
   548  				sudoclean();
   549  				goto ret;
   550  			}
   551  		}
   552  		break;
   553  
   554  	case OSUB:
   555  		if(smallintconst(nr))
   556  		if(mpgetfix(nr->val.u.xval) == 1) {
   557  			a = optoas(ODEC, nl->type);
   558  			if(nl->addable) {
   559  				gins(a, N, nl);
   560  				goto ret;
   561  			}
   562  			if(sudoaddable(a, nl, &addr)) {
   563  				p1 = gins(a, N, N);
   564  				p1->to = addr;
   565  				sudoclean();
   566  				goto ret;
   567  			}
   568  		}
   569  		break;
   570  	}
   571  
   572  	switch(n->etype) {
   573  	case OADD:
   574  	case OSUB:
   575  	case OXOR:
   576  	case OAND:
   577  	case OOR:
   578  		a = optoas(n->etype, nl->type);
   579  		if(nl->addable) {
   580  			if(smallintconst(nr)) {
   581  				gins(a, nr, nl);
   582  				goto ret;
   583  			}
   584  			regalloc(&n2, nr->type, N);
   585  			cgen(nr, &n2);
   586  			gins(a, &n2, nl);
   587  			regfree(&n2);
   588  			goto ret;
   589  		}
   590  		if(nr->ullman < UINF)
   591  		if(sudoaddable(a, nl, &addr)) {
   592  			if(smallintconst(nr)) {
   593  				p1 = gins(a, nr, N);
   594  				p1->to = addr;
   595  				sudoclean();
   596  				goto ret;
   597  			}
   598  			regalloc(&n2, nr->type, N);
   599  			cgen(nr, &n2);
   600  			p1 = gins(a, &n2, N);
   601  			p1->to = addr;
   602  			regfree(&n2);
   603  			sudoclean();
   604  			goto ret;
   605  		}
   606  	}
   607  
   608  hard:
   609  	n2.op = 0;
   610  	n1.op = 0;
   611  	if(nr->ullman >= nl->ullman || nl->addable) {
   612  		mgen(nr, &n2, N);
   613  		nr = &n2;
   614  	} else {
   615  		tempname(&n2, nr->type);
   616  		cgen(nr, &n2);
   617  		nr = &n2;
   618  	}
   619  	if(!nl->addable) {
   620  		igen(nl, &n1, N);
   621  		nl = &n1;
   622  	}
   623  
   624  	n3 = *n;
   625  	n3.left = nl;
   626  	n3.right = nr;
   627  	n3.op = n->etype;
   628  
   629  	mgen(&n3, &n4, N);
   630  	gmove(&n4, nl);
   631  
   632  	if(n1.op)
   633  		regfree(&n1);
   634  	mfree(&n2);
   635  	mfree(&n4);
   636  
   637  ret:
   638  	;
   639  }
   640  
   641  int
   642  samereg(Node *a, Node *b)
   643  {
   644  	if(a->op != OREGISTER)
   645  		return 0;
   646  	if(b->op != OREGISTER)
   647  		return 0;
   648  	if(a->val.u.reg != b->val.u.reg)
   649  		return 0;
   650  	return 1;
   651  }
   652  
   653  /*
   654   * generate division.
   655   * caller must set:
   656   *	ax = allocated AX register
   657   *	dx = allocated DX register
   658   * generates one of:
   659   *	res = nl / nr
   660   *	res = nl % nr
   661   * according to op.
   662   */
   663  void
   664  dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
   665  {
   666  	int check;
   667  	Node n1, t1, t2, t3, t4, n4, nz;
   668  	Type *t, *t0;
   669  	Prog *p1, *p2;
   670  
   671  	// Have to be careful about handling
   672  	// most negative int divided by -1 correctly.
   673  	// The hardware will trap.
   674  	// Also the byte divide instruction needs AH,
   675  	// which we otherwise don't have to deal with.
   676  	// Easiest way to avoid for int8, int16: use int32.
   677  	// For int32 and int64, use explicit test.
   678  	// Could use int64 hw for int32.
   679  	t = nl->type;
   680  	t0 = t;
   681  	check = 0;
   682  	if(issigned[t->etype]) {
   683  		check = 1;
   684  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   685  			check = 0;
   686  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   687  			check = 0;
   688  	}
   689  	if(t->width < 4) {
   690  		if(issigned[t->etype])
   691  			t = types[TINT32];
   692  		else
   693  			t = types[TUINT32];
   694  		check = 0;
   695  	}
   696  
   697  	tempname(&t1, t);
   698  	tempname(&t2, t);
   699  	if(t0 != t) {
   700  		tempname(&t3, t0);
   701  		tempname(&t4, t0);
   702  		cgen(nl, &t3);
   703  		cgen(nr, &t4);
   704  		// Convert.
   705  		gmove(&t3, &t1);
   706  		gmove(&t4, &t2);
   707  	} else {
   708  		cgen(nl, &t1);
   709  		cgen(nr, &t2);
   710  	}
   711  
   712  	if(!samereg(ax, res) && !samereg(dx, res))
   713  		regalloc(&n1, t, res);
   714  	else
   715  		regalloc(&n1, t, N);
   716  	gmove(&t2, &n1);
   717  	gmove(&t1, ax);
   718  	p2 = P;
   719  	if(nacl) {
   720  		// Native Client does not relay the divide-by-zero trap
   721  		// to the executing program, so we must insert a check
   722  		// for ourselves.
   723  		nodconst(&n4, t, 0);
   724  		gins(optoas(OCMP, t), &n1, &n4);
   725  		p1 = gbranch(optoas(ONE, t), T, +1);
   726  		if(panicdiv == N)
   727  			panicdiv = sysfunc("panicdivide");
   728  		ginscall(panicdiv, -1);
   729  		patch(p1, pc);
   730  	}
   731  	if(check) {
   732  		nodconst(&n4, t, -1);
   733  		gins(optoas(OCMP, t), &n1, &n4);
   734  		p1 = gbranch(optoas(ONE, t), T, +1);
   735  		if(op == ODIV) {
   736  			// a / (-1) is -a.
   737  			gins(optoas(OMINUS, t), N, ax);
   738  			gmove(ax, res);
   739  		} else {
   740  			// a % (-1) is 0.
   741  			nodconst(&n4, t, 0);
   742  			gmove(&n4, res);
   743  		}
   744  		p2 = gbranch(AJMP, T, 0);
   745  		patch(p1, pc);
   746  	}
   747  	if(!issigned[t->etype]) {
   748  		nodconst(&nz, t, 0);
   749  		gmove(&nz, dx);
   750  	} else
   751  		gins(optoas(OEXTEND, t), N, N);
   752  	gins(optoas(op, t), &n1, N);
   753  	regfree(&n1);
   754  
   755  	if(op == ODIV)
   756  		gmove(ax, res);
   757  	else
   758  		gmove(dx, res);
   759  	if(check)
   760  		patch(p2, pc);
   761  }
   762  
   763  static void
   764  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   765  {
   766  	int r;
   767  
   768  	r = reg[dr];
   769  	nodreg(x, types[TINT32], dr);
   770  
   771  	// save current ax and dx if they are live
   772  	// and not the destination
   773  	memset(oldx, 0, sizeof *oldx);
   774  	if(r > 0 && !samereg(x, res)) {
   775  		tempname(oldx, types[TINT32]);
   776  		gmove(x, oldx);
   777  	}
   778  
   779  	regalloc(x, t, x);
   780  }
   781  
   782  static void
   783  restx(Node *x, Node *oldx)
   784  {
   785  	regfree(x);
   786  
   787  	if(oldx->op != 0) {
   788  		x->type = types[TINT32];
   789  		gmove(oldx, x);
   790  	}
   791  }
   792  
   793  /*
   794   * generate division according to op, one of:
   795   *	res = nl / nr
   796   *	res = nl % nr
   797   */
   798  void
   799  cgen_div(int op, Node *nl, Node *nr, Node *res)
   800  {
   801  	Node ax, dx, oldax, olddx;
   802  	Type *t;
   803  
   804  	if(is64(nl->type))
   805  		fatal("cgen_div %T", nl->type);
   806  
   807  	if(issigned[nl->type->etype])
   808  		t = types[TINT32];
   809  	else
   810  		t = types[TUINT32];
   811  	savex(D_AX, &ax, &oldax, res, t);
   812  	savex(D_DX, &dx, &olddx, res, t);
   813  	dodiv(op, nl, nr, res, &ax, &dx);
   814  	restx(&dx, &olddx);
   815  	restx(&ax, &oldax);
   816  }
   817  
   818  /*
   819   * generate shift according to op, one of:
   820   *	res = nl << nr
   821   *	res = nl >> nr
   822   */
   823  void
   824  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   825  {
   826  	Node n1, n2, nt, cx, oldcx, hi, lo;
   827  	int a, w;
   828  	Prog *p1, *p2;
   829  	uvlong sc;
   830  
   831  	if(nl->type->width > 4)
   832  		fatal("cgen_shift %T", nl->type);
   833  
   834  	w = nl->type->width * 8;
   835  
   836  	a = optoas(op, nl->type);
   837  
   838  	if(nr->op == OLITERAL) {
   839  		tempname(&n2, nl->type);
   840  		cgen(nl, &n2);
   841  		regalloc(&n1, nl->type, res);
   842  		gmove(&n2, &n1);
   843  		sc = mpgetfix(nr->val.u.xval);
   844  		if(sc >= nl->type->width*8) {
   845  			// large shift gets 2 shifts by width-1
   846  			gins(a, ncon(w-1), &n1);
   847  			gins(a, ncon(w-1), &n1);
   848  		} else
   849  			gins(a, nr, &n1);
   850  		gmove(&n1, res);
   851  		regfree(&n1);
   852  		return;
   853  	}
   854  
   855  	memset(&oldcx, 0, sizeof oldcx);
   856  	nodreg(&cx, types[TUINT32], D_CX);
   857  	if(reg[D_CX] > 1 && !samereg(&cx, res)) {
   858  		tempname(&oldcx, types[TUINT32]);
   859  		gmove(&cx, &oldcx);
   860  	}
   861  
   862  	if(nr->type->width > 4) {
   863  		tempname(&nt, nr->type);
   864  		n1 = nt;
   865  	} else {
   866  		nodreg(&n1, types[TUINT32], D_CX);
   867  		regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   868  	}
   869  
   870  	if(samereg(&cx, res))
   871  		regalloc(&n2, nl->type, N);
   872  	else
   873  		regalloc(&n2, nl->type, res);
   874  	if(nl->ullman >= nr->ullman) {
   875  		cgen(nl, &n2);
   876  		cgen(nr, &n1);
   877  	} else {
   878  		cgen(nr, &n1);
   879  		cgen(nl, &n2);
   880  	}
   881  
   882  	// test and fix up large shifts
   883  	if(bounded) {
   884  		if(nr->type->width > 4) {
   885  			// delayed reg alloc
   886  			nodreg(&n1, types[TUINT32], D_CX);
   887  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   888  			split64(&nt, &lo, &hi);
   889  			gmove(&lo, &n1);
   890  			splitclean();
   891  		}
   892  	} else {
   893  		if(nr->type->width > 4) {
   894  			// delayed reg alloc
   895  			nodreg(&n1, types[TUINT32], D_CX);
   896  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   897  			split64(&nt, &lo, &hi);
   898  			gmove(&lo, &n1);
   899  			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
   900  			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
   901  			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
   902  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   903  			splitclean();
   904  			patch(p2, pc);
   905  		} else {
   906  			gins(optoas(OCMP, nr->type), &n1, ncon(w));
   907  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   908  		}
   909  		if(op == ORSH && issigned[nl->type->etype]) {
   910  			gins(a, ncon(w-1), &n2);
   911  		} else {
   912  			gmove(ncon(0), &n2);
   913  		}
   914  		patch(p1, pc);
   915  	}
   916  	gins(a, &n1, &n2);
   917  
   918  	if(oldcx.op != 0)
   919  		gmove(&oldcx, &cx);
   920  
   921  	gmove(&n2, res);
   922  
   923  	regfree(&n1);
   924  	regfree(&n2);
   925  }
   926  
   927  /*
   928   * generate byte multiply:
   929   *	res = nl * nr
   930   * there is no 2-operand byte multiply instruction so
   931   * we do a full-width multiplication and truncate afterwards.
   932   */
   933  void
   934  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   935  {
   936  	Node n1, n2, nt, *tmp;
   937  	Type *t;
   938  	int a;
   939  
   940  	// copy from byte to full registers
   941  	t = types[TUINT32];
   942  	if(issigned[nl->type->etype])
   943  		t = types[TINT32];
   944  
   945  	// largest ullman on left.
   946  	if(nl->ullman < nr->ullman) {
   947  		tmp = nl;
   948  		nl = nr;
   949  		nr = tmp;
   950  	}
   951  
   952  	tempname(&nt, nl->type);
   953  	cgen(nl, &nt);
   954  	regalloc(&n1, t, res);
   955  	cgen(nr, &n1);
   956  	regalloc(&n2, t, N);
   957  	gmove(&nt, &n2);
   958  	a = optoas(op, t);
   959  	gins(a, &n2, &n1);
   960  	regfree(&n2);
   961  	gmove(&n1, res);
   962  	regfree(&n1);
   963  }
   964  
   965  /*
   966   * generate high multiply:
   967   *   res = (nl*nr) >> width
   968   */
   969  void
   970  cgen_hmul(Node *nl, Node *nr, Node *res)
   971  {
   972  	Type *t;
   973  	int a;
   974  	Node n1, n2, ax, dx;
   975  
   976  	t = nl->type;
   977  	a = optoas(OHMUL, t);
   978  	// gen nl in n1.
   979  	tempname(&n1, t);
   980  	cgen(nl, &n1);
   981  	// gen nr in n2.
   982  	regalloc(&n2, t, res);
   983  	cgen(nr, &n2);
   984  
   985  	// multiply.
   986  	nodreg(&ax, t, D_AX);
   987  	gmove(&n2, &ax);
   988  	gins(a, &n1, N);
   989  	regfree(&n2);
   990  
   991  	if(t->width == 1) {
   992  		// byte multiply behaves differently.
   993  		nodreg(&ax, t, D_AH);
   994  		nodreg(&dx, t, D_DL);
   995  		gmove(&ax, &dx);
   996  	}
   997  	nodreg(&dx, t, D_DX);
   998  	gmove(&dx, res);
   999  }
  1000  
  1001  static void cgen_float387(Node *n, Node *res);
  1002  static void cgen_floatsse(Node *n, Node *res);
  1003  
  1004  /*
  1005   * generate floating-point operation.
  1006   */
  1007  void
  1008  cgen_float(Node *n, Node *res)
  1009  {
  1010  	Node *nl;
  1011  	Node n1, n2;
  1012  	Prog *p1, *p2, *p3;
  1013  
  1014  	nl = n->left;
  1015  	switch(n->op) {
  1016  	case OEQ:
  1017  	case ONE:
  1018  	case OLT:
  1019  	case OLE:
  1020  	case OGE:
  1021  		p1 = gbranch(AJMP, T, 0);
  1022  		p2 = pc;
  1023  		gmove(nodbool(1), res);
  1024  		p3 = gbranch(AJMP, T, 0);
  1025  		patch(p1, pc);
  1026  		bgen(n, 1, 0, p2);
  1027  		gmove(nodbool(0), res);
  1028  		patch(p3, pc);
  1029  		return;
  1030  
  1031  	case OPLUS:
  1032  		cgen(nl, res);
  1033  		return;
  1034  
  1035  	case OCONV:
  1036  		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
  1037  			cgen(nl, res);
  1038  			return;
  1039  		}
  1040  
  1041  		tempname(&n2, n->type);
  1042  		mgen(nl, &n1, res);
  1043  		gmove(&n1, &n2);
  1044  		gmove(&n2, res);
  1045  		mfree(&n1);
  1046  		return;
  1047  	}
  1048  
  1049  	if(use_sse)
  1050  		cgen_floatsse(n, res);
  1051  	else
  1052  		cgen_float387(n, res);
  1053  }
  1054  
  1055  // floating-point.  387 (not SSE2)
  1056  static void
  1057  cgen_float387(Node *n, Node *res)
  1058  {
  1059  	Node f0, f1;
  1060  	Node *nl, *nr;
  1061  
  1062  	nl = n->left;
  1063  	nr = n->right;
  1064  	nodreg(&f0, nl->type, D_F0);
  1065  	nodreg(&f1, n->type, D_F0+1);
  1066  	if(nr != N)
  1067  		goto flt2;
  1068  
  1069  	// unary
  1070  	cgen(nl, &f0);
  1071  	if(n->op != OCONV && n->op != OPLUS)
  1072  		gins(foptoas(n->op, n->type, 0), N, N);
  1073  	gmove(&f0, res);
  1074  	return;
  1075  
  1076  flt2:	// binary
  1077  	if(nl->ullman >= nr->ullman) {
  1078  		cgen(nl, &f0);
  1079  		if(nr->addable)
  1080  			gins(foptoas(n->op, n->type, 0), nr, &f0);
  1081  		else {
  1082  			cgen(nr, &f0);
  1083  			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
  1084  		}
  1085  	} else {
  1086  		cgen(nr, &f0);
  1087  		if(nl->addable)
  1088  			gins(foptoas(n->op, n->type, Frev), nl, &f0);
  1089  		else {
  1090  			cgen(nl, &f0);
  1091  			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
  1092  		}
  1093  	}
  1094  	gmove(&f0, res);
  1095  	return;
  1096  
  1097  }
  1098  
  1099  static void
  1100  cgen_floatsse(Node *n, Node *res)
  1101  {
  1102  	Node *nl, *nr, *r;
  1103  	Node n1, n2, nt;
  1104  	int a;
  1105  
  1106  	nl = n->left;
  1107  	nr = n->right;
  1108  	switch(n->op) {
  1109  	default:
  1110  		dump("cgen_floatsse", n);
  1111  		fatal("cgen_floatsse %O", n->op);
  1112  		return;
  1113  
  1114  	case OMINUS:
  1115  	case OCOM:
  1116  		nr = nodintconst(-1);
  1117  		convlit(&nr, n->type);
  1118  		a = foptoas(OMUL, nl->type, 0);
  1119  		goto sbop;
  1120  
  1121  	// symmetric binary
  1122  	case OADD:
  1123  	case OMUL:
  1124  		a = foptoas(n->op, nl->type, 0);
  1125  		goto sbop;
  1126  
  1127  	// asymmetric binary
  1128  	case OSUB:
  1129  	case OMOD:
  1130  	case ODIV:
  1131  		a = foptoas(n->op, nl->type, 0);
  1132  		goto abop;
  1133  	}
  1134  
  1135  sbop:	// symmetric binary
  1136  	if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
  1137  		r = nl;
  1138  		nl = nr;
  1139  		nr = r;
  1140  	}
  1141  
  1142  abop:	// asymmetric binary
  1143  	if(nl->ullman >= nr->ullman) {
  1144  		tempname(&nt, nl->type);
  1145  		cgen(nl, &nt);
  1146  		mgen(nr, &n2, N);
  1147  		regalloc(&n1, nl->type, res);
  1148  		gmove(&nt, &n1);
  1149  		gins(a, &n2, &n1);
  1150  		gmove(&n1, res);
  1151  		regfree(&n1);
  1152  		mfree(&n2);
  1153  	} else {
  1154  		regalloc(&n2, nr->type, res);
  1155  		cgen(nr, &n2);
  1156  		regalloc(&n1, nl->type, N);
  1157  		cgen(nl, &n1);
  1158  		gins(a, &n2, &n1);
  1159  		regfree(&n2);
  1160  		gmove(&n1, res);
  1161  		regfree(&n1);
  1162  	}
  1163  	return;
  1164  }
  1165  
  1166  void
  1167  bgen_float(Node *n, int true, int likely, Prog *to)
  1168  {
  1169  	int et, a;
  1170  	Node *nl, *nr, *r;
  1171  	Node n1, n2, n3, tmp, t1, t2, ax;
  1172  	Prog *p1, *p2;
  1173  
  1174  	nl = n->left;
  1175  	nr = n->right;
  1176  	a = n->op;
  1177  	if(!true) {
  1178  		// brcom is not valid on floats when NaN is involved.
  1179  		p1 = gbranch(AJMP, T, 0);
  1180  		p2 = gbranch(AJMP, T, 0);
  1181  		patch(p1, pc);
  1182  		// No need to avoid re-genning ninit.
  1183  		bgen_float(n, 1, -likely, p2);
  1184  		patch(gbranch(AJMP, T, 0), to);
  1185  		patch(p2, pc);
  1186  		return;
  1187  	}
  1188  
  1189  	if(use_sse)
  1190  		goto sse;
  1191  	else
  1192  		goto x87;
  1193  
  1194  x87:
  1195  	a = brrev(a);	// because the args are stacked
  1196  	if(a == OGE || a == OGT) {
  1197  		// only < and <= work right with NaN; reverse if needed
  1198  		r = nr;
  1199  		nr = nl;
  1200  		nl = r;
  1201  		a = brrev(a);
  1202  	}
  1203  
  1204  	nodreg(&tmp, nr->type, D_F0);
  1205  	nodreg(&n2, nr->type, D_F0 + 1);
  1206  	nodreg(&ax, types[TUINT16], D_AX);
  1207  	et = simsimtype(nr->type);
  1208  	if(et == TFLOAT64) {
  1209  		if(nl->ullman > nr->ullman) {
  1210  			cgen(nl, &tmp);
  1211  			cgen(nr, &tmp);
  1212  			gins(AFXCHD, &tmp, &n2);
  1213  		} else {
  1214  			cgen(nr, &tmp);
  1215  			cgen(nl, &tmp);
  1216  		}
  1217  		gins(AFUCOMIP, &tmp, &n2);
  1218  		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF
  1219  	} else {
  1220  		// TODO(rsc): The moves back and forth to memory
  1221  		// here are for truncating the value to 32 bits.
  1222  		// This handles 32-bit comparison but presumably
  1223  		// all the other ops have the same problem.
  1224  		// We need to figure out what the right general
  1225  		// solution is, besides telling people to use float64.
  1226  		tempname(&t1, types[TFLOAT32]);
  1227  		tempname(&t2, types[TFLOAT32]);
  1228  		cgen(nr, &t1);
  1229  		cgen(nl, &t2);
  1230  		gmove(&t2, &tmp);
  1231  		gins(AFCOMFP, &t1, &tmp);
  1232  		gins(AFSTSW, N, &ax);
  1233  		gins(ASAHF, N, N);
  1234  	}
  1235  
  1236  	goto ret;
  1237  
  1238  sse:
  1239  	if(!nl->addable) {
  1240  		tempname(&n1, nl->type);
  1241  		cgen(nl, &n1);
  1242  		nl = &n1;
  1243  	}
  1244  	if(!nr->addable) {
  1245  		tempname(&tmp, nr->type);
  1246  		cgen(nr, &tmp);
  1247  		nr = &tmp;
  1248  	}
  1249  	regalloc(&n2, nr->type, N);
  1250  	gmove(nr, &n2);
  1251  	nr = &n2;
  1252  
  1253  	if(nl->op != OREGISTER) {
  1254  		regalloc(&n3, nl->type, N);
  1255  		gmove(nl, &n3);
  1256  		nl = &n3;
  1257  	}
  1258  
  1259  	if(a == OGE || a == OGT) {
  1260  		// only < and <= work right with NaN; reverse if needed
  1261  		r = nr;
  1262  		nr = nl;
  1263  		nl = r;
  1264  		a = brrev(a);
  1265  	}
  1266  
  1267  	gins(foptoas(OCMP, nr->type, 0), nl, nr);
  1268  	if(nl->op == OREGISTER)
  1269  		regfree(nl);
  1270  	regfree(nr);
  1271  
  1272  ret:
  1273  	if(a == OEQ) {
  1274  		// neither NE nor P
  1275  		p1 = gbranch(AJNE, T, -likely);
  1276  		p2 = gbranch(AJPS, T, -likely);
  1277  		patch(gbranch(AJMP, T, 0), to);
  1278  		patch(p1, pc);
  1279  		patch(p2, pc);
  1280  	} else if(a == ONE) {
  1281  		// either NE or P
  1282  		patch(gbranch(AJNE, T, likely), to);
  1283  		patch(gbranch(AJPS, T, likely), to);
  1284  	} else
  1285  		patch(gbranch(optoas(a, nr->type), T, likely), to);
  1286  
  1287  }
  1288  
  1289  // Called after regopt and peep have run.
  1290  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1291  void
  1292  expandchecks(Prog *firstp)
  1293  {
  1294  	Prog *p, *p1, *p2;
  1295  
  1296  	for(p = firstp; p != P; p = p->link) {
  1297  		if(p->as != ACHECKNIL)
  1298  			continue;
  1299  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1300  			warnl(p->lineno, "generated nil check");
  1301  		// check is
  1302  		//	CMP arg, $0
  1303  		//	JNE 2(PC) (likely)
  1304  		//	MOV AX, 0
  1305  		p1 = mal(sizeof *p1);
  1306  		p2 = mal(sizeof *p2);
  1307  		clearp(p1);
  1308  		clearp(p2);
  1309  		p1->link = p2;
  1310  		p2->link = p->link;
  1311  		p->link = p1;
  1312  		p1->lineno = p->lineno;
  1313  		p2->lineno = p->lineno;
  1314  		p1->pc = 9999;
  1315  		p2->pc = 9999;
  1316  		p->as = ACMPL;
  1317  		p->to.type = D_CONST;
  1318  		p->to.offset = 0;
  1319  		p1->as = AJNE;
  1320  		p1->from.type = D_CONST;
  1321  		p1->from.offset = 1; // likely
  1322  		p1->to.type = D_BRANCH;
  1323  		p1->to.u.branch = p2->link;
  1324  		// crash by write to memory address 0.
  1325  		// if possible, since we know arg is 0, use 0(arg),
  1326  		// which will be shorter to encode than plain 0.
  1327  		p2->as = AMOVL;
  1328  		p2->from.type = D_AX;
  1329  		if(regtyp(&p->from))
  1330  			p2->to.type = p->from.type + D_INDIR;
  1331  		else
  1332  			p2->to.type = D_INDIR+D_NONE;
  1333  		p2->to.offset = 0;
  1334  	}
  1335  }