github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/cmd/8g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame, ax;
    19  	Prog *p;
    20  	vlong lo, hi;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    26  
    27  	// fill in final stack size
    28  	frame = rnd(stksize+maxarg, widthptr);
    29  	ptxt->to.offset = frame;
    30  	
    31  	// insert code to zero ambiguously live variables
    32  	// so that the garbage collector only sees initialized values
    33  	// when it looks for pointers.
    34  	p = ptxt;
    35  	hi = 0;
    36  	lo = hi;
    37  	ax = 0;
    38  	for(l=curfn->dcl; l != nil; l = l->next) {
    39  		n = l->n;
    40  		if(!n->needzero)
    41  			continue;
    42  		if(n->class != PAUTO)
    43  			fatal("needzero class %d", n->class);
    44  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    45  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    46  		if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) {
    47  			// merge with range we already have
    48  			lo = n->xoffset;
    49  			continue;
    50  		}
    51  		// zero old range
    52  		p = zerorange(p, frame, lo, hi, &ax);
    53  
    54  		// set new range
    55  		hi = n->xoffset + n->type->width;
    56  		lo = n->xoffset;
    57  	}
    58  	// zero final range
    59  	zerorange(p, frame, lo, hi, &ax);
    60  }
    61  
    62  static Prog*
    63  zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
    64  {
    65  	vlong cnt, i;
    66  
    67  	cnt = hi - lo;
    68  	if(cnt == 0)
    69  		return p;
    70  	if(*ax == 0) {
    71  		p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
    72  		*ax = 1;
    73  	}
    74  	if(cnt <= 4*widthreg) {
    75  		for(i = 0; i < cnt; i += widthreg) {
    76  			p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i);
    77  		}
    78  	} else if(!nacl && cnt <= 128*widthreg) {
    79  		p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
    80  		p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg));
    81  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
    82  	} else {
    83  		p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0);
    84  		p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
    85  		p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
    86  		p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
    87  	}
    88  	return p;
    89  }
    90  
    91  static Prog*	
    92  appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)	
    93  {
    94  	Prog *q;
    95  	q = mal(sizeof(*q));	
    96  	clearp(q);	
    97  	q->as = as;	
    98  	q->lineno = p->lineno;	
    99  	q->from.type = ftype;	
   100  	q->from.offset = foffset;	
   101  	q->to.type = ttype;	
   102  	q->to.offset = toffset;	
   103  	q->link = p->link;	
   104  	p->link = q;	
   105  	return q;	
   106  }
   107  
   108  // Sweep the prog list to mark any used nodes.
   109  void
   110  markautoused(Prog* p)
   111  {
   112  	for (; p; p = p->link) {
   113  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   114  			continue;
   115  
   116  		if (p->from.node)
   117  			p->from.node->used = 1;
   118  
   119  		if (p->to.node)
   120  			p->to.node->used = 1;
   121  	}
   122  }
   123  
   124  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   125  void
   126  fixautoused(Prog* p)
   127  {
   128  	Prog **lp;
   129  
   130  	for (lp=&p; (p=*lp) != P; ) {
   131  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
   132  			*lp = p->link;
   133  			continue;
   134  		}
   135  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   136  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   137  			// VARDEFs are interspersed with other code, and a jump might be using the
   138  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   139  			// the no-ops.
   140  			p->to.type = D_NONE;
   141  			p->to.node = N;
   142  			p->as = ANOP;
   143  			continue;
   144  		}
   145  
   146  		if (p->from.type == D_AUTO && p->from.node)
   147  			p->from.offset += p->from.node->stkdelta;
   148  
   149  		if (p->to.type == D_AUTO && p->to.node)
   150  			p->to.offset += p->to.node->stkdelta;
   151  
   152  		lp = &p->link;
   153  	}
   154  }
   155  
   156  void
   157  clearfat(Node *nl)
   158  {
   159  	uint32 w, c, q;
   160  	Node n1, z;
   161  	Prog *p;
   162  
   163  	/* clear a fat object */
   164  	if(debug['g'])
   165  		dump("\nclearfat", nl);
   166  
   167  	w = nl->type->width;
   168  	// Avoid taking the address for simple enough types.
   169  	if(componentgen(N, nl))
   170  		return;
   171  
   172  	c = w % 4;	// bytes
   173  	q = w / 4;	// quads
   174  
   175  	if(q < 4) {
   176  		// Write sequence of MOV 0, off(base) instead of using STOSL.
   177  		// The hope is that although the code will be slightly longer,
   178  		// the MOVs will have no dependencies and pipeline better
   179  		// than the unrolled STOSL loop.
   180  		// NOTE: Must use agen, not igen, so that optimizer sees address
   181  		// being taken. We are not writing on field boundaries.
   182  		regalloc(&n1, types[tptr], N);
   183  		agen(nl, &n1);
   184  		n1.op = OINDREG;
   185  		nodconst(&z, types[TUINT64], 0);
   186  		while(q-- > 0) {
   187  			n1.type = z.type;
   188  			gins(AMOVL, &z, &n1);
   189  			n1.xoffset += 4;
   190  		}
   191  		nodconst(&z, types[TUINT8], 0);
   192  		while(c-- > 0) {
   193  			n1.type = z.type;
   194  			gins(AMOVB, &z, &n1);
   195  			n1.xoffset++;
   196  		}
   197  		regfree(&n1);
   198  		return;
   199  	}
   200  
   201  	nodreg(&n1, types[tptr], D_DI);
   202  	agen(nl, &n1);
   203  	gconreg(AMOVL, 0, D_AX);
   204  
   205  	if(q > 128 || (q >= 4 && nacl)) {
   206  		gconreg(AMOVL, q, D_CX);
   207  		gins(AREP, N, N);	// repeat
   208  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   209  	} else if(q >= 4) {
   210  		p = gins(ADUFFZERO, N, N);
   211  		p->to.type = D_ADDR;
   212  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
   213  		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
   214  		p->to.offset = 1*(128-q);
   215  	} else
   216  	while(q > 0) {
   217  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   218  		q--;
   219  	}
   220  
   221  	while(c > 0) {
   222  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   223  		c--;
   224  	}
   225  }
   226  
   227  /*
   228   * generate:
   229   *	call f
   230   *	proc=-1	normal call but no return
   231   *	proc=0	normal call
   232   *	proc=1	goroutine run in new proc
   233   *	proc=2	defer call save away stack
   234    *	proc=3	normal call to C pointer (not Go func value)
   235   */
   236  void
   237  ginscall(Node *f, int proc)
   238  {
   239  	Prog *p;
   240  	Node reg, r1, con;
   241  
   242  	if(f->type != T)
   243  		setmaxarg(f->type);
   244  
   245  	switch(proc) {
   246  	default:
   247  		fatal("ginscall: bad proc %d", proc);
   248  		break;
   249  
   250  	case 0:	// normal call
   251  	case -1:	// normal call but no return
   252  		if(f->op == ONAME && f->class == PFUNC) {
   253  			if(f == deferreturn) {
   254  				// Deferred calls will appear to be returning to
   255  				// the CALL deferreturn(SB) that we are about to emit.
   256  				// However, the stack trace code will show the line
   257  				// of the instruction byte before the return PC. 
   258  				// To avoid that being an unrelated instruction,
   259  				// insert an x86 NOP that we will have the right line number.
   260  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   261  				// because the NOP pseudo-instruction will be removed by
   262  				// the linker.
   263  				nodreg(&reg, types[TINT], D_AX);
   264  				gins(AXCHGL, &reg, &reg);
   265  			}
   266  			p = gins(ACALL, N, f);
   267  			afunclit(&p->to, f);
   268  			if(proc == -1 || noreturn(p))
   269  				gins(AUNDEF, N, N);
   270  			break;
   271  		}
   272  		nodreg(&reg, types[tptr], D_DX);
   273  		nodreg(&r1, types[tptr], D_BX);
   274  		gmove(f, &reg);
   275  		reg.op = OINDREG;
   276  		gmove(&reg, &r1);
   277  		reg.op = OREGISTER;
   278  		gins(ACALL, &reg, &r1);
   279  		break;
   280  	
   281  	case 3:	// normal call of c function pointer
   282  		gins(ACALL, N, f);
   283  		break;
   284  
   285  	case 1:	// call in new proc (go)
   286  	case 2:	// deferred call (defer)
   287  		nodreg(&reg, types[TINT32], D_CX);
   288  		gins(APUSHL, f, N);
   289  		nodconst(&con, types[TINT32], argsize(f->type));
   290  		gins(APUSHL, &con, N);
   291  		if(proc == 1)
   292  			ginscall(newproc, 0);
   293  		else
   294  			ginscall(deferproc, 0);
   295  		gins(APOPL, N, &reg);
   296  		gins(APOPL, N, &reg);
   297  		if(proc == 2) {
   298  			nodreg(&reg, types[TINT64], D_AX);
   299  			gins(ATESTL, &reg, &reg);
   300  			p = gbranch(AJEQ, T, +1);
   301  			cgen_ret(N);
   302  			patch(p, pc);
   303  		}
   304  		break;
   305  	}
   306  }
   307  
   308  /*
   309   * n is call to interface method.
   310   * generate res = n.
   311   */
   312  void
   313  cgen_callinter(Node *n, Node *res, int proc)
   314  {
   315  	Node *i, *f;
   316  	Node tmpi, nodi, nodo, nodr, nodsp;
   317  
   318  	i = n->left;
   319  	if(i->op != ODOTINTER)
   320  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   321  
   322  	f = i->right;		// field
   323  	if(f->op != ONAME)
   324  		fatal("cgen_callinter: not ONAME %O", f->op);
   325  
   326  	i = i->left;		// interface
   327  
   328  	if(!i->addable) {
   329  		tempname(&tmpi, i->type);
   330  		cgen(i, &tmpi);
   331  		i = &tmpi;
   332  	}
   333  
   334  	genlist(n->list);		// assign the args
   335  
   336  	// i is now addable, prepare an indirected
   337  	// register to hold its address.
   338  	igen(i, &nodi, res);		// REG = &inter
   339  
   340  	nodindreg(&nodsp, types[tptr], D_SP);
   341  	nodi.type = types[tptr];
   342  	nodi.xoffset += widthptr;
   343  	cgen(&nodi, &nodsp);	// 0(SP) = 4(REG) -- i.data
   344  
   345  	regalloc(&nodo, types[tptr], res);
   346  	nodi.type = types[tptr];
   347  	nodi.xoffset -= widthptr;
   348  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   349  	regfree(&nodi);
   350  
   351  	regalloc(&nodr, types[tptr], &nodo);
   352  	if(n->left->xoffset == BADWIDTH)
   353  		fatal("cgen_callinter: badwidth");
   354  	cgen_checknil(&nodo);
   355  	nodo.op = OINDREG;
   356  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   357  	
   358  	if(proc == 0) {
   359  		// plain call: use direct c function pointer - more efficient
   360  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   361  		proc = 3;
   362  	} else {
   363  		// go/defer. generate go func value.
   364  		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   365  	}
   366  
   367  	nodr.type = n->left->type;
   368  	ginscall(&nodr, proc);
   369  
   370  	regfree(&nodr);
   371  	regfree(&nodo);
   372  }
   373  
   374  /*
   375   * generate function call;
   376   *	proc=0	normal call
   377   *	proc=1	goroutine run in new proc
   378   *	proc=2	defer call save away stack
   379   */
   380  void
   381  cgen_call(Node *n, int proc)
   382  {
   383  	Type *t;
   384  	Node nod, afun;
   385  
   386  	if(n == N)
   387  		return;
   388  
   389  	if(n->left->ullman >= UINF) {
   390  		// if name involves a fn call
   391  		// precompute the address of the fn
   392  		tempname(&afun, types[tptr]);
   393  		cgen(n->left, &afun);
   394  	}
   395  
   396  	genlist(n->list);		// assign the args
   397  	t = n->left->type;
   398  
   399  	// call tempname pointer
   400  	if(n->left->ullman >= UINF) {
   401  		regalloc(&nod, types[tptr], N);
   402  		cgen_as(&nod, &afun);
   403  		nod.type = t;
   404  		ginscall(&nod, proc);
   405  		regfree(&nod);
   406  		return;
   407  	}
   408  
   409  	// call pointer
   410  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   411  		regalloc(&nod, types[tptr], N);
   412  		cgen_as(&nod, n->left);
   413  		nod.type = t;
   414  		ginscall(&nod, proc);
   415  		regfree(&nod);
   416  		return;
   417  	}
   418  
   419  	// call direct
   420  	n->left->method = 1;
   421  	ginscall(n->left, proc);
   422  }
   423  
   424  /*
   425   * call to n has already been generated.
   426   * generate:
   427   *	res = return value from call.
   428   */
   429  void
   430  cgen_callret(Node *n, Node *res)
   431  {
   432  	Node nod;
   433  	Type *fp, *t;
   434  	Iter flist;
   435  
   436  	t = n->left->type;
   437  	if(t->etype == TPTR32 || t->etype == TPTR64)
   438  		t = t->type;
   439  
   440  	fp = structfirst(&flist, getoutarg(t));
   441  	if(fp == T)
   442  		fatal("cgen_callret: nil");
   443  
   444  	memset(&nod, 0, sizeof(nod));
   445  	nod.op = OINDREG;
   446  	nod.val.u.reg = D_SP;
   447  	nod.addable = 1;
   448  
   449  	nod.xoffset = fp->width;
   450  	nod.type = fp->type;
   451  	cgen_as(res, &nod);
   452  }
   453  
   454  /*
   455   * call to n has already been generated.
   456   * generate:
   457   *	res = &return value from call.
   458   */
   459  void
   460  cgen_aret(Node *n, Node *res)
   461  {
   462  	Node nod1, nod2;
   463  	Type *fp, *t;
   464  	Iter flist;
   465  
   466  	t = n->left->type;
   467  	if(isptr[t->etype])
   468  		t = t->type;
   469  
   470  	fp = structfirst(&flist, getoutarg(t));
   471  	if(fp == T)
   472  		fatal("cgen_aret: nil");
   473  
   474  	memset(&nod1, 0, sizeof(nod1));
   475  	nod1.op = OINDREG;
   476  	nod1.val.u.reg = D_SP;
   477  	nod1.addable = 1;
   478  
   479  	nod1.xoffset = fp->width;
   480  	nod1.type = fp->type;
   481  
   482  	if(res->op != OREGISTER) {
   483  		regalloc(&nod2, types[tptr], res);
   484  		gins(ALEAL, &nod1, &nod2);
   485  		gins(AMOVL, &nod2, res);
   486  		regfree(&nod2);
   487  	} else
   488  		gins(ALEAL, &nod1, res);
   489  }
   490  
   491  /*
   492   * generate return.
   493   * n->left is assignments to return values.
   494   */
   495  void
   496  cgen_ret(Node *n)
   497  {
   498  	Prog *p;
   499  
   500  	if(n != N)
   501  		genlist(n->list);		// copy out args
   502  	if(hasdefer)
   503  		ginscall(deferreturn, 0);
   504  	genlist(curfn->exit);
   505  	p = gins(ARET, N, N);
   506  	if(n != N && n->op == ORETJMP) {
   507  		p->to.type = D_EXTERN;
   508  		p->to.sym = linksym(n->left->sym);
   509  	}
   510  }
   511  
   512  /*
   513   * generate += *= etc.
   514   */
   515  void
   516  cgen_asop(Node *n)
   517  {
   518  	Node n1, n2, n3, n4;
   519  	Node *nl, *nr;
   520  	Prog *p1;
   521  	Addr addr;
   522  	int a;
   523  
   524  	nl = n->left;
   525  	nr = n->right;
   526  
   527  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   528  		tempname(&n1, nr->type);
   529  		cgen(nr, &n1);
   530  		n2 = *n;
   531  		n2.right = &n1;
   532  		cgen_asop(&n2);
   533  		goto ret;
   534  	}
   535  
   536  	if(!isint[nl->type->etype])
   537  		goto hard;
   538  	if(!isint[nr->type->etype])
   539  		goto hard;
   540  	if(is64(nl->type) || is64(nr->type))
   541  		goto hard;
   542  
   543  	switch(n->etype) {
   544  	case OADD:
   545  		if(smallintconst(nr))
   546  		if(mpgetfix(nr->val.u.xval) == 1) {
   547  			a = optoas(OINC, nl->type);
   548  			if(nl->addable) {
   549  				gins(a, N, nl);
   550  				goto ret;
   551  			}
   552  			if(sudoaddable(a, nl, &addr)) {
   553  				p1 = gins(a, N, N);
   554  				p1->to = addr;
   555  				sudoclean();
   556  				goto ret;
   557  			}
   558  		}
   559  		break;
   560  
   561  	case OSUB:
   562  		if(smallintconst(nr))
   563  		if(mpgetfix(nr->val.u.xval) == 1) {
   564  			a = optoas(ODEC, nl->type);
   565  			if(nl->addable) {
   566  				gins(a, N, nl);
   567  				goto ret;
   568  			}
   569  			if(sudoaddable(a, nl, &addr)) {
   570  				p1 = gins(a, N, N);
   571  				p1->to = addr;
   572  				sudoclean();
   573  				goto ret;
   574  			}
   575  		}
   576  		break;
   577  	}
   578  
   579  	switch(n->etype) {
   580  	case OADD:
   581  	case OSUB:
   582  	case OXOR:
   583  	case OAND:
   584  	case OOR:
   585  		a = optoas(n->etype, nl->type);
   586  		if(nl->addable) {
   587  			if(smallintconst(nr)) {
   588  				gins(a, nr, nl);
   589  				goto ret;
   590  			}
   591  			regalloc(&n2, nr->type, N);
   592  			cgen(nr, &n2);
   593  			gins(a, &n2, nl);
   594  			regfree(&n2);
   595  			goto ret;
   596  		}
   597  		if(nr->ullman < UINF)
   598  		if(sudoaddable(a, nl, &addr)) {
   599  			if(smallintconst(nr)) {
   600  				p1 = gins(a, nr, N);
   601  				p1->to = addr;
   602  				sudoclean();
   603  				goto ret;
   604  			}
   605  			regalloc(&n2, nr->type, N);
   606  			cgen(nr, &n2);
   607  			p1 = gins(a, &n2, N);
   608  			p1->to = addr;
   609  			regfree(&n2);
   610  			sudoclean();
   611  			goto ret;
   612  		}
   613  	}
   614  
   615  hard:
   616  	n2.op = 0;
   617  	n1.op = 0;
   618  	if(nr->ullman >= nl->ullman || nl->addable) {
   619  		mgen(nr, &n2, N);
   620  		nr = &n2;
   621  	} else {
   622  		tempname(&n2, nr->type);
   623  		cgen(nr, &n2);
   624  		nr = &n2;
   625  	}
   626  	if(!nl->addable) {
   627  		igen(nl, &n1, N);
   628  		nl = &n1;
   629  	}
   630  
   631  	n3 = *n;
   632  	n3.left = nl;
   633  	n3.right = nr;
   634  	n3.op = n->etype;
   635  
   636  	mgen(&n3, &n4, N);
   637  	gmove(&n4, nl);
   638  
   639  	if(n1.op)
   640  		regfree(&n1);
   641  	mfree(&n2);
   642  	mfree(&n4);
   643  
   644  ret:
   645  	;
   646  }
   647  
   648  int
   649  samereg(Node *a, Node *b)
   650  {
   651  	if(a->op != OREGISTER)
   652  		return 0;
   653  	if(b->op != OREGISTER)
   654  		return 0;
   655  	if(a->val.u.reg != b->val.u.reg)
   656  		return 0;
   657  	return 1;
   658  }
   659  
   660  /*
   661   * generate division.
   662   * caller must set:
   663   *	ax = allocated AX register
   664   *	dx = allocated DX register
   665   * generates one of:
   666   *	res = nl / nr
   667   *	res = nl % nr
   668   * according to op.
   669   */
   670  void
   671  dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
   672  {
   673  	int check;
   674  	Node n1, t1, t2, t3, t4, n4, nz;
   675  	Type *t, *t0;
   676  	Prog *p1, *p2;
   677  
   678  	// Have to be careful about handling
   679  	// most negative int divided by -1 correctly.
   680  	// The hardware will trap.
   681  	// Also the byte divide instruction needs AH,
   682  	// which we otherwise don't have to deal with.
   683  	// Easiest way to avoid for int8, int16: use int32.
   684  	// For int32 and int64, use explicit test.
   685  	// Could use int64 hw for int32.
   686  	t = nl->type;
   687  	t0 = t;
   688  	check = 0;
   689  	if(issigned[t->etype]) {
   690  		check = 1;
   691  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   692  			check = 0;
   693  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   694  			check = 0;
   695  	}
   696  	if(t->width < 4) {
   697  		if(issigned[t->etype])
   698  			t = types[TINT32];
   699  		else
   700  			t = types[TUINT32];
   701  		check = 0;
   702  	}
   703  
   704  	tempname(&t1, t);
   705  	tempname(&t2, t);
   706  	if(t0 != t) {
   707  		tempname(&t3, t0);
   708  		tempname(&t4, t0);
   709  		cgen(nl, &t3);
   710  		cgen(nr, &t4);
   711  		// Convert.
   712  		gmove(&t3, &t1);
   713  		gmove(&t4, &t2);
   714  	} else {
   715  		cgen(nl, &t1);
   716  		cgen(nr, &t2);
   717  	}
   718  
   719  	if(!samereg(ax, res) && !samereg(dx, res))
   720  		regalloc(&n1, t, res);
   721  	else
   722  		regalloc(&n1, t, N);
   723  	gmove(&t2, &n1);
   724  	gmove(&t1, ax);
   725  	p2 = P;
   726  	if(nacl) {
   727  		// Native Client does not relay the divide-by-zero trap
   728  		// to the executing program, so we must insert a check
   729  		// for ourselves.
   730  		nodconst(&n4, t, 0);
   731  		gins(optoas(OCMP, t), &n1, &n4);
   732  		p1 = gbranch(optoas(ONE, t), T, +1);
   733  		if(panicdiv == N)
   734  			panicdiv = sysfunc("panicdivide");
   735  		ginscall(panicdiv, -1);
   736  		patch(p1, pc);
   737  	}
   738  	if(check) {
   739  		nodconst(&n4, t, -1);
   740  		gins(optoas(OCMP, t), &n1, &n4);
   741  		p1 = gbranch(optoas(ONE, t), T, +1);
   742  		if(op == ODIV) {
   743  			// a / (-1) is -a.
   744  			gins(optoas(OMINUS, t), N, ax);
   745  			gmove(ax, res);
   746  		} else {
   747  			// a % (-1) is 0.
   748  			nodconst(&n4, t, 0);
   749  			gmove(&n4, res);
   750  		}
   751  		p2 = gbranch(AJMP, T, 0);
   752  		patch(p1, pc);
   753  	}
   754  	if(!issigned[t->etype]) {
   755  		nodconst(&nz, t, 0);
   756  		gmove(&nz, dx);
   757  	} else
   758  		gins(optoas(OEXTEND, t), N, N);
   759  	gins(optoas(op, t), &n1, N);
   760  	regfree(&n1);
   761  
   762  	if(op == ODIV)
   763  		gmove(ax, res);
   764  	else
   765  		gmove(dx, res);
   766  	if(check)
   767  		patch(p2, pc);
   768  }
   769  
   770  static void
   771  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   772  {
   773  	int r;
   774  
   775  	r = reg[dr];
   776  	nodreg(x, types[TINT32], dr);
   777  
   778  	// save current ax and dx if they are live
   779  	// and not the destination
   780  	memset(oldx, 0, sizeof *oldx);
   781  	if(r > 0 && !samereg(x, res)) {
   782  		tempname(oldx, types[TINT32]);
   783  		gmove(x, oldx);
   784  	}
   785  
   786  	regalloc(x, t, x);
   787  }
   788  
   789  static void
   790  restx(Node *x, Node *oldx)
   791  {
   792  	regfree(x);
   793  
   794  	if(oldx->op != 0) {
   795  		x->type = types[TINT32];
   796  		gmove(oldx, x);
   797  	}
   798  }
   799  
   800  /*
   801   * generate division according to op, one of:
   802   *	res = nl / nr
   803   *	res = nl % nr
   804   */
   805  void
   806  cgen_div(int op, Node *nl, Node *nr, Node *res)
   807  {
   808  	Node ax, dx, oldax, olddx;
   809  	Type *t;
   810  
   811  	if(is64(nl->type))
   812  		fatal("cgen_div %T", nl->type);
   813  
   814  	if(issigned[nl->type->etype])
   815  		t = types[TINT32];
   816  	else
   817  		t = types[TUINT32];
   818  	savex(D_AX, &ax, &oldax, res, t);
   819  	savex(D_DX, &dx, &olddx, res, t);
   820  	dodiv(op, nl, nr, res, &ax, &dx);
   821  	restx(&dx, &olddx);
   822  	restx(&ax, &oldax);
   823  }
   824  
   825  /*
   826   * generate shift according to op, one of:
   827   *	res = nl << nr
   828   *	res = nl >> nr
   829   */
   830  void
   831  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   832  {
   833  	Node n1, n2, nt, cx, oldcx, hi, lo;
   834  	int a, w;
   835  	Prog *p1, *p2;
   836  	uvlong sc;
   837  
   838  	if(nl->type->width > 4)
   839  		fatal("cgen_shift %T", nl->type);
   840  
   841  	w = nl->type->width * 8;
   842  
   843  	a = optoas(op, nl->type);
   844  
   845  	if(nr->op == OLITERAL) {
   846  		tempname(&n2, nl->type);
   847  		cgen(nl, &n2);
   848  		regalloc(&n1, nl->type, res);
   849  		gmove(&n2, &n1);
   850  		sc = mpgetfix(nr->val.u.xval);
   851  		if(sc >= nl->type->width*8) {
   852  			// large shift gets 2 shifts by width-1
   853  			gins(a, ncon(w-1), &n1);
   854  			gins(a, ncon(w-1), &n1);
   855  		} else
   856  			gins(a, nr, &n1);
   857  		gmove(&n1, res);
   858  		regfree(&n1);
   859  		return;
   860  	}
   861  
   862  	memset(&oldcx, 0, sizeof oldcx);
   863  	nodreg(&cx, types[TUINT32], D_CX);
   864  	if(reg[D_CX] > 1 && !samereg(&cx, res)) {
   865  		tempname(&oldcx, types[TUINT32]);
   866  		gmove(&cx, &oldcx);
   867  	}
   868  
   869  	if(nr->type->width > 4) {
   870  		tempname(&nt, nr->type);
   871  		n1 = nt;
   872  	} else {
   873  		nodreg(&n1, types[TUINT32], D_CX);
   874  		regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   875  	}
   876  
   877  	if(samereg(&cx, res))
   878  		regalloc(&n2, nl->type, N);
   879  	else
   880  		regalloc(&n2, nl->type, res);
   881  	if(nl->ullman >= nr->ullman) {
   882  		cgen(nl, &n2);
   883  		cgen(nr, &n1);
   884  	} else {
   885  		cgen(nr, &n1);
   886  		cgen(nl, &n2);
   887  	}
   888  
   889  	// test and fix up large shifts
   890  	if(bounded) {
   891  		if(nr->type->width > 4) {
   892  			// delayed reg alloc
   893  			nodreg(&n1, types[TUINT32], D_CX);
   894  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   895  			split64(&nt, &lo, &hi);
   896  			gmove(&lo, &n1);
   897  			splitclean();
   898  		}
   899  	} else {
   900  		if(nr->type->width > 4) {
   901  			// delayed reg alloc
   902  			nodreg(&n1, types[TUINT32], D_CX);
   903  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   904  			split64(&nt, &lo, &hi);
   905  			gmove(&lo, &n1);
   906  			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
   907  			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
   908  			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
   909  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   910  			splitclean();
   911  			patch(p2, pc);
   912  		} else {
   913  			gins(optoas(OCMP, nr->type), &n1, ncon(w));
   914  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   915  		}
   916  		if(op == ORSH && issigned[nl->type->etype]) {
   917  			gins(a, ncon(w-1), &n2);
   918  		} else {
   919  			gmove(ncon(0), &n2);
   920  		}
   921  		patch(p1, pc);
   922  	}
   923  	gins(a, &n1, &n2);
   924  
   925  	if(oldcx.op != 0)
   926  		gmove(&oldcx, &cx);
   927  
   928  	gmove(&n2, res);
   929  
   930  	regfree(&n1);
   931  	regfree(&n2);
   932  }
   933  
   934  /*
   935   * generate byte multiply:
   936   *	res = nl * nr
   937   * there is no 2-operand byte multiply instruction so
   938   * we do a full-width multiplication and truncate afterwards.
   939   */
   940  void
   941  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   942  {
   943  	Node n1, n2, nt, *tmp;
   944  	Type *t;
   945  	int a;
   946  
   947  	// copy from byte to full registers
   948  	t = types[TUINT32];
   949  	if(issigned[nl->type->etype])
   950  		t = types[TINT32];
   951  
   952  	// largest ullman on left.
   953  	if(nl->ullman < nr->ullman) {
   954  		tmp = nl;
   955  		nl = nr;
   956  		nr = tmp;
   957  	}
   958  
   959  	tempname(&nt, nl->type);
   960  	cgen(nl, &nt);
   961  	regalloc(&n1, t, res);
   962  	cgen(nr, &n1);
   963  	regalloc(&n2, t, N);
   964  	gmove(&nt, &n2);
   965  	a = optoas(op, t);
   966  	gins(a, &n2, &n1);
   967  	regfree(&n2);
   968  	gmove(&n1, res);
   969  	regfree(&n1);
   970  }
   971  
   972  /*
   973   * generate high multiply:
   974   *   res = (nl*nr) >> width
   975   */
   976  void
   977  cgen_hmul(Node *nl, Node *nr, Node *res)
   978  {
   979  	Type *t;
   980  	int a;
   981  	Node n1, n2, ax, dx;
   982  
   983  	t = nl->type;
   984  	a = optoas(OHMUL, t);
   985  	// gen nl in n1.
   986  	tempname(&n1, t);
   987  	cgen(nl, &n1);
   988  	// gen nr in n2.
   989  	regalloc(&n2, t, res);
   990  	cgen(nr, &n2);
   991  
   992  	// multiply.
   993  	nodreg(&ax, t, D_AX);
   994  	gmove(&n2, &ax);
   995  	gins(a, &n1, N);
   996  	regfree(&n2);
   997  
   998  	if(t->width == 1) {
   999  		// byte multiply behaves differently.
  1000  		nodreg(&ax, t, D_AH);
  1001  		nodreg(&dx, t, D_DX);
  1002  		gmove(&ax, &dx);
  1003  	}
  1004  	nodreg(&dx, t, D_DX);
  1005  	gmove(&dx, res);
  1006  }
  1007  
  1008  static void cgen_float387(Node *n, Node *res);
  1009  static void cgen_floatsse(Node *n, Node *res);
  1010  
  1011  /*
  1012   * generate floating-point operation.
  1013   */
  1014  void
  1015  cgen_float(Node *n, Node *res)
  1016  {
  1017  	Node *nl;
  1018  	Node n1, n2;
  1019  	Prog *p1, *p2, *p3;
  1020  
  1021  	nl = n->left;
  1022  	switch(n->op) {
  1023  	case OEQ:
  1024  	case ONE:
  1025  	case OLT:
  1026  	case OLE:
  1027  	case OGE:
  1028  		p1 = gbranch(AJMP, T, 0);
  1029  		p2 = pc;
  1030  		gmove(nodbool(1), res);
  1031  		p3 = gbranch(AJMP, T, 0);
  1032  		patch(p1, pc);
  1033  		bgen(n, 1, 0, p2);
  1034  		gmove(nodbool(0), res);
  1035  		patch(p3, pc);
  1036  		return;
  1037  
  1038  	case OPLUS:
  1039  		cgen(nl, res);
  1040  		return;
  1041  
  1042  	case OCONV:
  1043  		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
  1044  			cgen(nl, res);
  1045  			return;
  1046  		}
  1047  
  1048  		tempname(&n2, n->type);
  1049  		mgen(nl, &n1, res);
  1050  		gmove(&n1, &n2);
  1051  		gmove(&n2, res);
  1052  		mfree(&n1);
  1053  		return;
  1054  	}
  1055  
  1056  	if(use_sse)
  1057  		cgen_floatsse(n, res);
  1058  	else
  1059  		cgen_float387(n, res);
  1060  }
  1061  
  1062  // floating-point.  387 (not SSE2)
  1063  static void
  1064  cgen_float387(Node *n, Node *res)
  1065  {
  1066  	Node f0, f1;
  1067  	Node *nl, *nr;
  1068  
  1069  	nl = n->left;
  1070  	nr = n->right;
  1071  	nodreg(&f0, nl->type, D_F0);
  1072  	nodreg(&f1, n->type, D_F0+1);
  1073  	if(nr != N)
  1074  		goto flt2;
  1075  
  1076  	// unary
  1077  	cgen(nl, &f0);
  1078  	if(n->op != OCONV && n->op != OPLUS)
  1079  		gins(foptoas(n->op, n->type, 0), N, N);
  1080  	gmove(&f0, res);
  1081  	return;
  1082  
  1083  flt2:	// binary
  1084  	if(nl->ullman >= nr->ullman) {
  1085  		cgen(nl, &f0);
  1086  		if(nr->addable)
  1087  			gins(foptoas(n->op, n->type, 0), nr, &f0);
  1088  		else {
  1089  			cgen(nr, &f0);
  1090  			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
  1091  		}
  1092  	} else {
  1093  		cgen(nr, &f0);
  1094  		if(nl->addable)
  1095  			gins(foptoas(n->op, n->type, Frev), nl, &f0);
  1096  		else {
  1097  			cgen(nl, &f0);
  1098  			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
  1099  		}
  1100  	}
  1101  	gmove(&f0, res);
  1102  	return;
  1103  
  1104  }
  1105  
  1106  static void
  1107  cgen_floatsse(Node *n, Node *res)
  1108  {
  1109  	Node *nl, *nr, *r;
  1110  	Node n1, n2, nt;
  1111  	int a;
  1112  
  1113  	nl = n->left;
  1114  	nr = n->right;
  1115  	switch(n->op) {
  1116  	default:
  1117  		dump("cgen_floatsse", n);
  1118  		fatal("cgen_floatsse %O", n->op);
  1119  		return;
  1120  
  1121  	case OMINUS:
  1122  	case OCOM:
  1123  		nr = nodintconst(-1);
  1124  		convlit(&nr, n->type);
  1125  		a = foptoas(OMUL, nl->type, 0);
  1126  		goto sbop;
  1127  
  1128  	// symmetric binary
  1129  	case OADD:
  1130  	case OMUL:
  1131  		a = foptoas(n->op, nl->type, 0);
  1132  		goto sbop;
  1133  
  1134  	// asymmetric binary
  1135  	case OSUB:
  1136  	case OMOD:
  1137  	case ODIV:
  1138  		a = foptoas(n->op, nl->type, 0);
  1139  		goto abop;
  1140  	}
  1141  
  1142  sbop:	// symmetric binary
  1143  	if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
  1144  		r = nl;
  1145  		nl = nr;
  1146  		nr = r;
  1147  	}
  1148  
  1149  abop:	// asymmetric binary
  1150  	if(nl->ullman >= nr->ullman) {
  1151  		tempname(&nt, nl->type);
  1152  		cgen(nl, &nt);
  1153  		mgen(nr, &n2, N);
  1154  		regalloc(&n1, nl->type, res);
  1155  		gmove(&nt, &n1);
  1156  		gins(a, &n2, &n1);
  1157  		gmove(&n1, res);
  1158  		regfree(&n1);
  1159  		mfree(&n2);
  1160  	} else {
  1161  		regalloc(&n2, nr->type, res);
  1162  		cgen(nr, &n2);
  1163  		regalloc(&n1, nl->type, N);
  1164  		cgen(nl, &n1);
  1165  		gins(a, &n2, &n1);
  1166  		regfree(&n2);
  1167  		gmove(&n1, res);
  1168  		regfree(&n1);
  1169  	}
  1170  	return;
  1171  }
  1172  
  1173  void
  1174  bgen_float(Node *n, int true, int likely, Prog *to)
  1175  {
  1176  	int et, a;
  1177  	Node *nl, *nr, *r;
  1178  	Node n1, n2, n3, tmp, t1, t2, ax;
  1179  	Prog *p1, *p2;
  1180  
  1181  	nl = n->left;
  1182  	nr = n->right;
  1183  	a = n->op;
  1184  	if(!true) {
  1185  		// brcom is not valid on floats when NaN is involved.
  1186  		p1 = gbranch(AJMP, T, 0);
  1187  		p2 = gbranch(AJMP, T, 0);
  1188  		patch(p1, pc);
  1189  		// No need to avoid re-genning ninit.
  1190  		bgen_float(n, 1, -likely, p2);
  1191  		patch(gbranch(AJMP, T, 0), to);
  1192  		patch(p2, pc);
  1193  		return;
  1194  	}
  1195  
  1196  	if(use_sse)
  1197  		goto sse;
  1198  	else
  1199  		goto x87;
  1200  
  1201  x87:
  1202  	a = brrev(a);	// because the args are stacked
  1203  	if(a == OGE || a == OGT) {
  1204  		// only < and <= work right with NaN; reverse if needed
  1205  		r = nr;
  1206  		nr = nl;
  1207  		nl = r;
  1208  		a = brrev(a);
  1209  	}
  1210  
  1211  	nodreg(&tmp, nr->type, D_F0);
  1212  	nodreg(&n2, nr->type, D_F0 + 1);
  1213  	nodreg(&ax, types[TUINT16], D_AX);
  1214  	et = simsimtype(nr->type);
  1215  	if(et == TFLOAT64) {
  1216  		if(nl->ullman > nr->ullman) {
  1217  			cgen(nl, &tmp);
  1218  			cgen(nr, &tmp);
  1219  			gins(AFXCHD, &tmp, &n2);
  1220  		} else {
  1221  			cgen(nr, &tmp);
  1222  			cgen(nl, &tmp);
  1223  		}
  1224  		gins(AFUCOMIP, &tmp, &n2);
  1225  		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF
  1226  	} else {
  1227  		// TODO(rsc): The moves back and forth to memory
  1228  		// here are for truncating the value to 32 bits.
  1229  		// This handles 32-bit comparison but presumably
  1230  		// all the other ops have the same problem.
  1231  		// We need to figure out what the right general
  1232  		// solution is, besides telling people to use float64.
  1233  		tempname(&t1, types[TFLOAT32]);
  1234  		tempname(&t2, types[TFLOAT32]);
  1235  		cgen(nr, &t1);
  1236  		cgen(nl, &t2);
  1237  		gmove(&t2, &tmp);
  1238  		gins(AFCOMFP, &t1, &tmp);
  1239  		gins(AFSTSW, N, &ax);
  1240  		gins(ASAHF, N, N);
  1241  	}
  1242  
  1243  	goto ret;
  1244  
  1245  sse:
  1246  	if(!nl->addable) {
  1247  		tempname(&n1, nl->type);
  1248  		cgen(nl, &n1);
  1249  		nl = &n1;
  1250  	}
  1251  	if(!nr->addable) {
  1252  		tempname(&tmp, nr->type);
  1253  		cgen(nr, &tmp);
  1254  		nr = &tmp;
  1255  	}
  1256  	regalloc(&n2, nr->type, N);
  1257  	gmove(nr, &n2);
  1258  	nr = &n2;
  1259  
  1260  	if(nl->op != OREGISTER) {
  1261  		regalloc(&n3, nl->type, N);
  1262  		gmove(nl, &n3);
  1263  		nl = &n3;
  1264  	}
  1265  
  1266  	if(a == OGE || a == OGT) {
  1267  		// only < and <= work right with NaN; reverse if needed
  1268  		r = nr;
  1269  		nr = nl;
  1270  		nl = r;
  1271  		a = brrev(a);
  1272  	}
  1273  
  1274  	gins(foptoas(OCMP, nr->type, 0), nl, nr);
  1275  	if(nl->op == OREGISTER)
  1276  		regfree(nl);
  1277  	regfree(nr);
  1278  
  1279  ret:
  1280  	if(a == OEQ) {
  1281  		// neither NE nor P
  1282  		p1 = gbranch(AJNE, T, -likely);
  1283  		p2 = gbranch(AJPS, T, -likely);
  1284  		patch(gbranch(AJMP, T, 0), to);
  1285  		patch(p1, pc);
  1286  		patch(p2, pc);
  1287  	} else if(a == ONE) {
  1288  		// either NE or P
  1289  		patch(gbranch(AJNE, T, likely), to);
  1290  		patch(gbranch(AJPS, T, likely), to);
  1291  	} else
  1292  		patch(gbranch(optoas(a, nr->type), T, likely), to);
  1293  
  1294  }
  1295  
  1296  // Called after regopt and peep have run.
  1297  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1298  void
  1299  expandchecks(Prog *firstp)
  1300  {
  1301  	Prog *p, *p1, *p2;
  1302  
  1303  	for(p = firstp; p != P; p = p->link) {
  1304  		if(p->as != ACHECKNIL)
  1305  			continue;
  1306  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1307  			warnl(p->lineno, "generated nil check");
  1308  		// check is
  1309  		//	CMP arg, $0
  1310  		//	JNE 2(PC) (likely)
  1311  		//	MOV AX, 0
  1312  		p1 = mal(sizeof *p1);
  1313  		p2 = mal(sizeof *p2);
  1314  		clearp(p1);
  1315  		clearp(p2);
  1316  		p1->link = p2;
  1317  		p2->link = p->link;
  1318  		p->link = p1;
  1319  		p1->lineno = p->lineno;
  1320  		p2->lineno = p->lineno;
  1321  		p1->pc = 9999;
  1322  		p2->pc = 9999;
  1323  		p->as = ACMPL;
  1324  		p->to.type = D_CONST;
  1325  		p->to.offset = 0;
  1326  		p1->as = AJNE;
  1327  		p1->from.type = D_CONST;
  1328  		p1->from.offset = 1; // likely
  1329  		p1->to.type = D_BRANCH;
  1330  		p1->to.u.branch = p2->link;
  1331  		// crash by write to memory address 0.
  1332  		// if possible, since we know arg is 0, use 0(arg),
  1333  		// which will be shorter to encode than plain 0.
  1334  		p2->as = AMOVL;
  1335  		p2->from.type = D_AX;
  1336  		if(regtyp(&p->from))
  1337  			p2->to.type = p->from.type + D_INDIR;
  1338  		else
  1339  			p2->to.type = D_INDIR+D_NONE;
  1340  		p2->to.offset = 0;
  1341  	}
  1342  }