github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/8g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  void
    13  defframe(Prog *ptxt)
    14  {
    15  	// fill in argument size
    16  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    17  
    18  	// fill in final stack size
    19  	if(stksize > maxstksize)
    20  		maxstksize = stksize;
    21  	ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
    22  	maxstksize = 0;
    23  }
    24  
    25  // Sweep the prog list to mark any used nodes.
    26  void
    27  markautoused(Prog* p)
    28  {
    29  	for (; p; p = p->link) {
    30  		if (p->as == ATYPE)
    31  			continue;
    32  
    33  		if (p->from.type == D_AUTO && p->from.node)
    34  			p->from.node->used = 1;
    35  
    36  		if (p->to.type == D_AUTO && p->to.node)
    37  			p->to.node->used = 1;
    38  	}
    39  }
    40  
    41  // Fixup instructions after compactframe has moved all autos around.
    42  void
    43  fixautoused(Prog* p)
    44  {
    45  	Prog **lp;
    46  
    47  	for (lp=&p; (p=*lp) != P; ) {
    48  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
    49  			*lp = p->link;
    50  			continue;
    51  		}
    52  
    53  		if (p->from.type == D_AUTO && p->from.node)
    54  			p->from.offset += p->from.node->stkdelta;
    55  
    56  		if (p->to.type == D_AUTO && p->to.node)
    57  			p->to.offset += p->to.node->stkdelta;
    58  
    59  		lp = &p->link;
    60  	}
    61  }
    62  
    63  void
    64  clearfat(Node *nl)
    65  {
    66  	uint32 w, c, q;
    67  	Node n1;
    68  
    69  	/* clear a fat object */
    70  	if(debug['g'])
    71  		dump("\nclearfat", nl);
    72  
    73  	w = nl->type->width;
    74  	// Avoid taking the address for simple enough types.
    75  	if(componentgen(N, nl))
    76  		return;
    77  
    78  	c = w % 4;	// bytes
    79  	q = w / 4;	// quads
    80  
    81  	gconreg(AMOVL, 0, D_AX);
    82  	nodreg(&n1, types[tptr], D_DI);
    83  	agen(nl, &n1);
    84  
    85  	if(q >= 4) {
    86  		gconreg(AMOVL, q, D_CX);
    87  		gins(AREP, N, N);	// repeat
    88  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
    89  	} else
    90  	while(q > 0) {
    91  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
    92  		q--;
    93  	}
    94  
    95  	if(c >= 4) {
    96  		gconreg(AMOVL, c, D_CX);
    97  		gins(AREP, N, N);	// repeat
    98  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
    99  	} else
   100  	while(c > 0) {
   101  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   102  		c--;
   103  	}
   104  }
   105  
   106  /*
   107   * generate:
   108   *	call f
   109   *	proc=-1	normal call but no return
   110   *	proc=0	normal call
   111   *	proc=1	goroutine run in new proc
   112   *	proc=2	defer call save away stack
   113    *	proc=3	normal call to C pointer (not Go func value)
   114   */
   115  void
   116  ginscall(Node *f, int proc)
   117  {
   118  	Prog *p;
   119  	Node reg, r1, con;
   120  
   121  	switch(proc) {
   122  	default:
   123  		fatal("ginscall: bad proc %d", proc);
   124  		break;
   125  
   126  	case 0:	// normal call
   127  	case -1:	// normal call but no return
   128  		if(f->op == ONAME && f->class == PFUNC) {
   129  			p = gins(ACALL, N, f);
   130  			afunclit(&p->to, f);
   131  			if(proc == -1 || noreturn(p))
   132  				gins(AUNDEF, N, N);
   133  			break;
   134  		}
   135  		nodreg(&reg, types[tptr], D_DX);
   136  		nodreg(&r1, types[tptr], D_BX);
   137  		gmove(f, &reg);
   138  		reg.op = OINDREG;
   139  		gmove(&reg, &r1);
   140  		reg.op = OREGISTER;
   141  		gins(ACALL, &reg, &r1);
   142  		break;
   143  	
   144  	case 3:	// normal call of c function pointer
   145  		gins(ACALL, N, f);
   146  		break;
   147  
   148  	case 1:	// call in new proc (go)
   149  	case 2:	// deferred call (defer)
   150  		nodreg(&reg, types[TINT32], D_CX);
   151  		gins(APUSHL, f, N);
   152  		nodconst(&con, types[TINT32], argsize(f->type));
   153  		gins(APUSHL, &con, N);
   154  		if(proc == 1)
   155  			ginscall(newproc, 0);
   156  		else
   157  			ginscall(deferproc, 0);
   158  		gins(APOPL, N, &reg);
   159  		gins(APOPL, N, &reg);
   160  		if(proc == 2) {
   161  			nodreg(&reg, types[TINT64], D_AX);
   162  			gins(ATESTL, &reg, &reg);
   163  			patch(gbranch(AJNE, T, -1), retpc);
   164  		}
   165  		break;
   166  	}
   167  }
   168  
   169  /*
   170   * n is call to interface method.
   171   * generate res = n.
   172   */
   173  void
   174  cgen_callinter(Node *n, Node *res, int proc)
   175  {
   176  	Node *i, *f;
   177  	Node tmpi, nodi, nodo, nodr, nodsp;
   178  
   179  	i = n->left;
   180  	if(i->op != ODOTINTER)
   181  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   182  
   183  	f = i->right;		// field
   184  	if(f->op != ONAME)
   185  		fatal("cgen_callinter: not ONAME %O", f->op);
   186  
   187  	i = i->left;		// interface
   188  
   189  	if(!i->addable) {
   190  		tempname(&tmpi, i->type);
   191  		cgen(i, &tmpi);
   192  		i = &tmpi;
   193  	}
   194  
   195  	genlist(n->list);		// assign the args
   196  
   197  	// i is now addable, prepare an indirected
   198  	// register to hold its address.
   199  	igen(i, &nodi, res);		// REG = &inter
   200  
   201  	nodindreg(&nodsp, types[tptr], D_SP);
   202  	nodi.type = types[tptr];
   203  	nodi.xoffset += widthptr;
   204  	cgen(&nodi, &nodsp);	// 0(SP) = 4(REG) -- i.data
   205  
   206  	regalloc(&nodo, types[tptr], res);
   207  	nodi.type = types[tptr];
   208  	nodi.xoffset -= widthptr;
   209  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   210  	regfree(&nodi);
   211  
   212  	regalloc(&nodr, types[tptr], &nodo);
   213  	if(n->left->xoffset == BADWIDTH)
   214  		fatal("cgen_callinter: badwidth");
   215  	nodo.op = OINDREG;
   216  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   217  	
   218  	if(proc == 0) {
   219  		// plain call: use direct c function pointer - more efficient
   220  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   221  		proc = 3;
   222  	} else {
   223  		// go/defer. generate go func value.
   224  		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   225  	}
   226  
   227  	// BOTCH nodr.type = fntype;
   228  	nodr.type = n->left->type;
   229  	ginscall(&nodr, proc);
   230  
   231  	regfree(&nodr);
   232  	regfree(&nodo);
   233  
   234  	setmaxarg(n->left->type);
   235  }
   236  
   237  /*
   238   * generate function call;
   239   *	proc=0	normal call
   240   *	proc=1	goroutine run in new proc
   241   *	proc=2	defer call save away stack
   242   */
   243  void
   244  cgen_call(Node *n, int proc)
   245  {
   246  	Type *t;
   247  	Node nod, afun;
   248  
   249  	if(n == N)
   250  		return;
   251  
   252  	if(n->left->ullman >= UINF) {
   253  		// if name involves a fn call
   254  		// precompute the address of the fn
   255  		tempname(&afun, types[tptr]);
   256  		cgen(n->left, &afun);
   257  	}
   258  
   259  	genlist(n->list);		// assign the args
   260  	t = n->left->type;
   261  
   262  	setmaxarg(t);
   263  
   264  	// call tempname pointer
   265  	if(n->left->ullman >= UINF) {
   266  		regalloc(&nod, types[tptr], N);
   267  		cgen_as(&nod, &afun);
   268  		nod.type = t;
   269  		ginscall(&nod, proc);
   270  		regfree(&nod);
   271  		return;
   272  	}
   273  
   274  	// call pointer
   275  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   276  		regalloc(&nod, types[tptr], N);
   277  		cgen_as(&nod, n->left);
   278  		nod.type = t;
   279  		ginscall(&nod, proc);
   280  		regfree(&nod);
   281  		return;
   282  	}
   283  
   284  	// call direct
   285  	n->left->method = 1;
   286  	ginscall(n->left, proc);
   287  }
   288  
   289  /*
   290   * call to n has already been generated.
   291   * generate:
   292   *	res = return value from call.
   293   */
   294  void
   295  cgen_callret(Node *n, Node *res)
   296  {
   297  	Node nod;
   298  	Type *fp, *t;
   299  	Iter flist;
   300  
   301  	t = n->left->type;
   302  	if(t->etype == TPTR32 || t->etype == TPTR64)
   303  		t = t->type;
   304  
   305  	fp = structfirst(&flist, getoutarg(t));
   306  	if(fp == T)
   307  		fatal("cgen_callret: nil");
   308  
   309  	memset(&nod, 0, sizeof(nod));
   310  	nod.op = OINDREG;
   311  	nod.val.u.reg = D_SP;
   312  	nod.addable = 1;
   313  
   314  	nod.xoffset = fp->width;
   315  	nod.type = fp->type;
   316  	cgen_as(res, &nod);
   317  }
   318  
   319  /*
   320   * call to n has already been generated.
   321   * generate:
   322   *	res = &return value from call.
   323   */
   324  void
   325  cgen_aret(Node *n, Node *res)
   326  {
   327  	Node nod1, nod2;
   328  	Type *fp, *t;
   329  	Iter flist;
   330  
   331  	t = n->left->type;
   332  	if(isptr[t->etype])
   333  		t = t->type;
   334  
   335  	fp = structfirst(&flist, getoutarg(t));
   336  	if(fp == T)
   337  		fatal("cgen_aret: nil");
   338  
   339  	memset(&nod1, 0, sizeof(nod1));
   340  	nod1.op = OINDREG;
   341  	nod1.val.u.reg = D_SP;
   342  	nod1.addable = 1;
   343  
   344  	nod1.xoffset = fp->width;
   345  	nod1.type = fp->type;
   346  
   347  	if(res->op != OREGISTER) {
   348  		regalloc(&nod2, types[tptr], res);
   349  		gins(ALEAL, &nod1, &nod2);
   350  		gins(AMOVL, &nod2, res);
   351  		regfree(&nod2);
   352  	} else
   353  		gins(ALEAL, &nod1, res);
   354  }
   355  
   356  /*
   357   * generate return.
   358   * n->left is assignments to return values.
   359   */
   360  void
   361  cgen_ret(Node *n)
   362  {
   363  	genlist(n->list);		// copy out args
   364  	if(retpc)
   365  		gjmp(retpc);
   366  	else
   367  		gins(ARET, N, N);
   368  }
   369  
   370  /*
   371   * generate += *= etc.
   372   */
   373  void
   374  cgen_asop(Node *n)
   375  {
   376  	Node n1, n2, n3, n4;
   377  	Node *nl, *nr;
   378  	Prog *p1;
   379  	Addr addr;
   380  	int a;
   381  
   382  	nl = n->left;
   383  	nr = n->right;
   384  
   385  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   386  		tempname(&n1, nr->type);
   387  		cgen(nr, &n1);
   388  		n2 = *n;
   389  		n2.right = &n1;
   390  		cgen_asop(&n2);
   391  		goto ret;
   392  	}
   393  
   394  	if(!isint[nl->type->etype])
   395  		goto hard;
   396  	if(!isint[nr->type->etype])
   397  		goto hard;
   398  	if(is64(nl->type) || is64(nr->type))
   399  		goto hard;
   400  
   401  	switch(n->etype) {
   402  	case OADD:
   403  		if(smallintconst(nr))
   404  		if(mpgetfix(nr->val.u.xval) == 1) {
   405  			a = optoas(OINC, nl->type);
   406  			if(nl->addable) {
   407  				gins(a, N, nl);
   408  				goto ret;
   409  			}
   410  			if(sudoaddable(a, nl, &addr)) {
   411  				p1 = gins(a, N, N);
   412  				p1->to = addr;
   413  				sudoclean();
   414  				goto ret;
   415  			}
   416  		}
   417  		break;
   418  
   419  	case OSUB:
   420  		if(smallintconst(nr))
   421  		if(mpgetfix(nr->val.u.xval) == 1) {
   422  			a = optoas(ODEC, nl->type);
   423  			if(nl->addable) {
   424  				gins(a, N, nl);
   425  				goto ret;
   426  			}
   427  			if(sudoaddable(a, nl, &addr)) {
   428  				p1 = gins(a, N, N);
   429  				p1->to = addr;
   430  				sudoclean();
   431  				goto ret;
   432  			}
   433  		}
   434  		break;
   435  	}
   436  
   437  	switch(n->etype) {
   438  	case OADD:
   439  	case OSUB:
   440  	case OXOR:
   441  	case OAND:
   442  	case OOR:
   443  		a = optoas(n->etype, nl->type);
   444  		if(nl->addable) {
   445  			if(smallintconst(nr)) {
   446  				gins(a, nr, nl);
   447  				goto ret;
   448  			}
   449  			regalloc(&n2, nr->type, N);
   450  			cgen(nr, &n2);
   451  			gins(a, &n2, nl);
   452  			regfree(&n2);
   453  			goto ret;
   454  		}
   455  		if(nr->ullman < UINF)
   456  		if(sudoaddable(a, nl, &addr)) {
   457  			if(smallintconst(nr)) {
   458  				p1 = gins(a, nr, N);
   459  				p1->to = addr;
   460  				sudoclean();
   461  				goto ret;
   462  			}
   463  			regalloc(&n2, nr->type, N);
   464  			cgen(nr, &n2);
   465  			p1 = gins(a, &n2, N);
   466  			p1->to = addr;
   467  			regfree(&n2);
   468  			sudoclean();
   469  			goto ret;
   470  		}
   471  	}
   472  
   473  hard:
   474  	n2.op = 0;
   475  	n1.op = 0;
   476  	if(nr->ullman >= nl->ullman || nl->addable) {
   477  		mgen(nr, &n2, N);
   478  		nr = &n2;
   479  	} else {
   480  		tempname(&n2, nr->type);
   481  		cgen(nr, &n2);
   482  		nr = &n2;
   483  	}
   484  	if(!nl->addable) {
   485  		igen(nl, &n1, N);
   486  		nl = &n1;
   487  	}
   488  
   489  	n3 = *n;
   490  	n3.left = nl;
   491  	n3.right = nr;
   492  	n3.op = n->etype;
   493  
   494  	mgen(&n3, &n4, N);
   495  	gmove(&n4, nl);
   496  
   497  	if(n1.op)
   498  		regfree(&n1);
   499  	mfree(&n2);
   500  	mfree(&n4);
   501  
   502  ret:
   503  	;
   504  }
   505  
   506  int
   507  samereg(Node *a, Node *b)
   508  {
   509  	if(a->op != OREGISTER)
   510  		return 0;
   511  	if(b->op != OREGISTER)
   512  		return 0;
   513  	if(a->val.u.reg != b->val.u.reg)
   514  		return 0;
   515  	return 1;
   516  }
   517  
   518  /*
   519   * generate division.
   520   * caller must set:
   521   *	ax = allocated AX register
   522   *	dx = allocated DX register
   523   * generates one of:
   524   *	res = nl / nr
   525   *	res = nl % nr
   526   * according to op.
   527   */
   528  void
   529  dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
   530  {
   531  	int check;
   532  	Node n1, t1, t2, t3, t4, n4, nz;
   533  	Type *t, *t0;
   534  	Prog *p1, *p2;
   535  
   536  	// Have to be careful about handling
   537  	// most negative int divided by -1 correctly.
   538  	// The hardware will trap.
   539  	// Also the byte divide instruction needs AH,
   540  	// which we otherwise don't have to deal with.
   541  	// Easiest way to avoid for int8, int16: use int32.
   542  	// For int32 and int64, use explicit test.
   543  	// Could use int64 hw for int32.
   544  	t = nl->type;
   545  	t0 = t;
   546  	check = 0;
   547  	if(issigned[t->etype]) {
   548  		check = 1;
   549  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   550  			check = 0;
   551  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   552  			check = 0;
   553  	}
   554  	if(t->width < 4) {
   555  		if(issigned[t->etype])
   556  			t = types[TINT32];
   557  		else
   558  			t = types[TUINT32];
   559  		check = 0;
   560  	}
   561  
   562  	tempname(&t1, t);
   563  	tempname(&t2, t);
   564  	if(t0 != t) {
   565  		tempname(&t3, t0);
   566  		tempname(&t4, t0);
   567  		cgen(nl, &t3);
   568  		cgen(nr, &t4);
   569  		// Convert.
   570  		gmove(&t3, &t1);
   571  		gmove(&t4, &t2);
   572  	} else {
   573  		cgen(nl, &t1);
   574  		cgen(nr, &t2);
   575  	}
   576  
   577  	if(!samereg(ax, res) && !samereg(dx, res))
   578  		regalloc(&n1, t, res);
   579  	else
   580  		regalloc(&n1, t, N);
   581  	gmove(&t2, &n1);
   582  	gmove(&t1, ax);
   583  	p2 = P;
   584  	if(check) {
   585  		nodconst(&n4, t, -1);
   586  		gins(optoas(OCMP, t), &n1, &n4);
   587  		p1 = gbranch(optoas(ONE, t), T, +1);
   588  		if(op == ODIV) {
   589  			// a / (-1) is -a.
   590  			gins(optoas(OMINUS, t), N, ax);
   591  			gmove(ax, res);
   592  		} else {
   593  			// a % (-1) is 0.
   594  			nodconst(&n4, t, 0);
   595  			gmove(&n4, res);
   596  		}
   597  		p2 = gbranch(AJMP, T, 0);
   598  		patch(p1, pc);
   599  	}
   600  	if(!issigned[t->etype]) {
   601  		nodconst(&nz, t, 0);
   602  		gmove(&nz, dx);
   603  	} else
   604  		gins(optoas(OEXTEND, t), N, N);
   605  	gins(optoas(op, t), &n1, N);
   606  	regfree(&n1);
   607  
   608  	if(op == ODIV)
   609  		gmove(ax, res);
   610  	else
   611  		gmove(dx, res);
   612  	if(check)
   613  		patch(p2, pc);
   614  }
   615  
   616  static void
   617  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   618  {
   619  	int r;
   620  
   621  	r = reg[dr];
   622  	nodreg(x, types[TINT32], dr);
   623  
   624  	// save current ax and dx if they are live
   625  	// and not the destination
   626  	memset(oldx, 0, sizeof *oldx);
   627  	if(r > 0 && !samereg(x, res)) {
   628  		tempname(oldx, types[TINT32]);
   629  		gmove(x, oldx);
   630  	}
   631  
   632  	regalloc(x, t, x);
   633  }
   634  
   635  static void
   636  restx(Node *x, Node *oldx)
   637  {
   638  	regfree(x);
   639  
   640  	if(oldx->op != 0) {
   641  		x->type = types[TINT32];
   642  		gmove(oldx, x);
   643  	}
   644  }
   645  
   646  /*
   647   * generate division according to op, one of:
   648   *	res = nl / nr
   649   *	res = nl % nr
   650   */
   651  void
   652  cgen_div(int op, Node *nl, Node *nr, Node *res)
   653  {
   654  	Node ax, dx, oldax, olddx;
   655  	Type *t;
   656  
   657  	if(is64(nl->type))
   658  		fatal("cgen_div %T", nl->type);
   659  
   660  	if(issigned[nl->type->etype])
   661  		t = types[TINT32];
   662  	else
   663  		t = types[TUINT32];
   664  	savex(D_AX, &ax, &oldax, res, t);
   665  	savex(D_DX, &dx, &olddx, res, t);
   666  	dodiv(op, nl, nr, res, &ax, &dx);
   667  	restx(&dx, &olddx);
   668  	restx(&ax, &oldax);
   669  }
   670  
   671  /*
   672   * generate shift according to op, one of:
   673   *	res = nl << nr
   674   *	res = nl >> nr
   675   */
   676  void
   677  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   678  {
   679  	Node n1, n2, nt, cx, oldcx, hi, lo;
   680  	int a, w;
   681  	Prog *p1, *p2;
   682  	uvlong sc;
   683  
   684  	if(nl->type->width > 4)
   685  		fatal("cgen_shift %T", nl->type);
   686  
   687  	w = nl->type->width * 8;
   688  
   689  	a = optoas(op, nl->type);
   690  
   691  	if(nr->op == OLITERAL) {
   692  		tempname(&n2, nl->type);
   693  		cgen(nl, &n2);
   694  		regalloc(&n1, nl->type, res);
   695  		gmove(&n2, &n1);
   696  		sc = mpgetfix(nr->val.u.xval);
   697  		if(sc >= nl->type->width*8) {
   698  			// large shift gets 2 shifts by width-1
   699  			gins(a, ncon(w-1), &n1);
   700  			gins(a, ncon(w-1), &n1);
   701  		} else
   702  			gins(a, nr, &n1);
   703  		gmove(&n1, res);
   704  		regfree(&n1);
   705  		return;
   706  	}
   707  
   708  	memset(&oldcx, 0, sizeof oldcx);
   709  	nodreg(&cx, types[TUINT32], D_CX);
   710  	if(reg[D_CX] > 1 && !samereg(&cx, res)) {
   711  		tempname(&oldcx, types[TUINT32]);
   712  		gmove(&cx, &oldcx);
   713  	}
   714  
   715  	if(nr->type->width > 4) {
   716  		tempname(&nt, nr->type);
   717  		n1 = nt;
   718  	} else {
   719  		nodreg(&n1, types[TUINT32], D_CX);
   720  		regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   721  	}
   722  
   723  	if(samereg(&cx, res))
   724  		regalloc(&n2, nl->type, N);
   725  	else
   726  		regalloc(&n2, nl->type, res);
   727  	if(nl->ullman >= nr->ullman) {
   728  		cgen(nl, &n2);
   729  		cgen(nr, &n1);
   730  	} else {
   731  		cgen(nr, &n1);
   732  		cgen(nl, &n2);
   733  	}
   734  
   735  	// test and fix up large shifts
   736  	if(bounded) {
   737  		if(nr->type->width > 4) {
   738  			// delayed reg alloc
   739  			nodreg(&n1, types[TUINT32], D_CX);
   740  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   741  			split64(&nt, &lo, &hi);
   742  			gmove(&lo, &n1);
   743  			splitclean();
   744  		}
   745  	} else {
   746  		if(nr->type->width > 4) {
   747  			// delayed reg alloc
   748  			nodreg(&n1, types[TUINT32], D_CX);
   749  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   750  			split64(&nt, &lo, &hi);
   751  			gmove(&lo, &n1);
   752  			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
   753  			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
   754  			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
   755  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   756  			splitclean();
   757  			patch(p2, pc);
   758  		} else {
   759  			gins(optoas(OCMP, nr->type), &n1, ncon(w));
   760  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   761  		}
   762  		if(op == ORSH && issigned[nl->type->etype]) {
   763  			gins(a, ncon(w-1), &n2);
   764  		} else {
   765  			gmove(ncon(0), &n2);
   766  		}
   767  		patch(p1, pc);
   768  	}
   769  	gins(a, &n1, &n2);
   770  
   771  	if(oldcx.op != 0)
   772  		gmove(&oldcx, &cx);
   773  
   774  	gmove(&n2, res);
   775  
   776  	regfree(&n1);
   777  	regfree(&n2);
   778  }
   779  
   780  /*
   781   * generate byte multiply:
   782   *	res = nl * nr
   783   * there is no 2-operand byte multiply instruction so
   784   * we do a full-width multiplication and truncate afterwards.
   785   */
   786  void
   787  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   788  {
   789  	Node n1, n2, nt, *tmp;
   790  	Type *t;
   791  	int a;
   792  
   793  	// copy from byte to full registers
   794  	t = types[TUINT32];
   795  	if(issigned[nl->type->etype])
   796  		t = types[TINT32];
   797  
   798  	// largest ullman on left.
   799  	if(nl->ullman < nr->ullman) {
   800  		tmp = nl;
   801  		nl = nr;
   802  		nr = tmp;
   803  	}
   804  
   805  	tempname(&nt, nl->type);
   806  	cgen(nl, &nt);
   807  	regalloc(&n1, t, res);
   808  	cgen(nr, &n1);
   809  	regalloc(&n2, t, N);
   810  	gmove(&nt, &n2);
   811  	a = optoas(op, t);
   812  	gins(a, &n2, &n1);
   813  	regfree(&n2);
   814  	gmove(&n1, res);
   815  	regfree(&n1);
   816  }
   817  
   818  /*
   819   * generate high multiply:
   820   *   res = (nl*nr) >> width
   821   */
   822  void
   823  cgen_hmul(Node *nl, Node *nr, Node *res)
   824  {
   825  	Type *t;
   826  	int a;
   827  	Node n1, n2, ax, dx;
   828  
   829  	t = nl->type;
   830  	a = optoas(OHMUL, t);
   831  	// gen nl in n1.
   832  	tempname(&n1, t);
   833  	cgen(nl, &n1);
   834  	// gen nr in n2.
   835  	regalloc(&n2, t, res);
   836  	cgen(nr, &n2);
   837  
   838  	// multiply.
   839  	nodreg(&ax, t, D_AX);
   840  	gmove(&n2, &ax);
   841  	gins(a, &n1, N);
   842  	regfree(&n2);
   843  
   844  	if(t->width == 1) {
   845  		// byte multiply behaves differently.
   846  		nodreg(&ax, t, D_AH);
   847  		nodreg(&dx, t, D_DL);
   848  		gmove(&ax, &dx);
   849  	}
   850  	nodreg(&dx, t, D_DX);
   851  	gmove(&dx, res);
   852  }
   853  
   854  static void cgen_float387(Node *n, Node *res);
   855  static void cgen_floatsse(Node *n, Node *res);
   856  
   857  /*
   858   * generate floating-point operation.
   859   */
   860  void
   861  cgen_float(Node *n, Node *res)
   862  {
   863  	Node *nl;
   864  	Node n1, n2;
   865  	Prog *p1, *p2, *p3;
   866  
   867  	nl = n->left;
   868  	switch(n->op) {
   869  	case OEQ:
   870  	case ONE:
   871  	case OLT:
   872  	case OLE:
   873  	case OGE:
   874  		p1 = gbranch(AJMP, T, 0);
   875  		p2 = pc;
   876  		gmove(nodbool(1), res);
   877  		p3 = gbranch(AJMP, T, 0);
   878  		patch(p1, pc);
   879  		bgen(n, 1, 0, p2);
   880  		gmove(nodbool(0), res);
   881  		patch(p3, pc);
   882  		return;
   883  
   884  	case OPLUS:
   885  		cgen(nl, res);
   886  		return;
   887  
   888  	case OCONV:
   889  		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
   890  			cgen(nl, res);
   891  			return;
   892  		}
   893  
   894  		tempname(&n2, n->type);
   895  		mgen(nl, &n1, res);
   896  		gmove(&n1, &n2);
   897  		gmove(&n2, res);
   898  		mfree(&n1);
   899  		return;
   900  	}
   901  
   902  	if(use_sse)
   903  		cgen_floatsse(n, res);
   904  	else
   905  		cgen_float387(n, res);
   906  }
   907  
   908  // floating-point.  387 (not SSE2)
   909  static void
   910  cgen_float387(Node *n, Node *res)
   911  {
   912  	Node f0, f1;
   913  	Node *nl, *nr;
   914  
   915  	nl = n->left;
   916  	nr = n->right;
   917  	nodreg(&f0, nl->type, D_F0);
   918  	nodreg(&f1, n->type, D_F0+1);
   919  	if(nr != N)
   920  		goto flt2;
   921  
   922  	// unary
   923  	cgen(nl, &f0);
   924  	if(n->op != OCONV && n->op != OPLUS)
   925  		gins(foptoas(n->op, n->type, 0), N, N);
   926  	gmove(&f0, res);
   927  	return;
   928  
   929  flt2:	// binary
   930  	if(nl->ullman >= nr->ullman) {
   931  		cgen(nl, &f0);
   932  		if(nr->addable)
   933  			gins(foptoas(n->op, n->type, 0), nr, &f0);
   934  		else {
   935  			cgen(nr, &f0);
   936  			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
   937  		}
   938  	} else {
   939  		cgen(nr, &f0);
   940  		if(nl->addable)
   941  			gins(foptoas(n->op, n->type, Frev), nl, &f0);
   942  		else {
   943  			cgen(nl, &f0);
   944  			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
   945  		}
   946  	}
   947  	gmove(&f0, res);
   948  	return;
   949  
   950  }
   951  
   952  static void
   953  cgen_floatsse(Node *n, Node *res)
   954  {
   955  	Node *nl, *nr, *r;
   956  	Node n1, n2, nt;
   957  	int a;
   958  
   959  	nl = n->left;
   960  	nr = n->right;
   961  	switch(n->op) {
   962  	default:
   963  		dump("cgen_floatsse", n);
   964  		fatal("cgen_floatsse %O", n->op);
   965  		return;
   966  
   967  	case OMINUS:
   968  	case OCOM:
   969  		nr = nodintconst(-1);
   970  		convlit(&nr, n->type);
   971  		a = foptoas(OMUL, nl->type, 0);
   972  		goto sbop;
   973  
   974  	// symmetric binary
   975  	case OADD:
   976  	case OMUL:
   977  		a = foptoas(n->op, nl->type, 0);
   978  		goto sbop;
   979  
   980  	// asymmetric binary
   981  	case OSUB:
   982  	case OMOD:
   983  	case ODIV:
   984  		a = foptoas(n->op, nl->type, 0);
   985  		goto abop;
   986  	}
   987  
   988  sbop:	// symmetric binary
   989  	if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
   990  		r = nl;
   991  		nl = nr;
   992  		nr = r;
   993  	}
   994  
   995  abop:	// asymmetric binary
   996  	if(nl->ullman >= nr->ullman) {
   997  		tempname(&nt, nl->type);
   998  		cgen(nl, &nt);
   999  		mgen(nr, &n2, N);
  1000  		regalloc(&n1, nl->type, res);
  1001  		gmove(&nt, &n1);
  1002  		gins(a, &n2, &n1);
  1003  		gmove(&n1, res);
  1004  		regfree(&n1);
  1005  		mfree(&n2);
  1006  	} else {
  1007  		regalloc(&n2, nr->type, res);
  1008  		cgen(nr, &n2);
  1009  		regalloc(&n1, nl->type, N);
  1010  		cgen(nl, &n1);
  1011  		gins(a, &n2, &n1);
  1012  		regfree(&n2);
  1013  		gmove(&n1, res);
  1014  		regfree(&n1);
  1015  	}
  1016  	return;
  1017  }
  1018  
  1019  void
  1020  bgen_float(Node *n, int true, int likely, Prog *to)
  1021  {
  1022  	int et, a;
  1023  	Node *nl, *nr, *r;
  1024  	Node n1, n2, n3, tmp, t1, t2, ax;
  1025  	Prog *p1, *p2;
  1026  
  1027  	nl = n->left;
  1028  	nr = n->right;
  1029  	a = n->op;
  1030  	if(!true) {
  1031  		// brcom is not valid on floats when NaN is involved.
  1032  		p1 = gbranch(AJMP, T, 0);
  1033  		p2 = gbranch(AJMP, T, 0);
  1034  		patch(p1, pc);
  1035  		// No need to avoid re-genning ninit.
  1036  		bgen_float(n, 1, -likely, p2);
  1037  		patch(gbranch(AJMP, T, 0), to);
  1038  		patch(p2, pc);
  1039  		return;
  1040  	}
  1041  
  1042  	if(use_sse)
  1043  		goto sse;
  1044  	else
  1045  		goto x87;
  1046  
  1047  x87:
  1048  	a = brrev(a);	// because the args are stacked
  1049  	if(a == OGE || a == OGT) {
  1050  		// only < and <= work right with NaN; reverse if needed
  1051  		r = nr;
  1052  		nr = nl;
  1053  		nl = r;
  1054  		a = brrev(a);
  1055  	}
  1056  
  1057  	nodreg(&tmp, nr->type, D_F0);
  1058  	nodreg(&n2, nr->type, D_F0 + 1);
  1059  	nodreg(&ax, types[TUINT16], D_AX);
  1060  	et = simsimtype(nr->type);
  1061  	if(et == TFLOAT64) {
  1062  		if(nl->ullman > nr->ullman) {
  1063  			cgen(nl, &tmp);
  1064  			cgen(nr, &tmp);
  1065  			gins(AFXCHD, &tmp, &n2);
  1066  		} else {
  1067  			cgen(nr, &tmp);
  1068  			cgen(nl, &tmp);
  1069  		}
  1070  		gins(AFUCOMIP, &tmp, &n2);
  1071  		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF
  1072  	} else {
  1073  		// TODO(rsc): The moves back and forth to memory
  1074  		// here are for truncating the value to 32 bits.
  1075  		// This handles 32-bit comparison but presumably
  1076  		// all the other ops have the same problem.
  1077  		// We need to figure out what the right general
  1078  		// solution is, besides telling people to use float64.
  1079  		tempname(&t1, types[TFLOAT32]);
  1080  		tempname(&t2, types[TFLOAT32]);
  1081  		cgen(nr, &t1);
  1082  		cgen(nl, &t2);
  1083  		gmove(&t2, &tmp);
  1084  		gins(AFCOMFP, &t1, &tmp);
  1085  		gins(AFSTSW, N, &ax);
  1086  		gins(ASAHF, N, N);
  1087  	}
  1088  
  1089  	goto ret;
  1090  
  1091  sse:
  1092  	if(!nl->addable) {
  1093  		tempname(&n1, nl->type);
  1094  		cgen(nl, &n1);
  1095  		nl = &n1;
  1096  	}
  1097  	if(!nr->addable) {
  1098  		tempname(&tmp, nr->type);
  1099  		cgen(nr, &tmp);
  1100  		nr = &tmp;
  1101  	}
  1102  	regalloc(&n2, nr->type, N);
  1103  	gmove(nr, &n2);
  1104  	nr = &n2;
  1105  
  1106  	if(nl->op != OREGISTER) {
  1107  		regalloc(&n3, nl->type, N);
  1108  		gmove(nl, &n3);
  1109  		nl = &n3;
  1110  	}
  1111  
  1112  	if(a == OGE || a == OGT) {
  1113  		// only < and <= work right with NaN; reverse if needed
  1114  		r = nr;
  1115  		nr = nl;
  1116  		nl = r;
  1117  		a = brrev(a);
  1118  	}
  1119  
  1120  	gins(foptoas(OCMP, nr->type, 0), nl, nr);
  1121  	if(nl->op == OREGISTER)
  1122  		regfree(nl);
  1123  	regfree(nr);
  1124  
  1125  ret:
  1126  	if(a == OEQ) {
  1127  		// neither NE nor P
  1128  		p1 = gbranch(AJNE, T, -likely);
  1129  		p2 = gbranch(AJPS, T, -likely);
  1130  		patch(gbranch(AJMP, T, 0), to);
  1131  		patch(p1, pc);
  1132  		patch(p2, pc);
  1133  	} else if(a == ONE) {
  1134  		// either NE or P
  1135  		patch(gbranch(AJNE, T, likely), to);
  1136  		patch(gbranch(AJPS, T, likely), to);
  1137  	} else
  1138  		patch(gbranch(optoas(a, nr->type), T, likely), to);
  1139  
  1140  }