github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  void
    13  defframe(Prog *ptxt)
    14  {
    15  	// fill in argument size
    16  	ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
    17  
    18  	// fill in final stack size
    19  	ptxt->to.offset <<= 32;
    20  	ptxt->to.offset |= rnd(stksize+maxarg, widthptr);
    21  }
    22  
    23  // Sweep the prog list to mark any used nodes.
    24  void
    25  markautoused(Prog* p)
    26  {
    27  	for (; p; p = p->link) {
    28  		if (p->as == ATYPE)
    29  			continue;
    30  
    31  		if (p->from.type == D_AUTO && p->from.node)
    32  			p->from.node->used = 1;
    33  
    34  		if (p->to.type == D_AUTO && p->to.node)
    35  			p->to.node->used = 1;
    36  	}
    37  }
    38  
    39  // Fixup instructions after compactframe has moved all autos around.
    40  void
    41  fixautoused(Prog *p)
    42  {
    43  	Prog **lp;
    44  
    45  	for (lp=&p; (p=*lp) != P; ) {
    46  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
    47  			*lp = p->link;
    48  			continue;
    49  		}
    50  		if (p->from.type == D_AUTO && p->from.node)
    51  			p->from.offset += p->from.node->stkdelta;
    52  
    53  		if (p->to.type == D_AUTO && p->to.node)
    54  			p->to.offset += p->to.node->stkdelta;
    55  
    56  		lp = &p->link;
    57  	}
    58  }
    59  
    60  
    61  /*
    62   * generate:
    63   *	call f
    64   *	proc=-1	normal call but no return
    65   *	proc=0	normal call
    66   *	proc=1	goroutine run in new proc
    67   *	proc=2	defer call save away stack
    68    *	proc=3	normal call to C pointer (not Go func value)
    69   */
    70  void
    71  ginscall(Node *f, int proc)
    72  {
    73  	Prog *p;
    74  	Node reg, con;
    75  	Node r1;
    76  
    77  	switch(proc) {
    78  	default:
    79  		fatal("ginscall: bad proc %d", proc);
    80  		break;
    81  
    82  	case 0:	// normal call
    83  	case -1:	// normal call but no return
    84  		if(f->op == ONAME && f->class == PFUNC) {
    85  			p = gins(ACALL, N, f);
    86  			afunclit(&p->to, f);
    87  			if(proc == -1 || noreturn(p))
    88  				gins(AUNDEF, N, N);
    89  			break;
    90  		}
    91  		nodreg(&reg, types[tptr], D_DX);
    92  		nodreg(&r1, types[tptr], D_BX);
    93  		gmove(f, &reg);
    94  		reg.op = OINDREG;
    95  		gmove(&reg, &r1);
    96  		reg.op = OREGISTER;
    97  		gins(ACALL, &reg, &r1);
    98  		break;
    99  	
   100  	case 3:	// normal call of c function pointer
   101  		gins(ACALL, N, f);
   102  		break;
   103  
   104  	case 1:	// call in new proc (go)
   105  	case 2:	// deferred call (defer)
   106  		nodreg(&reg, types[TINT64], D_CX);
   107  		if(flag_largemodel) {
   108  			regalloc(&r1, f->type, f);
   109  			gmove(f, &r1);
   110  			gins(APUSHQ, &r1, N);
   111  			regfree(&r1);
   112  		} else {
   113  			gins(APUSHQ, f, N);
   114  		}
   115  		nodconst(&con, types[TINT32], argsize(f->type));
   116  		gins(APUSHQ, &con, N);
   117  		if(proc == 1)
   118  			ginscall(newproc, 0);
   119  		else {
   120  			if(!hasdefer)
   121  				fatal("hasdefer=0 but has defer");
   122  			ginscall(deferproc, 0);
   123  		}
   124  		gins(APOPQ, N, &reg);
   125  		gins(APOPQ, N, &reg);
   126  		if(proc == 2) {
   127  			nodreg(&reg, types[TINT64], D_AX);
   128  			gins(ATESTQ, &reg, &reg);
   129  			patch(gbranch(AJNE, T, -1), retpc);
   130  		}
   131  		break;
   132  	}
   133  }
   134  
   135  /*
   136   * n is call to interface method.
   137   * generate res = n.
   138   */
   139  void
   140  cgen_callinter(Node *n, Node *res, int proc)
   141  {
   142  	Node *i, *f;
   143  	Node tmpi, nodi, nodo, nodr, nodsp;
   144  
   145  	i = n->left;
   146  	if(i->op != ODOTINTER)
   147  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   148  
   149  	f = i->right;		// field
   150  	if(f->op != ONAME)
   151  		fatal("cgen_callinter: not ONAME %O", f->op);
   152  
   153  	i = i->left;		// interface
   154  
   155  	if(!i->addable) {
   156  		tempname(&tmpi, i->type);
   157  		cgen(i, &tmpi);
   158  		i = &tmpi;
   159  	}
   160  
   161  	genlist(n->list);		// assign the args
   162  
   163  	// i is now addable, prepare an indirected
   164  	// register to hold its address.
   165  	igen(i, &nodi, res);		// REG = &inter
   166  
   167  	nodindreg(&nodsp, types[tptr], D_SP);
   168  	nodi.type = types[tptr];
   169  	nodi.xoffset += widthptr;
   170  	cgen(&nodi, &nodsp);	// 0(SP) = 8(REG) -- i.data
   171  
   172  	regalloc(&nodo, types[tptr], res);
   173  	nodi.type = types[tptr];
   174  	nodi.xoffset -= widthptr;
   175  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   176  	regfree(&nodi);
   177  
   178  	regalloc(&nodr, types[tptr], &nodo);
   179  	if(n->left->xoffset == BADWIDTH)
   180  		fatal("cgen_callinter: badwidth");
   181  	nodo.op = OINDREG;
   182  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   183  	if(proc == 0) {
   184  		// plain call: use direct c function pointer - more efficient
   185  		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
   186  		proc = 3;
   187  	} else {
   188  		// go/defer. generate go func value.
   189  		gins(ALEAQ, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
   190  	}
   191  
   192  	// BOTCH nodr.type = fntype;
   193  	nodr.type = n->left->type;
   194  	ginscall(&nodr, proc);
   195  
   196  	regfree(&nodr);
   197  	regfree(&nodo);
   198  
   199  	setmaxarg(n->left->type);
   200  }
   201  
   202  /*
   203   * generate function call;
   204   *	proc=0	normal call
   205   *	proc=1	goroutine run in new proc
   206   *	proc=2	defer call save away stack
   207   */
   208  void
   209  cgen_call(Node *n, int proc)
   210  {
   211  	Type *t;
   212  	Node nod, afun;
   213  
   214  	if(n == N)
   215  		return;
   216  
   217  	if(n->left->ullman >= UINF) {
   218  		// if name involves a fn call
   219  		// precompute the address of the fn
   220  		tempname(&afun, types[tptr]);
   221  		cgen(n->left, &afun);
   222  	}
   223  
   224  	genlist(n->list);		// assign the args
   225  	t = n->left->type;
   226  
   227  	setmaxarg(t);
   228  
   229  	// call tempname pointer
   230  	if(n->left->ullman >= UINF) {
   231  		regalloc(&nod, types[tptr], N);
   232  		cgen_as(&nod, &afun);
   233  		nod.type = t;
   234  		ginscall(&nod, proc);
   235  		regfree(&nod);
   236  		return;
   237  	}
   238  
   239  	// call pointer
   240  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   241  		regalloc(&nod, types[tptr], N);
   242  		cgen_as(&nod, n->left);
   243  		nod.type = t;
   244  		ginscall(&nod, proc);
   245  		regfree(&nod);
   246  		return;
   247  	}
   248  
   249  	// call direct
   250  	n->left->method = 1;
   251  	ginscall(n->left, proc);
   252  }
   253  
   254  /*
   255   * call to n has already been generated.
   256   * generate:
   257   *	res = return value from call.
   258   */
   259  void
   260  cgen_callret(Node *n, Node *res)
   261  {
   262  	Node nod;
   263  	Type *fp, *t;
   264  	Iter flist;
   265  
   266  	t = n->left->type;
   267  	if(t->etype == TPTR32 || t->etype == TPTR64)
   268  		t = t->type;
   269  
   270  	fp = structfirst(&flist, getoutarg(t));
   271  	if(fp == T)
   272  		fatal("cgen_callret: nil");
   273  
   274  	memset(&nod, 0, sizeof(nod));
   275  	nod.op = OINDREG;
   276  	nod.val.u.reg = D_SP;
   277  	nod.addable = 1;
   278  
   279  	nod.xoffset = fp->width;
   280  	nod.type = fp->type;
   281  	cgen_as(res, &nod);
   282  }
   283  
   284  /*
   285   * call to n has already been generated.
   286   * generate:
   287   *	res = &return value from call.
   288   */
   289  void
   290  cgen_aret(Node *n, Node *res)
   291  {
   292  	Node nod1, nod2;
   293  	Type *fp, *t;
   294  	Iter flist;
   295  
   296  	t = n->left->type;
   297  	if(isptr[t->etype])
   298  		t = t->type;
   299  
   300  	fp = structfirst(&flist, getoutarg(t));
   301  	if(fp == T)
   302  		fatal("cgen_aret: nil");
   303  
   304  	memset(&nod1, 0, sizeof(nod1));
   305  	nod1.op = OINDREG;
   306  	nod1.val.u.reg = D_SP;
   307  	nod1.addable = 1;
   308  
   309  	nod1.xoffset = fp->width;
   310  	nod1.type = fp->type;
   311  
   312  	if(res->op != OREGISTER) {
   313  		regalloc(&nod2, types[tptr], res);
   314  		gins(ALEAQ, &nod1, &nod2);
   315  		gins(AMOVQ, &nod2, res);
   316  		regfree(&nod2);
   317  	} else
   318  		gins(ALEAQ, &nod1, res);
   319  }
   320  
   321  /*
   322   * generate return.
   323   * n->left is assignments to return values.
   324   */
   325  void
   326  cgen_ret(Node *n)
   327  {
   328  	genlist(n->list);		// copy out args
   329  	if(hasdefer || curfn->exit)
   330  		gjmp(retpc);
   331  	else
   332  		gins(ARET, N, N);
   333  }
   334  
   335  /*
   336   * generate += *= etc.
   337   */
   338  void
   339  cgen_asop(Node *n)
   340  {
   341  	Node n1, n2, n3, n4;
   342  	Node *nl, *nr;
   343  	Prog *p1;
   344  	Addr addr;
   345  	int a;
   346  
   347  	nl = n->left;
   348  	nr = n->right;
   349  
   350  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   351  		tempname(&n1, nr->type);
   352  		cgen(nr, &n1);
   353  		n2 = *n;
   354  		n2.right = &n1;
   355  		cgen_asop(&n2);
   356  		goto ret;
   357  	}
   358  
   359  	if(!isint[nl->type->etype])
   360  		goto hard;
   361  	if(!isint[nr->type->etype])
   362  		goto hard;
   363  
   364  	switch(n->etype) {
   365  	case OADD:
   366  		if(smallintconst(nr))
   367  		if(mpgetfix(nr->val.u.xval) == 1) {
   368  			a = optoas(OINC, nl->type);
   369  			if(nl->addable) {
   370  				gins(a, N, nl);
   371  				goto ret;
   372  			}
   373  			if(sudoaddable(a, nl, &addr)) {
   374  				p1 = gins(a, N, N);
   375  				p1->to = addr;
   376  				sudoclean();
   377  				goto ret;
   378  			}
   379  		}
   380  		break;
   381  
   382  	case OSUB:
   383  		if(smallintconst(nr))
   384  		if(mpgetfix(nr->val.u.xval) == 1) {
   385  			a = optoas(ODEC, nl->type);
   386  			if(nl->addable) {
   387  				gins(a, N, nl);
   388  				goto ret;
   389  			}
   390  			if(sudoaddable(a, nl, &addr)) {
   391  				p1 = gins(a, N, N);
   392  				p1->to = addr;
   393  				sudoclean();
   394  				goto ret;
   395  			}
   396  		}
   397  		break;
   398  	}
   399  
   400  	switch(n->etype) {
   401  	case OADD:
   402  	case OSUB:
   403  	case OXOR:
   404  	case OAND:
   405  	case OOR:
   406  		a = optoas(n->etype, nl->type);
   407  		if(nl->addable) {
   408  			if(smallintconst(nr)) {
   409  				gins(a, nr, nl);
   410  				goto ret;
   411  			}
   412  			regalloc(&n2, nr->type, N);
   413  			cgen(nr, &n2);
   414  			gins(a, &n2, nl);
   415  			regfree(&n2);
   416  			goto ret;
   417  		}
   418  		if(nr->ullman < UINF)
   419  		if(sudoaddable(a, nl, &addr)) {
   420  			if(smallintconst(nr)) {
   421  				p1 = gins(a, nr, N);
   422  				p1->to = addr;
   423  				sudoclean();
   424  				goto ret;
   425  			}
   426  			regalloc(&n2, nr->type, N);
   427  			cgen(nr, &n2);
   428  			p1 = gins(a, &n2, N);
   429  			p1->to = addr;
   430  			regfree(&n2);
   431  			sudoclean();
   432  			goto ret;
   433  		}
   434  	}
   435  
   436  hard:
   437  	n2.op = 0;
   438  	n1.op = 0;
   439  	if(nr->op == OLITERAL) {
   440  		// don't allocate a register for literals.
   441  	} else if(nr->ullman >= nl->ullman || nl->addable) {
   442  		regalloc(&n2, nr->type, N);
   443  		cgen(nr, &n2);
   444  		nr = &n2;
   445  	} else {
   446  		tempname(&n2, nr->type);
   447  		cgen(nr, &n2);
   448  		nr = &n2;
   449  	}
   450  	if(!nl->addable) {
   451  		igen(nl, &n1, N);
   452  		nl = &n1;
   453  	}
   454  
   455  	n3 = *n;
   456  	n3.left = nl;
   457  	n3.right = nr;
   458  	n3.op = n->etype;
   459  
   460  	regalloc(&n4, nl->type, N);
   461  	cgen(&n3, &n4);
   462  	gmove(&n4, nl);
   463  
   464  	if(n1.op)
   465  		regfree(&n1);
   466  	if(n2.op == OREGISTER)
   467  		regfree(&n2);
   468  	regfree(&n4);
   469  
   470  ret:
   471  	;
   472  }
   473  
   474  int
   475  samereg(Node *a, Node *b)
   476  {
   477  	if(a == N || b == N)
   478  		return 0;
   479  	if(a->op != OREGISTER)
   480  		return 0;
   481  	if(b->op != OREGISTER)
   482  		return 0;
   483  	if(a->val.u.reg != b->val.u.reg)
   484  		return 0;
   485  	return 1;
   486  }
   487  
   488  /*
   489   * generate division.
   490   * generates one of:
   491   *	res = nl / nr
   492   *	res = nl % nr
   493   * according to op.
   494   */
   495  void
   496  dodiv(int op, Node *nl, Node *nr, Node *res)
   497  {
   498  	int a, check;
   499  	Node n3, n4;
   500  	Type *t, *t0;
   501  	Node ax, dx, ax1, n31, oldax, olddx;
   502  	Prog *p1, *p2;
   503  
   504  	// Have to be careful about handling
   505  	// most negative int divided by -1 correctly.
   506  	// The hardware will trap.
   507  	// Also the byte divide instruction needs AH,
   508  	// which we otherwise don't have to deal with.
   509  	// Easiest way to avoid for int8, int16: use int32.
   510  	// For int32 and int64, use explicit test.
   511  	// Could use int64 hw for int32.
   512  	t = nl->type;
   513  	t0 = t;
   514  	check = 0;
   515  	if(issigned[t->etype]) {
   516  		check = 1;
   517  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   518  			check = 0;
   519  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   520  			check = 0;
   521  	}
   522  	if(t->width < 4) {
   523  		if(issigned[t->etype])
   524  			t = types[TINT32];
   525  		else
   526  			t = types[TUINT32];
   527  		check = 0;
   528  	}
   529  	a = optoas(op, t);
   530  
   531  	regalloc(&n3, t0, N);
   532  	if(nl->ullman >= nr->ullman) {
   533  		savex(D_AX, &ax, &oldax, res, t0);
   534  		cgen(nl, &ax);
   535  		regalloc(&ax, t0, &ax);	// mark ax live during cgen
   536  		cgen(nr, &n3);
   537  		regfree(&ax);
   538  	} else {
   539  		cgen(nr, &n3);
   540  		savex(D_AX, &ax, &oldax, res, t0);
   541  		cgen(nl, &ax);
   542  	}
   543  	if(t != t0) {
   544  		// Convert
   545  		ax1 = ax;
   546  		n31 = n3;
   547  		ax.type = t;
   548  		n3.type = t;
   549  		gmove(&ax1, &ax);
   550  		gmove(&n31, &n3);
   551  	}
   552  
   553  	p2 = P;
   554  	if(check) {
   555  		nodconst(&n4, t, -1);
   556  		gins(optoas(OCMP, t), &n3, &n4);
   557  		p1 = gbranch(optoas(ONE, t), T, +1);
   558  		if(op == ODIV) {
   559  			// a / (-1) is -a.
   560  			gins(optoas(OMINUS, t), N, &ax);
   561  			gmove(&ax, res);
   562  		} else {
   563  			// a % (-1) is 0.
   564  			nodconst(&n4, t, 0);
   565  			gmove(&n4, res);
   566  		}
   567  		p2 = gbranch(AJMP, T, 0);
   568  		patch(p1, pc);
   569  	}
   570  	savex(D_DX, &dx, &olddx, res, t);
   571  	if(!issigned[t->etype]) {
   572  		nodconst(&n4, t, 0);
   573  		gmove(&n4, &dx);
   574  	} else
   575  		gins(optoas(OEXTEND, t), N, N);
   576  	gins(a, &n3, N);
   577  	regfree(&n3);
   578  	if(op == ODIV)
   579  		gmove(&ax, res);
   580  	else
   581  		gmove(&dx, res);
   582  	restx(&dx, &olddx);
   583  	if(check)
   584  		patch(p2, pc);
   585  	restx(&ax, &oldax);
   586  }
   587  
   588  /*
   589   * register dr is one of the special ones (AX, CX, DI, SI, etc.).
   590   * we need to use it.  if it is already allocated as a temporary
   591   * (r > 1; can only happen if a routine like sgen passed a
   592   * special as cgen's res and then cgen used regalloc to reuse
   593   * it as its own temporary), then move it for now to another
   594   * register.  caller must call restx to move it back.
   595   * the move is not necessary if dr == res, because res is
   596   * known to be dead.
   597   */
   598  void
   599  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   600  {
   601  	int r;
   602  
   603  	r = reg[dr];
   604  
   605  	// save current ax and dx if they are live
   606  	// and not the destination
   607  	memset(oldx, 0, sizeof *oldx);
   608  	nodreg(x, t, dr);
   609  	if(r > 1 && !samereg(x, res)) {
   610  		regalloc(oldx, types[TINT64], N);
   611  		x->type = types[TINT64];
   612  		gmove(x, oldx);
   613  		x->type = t;
   614  		oldx->ostk = r;	// squirrel away old r value
   615  		reg[dr] = 1;
   616  	}
   617  }
   618  
   619  void
   620  restx(Node *x, Node *oldx)
   621  {
   622  	if(oldx->op != 0) {
   623  		x->type = types[TINT64];
   624  		reg[x->val.u.reg] = oldx->ostk;
   625  		gmove(oldx, x);
   626  		regfree(oldx);
   627  	}
   628  }
   629  
   630  /*
   631   * generate division according to op, one of:
   632   *	res = nl / nr
   633   *	res = nl % nr
   634   */
   635  void
   636  cgen_div(int op, Node *nl, Node *nr, Node *res)
   637  {
   638  	Node n1, n2, n3;
   639  	int w, a;
   640  	Magic m;
   641  
   642  	if(nr->op != OLITERAL)
   643  		goto longdiv;
   644  	w = nl->type->width*8;
   645  
   646  	// Front end handled 32-bit division. We only need to handle 64-bit.
   647  	// try to do division by multiply by (2^w)/d
   648  	// see hacker's delight chapter 10
   649  	switch(simtype[nl->type->etype]) {
   650  	default:
   651  		goto longdiv;
   652  
   653  	case TUINT64:
   654  		m.w = w;
   655  		m.ud = mpgetfix(nr->val.u.xval);
   656  		umagic(&m);
   657  		if(m.bad)
   658  			break;
   659  		if(op == OMOD)
   660  			goto longmod;
   661  
   662  		cgenr(nl, &n1, N);
   663  		nodconst(&n2, nl->type, m.um);
   664  		regalloc(&n3, nl->type, res);
   665  		cgen_hmul(&n1, &n2, &n3);
   666  
   667  		if(m.ua) {
   668  			// need to add numerator accounting for overflow
   669  			gins(optoas(OADD, nl->type), &n1, &n3);
   670  			nodconst(&n2, nl->type, 1);
   671  			gins(optoas(ORROTC, nl->type), &n2, &n3);
   672  			nodconst(&n2, nl->type, m.s-1);
   673  			gins(optoas(ORSH, nl->type), &n2, &n3);
   674  		} else {
   675  			nodconst(&n2, nl->type, m.s);
   676  			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
   677  		}
   678  
   679  		gmove(&n3, res);
   680  		regfree(&n1);
   681  		regfree(&n3);
   682  		return;
   683  
   684  	case TINT64:
   685  		m.w = w;
   686  		m.sd = mpgetfix(nr->val.u.xval);
   687  		smagic(&m);
   688  		if(m.bad)
   689  			break;
   690  		if(op == OMOD)
   691  			goto longmod;
   692  
   693  		cgenr(nl, &n1, res);
   694  		nodconst(&n2, nl->type, m.sm);
   695  		regalloc(&n3, nl->type, N);
   696  		cgen_hmul(&n1, &n2, &n3);
   697  
   698  		if(m.sm < 0) {
   699  			// need to add numerator
   700  			gins(optoas(OADD, nl->type), &n1, &n3);
   701  		}
   702  
   703  		nodconst(&n2, nl->type, m.s);
   704  		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
   705  
   706  		nodconst(&n2, nl->type, w-1);
   707  		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
   708  		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
   709  
   710  		if(m.sd < 0) {
   711  			// this could probably be removed
   712  			// by factoring it into the multiplier
   713  			gins(optoas(OMINUS, nl->type), N, &n3);
   714  		}
   715  
   716  		gmove(&n3, res);
   717  		regfree(&n1);
   718  		regfree(&n3);
   719  		return;
   720  	}
   721  	goto longdiv;
   722  
   723  longdiv:
   724  	// division and mod using (slow) hardware instruction
   725  	dodiv(op, nl, nr, res);
   726  	return;
   727  
   728  longmod:
   729  	// mod using formula A%B = A-(A/B*B) but
   730  	// we know that there is a fast algorithm for A/B
   731  	regalloc(&n1, nl->type, res);
   732  	cgen(nl, &n1);
   733  	regalloc(&n2, nl->type, N);
   734  	cgen_div(ODIV, &n1, nr, &n2);
   735  	a = optoas(OMUL, nl->type);
   736  	if(w == 8) {
   737  		// use 2-operand 16-bit multiply
   738  		// because there is no 2-operand 8-bit multiply
   739  		a = AIMULW;
   740  	}
   741  	if(!smallintconst(nr)) {
   742  		regalloc(&n3, nl->type, N);
   743  		cgen(nr, &n3);
   744  		gins(a, &n3, &n2);
   745  		regfree(&n3);
   746  	} else
   747  		gins(a, nr, &n2);
   748  	gins(optoas(OSUB, nl->type), &n2, &n1);
   749  	gmove(&n1, res);
   750  	regfree(&n1);
   751  	regfree(&n2);
   752  }
   753  
   754  /*
   755   * generate high multiply:
   756   *   res = (nl*nr) >> width
   757   */
   758  void
   759  cgen_hmul(Node *nl, Node *nr, Node *res)
   760  {
   761  	Type *t;
   762  	int a;
   763  	Node n1, n2, ax, dx, *tmp;
   764  
   765  	t = nl->type;
   766  	a = optoas(OHMUL, t);
   767  	if(nl->ullman < nr->ullman) {
   768  		tmp = nl;
   769  		nl = nr;
   770  		nr = tmp;
   771  	}
   772  	cgenr(nl, &n1, res);
   773  	cgenr(nr, &n2, N);
   774  	nodreg(&ax, t, D_AX);
   775  	gmove(&n1, &ax);
   776  	gins(a, &n2, N);
   777  	regfree(&n2);
   778  	regfree(&n1);
   779  
   780  	if(t->width == 1) {
   781  		// byte multiply behaves differently.
   782  		nodreg(&ax, t, D_AH);
   783  		nodreg(&dx, t, D_DL);
   784  		gmove(&ax, &dx);
   785  	}
   786  	nodreg(&dx, t, D_DX);
   787  	gmove(&dx, res);
   788  }
   789  
   790  /*
   791   * generate shift according to op, one of:
   792   *	res = nl << nr
   793   *	res = nl >> nr
   794   */
   795  void
   796  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   797  {
   798  	Node n1, n2, n3, n4, n5, cx, oldcx;
   799  	int a, rcx;
   800  	Prog *p1;
   801  	uvlong sc;
   802  	Type *tcount;
   803  
   804  	a = optoas(op, nl->type);
   805  
   806  	if(nr->op == OLITERAL) {
   807  		regalloc(&n1, nl->type, res);
   808  		cgen(nl, &n1);
   809  		sc = mpgetfix(nr->val.u.xval);
   810  		if(sc >= nl->type->width*8) {
   811  			// large shift gets 2 shifts by width-1
   812  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   813  			gins(a, &n3, &n1);
   814  			gins(a, &n3, &n1);
   815  		} else
   816  			gins(a, nr, &n1);
   817  		gmove(&n1, res);
   818  		regfree(&n1);
   819  		goto ret;
   820  	}
   821  
   822  	if(nl->ullman >= UINF) {
   823  		tempname(&n4, nl->type);
   824  		cgen(nl, &n4);
   825  		nl = &n4;
   826  	}
   827  	if(nr->ullman >= UINF) {
   828  		tempname(&n5, nr->type);
   829  		cgen(nr, &n5);
   830  		nr = &n5;
   831  	}
   832  
   833  	rcx = reg[D_CX];
   834  	nodreg(&n1, types[TUINT32], D_CX);
   835  	
   836  	// Allow either uint32 or uint64 as shift type,
   837  	// to avoid unnecessary conversion from uint32 to uint64
   838  	// just to do the comparison.
   839  	tcount = types[simtype[nr->type->etype]];
   840  	if(tcount->etype < TUINT32)
   841  		tcount = types[TUINT32];
   842  
   843  	regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   844  	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
   845  
   846  	nodreg(&cx, types[TUINT64], D_CX);
   847  	memset(&oldcx, 0, sizeof oldcx);
   848  	if(rcx > 0 && !samereg(&cx, res)) {
   849  		regalloc(&oldcx, types[TUINT64], N);
   850  		gmove(&cx, &oldcx);
   851  	}
   852  	cx.type = tcount;
   853  
   854  	if(samereg(&cx, res))
   855  		regalloc(&n2, nl->type, N);
   856  	else
   857  		regalloc(&n2, nl->type, res);
   858  	if(nl->ullman >= nr->ullman) {
   859  		cgen(nl, &n2);
   860  		cgen(nr, &n1);
   861  		gmove(&n1, &n3);
   862  	} else {
   863  		cgen(nr, &n1);
   864  		gmove(&n1, &n3);
   865  		cgen(nl, &n2);
   866  	}
   867  	regfree(&n3);
   868  
   869  	// test and fix up large shifts
   870  	if(!bounded) {
   871  		nodconst(&n3, tcount, nl->type->width*8);
   872  		gins(optoas(OCMP, tcount), &n1, &n3);
   873  		p1 = gbranch(optoas(OLT, tcount), T, +1);
   874  		if(op == ORSH && issigned[nl->type->etype]) {
   875  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   876  			gins(a, &n3, &n2);
   877  		} else {
   878  			nodconst(&n3, nl->type, 0);
   879  			gmove(&n3, &n2);
   880  		}
   881  		patch(p1, pc);
   882  	}
   883  
   884  	gins(a, &n1, &n2);
   885  
   886  	if(oldcx.op != 0) {
   887  		cx.type = types[TUINT64];
   888  		gmove(&oldcx, &cx);
   889  		regfree(&oldcx);
   890  	}
   891  
   892  	gmove(&n2, res);
   893  
   894  	regfree(&n1);
   895  	regfree(&n2);
   896  
   897  ret:
   898  	;
   899  }
   900  
   901  /*
   902   * generate byte multiply:
   903   *	res = nl * nr
   904   * there is no 2-operand byte multiply instruction so
   905   * we do a full-width multiplication and truncate afterwards.
   906   */
   907  void
   908  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   909  {
   910  	Node n1, n2, n1b, n2b, *tmp;
   911  	Type *t;
   912  	int a;
   913  
   914  	// largest ullman on left.
   915  	if(nl->ullman < nr->ullman) {
   916  		tmp = nl;
   917  		nl = nr;
   918  		nr = tmp;
   919  	}
   920  
   921  	// generate operands in "8-bit" registers.
   922  	regalloc(&n1b, nl->type, res);
   923  	cgen(nl, &n1b);
   924  	regalloc(&n2b, nr->type, N);
   925  	cgen(nr, &n2b);
   926  
   927  	// perform full-width multiplication.
   928  	t = types[TUINT64];
   929  	if(issigned[nl->type->etype])
   930  		t = types[TINT64];
   931  	nodreg(&n1, t, n1b.val.u.reg);
   932  	nodreg(&n2, t, n2b.val.u.reg);
   933  	a = optoas(op, t);
   934  	gins(a, &n2, &n1);
   935  
   936  	// truncate.
   937  	gmove(&n1, res);
   938  	regfree(&n1b);
   939  	regfree(&n2b);
   940  }
   941  
   942  void
   943  clearfat(Node *nl)
   944  {
   945  	int64 w, c, q;
   946  	Node n1, oldn1, ax, oldax;
   947  
   948  	/* clear a fat object */
   949  	if(debug['g'])
   950  		dump("\nclearfat", nl);
   951  
   952  
   953  	w = nl->type->width;
   954  	// Avoid taking the address for simple enough types.
   955  	if(componentgen(N, nl))
   956  		return;
   957  
   958  	c = w % 8;	// bytes
   959  	q = w / 8;	// quads
   960  
   961  	savex(D_DI, &n1, &oldn1, N, types[tptr]);
   962  	agen(nl, &n1);
   963  
   964  	savex(D_AX, &ax, &oldax, N, types[tptr]);
   965  	gconreg(AMOVQ, 0, D_AX);
   966  
   967  	if(q >= 4) {
   968  		gconreg(AMOVQ, q, D_CX);
   969  		gins(AREP, N, N);	// repeat
   970  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
   971  	} else
   972  	while(q > 0) {
   973  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
   974  		q--;
   975  	}
   976  
   977  	if(c >= 4) {
   978  		gconreg(AMOVQ, c, D_CX);
   979  		gins(AREP, N, N);	// repeat
   980  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   981  	} else
   982  	while(c > 0) {
   983  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   984  		c--;
   985  	}
   986  
   987  	restx(&n1, &oldn1);
   988  	restx(&ax, &oldax);
   989  }