github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/5g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog* appendpp(Prog*, int, int, int, int32, int, int, int32);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame, r0;
    19  	Prog *p;
    20  	vlong hi, lo;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.type = D_CONST2;
    26  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    27  
    28  	// fill in final stack size
    29  	frame = rnd(stksize+maxarg, widthptr);
    30  	ptxt->to.offset = frame;
    31  	
    32  	// insert code to contain ambiguously live variables
    33  	// so that garbage collector only sees initialized values
    34  	// when it looks for pointers.
    35  	p = ptxt;
    36  	lo = hi = 0;
    37  	r0 = 0;
    38  	for(l=curfn->dcl; l != nil; l = l->next) {
    39  		n = l->n;
    40  		if(!n->needzero)
    41  			continue;
    42  		if(n->class != PAUTO)
    43  			fatal("needzero class %d", n->class);
    44  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    45  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    46  		if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthptr) {
    47  			// merge with range we already have
    48  			lo = rnd(n->xoffset, widthptr);
    49  			continue;
    50  		}
    51  		// zero old range
    52  		p = zerorange(p, frame, lo, hi, &r0);
    53  
    54  		// set new range
    55  		hi = n->xoffset + n->type->width;
    56  		lo = n->xoffset;
    57  	}
    58  	// zero final range
    59  	zerorange(p, frame, lo, hi, &r0);
    60  }
    61  
    62  static Prog*
    63  zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0)
    64  {
    65  	vlong cnt, i;
    66  	Prog *p1;
    67  	Node *f;
    68  
    69  	cnt = hi - lo;
    70  	if(cnt == 0)
    71  		return p;
    72  	if(*r0 == 0) {
    73  		p = appendpp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
    74  		*r0 = 1;
    75  	}
    76  	if(cnt < 4*widthptr) {
    77  		for(i = 0; i < cnt; i += widthptr) 
    78  			p = appendpp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame+lo+i);
    79  	} else if(cnt <= 128*widthptr) {
    80  		p = appendpp(p, AADD, D_CONST, NREG, 4+frame+lo, D_REG, 1, 0);
    81  		p->reg = REGSP;
    82  		p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0);
    83  		f = sysfunc("duffzero");
    84  		naddr(f, &p->to, 1);
    85  		afunclit(&p->to, f);
    86  		p->to.offset = 4*(128-cnt/widthptr);
    87  	} else {
    88  		p = appendpp(p, AADD, D_CONST, NREG, 4+frame+lo, D_REG, 1, 0);
    89  		p->reg = REGSP;
    90  		p = appendpp(p, AADD, D_CONST, NREG, cnt, D_REG, 2, 0);
    91  		p->reg = 1;
    92  		p1 = p = appendpp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4);
    93  		p->scond |= C_PBIT;
    94  		p = appendpp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0);
    95  		p->reg = 2;
    96  		p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
    97  		patch(p, p1);
    98  	}
    99  	return p;
   100  }
   101  
   102  static Prog*	
   103  appendpp(Prog *p, int as, int ftype, int freg, int32 foffset, int ttype, int treg, int32 toffset)	
   104  {	
   105  	Prog *q;	
   106  		
   107  	q = mal(sizeof(*q));	
   108  	clearp(q);	
   109  	q->as = as;	
   110  	q->lineno = p->lineno;	
   111  	q->from.type = ftype;	
   112  	q->from.reg = freg;	
   113  	q->from.offset = foffset;	
   114  	q->to.type = ttype;	
   115  	q->to.reg = treg;	
   116  	q->to.offset = toffset;	
   117  	q->link = p->link;	
   118  	p->link = q;	
   119  	return q;	
   120  }
   121  
   122  // Sweep the prog list to mark any used nodes.
   123  void
   124  markautoused(Prog* p)
   125  {
   126  	for (; p; p = p->link) {
   127  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   128  			continue;
   129  
   130  		if (p->from.node)
   131  			p->from.node->used = 1;
   132  
   133  		if (p->to.node)
   134  			p->to.node->used = 1;
   135  	}
   136  }
   137  
   138  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   139  void
   140  fixautoused(Prog* p)
   141  {
   142  	Prog **lp;
   143  
   144  	for (lp=&p; (p=*lp) != P; ) {
   145  		if (p->as == ATYPE && p->from.node && p->from.name == D_AUTO && !p->from.node->used) {
   146  			*lp = p->link;
   147  			continue;
   148  		}
   149  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   150  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   151  			// VARDEFs are interspersed with other code, and a jump might be using the
   152  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   153  			// the no-ops.
   154  			p->to.type = D_NONE;
   155  			p->to.node = N;
   156  			p->as = ANOP;
   157  			continue;
   158  		}
   159  
   160  		if (p->from.name == D_AUTO && p->from.node)
   161  			p->from.offset += p->from.node->stkdelta;
   162  
   163  		if (p->to.name == D_AUTO && p->to.node)
   164  			p->to.offset += p->to.node->stkdelta;
   165  
   166  		lp = &p->link;
   167  	}
   168  }
   169  
   170  /*
   171   * generate:
   172   *	call f
   173   *	proc=-1	normal call but no return
   174   *	proc=0	normal call
   175   *	proc=1	goroutine run in new proc
   176   *	proc=2	defer call save away stack
   177    *	proc=3	normal call to C pointer (not Go func value)
   178   */
   179  void
   180  ginscall(Node *f, int proc)
   181  {
   182  	int32 arg;
   183  	Prog *p;
   184  	Node n1, r, r1, con;
   185  
   186  	if(f->type != T)
   187  		setmaxarg(f->type);
   188  
   189  	arg = -1;
   190  	// Most functions have a fixed-size argument block, so traceback uses that during unwind.
   191  	// Not all, though: there are some variadic functions in package runtime,
   192  	// and for those we emit call-specific metadata recorded by caller.
   193  	// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
   194  	// so we do this for all indirect calls as well.
   195  	if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
   196  		arg = f->type->argwid;
   197  		if(proc == 1 || proc == 2)
   198  			arg += 3*widthptr;
   199  	}
   200  
   201  	if(arg != -1)
   202  		gargsize(arg);
   203  
   204  	switch(proc) {
   205  	default:
   206  		fatal("ginscall: bad proc %d", proc);
   207  		break;
   208  
   209  	case 0:	// normal call
   210  	case -1:	// normal call but no return
   211  		if(f->op == ONAME && f->class == PFUNC) {
   212  			if(f == deferreturn) {
   213  				// Deferred calls will appear to be returning to
   214  				// the BL deferreturn(SB) that we are about to emit.
   215  				// However, the stack trace code will show the line
   216  				// of the instruction before that return PC. 
   217  				// To avoid that instruction being an unrelated instruction,
   218  				// insert a NOP so that we will have the right line number.
   219  				// ARM NOP 0x00000000 is really AND.EQ R0, R0, R0.
   220  				// Use the latter form because the NOP pseudo-instruction
   221  				// would be removed by the linker.
   222  				nodreg(&r, types[TINT], 0);
   223  				p = gins(AAND, &r, &r);
   224  				p->scond = C_SCOND_EQ;
   225  			}
   226  			p = gins(ABL, N, f);
   227  			afunclit(&p->to, f);
   228  			if(proc == -1 || noreturn(p))
   229  				gins(AUNDEF, N, N);
   230  			break;
   231  		}
   232  		nodreg(&r, types[tptr], 7);
   233  		nodreg(&r1, types[tptr], 1);
   234  		gmove(f, &r);
   235  		r.op = OINDREG;
   236  		gmove(&r, &r1);
   237  		r.op = OREGISTER;
   238  		r1.op = OINDREG;
   239  		gins(ABL, &r, &r1);
   240  		break;
   241  
   242  	case 3:	// normal call of c function pointer
   243  		gins(ABL, N, f);
   244  		break;
   245  
   246  	case 1:	// call in new proc (go)
   247  	case 2:	// deferred call (defer)
   248  		regalloc(&r, types[tptr], N);
   249  		p = gins(AMOVW, N, &r);
   250  		p->from.type = D_OREG;
   251  		p->from.reg = REGSP;
   252  		
   253  		p = gins(AMOVW, &r, N);
   254  		p->to.type = D_OREG;
   255  		p->to.reg = REGSP;
   256  		p->to.offset = -12;
   257  		p->scond |= C_WBIT;
   258  
   259  		memset(&n1, 0, sizeof n1);
   260  		n1.op = OADDR;
   261  		n1.left = f;
   262  		gins(AMOVW, &n1, &r);
   263  
   264  		p = gins(AMOVW, &r, N);
   265  		p->to.type = D_OREG;
   266  		p->to.reg = REGSP;
   267  		p->to.offset = 8;
   268  
   269  		nodconst(&con, types[TINT32], argsize(f->type));
   270  		gins(AMOVW, &con, &r);
   271  		p = gins(AMOVW, &r, N);
   272  		p->to.type = D_OREG;
   273  		p->to.reg = REGSP;
   274  		p->to.offset = 4;
   275  		regfree(&r);
   276  
   277  		if(proc == 1)
   278  			ginscall(newproc, 0);
   279  		else
   280  			ginscall(deferproc, 0);
   281  
   282  		nodreg(&r, types[tptr], 1);
   283  		p = gins(AMOVW, N, N);
   284  		p->from.type = D_CONST;
   285  		p->from.reg = REGSP;
   286  		p->from.offset = 12;
   287  		p->to.reg = REGSP;
   288  		p->to.type = D_REG;
   289  
   290  		if(proc == 2) {
   291  			nodconst(&con, types[TINT32], 0);
   292  			p = gins(ACMP, &con, N);
   293  			p->reg = 0;
   294  			p = gbranch(ABEQ, T, +1);
   295  			cgen_ret(N);
   296  			patch(p, pc);
   297  		}
   298  		break;
   299  	}
   300  	
   301  	if(arg != -1)
   302  		gargsize(-1);
   303  }
   304  
   305  /*
   306   * n is call to interface method.
   307   * generate res = n.
   308   */
   309  void
   310  cgen_callinter(Node *n, Node *res, int proc)
   311  {
   312  	int r;
   313  	Node *i, *f;
   314  	Node tmpi, nodo, nodr, nodsp;
   315  	Prog *p;
   316  
   317  	i = n->left;
   318  	if(i->op != ODOTINTER)
   319  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   320  
   321  	f = i->right;		// field
   322  	if(f->op != ONAME)
   323  		fatal("cgen_callinter: not ONAME %O", f->op);
   324  
   325  	i = i->left;		// interface
   326  
   327  	// Release res register during genlist and cgen,
   328  	// which might have their own function calls.
   329  	r = -1;
   330  	if(res != N && (res->op == OREGISTER || res->op == OINDREG)) {
   331  		r = res->val.u.reg;
   332  		reg[r]--;
   333  	}
   334  
   335  	if(!i->addable) {
   336  		tempname(&tmpi, i->type);
   337  		cgen(i, &tmpi);
   338  		i = &tmpi;
   339  	}
   340  
   341  	genlist(n->list);			// args
   342  	if(r >= 0)
   343  		reg[r]++;
   344  
   345  	regalloc(&nodr, types[tptr], res);
   346  	regalloc(&nodo, types[tptr], &nodr);
   347  	nodo.op = OINDREG;
   348  
   349  	agen(i, &nodr);		// REG = &inter
   350  
   351  	nodindreg(&nodsp, types[tptr], REGSP);
   352  	nodsp.xoffset = 4;
   353  	nodo.xoffset += widthptr;
   354  	cgen(&nodo, &nodsp);	// 4(SP) = 4(REG) -- i.data
   355  
   356  	nodo.xoffset -= widthptr;
   357  	cgen(&nodo, &nodr);	// REG = 0(REG) -- i.tab
   358  	cgen_checknil(&nodr); // in case offset is huge
   359  
   360  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   361  	
   362  	if(proc == 0) {
   363  		// plain call: use direct c function pointer - more efficient
   364  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   365  		nodr.op = OINDREG;
   366  		proc = 3;
   367  	} else {
   368  		// go/defer. generate go func value.
   369  		p = gins(AMOVW, &nodo, &nodr);
   370  		p->from.type = D_CONST;	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   371  	}
   372  
   373  	nodr.type = n->left->type;
   374  	ginscall(&nodr, proc);
   375  
   376  	regfree(&nodr);
   377  	regfree(&nodo);
   378  }
   379  
   380  /*
   381   * generate function call;
   382   *	proc=0	normal call
   383   *	proc=1	goroutine run in new proc
   384   *	proc=2	defer call save away stack
   385   */
   386  void
   387  cgen_call(Node *n, int proc)
   388  {
   389  	Type *t;
   390  	Node nod, afun;
   391  
   392  	if(n == N)
   393  		return;
   394  
   395  	if(n->left->ullman >= UINF) {
   396  		// if name involves a fn call
   397  		// precompute the address of the fn
   398  		tempname(&afun, types[tptr]);
   399  		cgen(n->left, &afun);
   400  	}
   401  
   402  	genlist(n->list);		// assign the args
   403  	t = n->left->type;
   404  
   405  	// call tempname pointer
   406  	if(n->left->ullman >= UINF) {
   407  		regalloc(&nod, types[tptr], N);
   408  		cgen_as(&nod, &afun);
   409  		nod.type = t;
   410  		ginscall(&nod, proc);
   411  		regfree(&nod);
   412  		goto ret;
   413  	}
   414  
   415  	// call pointer
   416  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   417  		regalloc(&nod, types[tptr], N);
   418  		cgen_as(&nod, n->left);
   419  		nod.type = t;
   420  		ginscall(&nod, proc);
   421  		regfree(&nod);
   422  		goto ret;
   423  	}
   424  
   425  	// call direct
   426  	n->left->method = 1;
   427  	ginscall(n->left, proc);
   428  
   429  
   430  ret:
   431  	;
   432  }
   433  
   434  /*
   435   * call to n has already been generated.
   436   * generate:
   437   *	res = return value from call.
   438   */
   439  void
   440  cgen_callret(Node *n, Node *res)
   441  {
   442  	Node nod;
   443  	Type *fp, *t;
   444  	Iter flist;
   445  
   446  	t = n->left->type;
   447  	if(t->etype == TPTR32 || t->etype == TPTR64)
   448  		t = t->type;
   449  
   450  	fp = structfirst(&flist, getoutarg(t));
   451  	if(fp == T)
   452  		fatal("cgen_callret: nil");
   453  
   454  	memset(&nod, 0, sizeof(nod));
   455  	nod.op = OINDREG;
   456  	nod.val.u.reg = REGSP;
   457  	nod.addable = 1;
   458  
   459  	nod.xoffset = fp->width + 4; // +4: saved lr at 0(SP)
   460  	nod.type = fp->type;
   461  	cgen_as(res, &nod);
   462  }
   463  
   464  /*
   465   * call to n has already been generated.
   466   * generate:
   467   *	res = &return value from call.
   468   */
   469  void
   470  cgen_aret(Node *n, Node *res)
   471  {
   472  	Node nod1, nod2;
   473  	Type *fp, *t;
   474  	Iter flist;
   475  
   476  	t = n->left->type;
   477  	if(isptr[t->etype])
   478  		t = t->type;
   479  
   480  	fp = structfirst(&flist, getoutarg(t));
   481  	if(fp == T)
   482  		fatal("cgen_aret: nil");
   483  
   484  	memset(&nod1, 0, sizeof(nod1));
   485  	nod1.op = OINDREG;
   486  	nod1.val.u.reg = REGSP;
   487  	nod1.addable = 1;
   488  
   489  	nod1.xoffset = fp->width + 4; // +4: saved lr at 0(SP)
   490  	nod1.type = fp->type;
   491  
   492  	if(res->op != OREGISTER) {
   493  		regalloc(&nod2, types[tptr], res);
   494  		agen(&nod1, &nod2);
   495  		gins(AMOVW, &nod2, res);
   496  		regfree(&nod2);
   497  	} else
   498  		agen(&nod1, res);
   499  }
   500  
   501  /*
   502   * generate return.
   503   * n->left is assignments to return values.
   504   */
   505  void
   506  cgen_ret(Node *n)
   507  {
   508  	Prog *p;
   509  
   510  	if(n != N)
   511  		genlist(n->list);		// copy out args
   512  	if(hasdefer)
   513  		ginscall(deferreturn, 0);
   514  	genlist(curfn->exit);
   515  	p = gins(ARET, N, N);
   516  	if(n != N && n->op == ORETJMP) {
   517  		p->to.name = D_EXTERN;
   518  		p->to.type = D_CONST;
   519  		p->to.sym = linksym(n->left->sym);
   520  	}
   521  }
   522  
   523  /*
   524   * generate += *= etc.
   525   */
   526  void
   527  cgen_asop(Node *n)
   528  {
   529  	Node n1, n2, n3, n4;
   530  	Node *nl, *nr;
   531  	Prog *p1;
   532  	Addr addr;
   533  	int a, w;
   534  
   535  	nl = n->left;
   536  	nr = n->right;
   537  
   538  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   539  		tempname(&n1, nr->type);
   540  		cgen(nr, &n1);
   541  		n2 = *n;
   542  		n2.right = &n1;
   543  		cgen_asop(&n2);
   544  		goto ret;
   545  	}
   546  
   547  	if(!isint[nl->type->etype])
   548  		goto hard;
   549  	if(!isint[nr->type->etype])
   550  		goto hard;
   551  	if(is64(nl->type) || is64(nr->type))
   552  		goto hard64;
   553  
   554  	switch(n->etype) {
   555  	case OADD:
   556  	case OSUB:
   557  	case OXOR:
   558  	case OAND:
   559  	case OOR:
   560  		a = optoas(n->etype, nl->type);
   561  		if(nl->addable) {
   562  			if(smallintconst(nr))
   563  				n3 = *nr;
   564  			else {
   565  				regalloc(&n3, nr->type, N);
   566  				cgen(nr, &n3);
   567  			}
   568  			regalloc(&n2, nl->type, N);
   569  			cgen(nl, &n2);
   570  			gins(a, &n3, &n2);
   571  			cgen(&n2, nl);
   572  			regfree(&n2);
   573  			if(n3.op != OLITERAL)
   574  				regfree(&n3);
   575  			goto ret;
   576  		}
   577  		if(nr->ullman < UINF)
   578  		if(sudoaddable(a, nl, &addr, &w)) {
   579  			w = optoas(OAS, nl->type);
   580  			regalloc(&n2, nl->type, N);
   581  			p1 = gins(w, N, &n2);
   582  			p1->from = addr;
   583  			regalloc(&n3, nr->type, N);
   584  			cgen(nr, &n3);
   585  			gins(a, &n3, &n2);
   586  			p1 = gins(w, &n2, N);
   587  			p1->to = addr;
   588  			regfree(&n2);
   589  			regfree(&n3);
   590  			sudoclean();
   591  			goto ret;
   592  		}
   593  	}
   594  
   595  hard:
   596  	n2.op = 0;
   597  	n1.op = 0;
   598  	if(nr->op == OLITERAL) {
   599  		// don't allocate a register for literals.
   600  	} else if(nr->ullman >= nl->ullman || nl->addable) {
   601  		regalloc(&n2, nr->type, N);
   602  		cgen(nr, &n2);
   603  		nr = &n2;
   604  	} else {
   605  		tempname(&n2, nr->type);
   606  		cgen(nr, &n2);
   607  		nr = &n2;
   608  	}
   609  	if(!nl->addable) {
   610  		igen(nl, &n1, N);
   611  		nl = &n1;
   612  	}
   613  
   614  	n3 = *n;
   615  	n3.left = nl;
   616  	n3.right = nr;
   617  	n3.op = n->etype;
   618  
   619  	regalloc(&n4, nl->type, N);
   620  	cgen(&n3, &n4);
   621  	gmove(&n4, nl);
   622  
   623  	if(n1.op)
   624  		regfree(&n1);
   625  	if(n2.op == OREGISTER)
   626  		regfree(&n2);
   627  	regfree(&n4);
   628  	goto ret;
   629  
   630  hard64:
   631  	if(nr->ullman > nl->ullman) {
   632  		tempname(&n2, nr->type);
   633  		cgen(nr, &n2);
   634  		igen(nl, &n1, N);
   635  	} else {
   636  		igen(nl, &n1, N);
   637  		tempname(&n2, nr->type);
   638  		cgen(nr, &n2);
   639  	}
   640  
   641  	n3 = *n;
   642  	n3.left = &n1;
   643  	n3.right = &n2;
   644  	n3.op = n->etype;
   645  
   646  	cgen(&n3, &n1);
   647  
   648  ret:
   649  	;
   650  }
   651  
   652  int
   653  samereg(Node *a, Node *b)
   654  {
   655  	if(a->op != OREGISTER)
   656  		return 0;
   657  	if(b->op != OREGISTER)
   658  		return 0;
   659  	if(a->val.u.reg != b->val.u.reg)
   660  		return 0;
   661  	return 1;
   662  }
   663  
   664  /*
   665   * generate high multiply
   666   *  res = (nl * nr) >> wordsize
   667   */
   668  void
   669  cgen_hmul(Node *nl, Node *nr, Node *res)
   670  {
   671  	int w;
   672  	Node n1, n2, *tmp;
   673  	Type *t;
   674  	Prog *p;
   675  
   676  	if(nl->ullman < nr->ullman) {
   677  		tmp = nl;
   678  		nl = nr;
   679  		nr = tmp;
   680  	}
   681  	t = nl->type;
   682  	w = t->width * 8;
   683  	regalloc(&n1, t, res);
   684  	cgen(nl, &n1);
   685  	regalloc(&n2, t, N);
   686  	cgen(nr, &n2);
   687  	switch(simtype[t->etype]) {
   688  	case TINT8:
   689  	case TINT16:
   690  		gins(optoas(OMUL, t), &n2, &n1);
   691  		gshift(AMOVW, &n1, SHIFT_AR, w, &n1);
   692  		break;
   693  	case TUINT8:
   694  	case TUINT16:
   695  		gins(optoas(OMUL, t), &n2, &n1);
   696  		gshift(AMOVW, &n1, SHIFT_LR, w, &n1);
   697  		break;
   698  	case TINT32:
   699  	case TUINT32:
   700  		// perform a long multiplication.
   701  		if(issigned[t->etype])
   702  			p = gins(AMULL, &n2, N);
   703  		else
   704  			p = gins(AMULLU, &n2, N);
   705  		// n2 * n1 -> (n1 n2)
   706  		p->reg = n1.val.u.reg;
   707  		p->to.type = D_REGREG;
   708  		p->to.reg = n1.val.u.reg;
   709  		p->to.offset = n2.val.u.reg;
   710  		break;
   711  	default:
   712  		fatal("cgen_hmul %T", t);
   713  		break;
   714  	}
   715  	cgen(&n1, res);
   716  	regfree(&n1);
   717  	regfree(&n2);
   718  }
   719  
   720  /*
   721   * generate shift according to op, one of:
   722   *	res = nl << nr
   723   *	res = nl >> nr
   724   */
   725  void
   726  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   727  {
   728  	Node n1, n2, n3, nt, t, lo, hi;
   729  	int w, v;
   730  	Prog *p1, *p2, *p3;
   731  	Type *tr;
   732  	uvlong sc;
   733  
   734  	USED(bounded);
   735  	if(nl->type->width > 4)
   736  		fatal("cgen_shift %T", nl->type);
   737  
   738  	w = nl->type->width * 8;
   739  
   740  	if(op == OLROT) {
   741  		v = mpgetfix(nr->val.u.xval);
   742  		regalloc(&n1, nl->type, res);
   743  		if(w == 32) {
   744  			cgen(nl, &n1);
   745  			gshift(AMOVW, &n1, SHIFT_RR, w-v, &n1);
   746  		} else {
   747  			regalloc(&n2, nl->type, N);
   748  			cgen(nl, &n2);
   749  			gshift(AMOVW, &n2, SHIFT_LL, v, &n1);
   750  			gshift(AORR, &n2, SHIFT_LR, w-v, &n1);
   751  			regfree(&n2);
   752  			// Ensure sign/zero-extended result.
   753  			gins(optoas(OAS, nl->type), &n1, &n1);
   754  		}
   755  		gmove(&n1, res);
   756  		regfree(&n1);
   757  		return;
   758  	}
   759  
   760  	if(nr->op == OLITERAL) {
   761  		regalloc(&n1, nl->type, res);
   762  		cgen(nl, &n1);
   763  		sc = mpgetfix(nr->val.u.xval);
   764  		if(sc == 0) {
   765  			// nothing to do
   766  		} else if(sc >= nl->type->width*8) {
   767  			if(op == ORSH && issigned[nl->type->etype])
   768  				gshift(AMOVW, &n1, SHIFT_AR, w, &n1);
   769  			else
   770  				gins(AEOR, &n1, &n1);
   771  		} else {
   772  			if(op == ORSH && issigned[nl->type->etype])
   773  				gshift(AMOVW, &n1, SHIFT_AR, sc, &n1);
   774  			else if(op == ORSH)
   775  				gshift(AMOVW, &n1, SHIFT_LR, sc, &n1);
   776  			else // OLSH
   777  				gshift(AMOVW, &n1, SHIFT_LL, sc, &n1);
   778  		}
   779  		if(w < 32 && op == OLSH)
   780  			gins(optoas(OAS, nl->type), &n1, &n1);
   781  		gmove(&n1, res);
   782  		regfree(&n1);
   783  		return;
   784  	}
   785  
   786  	tr = nr->type;
   787  	if(tr->width > 4) {
   788  		tempname(&nt, nr->type);
   789  		if(nl->ullman >= nr->ullman) {
   790  			regalloc(&n2, nl->type, res);
   791  			cgen(nl, &n2);
   792  			cgen(nr, &nt);
   793  			n1 = nt;
   794  		} else {
   795  			cgen(nr, &nt);
   796  			regalloc(&n2, nl->type, res);
   797  			cgen(nl, &n2);
   798  		}
   799  		split64(&nt, &lo, &hi);
   800  		regalloc(&n1, types[TUINT32], N);
   801  		regalloc(&n3, types[TUINT32], N);
   802  		gmove(&lo, &n1);
   803  		gmove(&hi, &n3);
   804  		splitclean();
   805  		gins(ATST, &n3, N);
   806  		nodconst(&t, types[TUINT32], w);
   807  		p1 = gins(AMOVW, &t, &n1);
   808  		p1->scond = C_SCOND_NE;
   809  		tr = types[TUINT32];
   810  		regfree(&n3);
   811  	} else {
   812  		if(nl->ullman >= nr->ullman) {
   813  			regalloc(&n2, nl->type, res);
   814  			cgen(nl, &n2);
   815  			regalloc(&n1, nr->type, N);
   816  			cgen(nr, &n1);
   817  		} else {
   818  			regalloc(&n1, nr->type, N);
   819  			cgen(nr, &n1);
   820  			regalloc(&n2, nl->type, res);
   821  			cgen(nl, &n2);
   822  		}
   823  	}
   824  
   825  	// test for shift being 0
   826  	gins(ATST, &n1, N);
   827  	p3 = gbranch(ABEQ, T, -1);
   828  
   829  	// test and fix up large shifts
   830  	// TODO: if(!bounded), don't emit some of this.
   831  	regalloc(&n3, tr, N);
   832  	nodconst(&t, types[TUINT32], w);
   833  	gmove(&t, &n3);
   834  	gcmp(ACMP, &n1, &n3);
   835  	if(op == ORSH) {
   836  		if(issigned[nl->type->etype]) {
   837  			p1 = gshift(AMOVW, &n2, SHIFT_AR, w-1, &n2);
   838  			p2 = gregshift(AMOVW, &n2, SHIFT_AR, &n1, &n2);
   839  		} else {
   840  			p1 = gins(AEOR, &n2, &n2);
   841  			p2 = gregshift(AMOVW, &n2, SHIFT_LR, &n1, &n2);
   842  		}
   843  		p1->scond = C_SCOND_HS;
   844  		p2->scond = C_SCOND_LO;
   845  	} else {
   846  		p1 = gins(AEOR, &n2, &n2);
   847  		p2 = gregshift(AMOVW, &n2, SHIFT_LL, &n1, &n2);
   848  		p1->scond = C_SCOND_HS;
   849  		p2->scond = C_SCOND_LO;
   850  	}
   851  	regfree(&n3);
   852  
   853  	patch(p3, pc);
   854  	// Left-shift of smaller word must be sign/zero-extended.
   855  	if(w < 32 && op == OLSH)
   856  		gins(optoas(OAS, nl->type), &n2, &n2);
   857  	gmove(&n2, res);
   858  
   859  	regfree(&n1);
   860  	regfree(&n2);
   861  }
   862  
   863  void
   864  clearfat(Node *nl)
   865  {
   866  	uint32 w, c, q;
   867  	Node dst, nc, nz, end, r0, r1, *f;
   868  	Prog *p, *pl;
   869  
   870  	/* clear a fat object */
   871  	if(debug['g'])
   872  		dump("\nclearfat", nl);
   873  
   874  	w = nl->type->width;
   875  	// Avoid taking the address for simple enough types.
   876  	if(componentgen(N, nl))
   877  		return;
   878  
   879  	c = w % 4;	// bytes
   880  	q = w / 4;	// quads
   881  
   882  	r0.op = OREGISTER;
   883  	r0.val.u.reg = REGALLOC_R0;
   884  	r1.op = OREGISTER;
   885  	r1.val.u.reg = REGALLOC_R0 + 1;
   886  	regalloc(&dst, types[tptr], &r1);
   887  	agen(nl, &dst);
   888  	nodconst(&nc, types[TUINT32], 0);
   889  	regalloc(&nz, types[TUINT32], &r0);
   890  	cgen(&nc, &nz);
   891  
   892  	if(q > 128) {
   893  		regalloc(&end, types[tptr], N);
   894  		p = gins(AMOVW, &dst, &end);
   895  		p->from.type = D_CONST;
   896  		p->from.offset = q*4;
   897  
   898  		p = gins(AMOVW, &nz, &dst);
   899  		p->to.type = D_OREG;
   900  		p->to.offset = 4;
   901  		p->scond |= C_PBIT;
   902  		pl = p;
   903  
   904  		p = gins(ACMP, &dst, N);
   905  		raddr(&end, p);
   906  		patch(gbranch(ABNE, T, 0), pl);
   907  
   908  		regfree(&end);
   909  	} else if(q >= 4) {
   910  		f = sysfunc("duffzero");
   911  		p = gins(ADUFFZERO, N, f);
   912  		afunclit(&p->to, f);
   913  		// 4 and 128 = magic constants: see ../../pkg/runtime/asm_arm.s
   914  		p->to.offset = 4*(128-q);
   915  	} else
   916  	while(q > 0) {
   917  		p = gins(AMOVW, &nz, &dst);
   918  		p->to.type = D_OREG;
   919  		p->to.offset = 4;
   920   		p->scond |= C_PBIT;
   921  //print("1. %P\n", p);
   922  		q--;
   923  	}
   924  
   925  	while(c > 0) {
   926  		p = gins(AMOVB, &nz, &dst);
   927  		p->to.type = D_OREG;
   928  		p->to.offset = 1;
   929   		p->scond |= C_PBIT;
   930  //print("2. %P\n", p);
   931  		c--;
   932  	}
   933  	regfree(&dst);
   934  	regfree(&nz);
   935  }
   936  
   937  // Called after regopt and peep have run.
   938  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   939  void
   940  expandchecks(Prog *firstp)
   941  {
   942  	int reg;
   943  	Prog *p, *p1;
   944  
   945  	for(p = firstp; p != P; p = p->link) {
   946  		if(p->as != ACHECKNIL)
   947  			continue;
   948  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
   949  			warnl(p->lineno, "generated nil check");
   950  		if(p->from.type != D_REG)
   951  			fatal("invalid nil check %P", p);
   952  		reg = p->from.reg;
   953  		// check is
   954  		//	CMP arg, $0
   955  		//	MOV.EQ arg, 0(arg)
   956  		p1 = mal(sizeof *p1);
   957  		clearp(p1);
   958  		p1->link = p->link;
   959  		p->link = p1;
   960  		p1->lineno = p->lineno;
   961  		p1->pc = 9999;
   962  		p1->as = AMOVW;
   963  		p1->from.type = D_REG;
   964  		p1->from.reg = reg;
   965  		p1->to.type = D_OREG;
   966  		p1->to.reg = reg;
   967  		p1->to.offset = 0;
   968  		p1->scond = C_SCOND_EQ;
   969  		p->as = ACMP;
   970  		p->from.type = D_CONST;
   971  		p->from.reg = NREG;
   972  		p->from.offset = 0;
   973  		p->reg = reg;
   974  	}
   975  }