github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/9g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame;
    19  	Prog *p;
    20  	vlong hi, lo;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
    26  
    27  	// fill in final stack size
    28  	ptxt->to.offset <<= 32;
    29  	frame = rnd(stksize+maxarg, widthreg);
    30  	ptxt->to.offset |= frame;
    31  	
    32  	// insert code to zero ambiguously live variables
    33  	// so that the garbage collector only sees initialized values
    34  	// when it looks for pointers.
    35  	p = ptxt;
    36  	lo = hi = 0;
    37  	// iterate through declarations - they are sorted in decreasing xoffset order.
    38  	for(l=curfn->dcl; l != nil; l = l->next) {
    39  		n = l->n;
    40  		if(!n->needzero)
    41  			continue;
    42  		if(n->class != PAUTO)
    43  			fatal("needzero class %d", n->class);
    44  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    45  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    46  
    47  		if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) {
    48  			// merge with range we already have
    49  			lo = n->xoffset;
    50  			continue;
    51  		}
    52  		// zero old range
    53  		p = zerorange(p, frame, lo, hi);
    54  
    55  		// set new range
    56  		hi = n->xoffset + n->type->width;
    57  		lo = n->xoffset;
    58  	}
    59  	// zero final range
    60  	zerorange(p, frame, lo, hi);
    61  }
    62  
    63  static Prog*
    64  zerorange(Prog *p, vlong frame, vlong lo, vlong hi)
    65  {
    66  	vlong cnt, i;
    67  	Prog *p1;
    68  	Node *f;
    69  
    70  	cnt = hi - lo;
    71  	if(cnt == 0)
    72  		return p;
    73  	if(cnt < 4*widthptr) {
    74  		for(i = 0; i < cnt; i += widthptr)
    75  			p = appendpp(p, AMOVD, D_REG, REGZERO, 0, D_OREG, REGSP, 8+frame+lo+i);
    76  	} else if(cnt <= 128*widthptr) {
    77  		p = appendpp(p, AADD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGRT1, 0);
    78  		p->reg = REGSP;
    79  		p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0);
    80  		f = sysfunc("duffzero");
    81  		naddr(f, &p->to, 1);
    82  		afunclit(&p->to, f);
    83  		p->to.offset = 4*(128-cnt/widthptr);
    84  	} else {
    85  		p = appendpp(p, AMOVD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGTMP, 0);
    86  		p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT1, 0);
    87  		p->reg = REGSP;
    88  		p = appendpp(p, AMOVD, D_CONST, NREG, cnt, D_REG, REGTMP, 0);
    89  		p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT2, 0);
    90  		p->reg = REGRT1;
    91  		p1 = p = appendpp(p, AMOVDU, D_REG, REGZERO, 0, D_OREG, REGRT1, widthptr);
    92  		p = appendpp(p, ACMP, D_REG, REGRT1, 0, D_REG, REGRT2, 0);
    93  		p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
    94  		patch(p, p1);
    95  	}
    96  	return p;
    97  }
    98  
    99  static Prog*
   100  appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset)
   101  {
   102  	Prog *q;
   103  	q = mal(sizeof(*q));
   104  	clearp(q);
   105  	q->as = as;
   106  	q->lineno = p->lineno;
   107  	q->from.type = ftype;
   108  	q->from.reg = freg;
   109  	q->from.offset = foffset;
   110  	q->to.type = ttype;
   111  	q->to.reg = treg;
   112  	q->to.offset = toffset;
   113  	q->link = p->link;
   114  	p->link = q;
   115  	return q;
   116  }
   117  
   118  // Sweep the prog list to mark any used nodes.
   119  void
   120  markautoused(Prog *p)
   121  {
   122  	for (; p; p = p->link) {
   123  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   124  			continue;
   125  
   126  		if (p->from.node)
   127  			p->from.node->used = 1;
   128  
   129  		if (p->to.node)
   130  			p->to.node->used = 1;
   131  	}
   132  }
   133  
   134  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   135  void
   136  fixautoused(Prog *p)
   137  {
   138  	Prog **lp;
   139  
   140  	for (lp=&p; (p=*lp) != P; ) {
   141  		if (p->as == ATYPE && p->from.node && p->from.name == D_AUTO && !p->from.node->used) {
   142  			*lp = p->link;
   143  			continue;
   144  		}
   145  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   146  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   147  			// VARDEFs are interspersed with other code, and a jump might be using the
   148  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   149  			// the no-ops.
   150  			p->to.type = D_NONE;
   151  			p->to.node = N;
   152  			p->as = ANOP;
   153  			continue;
   154  		}
   155  		if (p->from.name == D_AUTO && p->from.node)
   156  			p->from.offset += p->from.node->stkdelta;
   157  
   158  		if (p->to.name == D_AUTO && p->to.node)
   159  			p->to.offset += p->to.node->stkdelta;
   160  
   161  		lp = &p->link;
   162  	}
   163  }
   164  
   165  /*
   166   * generate: BL reg, f
   167   * where both reg and f are registers.
   168   * On power, f must be moved to CTR first.
   169   */
   170  static void
   171  ginsBL(Node *reg, Node *f)
   172  {
   173  	Prog *p;
   174  	p = gins(AMOVD, f, N);
   175  	p->to.type = D_SPR;
   176  	p->to.offset = D_CTR;
   177  	p = gins(ABL, reg, N);
   178  	p->to.type = D_SPR;
   179  	p->to.offset = D_CTR;
   180  }
   181  
   182  /*
   183   * generate:
   184   *	call f
   185   *	proc=-1	normal call but no return
   186   *	proc=0	normal call
   187   *	proc=1	goroutine run in new proc
   188   *	proc=2	defer call save away stack
   189    *	proc=3	normal call to C pointer (not Go func value)
   190   */
   191  void
   192  ginscall(Node *f, int proc)
   193  {
   194  	Prog *p;
   195  	Node reg, con, reg2;
   196  	Node r1;
   197  	int32 extra;
   198  
   199  	if(f->type != T) {
   200  		extra = 0;
   201  		if(proc == 1 || proc == 2)
   202  			extra = 2 * widthptr;
   203  		setmaxarg(f->type, extra);
   204  	}
   205  
   206  	switch(proc) {
   207  	default:
   208  		fatal("ginscall: bad proc %d", proc);
   209  		break;
   210  
   211  	case 0:	// normal call
   212  	case -1:	// normal call but no return
   213  		if(f->op == ONAME && f->class == PFUNC) {
   214  			if(f == deferreturn) {
   215  				// Deferred calls will appear to be returning to
   216  				// the CALL deferreturn(SB) that we are about to emit.
   217  				// However, the stack trace code will show the line
   218  				// of the instruction byte before the return PC. 
   219  				// To avoid that being an unrelated instruction,
   220  				// insert a ppc64 NOP that we will have the right line number.
   221  				// The ppc64 NOP is really or r0, r0, r0; use that description
   222  				// because the NOP pseudo-instruction would be removed by
   223  				// the linker.
   224  				nodreg(&reg, types[TINT], D_R0);
   225  				gins(AOR, &reg, &reg);
   226  			}
   227  			p = gins(ABL, N, f);
   228  			afunclit(&p->to, f);
   229  			if(proc == -1 || noreturn(p))
   230  				gins(AUNDEF, N, N);
   231  			break;
   232  		}
   233  		nodreg(&reg, types[tptr], D_R0+REGENV);
   234  		nodreg(&r1, types[tptr], D_R0+3);
   235  		gmove(f, &reg);
   236  		reg.op = OINDREG;
   237  		gmove(&reg, &r1);
   238  		reg.op = OREGISTER;
   239  		ginsBL(&reg, &r1);
   240  		break;
   241  	
   242  	case 3:	// normal call of c function pointer
   243  		ginsBL(N, f);
   244  		break;
   245  
   246  	case 1:	// call in new proc (go)
   247  	case 2:	// deferred call (defer)
   248  		nodconst(&con, types[TINT64], argsize(f->type));
   249  		nodreg(&reg, types[TINT64], D_R0+3);
   250  		nodreg(&reg2, types[TINT64], D_R0+4);
   251  		gmove(f, &reg);
   252  
   253  		gmove(&con, &reg2);
   254  		p = gins(AMOVW, &reg2, N);
   255  		p->to.type = D_OREG;
   256  		p->to.reg = REGSP;
   257  		p->to.offset = 8;
   258  
   259  		p = gins(AMOVD, &reg, N);
   260  		p->to.type = D_OREG;
   261  		p->to.reg = REGSP;
   262  		p->to.offset = 16;
   263  
   264  		if(proc == 1)
   265  			ginscall(newproc, 0);
   266  		else {
   267  			if(!hasdefer)
   268  				fatal("hasdefer=0 but has defer");
   269  			ginscall(deferproc, 0);
   270  		}
   271  
   272  		if(proc == 2) {
   273  			nodreg(&reg, types[TINT64], D_R0+3);
   274  			p = gins(ACMP, &reg, N);
   275  			p->to.type = D_REG;
   276  			p->to.reg = D_R0;
   277  			p = gbranch(ABEQ, T, +1);
   278  			cgen_ret(N);
   279  			patch(p, pc);
   280  		}
   281  		break;
   282  	}
   283  }
   284  
   285  /*
   286   * n is call to interface method.
   287   * generate res = n.
   288   */
   289  void
   290  cgen_callinter(Node *n, Node *res, int proc)
   291  {
   292  	Node *i, *f;
   293  	Node tmpi, nodi, nodo, nodr, nodsp;
   294  	Prog *p;
   295  
   296  	i = n->left;
   297  	if(i->op != ODOTINTER)
   298  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   299  
   300  	f = i->right;		// field
   301  	if(f->op != ONAME)
   302  		fatal("cgen_callinter: not ONAME %O", f->op);
   303  
   304  	i = i->left;		// interface
   305  
   306  	if(!i->addable) {
   307  		tempname(&tmpi, i->type);
   308  		cgen(i, &tmpi);
   309  		i = &tmpi;
   310  	}
   311  
   312  	genlist(n->list);		// assign the args
   313  
   314  	// i is now addable, prepare an indirected
   315  	// register to hold its address.
   316  	igen(i, &nodi, res);		// REG = &inter
   317  
   318  	nodindreg(&nodsp, types[tptr], D_R0+REGSP);
   319  	nodsp.xoffset = widthptr;
   320  	if(proc != 0)
   321  		nodsp.xoffset += 2 * widthptr; // leave room for size & fn
   322  	nodi.type = types[tptr];
   323  	nodi.xoffset += widthptr;
   324  	cgen(&nodi, &nodsp);	// {8 or 24}(SP) = 8(REG) -- i.data
   325  
   326  	regalloc(&nodo, types[tptr], res);
   327  	nodi.type = types[tptr];
   328  	nodi.xoffset -= widthptr;
   329  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   330  	regfree(&nodi);
   331  
   332  	regalloc(&nodr, types[tptr], &nodo);
   333  	if(n->left->xoffset == BADWIDTH)
   334  		fatal("cgen_callinter: badwidth");
   335  	cgen_checknil(&nodo); // in case offset is huge
   336  	nodo.op = OINDREG;
   337  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   338  	if(proc == 0) {
   339  		// plain call: use direct c function pointer - more efficient
   340  		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
   341  		proc = 3;
   342  	} else {
   343  		// go/defer. generate go func value.
   344  		p = gins(AMOVD, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
   345  		p->from.type = D_CONST;
   346  	}
   347  
   348  	nodr.type = n->left->type;
   349  	ginscall(&nodr, proc);
   350  
   351  	regfree(&nodr);
   352  	regfree(&nodo);
   353  }
   354  
   355  /*
   356   * generate function call;
   357   *	proc=0	normal call
   358   *	proc=1	goroutine run in new proc
   359   *	proc=2	defer call save away stack
   360   */
   361  void
   362  cgen_call(Node *n, int proc)
   363  {
   364  	Type *t;
   365  	Node nod, afun;
   366  
   367  	if(n == N)
   368  		return;
   369  
   370  	if(n->left->ullman >= UINF) {
   371  		// if name involves a fn call
   372  		// precompute the address of the fn
   373  		tempname(&afun, types[tptr]);
   374  		cgen(n->left, &afun);
   375  	}
   376  
   377  	genlist(n->list);		// assign the args
   378  	t = n->left->type;
   379  
   380  	// call tempname pointer
   381  	if(n->left->ullman >= UINF) {
   382  		regalloc(&nod, types[tptr], N);
   383  		cgen_as(&nod, &afun);
   384  		nod.type = t;
   385  		ginscall(&nod, proc);
   386  		regfree(&nod);
   387  		return;
   388  	}
   389  
   390  	// call pointer
   391  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   392  		regalloc(&nod, types[tptr], N);
   393  		cgen_as(&nod, n->left);
   394  		nod.type = t;
   395  		ginscall(&nod, proc);
   396  		regfree(&nod);
   397  		return;
   398  	}
   399  
   400  	// call direct
   401  	n->left->method = 1;
   402  	ginscall(n->left, proc);
   403  }
   404  
   405  /*
   406   * call to n has already been generated.
   407   * generate:
   408   *	res = return value from call.
   409   */
   410  void
   411  cgen_callret(Node *n, Node *res)
   412  {
   413  	Node nod;
   414  	Type *fp, *t;
   415  	Iter flist;
   416  
   417  	t = n->left->type;
   418  	if(t->etype == TPTR32 || t->etype == TPTR64)
   419  		t = t->type;
   420  
   421  	fp = structfirst(&flist, getoutarg(t));
   422  	if(fp == T)
   423  		fatal("cgen_callret: nil");
   424  
   425  	memset(&nod, 0, sizeof(nod));
   426  	nod.op = OINDREG;
   427  	nod.val.u.reg = D_R0+REGSP;
   428  	nod.addable = 1;
   429  
   430  	nod.xoffset = fp->width + widthptr; // +widthptr: saved LR at 0(R1)
   431  	nod.type = fp->type;
   432  	cgen_as(res, &nod);
   433  }
   434  
   435  /*
   436   * call to n has already been generated.
   437   * generate:
   438   *	res = &return value from call.
   439   */
   440  void
   441  cgen_aret(Node *n, Node *res)
   442  {
   443  	Node nod1, nod2;
   444  	Type *fp, *t;
   445  	Iter flist;
   446  
   447  	t = n->left->type;
   448  	if(isptr[t->etype])
   449  		t = t->type;
   450  
   451  	fp = structfirst(&flist, getoutarg(t));
   452  	if(fp == T)
   453  		fatal("cgen_aret: nil");
   454  
   455  	memset(&nod1, 0, sizeof(nod1));
   456  	nod1.op = OINDREG;
   457  	nod1.val.u.reg = D_R0 + REGSP;
   458  	nod1.addable = 1;
   459  
   460  	nod1.xoffset = fp->width + widthptr; // +widthptr: saved lr at 0(SP)
   461  	nod1.type = fp->type;
   462  
   463  	if(res->op != OREGISTER) {
   464  		regalloc(&nod2, types[tptr], res);
   465  		agen(&nod1, &nod2);
   466  		gins(AMOVD, &nod2, res);
   467  		regfree(&nod2);
   468  	} else
   469  		agen(&nod1, res);
   470  }
   471  
   472  /*
   473   * generate return.
   474   * n->left is assignments to return values.
   475   */
   476  void
   477  cgen_ret(Node *n)
   478  {
   479  	Prog *p;
   480  
   481  	if(n != N)
   482  		genlist(n->list);		// copy out args
   483  	if(hasdefer)
   484  		ginscall(deferreturn, 0);
   485  	genlist(curfn->exit);
   486  	p = gins(ARET, N, N);
   487  	if(n != N && n->op == ORETJMP) {
   488  		p->to.name = D_EXTERN;
   489  		p->to.type = D_CONST;
   490  		p->to.sym = linksym(n->left->sym);
   491  	}
   492  }
   493  
   494  void
   495  cgen_asop(Node *n)
   496  {
   497  	USED(n);
   498  	fatal("cgen_asop"); // no longer used
   499  }
   500  
   501  int
   502  samereg(Node *a, Node *b)
   503  {
   504  	if(a == N || b == N)
   505  		return 0;
   506  	if(a->op != OREGISTER)
   507  		return 0;
   508  	if(b->op != OREGISTER)
   509  		return 0;
   510  	if(a->val.u.reg != b->val.u.reg)
   511  		return 0;
   512  	return 1;
   513  }
   514  
   515  /*
   516   * generate division.
   517   * generates one of:
   518   *	res = nl / nr
   519   *	res = nl % nr
   520   * according to op.
   521   */
   522  void
   523  dodiv(int op, Node *nl, Node *nr, Node *res)
   524  {
   525  	int a, check;
   526  	Type *t, *t0;
   527  	Node tl, tr, tl2, tr2, nm1, nz, tm;
   528  	Prog *p1, *p2;
   529  
   530  	// Have to be careful about handling
   531  	// most negative int divided by -1 correctly.
   532  	// The hardware will generate undefined result.
   533  	// Also need to explicitly trap on division on zero,
   534  	// the hardware will silently generate undefined result.
   535  	// DIVW will leave unpredicable result in higher 32-bit,
   536  	// so always use DIVD/DIVDU.
   537  	t = nl->type;
   538  	t0 = t;
   539  	check = 0;
   540  	if(issigned[t->etype]) {
   541  		check = 1;
   542  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
   543  			check = 0;
   544  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   545  			check = 0;
   546  	}
   547  	if(t->width < 8) {
   548  		if(issigned[t->etype])
   549  			t = types[TINT64];
   550  		else
   551  			t = types[TUINT64];
   552  		check = 0;
   553  	}
   554  
   555  	a = optoas(ODIV, t);
   556  
   557  	regalloc(&tl, t0, N);
   558  	regalloc(&tr, t0, N);
   559  	if(nl->ullman >= nr->ullman) {
   560  		cgen(nl, &tl);
   561  		cgen(nr, &tr);
   562  	} else {
   563  		cgen(nr, &tr);
   564  		cgen(nl, &tl);
   565  	}
   566  	if(t != t0) {
   567  		// Convert
   568  		tl2 = tl;
   569  		tr2 = tr;
   570  		tl.type = t;
   571  		tr.type = t;
   572  		gmove(&tl2, &tl);
   573  		gmove(&tr2, &tr);
   574  	}
   575  
   576  	// Handle divide-by-zero panic.
   577  	p1 = gins(optoas(OCMP, t), &tr, N);
   578  	p1->to.type = D_REG;
   579  	p1->to.reg = REGZERO;
   580  	p1 = gbranch(optoas(ONE, t), T, +1);
   581  	if(panicdiv == N)
   582  		panicdiv = sysfunc("panicdivide");
   583  	ginscall(panicdiv, -1);
   584  	patch(p1, pc);
   585  
   586  	if(check) {
   587  		nodconst(&nm1, t, -1);
   588  		gins(optoas(OCMP, t), &tr, &nm1);
   589  		p1 = gbranch(optoas(ONE, t), T, +1);
   590  		if(op == ODIV) {
   591  			// a / (-1) is -a.
   592  			gins(optoas(OMINUS, t), N, &tl);
   593  			gmove(&tl, res);
   594  		} else {
   595  			// a % (-1) is 0.
   596  			nodconst(&nz, t, 0);
   597  			gmove(&nz, res);
   598  		}
   599  		p2 = gbranch(AJMP, T, 0);
   600  		patch(p1, pc);
   601  	}
   602  	p1 = gins(a, &tr, &tl);
   603  	if(op == ODIV) {
   604  		regfree(&tr);
   605  		gmove(&tl, res);
   606  	} else {
   607  		// A%B = A-(A/B*B)
   608  		regalloc(&tm, t, N);
   609  		// patch div to use the 3 register form
   610  		// TODO(minux): add gins3?
   611  		p1->reg = p1->to.reg;
   612  		p1->to.reg = tm.val.u.reg;
   613  		gins(optoas(OMUL, t), &tr, &tm);
   614  		regfree(&tr);
   615  		gins(optoas(OSUB, t), &tm, &tl);
   616  		regfree(&tm);
   617  		gmove(&tl, res);
   618  	}
   619  	regfree(&tl);
   620  	if(check)
   621  		patch(p2, pc);
   622  }
   623  
   624  /*
   625   * generate division according to op, one of:
   626   *	res = nl / nr
   627   *	res = nl % nr
   628   */
   629  void
   630  cgen_div(int op, Node *nl, Node *nr, Node *res)
   631  {
   632  	Node n1, n2, n3;
   633  	int w, a;
   634  	Magic m;
   635  
   636  	// TODO(minux): enable division by magic multiply (also need to fix longmod below)
   637  	//if(nr->op != OLITERAL)
   638  		goto longdiv;
   639  	w = nl->type->width*8;
   640  
   641  	// Front end handled 32-bit division. We only need to handle 64-bit.
   642  	// try to do division by multiply by (2^w)/d
   643  	// see hacker's delight chapter 10
   644  	switch(simtype[nl->type->etype]) {
   645  	default:
   646  		goto longdiv;
   647  
   648  	case TUINT64:
   649  		m.w = w;
   650  		m.ud = mpgetfix(nr->val.u.xval);
   651  		umagic(&m);
   652  		if(m.bad)
   653  			break;
   654  		if(op == OMOD)
   655  			goto longmod;
   656  
   657  		cgenr(nl, &n1, N);
   658  		nodconst(&n2, nl->type, m.um);
   659  		regalloc(&n3, nl->type, res);
   660  		cgen_hmul(&n1, &n2, &n3);
   661  
   662  		if(m.ua) {
   663  			// need to add numerator accounting for overflow
   664  			gins(optoas(OADD, nl->type), &n1, &n3);
   665  			nodconst(&n2, nl->type, 1);
   666  			gins(optoas(ORROTC, nl->type), &n2, &n3);
   667  			nodconst(&n2, nl->type, m.s-1);
   668  			gins(optoas(ORSH, nl->type), &n2, &n3);
   669  		} else {
   670  			nodconst(&n2, nl->type, m.s);
   671  			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
   672  		}
   673  
   674  		gmove(&n3, res);
   675  		regfree(&n1);
   676  		regfree(&n3);
   677  		return;
   678  
   679  	case TINT64:
   680  		m.w = w;
   681  		m.sd = mpgetfix(nr->val.u.xval);
   682  		smagic(&m);
   683  		if(m.bad)
   684  			break;
   685  		if(op == OMOD)
   686  			goto longmod;
   687  
   688  		cgenr(nl, &n1, res);
   689  		nodconst(&n2, nl->type, m.sm);
   690  		regalloc(&n3, nl->type, N);
   691  		cgen_hmul(&n1, &n2, &n3);
   692  
   693  		if(m.sm < 0) {
   694  			// need to add numerator
   695  			gins(optoas(OADD, nl->type), &n1, &n3);
   696  		}
   697  
   698  		nodconst(&n2, nl->type, m.s);
   699  		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
   700  
   701  		nodconst(&n2, nl->type, w-1);
   702  		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
   703  		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
   704  
   705  		if(m.sd < 0) {
   706  			// this could probably be removed
   707  			// by factoring it into the multiplier
   708  			gins(optoas(OMINUS, nl->type), N, &n3);
   709  		}
   710  
   711  		gmove(&n3, res);
   712  		regfree(&n1);
   713  		regfree(&n3);
   714  		return;
   715  	}
   716  	goto longdiv;
   717  
   718  longdiv:
   719  	// division and mod using (slow) hardware instruction
   720  	dodiv(op, nl, nr, res);
   721  	return;
   722  
   723  longmod:
   724  	// mod using formula A%B = A-(A/B*B) but
   725  	// we know that there is a fast algorithm for A/B
   726  	regalloc(&n1, nl->type, res);
   727  	cgen(nl, &n1);
   728  	regalloc(&n2, nl->type, N);
   729  	cgen_div(ODIV, &n1, nr, &n2);
   730  	a = optoas(OMUL, nl->type);
   731  	if(w == 8) {
   732  		// use 2-operand 16-bit multiply
   733  		// because there is no 2-operand 8-bit multiply
   734  		//a = AIMULW;
   735  	}
   736  	if(!smallintconst(nr)) {
   737  		regalloc(&n3, nl->type, N);
   738  		cgen(nr, &n3);
   739  		gins(a, &n3, &n2);
   740  		regfree(&n3);
   741  	} else
   742  		gins(a, nr, &n2);
   743  	gins(optoas(OSUB, nl->type), &n2, &n1);
   744  	gmove(&n1, res);
   745  	regfree(&n1);
   746  	regfree(&n2);
   747  }
   748  
   749  /*
   750   * generate high multiply:
   751   *   res = (nl*nr) >> width
   752   */
   753  void
   754  cgen_hmul(Node *nl, Node *nr, Node *res)
   755  {
   756  	int w;
   757  	Node n1, n2, *tmp;
   758  	Type *t;
   759  	Prog *p;
   760  
   761  	// largest ullman on left.
   762  	if(nl->ullman < nr->ullman) {
   763  		tmp = nl;
   764  		nl = nr;
   765  		nr = tmp;
   766  	}
   767  	t = nl->type;
   768  	w = t->width * 8;
   769  	cgenr(nl, &n1, res);
   770  	cgenr(nr, &n2, N);
   771  	switch(simtype[t->etype]) {
   772  	case TINT8:
   773  	case TINT16:
   774  	case TINT32:
   775  		gins(optoas(OMUL, t), &n2, &n1);
   776  		p = gins(ASRAD, N, &n1);
   777  		p->from.type = D_CONST;
   778  		p->from.offset = w;
   779  		break;
   780  	case TUINT8:
   781  	case TUINT16:
   782  	case TUINT32:
   783  		gins(optoas(OMUL, t), &n2, &n1);
   784  		p = gins(ASRD, N, &n1);
   785  		p->from.type = D_CONST;
   786  		p->from.offset = w;
   787  		break;
   788  	case TINT64:
   789  	case TUINT64:
   790  		if(issigned[t->etype])
   791  			p = gins(AMULHD, &n2, &n1);
   792  		else
   793  			p = gins(AMULHDU, &n2, &n1);
   794  		break;
   795  	default:
   796  		fatal("cgen_hmul %T", t);
   797  		break;
   798  	}
   799  	cgen(&n1, res);
   800  	regfree(&n1);
   801  	regfree(&n2);
   802  }
   803  
   804  /*
   805   * generate shift according to op, one of:
   806   *	res = nl << nr
   807   *	res = nl >> nr
   808   */
   809  void
   810  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   811  {
   812  	Node n1, n2, n3, n4, n5;
   813  	int a;
   814  	Prog *p1;
   815  	uvlong sc;
   816  	Type *tcount;
   817  
   818  	a = optoas(op, nl->type);
   819  
   820  	if(nr->op == OLITERAL) {
   821  		regalloc(&n1, nl->type, res);
   822  		cgen(nl, &n1);
   823  		sc = mpgetfix(nr->val.u.xval);
   824  		if(sc >= nl->type->width*8) {
   825  			// large shift gets 2 shifts by width-1
   826  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   827  			gins(a, &n3, &n1);
   828  			gins(a, &n3, &n1);
   829  		} else
   830  			gins(a, nr, &n1);
   831  		gmove(&n1, res);
   832  		regfree(&n1);
   833  		goto ret;
   834  	}
   835  
   836  	if(nl->ullman >= UINF) {
   837  		tempname(&n4, nl->type);
   838  		cgen(nl, &n4);
   839  		nl = &n4;
   840  	}
   841  	if(nr->ullman >= UINF) {
   842  		tempname(&n5, nr->type);
   843  		cgen(nr, &n5);
   844  		nr = &n5;
   845  	}
   846  
   847  	// Allow either uint32 or uint64 as shift type,
   848  	// to avoid unnecessary conversion from uint32 to uint64
   849  	// just to do the comparison.
   850  	tcount = types[simtype[nr->type->etype]];
   851  	if(tcount->etype < TUINT32)
   852  		tcount = types[TUINT32];
   853  
   854  	regalloc(&n1, nr->type, N);		// to hold the shift type in CX
   855  	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
   856  
   857  	regalloc(&n2, nl->type, res);
   858  	if(nl->ullman >= nr->ullman) {
   859  		cgen(nl, &n2);
   860  		cgen(nr, &n1);
   861  		gmove(&n1, &n3);
   862  	} else {
   863  		cgen(nr, &n1);
   864  		gmove(&n1, &n3);
   865  		cgen(nl, &n2);
   866  	}
   867  	regfree(&n3);
   868  
   869  	// test and fix up large shifts
   870  	if(!bounded) {
   871  		nodconst(&n3, tcount, nl->type->width*8);
   872  		gins(optoas(OCMP, tcount), &n1, &n3);
   873  		p1 = gbranch(optoas(OLT, tcount), T, +1);
   874  		if(op == ORSH && issigned[nl->type->etype]) {
   875  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   876  			gins(a, &n3, &n2);
   877  		} else {
   878  			nodconst(&n3, nl->type, 0);
   879  			gmove(&n3, &n2);
   880  		}
   881  		patch(p1, pc);
   882  	}
   883  
   884  	gins(a, &n1, &n2);
   885  
   886  	gmove(&n2, res);
   887  
   888  	regfree(&n1);
   889  	regfree(&n2);
   890  
   891  ret:
   892  	;
   893  }
   894  
   895  void
   896  clearfat(Node *nl)
   897  {
   898  	uint64 w, c, q, t, boff;
   899  	Node dst, end, r0, *f;
   900  	Prog *p, *pl;
   901  
   902  	/* clear a fat object */
   903  	if(debug['g']) {
   904  		print("clearfat %N (%T, size: %lld)\n", nl, nl->type, nl->type->width);
   905  	}
   906  
   907  	w = nl->type->width;
   908  	// Avoid taking the address for simple enough types.
   909  	//if(componentgen(N, nl))
   910  	//	return;
   911  
   912  	c = w % 8;	// bytes
   913  	q = w / 8;	// dwords
   914  
   915  	if(reg[REGRT1] > 0)
   916  		fatal("R%d in use during clearfat", REGRT1);
   917  
   918  	nodreg(&r0, types[TUINT64], 0); // r0 is always zero
   919  	nodreg(&dst, types[tptr], D_R0+REGRT1);
   920  	reg[REGRT1]++;
   921  	agen(nl, &dst);
   922  
   923  	if(q > 128) {
   924  		p = gins(ASUB, N, &dst);
   925  		p->from.type = D_CONST;
   926  		p->from.offset = 8;
   927  
   928  		regalloc(&end, types[tptr], N);
   929  		p = gins(AMOVD, &dst, &end);
   930  		p->from.type = D_CONST;
   931  		p->from.offset = q*8;
   932  
   933  		p = gins(AMOVDU, &r0, &dst);
   934  		p->to.type = D_OREG;
   935  		p->to.offset = 8;
   936  		pl = p;
   937  
   938  		p = gins(ACMP, &dst, &end);
   939  		patch(gbranch(ABNE, T, 0), pl);
   940  
   941  		regfree(&end);
   942  		// The loop leaves R3 on the last zeroed dword
   943  		boff = 8;
   944  	} else if(q >= 4) {
   945  		p = gins(ASUB, N, &dst);
   946  		p->from.type = D_CONST;
   947  		p->from.offset = 8;
   948  		f = sysfunc("duffzero");
   949  		p = gins(ADUFFZERO, N, f);
   950  		afunclit(&p->to, f);
   951  		// 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s
   952  		p->to.offset = 4*(128-q);
   953  		// duffzero leaves R3 on the last zeroed dword
   954  		boff = 8;
   955  	} else {
   956  		for(t = 0; t < q; t++) {
   957  			p = gins(AMOVD, &r0, &dst);
   958  			p->to.type = D_OREG;
   959  			p->to.offset = 8*t;
   960  		}
   961  		boff = 8*q;
   962  	}
   963  
   964  	for(t = 0; t < c; t++) {
   965  		p = gins(AMOVB, &r0, &dst);
   966  		p->to.type = D_OREG;
   967  		p->to.offset = t+boff;
   968  	}
   969  	reg[REGRT1]--;
   970  }
   971  
   972  // Called after regopt and peep have run.
   973  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   974  void
   975  expandchecks(Prog *firstp)
   976  {
   977  	Prog *p, *p1, *p2;
   978  
   979  	for(p = firstp; p != P; p = p->link) {
   980  		if(debug_checknil && ctxt->debugvlog)
   981  			print("expandchecks: %P\n", p);
   982  		if(p->as != ACHECKNIL)
   983  			continue;
   984  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
   985  			warnl(p->lineno, "generated nil check");
   986  		if(p->from.type != D_REG)
   987  			fatal("invalid nil check %P\n", p);
   988  		/*
   989  		// check is
   990  		//	TD $4, R0, arg (R0 is always zero)
   991  		// eqv. to:
   992  		// 	tdeq r0, arg
   993  		// NOTE: this needs special runtime support to make SIGTRAP recoverable.
   994  		reg = p->from.reg;
   995  		p->as = ATD;
   996  		p->from = p->to = p->from3 = zprog.from;
   997  		p->from.type = D_CONST;
   998  		p->from.offset = 4;
   999  		p->from.reg = NREG;
  1000  		p->reg = 0;
  1001  		p->to.type = D_REG;
  1002  		p->to.reg = reg;
  1003  		*/
  1004  		// check is
  1005  		//	CMP arg, R0
  1006  		//	BNE 2(PC) [likely]
  1007  		//	MOVD R0, 0(R0)
  1008  		p1 = mal(sizeof *p1);
  1009  		p2 = mal(sizeof *p2);
  1010  		clearp(p1);
  1011  		clearp(p2);
  1012  		p1->link = p2;
  1013  		p2->link = p->link;
  1014  		p->link = p1;
  1015  		p1->lineno = p->lineno;
  1016  		p2->lineno = p->lineno;
  1017  		p1->pc = 9999;
  1018  		p2->pc = 9999;
  1019  		p->as = ACMP;
  1020  		p->to.type = D_REG;
  1021  		p->to.reg = REGZERO;
  1022  		p1->as = ABNE;
  1023  		//p1->from.type = D_CONST;
  1024  		//p1->from.offset = 1; // likely
  1025  		p1->to.type = D_BRANCH;
  1026  		p1->to.u.branch = p2->link;
  1027  		// crash by write to memory address 0.
  1028  		p2->as = AMOVD;
  1029  		p2->from.type = D_REG;
  1030  		p2->from.reg = 0;
  1031  		p2->to.type = D_OREG;
  1032  		p2->to.reg = 0;
  1033  		p2->to.offset = 0;
  1034  	}
  1035  }