github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/cmd/9g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame;
    19  	Prog *p;
    20  	vlong hi, lo;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
    26  
    27  	// fill in final stack size
    28  	ptxt->to.offset <<= 32;
    29  	frame = rnd(stksize+maxarg, widthreg);
    30  	ptxt->to.offset |= frame;
    31  	
    32  	// insert code to zero ambiguously live variables
    33  	// so that the garbage collector only sees initialized values
    34  	// when it looks for pointers.
    35  	p = ptxt;
    36  	lo = hi = 0;
    37  	// iterate through declarations - they are sorted in decreasing xoffset order.
    38  	for(l=curfn->dcl; l != nil; l = l->next) {
    39  		n = l->n;
    40  		if(!n->needzero)
    41  			continue;
    42  		if(n->class != PAUTO)
    43  			fatal("needzero class %d", n->class);
    44  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    45  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    46  
    47  		if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) {
    48  			// merge with range we already have
    49  			lo = n->xoffset;
    50  			continue;
    51  		}
    52  		// zero old range
    53  		p = zerorange(p, frame, lo, hi);
    54  
    55  		// set new range
    56  		hi = n->xoffset + n->type->width;
    57  		lo = n->xoffset;
    58  	}
    59  	// zero final range
    60  	zerorange(p, frame, lo, hi);
    61  }
    62  
    63  static Prog*
    64  zerorange(Prog *p, vlong frame, vlong lo, vlong hi)
    65  {
    66  	vlong cnt, i;
    67  	Prog *p1;
    68  	Node *f;
    69  
    70  	cnt = hi - lo;
    71  	if(cnt == 0)
    72  		return p;
    73  	if(cnt < 4*widthptr) {
    74  		for(i = 0; i < cnt; i += widthptr)
    75  			p = appendpp(p, AMOVD, D_REG, REGZERO, 0, D_OREG, REGSP, 8+frame+lo+i);
    76  	} else if(cnt <= 128*widthptr) {
    77  		p = appendpp(p, AADD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGRT1, 0);
    78  		p->reg = REGSP;
    79  		p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0);
    80  		f = sysfunc("duffzero");
    81  		naddr(f, &p->to, 1);
    82  		afunclit(&p->to, f);
    83  		p->to.offset = 4*(128-cnt/widthptr);
    84  	} else {
    85  		p = appendpp(p, AMOVD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGTMP, 0);
    86  		p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT1, 0);
    87  		p->reg = REGSP;
    88  		p = appendpp(p, AMOVD, D_CONST, NREG, cnt, D_REG, REGTMP, 0);
    89  		p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT2, 0);
    90  		p->reg = REGRT1;
    91  		p1 = p = appendpp(p, AMOVDU, D_REG, REGZERO, 0, D_OREG, REGRT1, widthptr);
    92  		p = appendpp(p, ACMP, D_REG, REGRT1, 0, D_REG, REGRT2, 0);
    93  		p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
    94  		patch(p, p1);
    95  	}
    96  	return p;
    97  }
    98  
    99  static Prog*
   100  appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset)
   101  {
   102  	Prog *q;
   103  	q = mal(sizeof(*q));
   104  	clearp(q);
   105  	q->as = as;
   106  	q->lineno = p->lineno;
   107  	q->from.type = ftype;
   108  	q->from.reg = freg;
   109  	q->from.offset = foffset;
   110  	q->to.type = ttype;
   111  	q->to.reg = treg;
   112  	q->to.offset = toffset;
   113  	q->link = p->link;
   114  	p->link = q;
   115  	return q;
   116  }
   117  
   118  // Sweep the prog list to mark any used nodes.
   119  void
   120  markautoused(Prog *p)
   121  {
   122  	for (; p; p = p->link) {
   123  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   124  			continue;
   125  
   126  		if (p->from.node)
   127  			p->from.node->used = 1;
   128  
   129  		if (p->to.node)
   130  			p->to.node->used = 1;
   131  	}
   132  }
   133  
   134  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   135  void
   136  fixautoused(Prog *p)
   137  {
   138  	Prog **lp;
   139  
   140  	for (lp=&p; (p=*lp) != P; ) {
   141  		if (p->as == ATYPE && p->from.node && p->from.name == D_AUTO && !p->from.node->used) {
   142  			*lp = p->link;
   143  			continue;
   144  		}
   145  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   146  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   147  			// VARDEFs are interspersed with other code, and a jump might be using the
   148  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   149  			// the no-ops.
   150  			p->to.type = D_NONE;
   151  			p->to.node = N;
   152  			p->as = ANOP;
   153  			continue;
   154  		}
   155  		if (p->from.name == D_AUTO && p->from.node)
   156  			p->from.offset += p->from.node->stkdelta;
   157  
   158  		if (p->to.name == D_AUTO && p->to.node)
   159  			p->to.offset += p->to.node->stkdelta;
   160  
   161  		lp = &p->link;
   162  	}
   163  }
   164  
   165  /*
   166   * generate: BL reg, f
   167   * where both reg and f are registers.
   168   * On power, f must be moved to CTR first.
   169   */
   170  static void
   171  ginsBL(Node *reg, Node *f)
   172  {
   173  	Prog *p;
   174  	p = gins(AMOVD, f, N);
   175  	p->to.type = D_SPR;
   176  	p->to.offset = D_CTR;
   177  	p = gins(ABL, reg, N);
   178  	p->to.type = D_SPR;
   179  	p->to.offset = D_CTR;
   180  }
   181  
   182  /*
   183   * generate:
   184   *	call f
   185   *	proc=-1	normal call but no return
   186   *	proc=0	normal call
   187   *	proc=1	goroutine run in new proc
   188   *	proc=2	defer call save away stack
   189    *	proc=3	normal call to C pointer (not Go func value)
   190   */
   191  void
   192  ginscall(Node *f, int proc)
   193  {
   194  	Prog *p;
   195  	Node reg, con, reg2;
   196  	Node r1;
   197  
   198  	if(f->type != T)
   199  		setmaxarg(f->type);
   200  
   201  	switch(proc) {
   202  	default:
   203  		fatal("ginscall: bad proc %d", proc);
   204  		break;
   205  
   206  	case 0:	// normal call
   207  	case -1:	// normal call but no return
   208  		if(f->op == ONAME && f->class == PFUNC) {
   209  			if(f == deferreturn) {
   210  				// Deferred calls will appear to be returning to
   211  				// the CALL deferreturn(SB) that we are about to emit.
   212  				// However, the stack trace code will show the line
   213  				// of the instruction byte before the return PC. 
   214  				// To avoid that being an unrelated instruction,
   215  				// insert a ppc64 NOP that we will have the right line number.
   216  				// The ppc64 NOP is really or r0, r0, r0; use that description
   217  				// because the NOP pseudo-instruction would be removed by
   218  				// the linker.
   219  				nodreg(&reg, types[TINT], D_R0);
   220  				gins(AOR, &reg, &reg);
   221  			}
   222  			p = gins(ABL, N, f);
   223  			afunclit(&p->to, f);
   224  			if(proc == -1 || noreturn(p))
   225  				gins(AUNDEF, N, N);
   226  			break;
   227  		}
   228  		nodreg(&reg, types[tptr], D_R0+REGENV);
   229  		nodreg(&r1, types[tptr], D_R0+3);
   230  		gmove(f, &reg);
   231  		reg.op = OINDREG;
   232  		gmove(&reg, &r1);
   233  		reg.op = OREGISTER;
   234  		ginsBL(&reg, &r1);
   235  		break;
   236  	
   237  	case 3:	// normal call of c function pointer
   238  		ginsBL(N, f);
   239  		break;
   240  
   241  	case 1:	// call in new proc (go)
   242  	case 2:	// deferred call (defer)
   243  		nodconst(&con, types[TINT64], argsize(f->type));
   244  		nodreg(&reg, types[TINT64], D_R0+3);
   245  		nodreg(&reg2, types[TINT64], D_R0+4);
   246  		gmove(f, &reg);
   247  
   248  		p = gins(ASUB, N, N);
   249  		p->from.type = D_CONST;
   250  		p->from.offset = 3 * 8;
   251  		p->to.type = D_REG;
   252  		p->to.reg = REGSP;
   253  
   254  		gmove(&con, &reg2);
   255  		p = gins(AMOVW, &reg2, N);
   256  		p->to.type = D_OREG;
   257  		p->to.reg = REGSP;
   258  		p->to.offset = 8;
   259  
   260  		p = gins(AMOVD, &reg, N);
   261  		p->to.type = D_OREG;
   262  		p->to.reg = REGSP;
   263  		p->to.offset = 16;
   264  
   265  		if(proc == 1)
   266  			ginscall(newproc, 0);
   267  		else {
   268  			if(!hasdefer)
   269  				fatal("hasdefer=0 but has defer");
   270  			ginscall(deferproc, 0);
   271  		}
   272  
   273  		p = gins(AADD, N, N);
   274  		p->from.type = D_CONST;
   275  		p->from.offset = 3 * 8;
   276  		p->to.type = D_REG;
   277  		p->to.reg = REGSP;
   278  
   279  		if(proc == 2) {
   280  			nodreg(&reg, types[TINT64], D_R0+3);
   281  			p = gins(ACMP, &reg, N);
   282  			p->to.type = D_REG;
   283  			p->to.reg = D_R0;
   284  			p = gbranch(ABEQ, T, +1);
   285  			cgen_ret(N);
   286  			patch(p, pc);
   287  		}
   288  		break;
   289  	}
   290  }
   291  
   292  /*
   293   * n is call to interface method.
   294   * generate res = n.
   295   */
   296  void
   297  cgen_callinter(Node *n, Node *res, int proc)
   298  {
   299  	Node *i, *f;
   300  	Node tmpi, nodi, nodo, nodr, nodsp;
   301  	Prog *p;
   302  
   303  	i = n->left;
   304  	if(i->op != ODOTINTER)
   305  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   306  
   307  	f = i->right;		// field
   308  	if(f->op != ONAME)
   309  		fatal("cgen_callinter: not ONAME %O", f->op);
   310  
   311  	i = i->left;		// interface
   312  
   313  	if(!i->addable) {
   314  		tempname(&tmpi, i->type);
   315  		cgen(i, &tmpi);
   316  		i = &tmpi;
   317  	}
   318  
   319  	genlist(n->list);		// assign the args
   320  
   321  	// i is now addable, prepare an indirected
   322  	// register to hold its address.
   323  	igen(i, &nodi, res);		// REG = &inter
   324  
   325  	nodindreg(&nodsp, types[tptr], D_R0+REGSP);
   326  	nodsp.xoffset = widthptr;
   327  	nodi.type = types[tptr];
   328  	nodi.xoffset += widthptr;
   329  	cgen(&nodi, &nodsp);	// 0(SP) = 8(REG) -- i.data
   330  
   331  	regalloc(&nodo, types[tptr], res);
   332  	nodi.type = types[tptr];
   333  	nodi.xoffset -= widthptr;
   334  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   335  	regfree(&nodi);
   336  
   337  	regalloc(&nodr, types[tptr], &nodo);
   338  	if(n->left->xoffset == BADWIDTH)
   339  		fatal("cgen_callinter: badwidth");
   340  	cgen_checknil(&nodo); // in case offset is huge
   341  	nodo.op = OINDREG;
   342  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   343  	if(proc == 0) {
   344  		// plain call: use direct c function pointer - more efficient
   345  		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
   346  		proc = 3;
   347  	} else {
   348  		// go/defer. generate go func value.
   349  		p = gins(AMOVD, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
   350  		p->from.type = D_CONST;
   351  	}
   352  
   353  	nodr.type = n->left->type;
   354  	ginscall(&nodr, proc);
   355  
   356  	regfree(&nodr);
   357  	regfree(&nodo);
   358  }
   359  
   360  /*
   361   * generate function call;
   362   *	proc=0	normal call
   363   *	proc=1	goroutine run in new proc
   364   *	proc=2	defer call save away stack
   365   */
   366  void
   367  cgen_call(Node *n, int proc)
   368  {
   369  	Type *t;
   370  	Node nod, afun;
   371  
   372  	if(n == N)
   373  		return;
   374  
   375  	if(n->left->ullman >= UINF) {
   376  		// if name involves a fn call
   377  		// precompute the address of the fn
   378  		tempname(&afun, types[tptr]);
   379  		cgen(n->left, &afun);
   380  	}
   381  
   382  	genlist(n->list);		// assign the args
   383  	t = n->left->type;
   384  
   385  	// call tempname pointer
   386  	if(n->left->ullman >= UINF) {
   387  		regalloc(&nod, types[tptr], N);
   388  		cgen_as(&nod, &afun);
   389  		nod.type = t;
   390  		ginscall(&nod, proc);
   391  		regfree(&nod);
   392  		return;
   393  	}
   394  
   395  	// call pointer
   396  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   397  		regalloc(&nod, types[tptr], N);
   398  		cgen_as(&nod, n->left);
   399  		nod.type = t;
   400  		ginscall(&nod, proc);
   401  		regfree(&nod);
   402  		return;
   403  	}
   404  
   405  	// call direct
   406  	n->left->method = 1;
   407  	ginscall(n->left, proc);
   408  }
   409  
   410  /*
   411   * call to n has already been generated.
   412   * generate:
   413   *	res = return value from call.
   414   */
   415  void
   416  cgen_callret(Node *n, Node *res)
   417  {
   418  	Node nod;
   419  	Type *fp, *t;
   420  	Iter flist;
   421  
   422  	t = n->left->type;
   423  	if(t->etype == TPTR32 || t->etype == TPTR64)
   424  		t = t->type;
   425  
   426  	fp = structfirst(&flist, getoutarg(t));
   427  	if(fp == T)
   428  		fatal("cgen_callret: nil");
   429  
   430  	memset(&nod, 0, sizeof(nod));
   431  	nod.op = OINDREG;
   432  	nod.val.u.reg = D_R0+REGSP;
   433  	nod.addable = 1;
   434  
   435  	nod.xoffset = fp->width + widthptr; // +widthptr: saved LR at 0(R1)
   436  	nod.type = fp->type;
   437  	cgen_as(res, &nod);
   438  }
   439  
   440  /*
   441   * call to n has already been generated.
   442   * generate:
   443   *	res = &return value from call.
   444   */
   445  void
   446  cgen_aret(Node *n, Node *res)
   447  {
   448  	Node nod1, nod2;
   449  	Type *fp, *t;
   450  	Iter flist;
   451  
   452  	t = n->left->type;
   453  	if(isptr[t->etype])
   454  		t = t->type;
   455  
   456  	fp = structfirst(&flist, getoutarg(t));
   457  	if(fp == T)
   458  		fatal("cgen_aret: nil");
   459  
   460  	memset(&nod1, 0, sizeof(nod1));
   461  	nod1.op = OINDREG;
   462  	nod1.val.u.reg = D_R0 + REGSP;
   463  	nod1.addable = 1;
   464  
   465  	nod1.xoffset = fp->width + widthptr; // +widthptr: saved lr at 0(SP)
   466  	nod1.type = fp->type;
   467  
   468  	if(res->op != OREGISTER) {
   469  		regalloc(&nod2, types[tptr], res);
   470  		agen(&nod1, &nod2);
   471  		gins(AMOVD, &nod2, res);
   472  		regfree(&nod2);
   473  	} else
   474  		agen(&nod1, res);
   475  }
   476  
   477  /*
   478   * generate return.
   479   * n->left is assignments to return values.
   480   */
   481  void
   482  cgen_ret(Node *n)
   483  {
   484  	Prog *p;
   485  
   486  	if(n != N)
   487  		genlist(n->list);		// copy out args
   488  	if(hasdefer)
   489  		ginscall(deferreturn, 0);
   490  	genlist(curfn->exit);
   491  	p = gins(ARET, N, N);
   492  	if(n != N && n->op == ORETJMP) {
   493  		p->to.name = D_EXTERN;
   494  		p->to.type = D_CONST;
   495  		p->to.sym = linksym(n->left->sym);
   496  	}
   497  }
   498  
   499  void
   500  cgen_asop(Node *n)
   501  {
   502  	USED(n);
   503  	fatal("cgen_asop"); // no longer used
   504  }
   505  
   506  int
   507  samereg(Node *a, Node *b)
   508  {
   509  	if(a == N || b == N)
   510  		return 0;
   511  	if(a->op != OREGISTER)
   512  		return 0;
   513  	if(b->op != OREGISTER)
   514  		return 0;
   515  	if(a->val.u.reg != b->val.u.reg)
   516  		return 0;
   517  	return 1;
   518  }
   519  
   520  /*
   521   * generate division.
   522   * generates one of:
   523   *	res = nl / nr
   524   *	res = nl % nr
   525   * according to op.
   526   */
   527  void
   528  dodiv(int op, Node *nl, Node *nr, Node *res)
   529  {
   530  	int a, check;
   531  	Type *t, *t0;
   532  	Node tl, tr, tl2, tr2, nm1, nz, tm;
   533  	Prog *p1, *p2;
   534  
   535  	// Have to be careful about handling
   536  	// most negative int divided by -1 correctly.
   537  	// The hardware will generate undefined result.
   538  	// Also need to explicitly trap on division on zero,
   539  	// the hardware will silently generate undefined result.
   540  	// DIVW will leave unpredicable result in higher 32-bit,
   541  	// so always use DIVD/DIVDU.
   542  	t = nl->type;
   543  	t0 = t;
   544  	check = 0;
   545  	if(issigned[t->etype]) {
   546  		check = 1;
   547  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
   548  			check = 0;
   549  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   550  			check = 0;
   551  	}
   552  	if(t->width < 8) {
   553  		if(issigned[t->etype])
   554  			t = types[TINT64];
   555  		else
   556  			t = types[TUINT64];
   557  		check = 0;
   558  	}
   559  
   560  	a = optoas(ODIV, t);
   561  
   562  	regalloc(&tl, t0, N);
   563  	regalloc(&tr, t0, N);
   564  	if(nl->ullman >= nr->ullman) {
   565  		cgen(nl, &tl);
   566  		cgen(nr, &tr);
   567  	} else {
   568  		cgen(nr, &tr);
   569  		cgen(nl, &tl);
   570  	}
   571  	if(t != t0) {
   572  		// Convert
   573  		tl2 = tl;
   574  		tr2 = tr;
   575  		tl.type = t;
   576  		tr.type = t;
   577  		gmove(&tl2, &tl);
   578  		gmove(&tr2, &tr);
   579  	}
   580  
   581  	// Handle divide-by-zero panic.
   582  	p1 = gins(optoas(OCMP, t), &tr, N);
   583  	p1->to.type = D_REG;
   584  	p1->to.reg = REGZERO;
   585  	p1 = gbranch(optoas(ONE, t), T, +1);
   586  	if(panicdiv == N)
   587  		panicdiv = sysfunc("panicdivide");
   588  	ginscall(panicdiv, -1);
   589  	patch(p1, pc);
   590  
   591  	if(check) {
   592  		nodconst(&nm1, t, -1);
   593  		gins(optoas(OCMP, t), &tr, &nm1);
   594  		p1 = gbranch(optoas(ONE, t), T, +1);
   595  		if(op == ODIV) {
   596  			// a / (-1) is -a.
   597  			gins(optoas(OMINUS, t), N, &tl);
   598  			gmove(&tl, res);
   599  		} else {
   600  			// a % (-1) is 0.
   601  			nodconst(&nz, t, 0);
   602  			gmove(&nz, res);
   603  		}
   604  		p2 = gbranch(AJMP, T, 0);
   605  		patch(p1, pc);
   606  	}
   607  	p1 = gins(a, &tr, &tl);
   608  	if(op == ODIV) {
   609  		regfree(&tr);
   610  		gmove(&tl, res);
   611  	} else {
   612  		// A%B = A-(A/B*B)
   613  		regalloc(&tm, t, N);
   614  		// patch div to use the 3 register form
   615  		// TODO(minux): add gins3?
   616  		p1->reg = p1->to.reg;
   617  		p1->to.reg = tm.val.u.reg;
   618  		gins(optoas(OMUL, t), &tr, &tm);
   619  		regfree(&tr);
   620  		gins(optoas(OSUB, t), &tm, &tl);
   621  		regfree(&tm);
   622  		gmove(&tl, res);
   623  	}
   624  	regfree(&tl);
   625  	if(check)
   626  		patch(p2, pc);
   627  }
   628  
   629  /*
   630   * generate division according to op, one of:
   631   *	res = nl / nr
   632   *	res = nl % nr
   633   */
   634  void
   635  cgen_div(int op, Node *nl, Node *nr, Node *res)
   636  {
   637  	Node n1, n2, n3;
   638  	int w, a;
   639  	Magic m;
   640  
   641  	// TODO(minux): enable division by magic multiply (also need to fix longmod below)
   642  	//if(nr->op != OLITERAL)
   643  		goto longdiv;
   644  	w = nl->type->width*8;
   645  
   646  	// Front end handled 32-bit division. We only need to handle 64-bit.
   647  	// try to do division by multiply by (2^w)/d
   648  	// see hacker's delight chapter 10
   649  	switch(simtype[nl->type->etype]) {
   650  	default:
   651  		goto longdiv;
   652  
   653  	case TUINT64:
   654  		m.w = w;
   655  		m.ud = mpgetfix(nr->val.u.xval);
   656  		umagic(&m);
   657  		if(m.bad)
   658  			break;
   659  		if(op == OMOD)
   660  			goto longmod;
   661  
   662  		cgenr(nl, &n1, N);
   663  		nodconst(&n2, nl->type, m.um);
   664  		regalloc(&n3, nl->type, res);
   665  		cgen_hmul(&n1, &n2, &n3);
   666  
   667  		if(m.ua) {
   668  			// need to add numerator accounting for overflow
   669  			gins(optoas(OADD, nl->type), &n1, &n3);
   670  			nodconst(&n2, nl->type, 1);
   671  			gins(optoas(ORROTC, nl->type), &n2, &n3);
   672  			nodconst(&n2, nl->type, m.s-1);
   673  			gins(optoas(ORSH, nl->type), &n2, &n3);
   674  		} else {
   675  			nodconst(&n2, nl->type, m.s);
   676  			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
   677  		}
   678  
   679  		gmove(&n3, res);
   680  		regfree(&n1);
   681  		regfree(&n3);
   682  		return;
   683  
   684  	case TINT64:
   685  		m.w = w;
   686  		m.sd = mpgetfix(nr->val.u.xval);
   687  		smagic(&m);
   688  		if(m.bad)
   689  			break;
   690  		if(op == OMOD)
   691  			goto longmod;
   692  
   693  		cgenr(nl, &n1, res);
   694  		nodconst(&n2, nl->type, m.sm);
   695  		regalloc(&n3, nl->type, N);
   696  		cgen_hmul(&n1, &n2, &n3);
   697  
   698  		if(m.sm < 0) {
   699  			// need to add numerator
   700  			gins(optoas(OADD, nl->type), &n1, &n3);
   701  		}
   702  
   703  		nodconst(&n2, nl->type, m.s);
   704  		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
   705  
   706  		nodconst(&n2, nl->type, w-1);
   707  		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
   708  		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
   709  
   710  		if(m.sd < 0) {
   711  			// this could probably be removed
   712  			// by factoring it into the multiplier
   713  			gins(optoas(OMINUS, nl->type), N, &n3);
   714  		}
   715  
   716  		gmove(&n3, res);
   717  		regfree(&n1);
   718  		regfree(&n3);
   719  		return;
   720  	}
   721  	goto longdiv;
   722  
   723  longdiv:
   724  	// division and mod using (slow) hardware instruction
   725  	dodiv(op, nl, nr, res);
   726  	return;
   727  
   728  longmod:
   729  	// mod using formula A%B = A-(A/B*B) but
   730  	// we know that there is a fast algorithm for A/B
   731  	regalloc(&n1, nl->type, res);
   732  	cgen(nl, &n1);
   733  	regalloc(&n2, nl->type, N);
   734  	cgen_div(ODIV, &n1, nr, &n2);
   735  	a = optoas(OMUL, nl->type);
   736  	if(w == 8) {
   737  		// use 2-operand 16-bit multiply
   738  		// because there is no 2-operand 8-bit multiply
   739  		//a = AIMULW;
   740  	}
   741  	if(!smallintconst(nr)) {
   742  		regalloc(&n3, nl->type, N);
   743  		cgen(nr, &n3);
   744  		gins(a, &n3, &n2);
   745  		regfree(&n3);
   746  	} else
   747  		gins(a, nr, &n2);
   748  	gins(optoas(OSUB, nl->type), &n2, &n1);
   749  	gmove(&n1, res);
   750  	regfree(&n1);
   751  	regfree(&n2);
   752  }
   753  
   754  /*
   755   * generate high multiply:
   756   *   res = (nl*nr) >> width
   757   */
   758  void
   759  cgen_hmul(Node *nl, Node *nr, Node *res)
   760  {
   761  	int w;
   762  	Node n1, n2, *tmp;
   763  	Type *t;
   764  	Prog *p;
   765  
   766  	// largest ullman on left.
   767  	if(nl->ullman < nr->ullman) {
   768  		tmp = nl;
   769  		nl = nr;
   770  		nr = tmp;
   771  	}
   772  	t = nl->type;
   773  	w = t->width * 8;
   774  	cgenr(nl, &n1, res);
   775  	cgenr(nr, &n2, N);
   776  	switch(simtype[t->etype]) {
   777  	case TINT8:
   778  	case TINT16:
   779  	case TINT32:
   780  		gins(optoas(OMUL, t), &n2, &n1);
   781  		p = gins(ASRAD, N, &n1);
   782  		p->from.type = D_CONST;
   783  		p->from.offset = w;
   784  		break;
   785  	case TUINT8:
   786  	case TUINT16:
   787  	case TUINT32:
   788  		gins(optoas(OMUL, t), &n2, &n1);
   789  		p = gins(ASRD, N, &n1);
   790  		p->from.type = D_CONST;
   791  		p->from.offset = w;
   792  		break;
   793  	case TINT64:
   794  	case TUINT64:
   795  		if(issigned[t->etype])
   796  			p = gins(AMULHD, &n2, &n1);
   797  		else
   798  			p = gins(AMULHDU, &n2, &n1);
   799  		break;
   800  	default:
   801  		fatal("cgen_hmul %T", t);
   802  		break;
   803  	}
   804  	cgen(&n1, res);
   805  	regfree(&n1);
   806  	regfree(&n2);
   807  }
   808  
   809  /*
   810   * generate shift according to op, one of:
   811   *	res = nl << nr
   812   *	res = nl >> nr
   813   */
   814  void
   815  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   816  {
   817  	Node n1, n2, n3, n4, n5;
   818  	int a;
   819  	Prog *p1;
   820  	uvlong sc;
   821  	Type *tcount;
   822  
   823  	a = optoas(op, nl->type);
   824  
   825  	if(nr->op == OLITERAL) {
   826  		regalloc(&n1, nl->type, res);
   827  		cgen(nl, &n1);
   828  		sc = mpgetfix(nr->val.u.xval);
   829  		if(sc >= nl->type->width*8) {
   830  			// large shift gets 2 shifts by width-1
   831  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   832  			gins(a, &n3, &n1);
   833  			gins(a, &n3, &n1);
   834  		} else
   835  			gins(a, nr, &n1);
   836  		gmove(&n1, res);
   837  		regfree(&n1);
   838  		goto ret;
   839  	}
   840  
   841  	if(nl->ullman >= UINF) {
   842  		tempname(&n4, nl->type);
   843  		cgen(nl, &n4);
   844  		nl = &n4;
   845  	}
   846  	if(nr->ullman >= UINF) {
   847  		tempname(&n5, nr->type);
   848  		cgen(nr, &n5);
   849  		nr = &n5;
   850  	}
   851  
   852  	// Allow either uint32 or uint64 as shift type,
   853  	// to avoid unnecessary conversion from uint32 to uint64
   854  	// just to do the comparison.
   855  	tcount = types[simtype[nr->type->etype]];
   856  	if(tcount->etype < TUINT32)
   857  		tcount = types[TUINT32];
   858  
   859  	regalloc(&n1, nr->type, N);		// to hold the shift type in CX
   860  	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
   861  
   862  	regalloc(&n2, nl->type, res);
   863  	if(nl->ullman >= nr->ullman) {
   864  		cgen(nl, &n2);
   865  		cgen(nr, &n1);
   866  		gmove(&n1, &n3);
   867  	} else {
   868  		cgen(nr, &n1);
   869  		gmove(&n1, &n3);
   870  		cgen(nl, &n2);
   871  	}
   872  	regfree(&n3);
   873  
   874  	// test and fix up large shifts
   875  	if(!bounded) {
   876  		nodconst(&n3, tcount, nl->type->width*8);
   877  		gins(optoas(OCMP, tcount), &n1, &n3);
   878  		p1 = gbranch(optoas(OLT, tcount), T, +1);
   879  		if(op == ORSH && issigned[nl->type->etype]) {
   880  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   881  			gins(a, &n3, &n2);
   882  		} else {
   883  			nodconst(&n3, nl->type, 0);
   884  			gmove(&n3, &n2);
   885  		}
   886  		patch(p1, pc);
   887  	}
   888  
   889  	gins(a, &n1, &n2);
   890  
   891  	gmove(&n2, res);
   892  
   893  	regfree(&n1);
   894  	regfree(&n2);
   895  
   896  ret:
   897  	;
   898  }
   899  
   900  void
   901  clearfat(Node *nl)
   902  {
   903  	uint64 w, c, q, t, boff;
   904  	Node dst, end, r0, *f;
   905  	Prog *p, *pl;
   906  
   907  	/* clear a fat object */
   908  	if(debug['g']) {
   909  		print("clearfat %N (%T, size: %lld)\n", nl, nl->type, nl->type->width);
   910  	}
   911  
   912  	w = nl->type->width;
   913  	// Avoid taking the address for simple enough types.
   914  	//if(componentgen(N, nl))
   915  	//	return;
   916  
   917  	c = w % 8;	// bytes
   918  	q = w / 8;	// dwords
   919  
   920  	if(reg[REGRT1] > 0)
   921  		fatal("R%d in use during clearfat", REGRT1);
   922  
   923  	nodreg(&r0, types[TUINT64], 0); // r0 is always zero
   924  	nodreg(&dst, types[tptr], D_R0+REGRT1);
   925  	reg[REGRT1]++;
   926  	agen(nl, &dst);
   927  
   928  	if(q > 128) {
   929  		p = gins(ASUB, N, &dst);
   930  		p->from.type = D_CONST;
   931  		p->from.offset = 8;
   932  
   933  		regalloc(&end, types[tptr], N);
   934  		p = gins(AMOVD, &dst, &end);
   935  		p->from.type = D_CONST;
   936  		p->from.offset = q*8;
   937  
   938  		p = gins(AMOVDU, &r0, &dst);
   939  		p->to.type = D_OREG;
   940  		p->to.offset = 8;
   941  		pl = p;
   942  
   943  		p = gins(ACMP, &dst, &end);
   944  		patch(gbranch(ABNE, T, 0), pl);
   945  
   946  		regfree(&end);
   947  		// The loop leaves R3 on the last zeroed dword
   948  		boff = 8;
   949  	} else if(q >= 4) {
   950  		p = gins(ASUB, N, &dst);
   951  		p->from.type = D_CONST;
   952  		p->from.offset = 8;
   953  		f = sysfunc("duffzero");
   954  		p = gins(ADUFFZERO, N, f);
   955  		afunclit(&p->to, f);
   956  		// 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s
   957  		p->to.offset = 4*(128-q);
   958  		// duffzero leaves R3 on the last zeroed dword
   959  		boff = 8;
   960  	} else {
   961  		for(t = 0; t < q; t++) {
   962  			p = gins(AMOVD, &r0, &dst);
   963  			p->to.type = D_OREG;
   964  			p->to.offset = 8*t;
   965  		}
   966  		boff = 8*q;
   967  	}
   968  
   969  	for(t = 0; t < c; t++) {
   970  		p = gins(AMOVB, &r0, &dst);
   971  		p->to.type = D_OREG;
   972  		p->to.offset = t+boff;
   973  	}
   974  	reg[REGRT1]--;
   975  }
   976  
   977  // Called after regopt and peep have run.
   978  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   979  void
   980  expandchecks(Prog *firstp)
   981  {
   982  	Prog *p, *p1, *p2;
   983  
   984  	for(p = firstp; p != P; p = p->link) {
   985  		if(debug_checknil && ctxt->debugvlog)
   986  			print("expandchecks: %P\n", p);
   987  		if(p->as != ACHECKNIL)
   988  			continue;
   989  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
   990  			warnl(p->lineno, "generated nil check");
   991  		if(p->from.type != D_REG)
   992  			fatal("invalid nil check %P\n", p);
   993  		/*
   994  		// check is
   995  		//	TD $4, R0, arg (R0 is always zero)
   996  		// eqv. to:
   997  		// 	tdeq r0, arg
   998  		// NOTE: this needs special runtime support to make SIGTRAP recoverable.
   999  		reg = p->from.reg;
  1000  		p->as = ATD;
  1001  		p->from = p->to = p->from3 = zprog.from;
  1002  		p->from.type = D_CONST;
  1003  		p->from.offset = 4;
  1004  		p->from.reg = NREG;
  1005  		p->reg = 0;
  1006  		p->to.type = D_REG;
  1007  		p->to.reg = reg;
  1008  		*/
  1009  		// check is
  1010  		//	CMP arg, R0
  1011  		//	BNE 2(PC) [likely]
  1012  		//	MOVD R0, 0(R0)
  1013  		p1 = mal(sizeof *p1);
  1014  		p2 = mal(sizeof *p2);
  1015  		clearp(p1);
  1016  		clearp(p2);
  1017  		p1->link = p2;
  1018  		p2->link = p->link;
  1019  		p->link = p1;
  1020  		p1->lineno = p->lineno;
  1021  		p2->lineno = p->lineno;
  1022  		p1->pc = 9999;
  1023  		p2->pc = 9999;
  1024  		p->as = ACMP;
  1025  		p->to.type = D_REG;
  1026  		p->to.reg = REGZERO;
  1027  		p1->as = ABNE;
  1028  		//p1->from.type = D_CONST;
  1029  		//p1->from.offset = 1; // likely
  1030  		p1->to.type = D_BRANCH;
  1031  		p1->to.u.branch = p2->link;
  1032  		// crash by write to memory address 0.
  1033  		p2->as = AMOVD;
  1034  		p2->from.type = D_REG;
  1035  		p2->from.reg = 0;
  1036  		p2->to.type = D_OREG;
  1037  		p2->to.reg = 0;
  1038  		p2->to.offset = 0;
  1039  	}
  1040  }