github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/cmd/6g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
    13  static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
    14  
    15  void
    16  defframe(Prog *ptxt)
    17  {
    18  	uint32 frame, ax;
    19  	Prog *p;
    20  	vlong hi, lo;
    21  	NodeList *l;
    22  	Node *n;
    23  
    24  	// fill in argument size
    25  	ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
    26  
    27  	// fill in final stack size
    28  	ptxt->to.offset <<= 32;
    29  	frame = rnd(stksize+maxarg, widthreg);
    30  	ptxt->to.offset |= frame;
    31  	
    32  	// insert code to zero ambiguously live variables
    33  	// so that the garbage collector only sees initialized values
    34  	// when it looks for pointers.
    35  	p = ptxt;
    36  	lo = hi = 0;
    37  	ax = 0;
    38  	// iterate through declarations - they are sorted in decreasing xoffset order.
    39  	for(l=curfn->dcl; l != nil; l = l->next) {
    40  		n = l->n;
    41  		if(!n->needzero)
    42  			continue;
    43  		if(n->class != PAUTO)
    44  			fatal("needzero class %d", n->class);
    45  		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
    46  			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
    47  
    48  		if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) {
    49  			// merge with range we already have
    50  			lo = n->xoffset;
    51  			continue;
    52  		}
    53  		// zero old range
    54  		p = zerorange(p, frame, lo, hi, &ax);
    55  
    56  		// set new range
    57  		hi = n->xoffset + n->type->width;
    58  		lo = n->xoffset;
    59  	}
    60  	// zero final range
    61  	zerorange(p, frame, lo, hi, &ax);
    62  }
    63  
    64  static Prog*
    65  zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
    66  {
    67  	vlong cnt, i;
    68  
    69  	cnt = hi - lo;
    70  	if(cnt == 0)
    71  		return p;
    72  	if(*ax == 0) {
    73  		p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0);
    74  		*ax = 1;
    75  	}
    76  	if(cnt % widthreg != 0) {
    77  		// should only happen with nacl
    78  		if(cnt % widthptr != 0)
    79  			fatal("zerorange count not a multiple of widthptr %d", cnt);
    80  		p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo);
    81  		lo += widthptr;
    82  		cnt -= widthptr;
    83  	}
    84  	if(cnt <= 4*widthreg) {
    85  		for(i = 0; i < cnt; i += widthreg) {
    86  			p = appendpp(p, AMOVQ, D_AX, 0, D_SP+D_INDIR, frame+lo+i);
    87  		}
    88  	} else if(!nacl && (cnt <= 128*widthreg)) {
    89  		p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0);
    90  		p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 2*(128-cnt/widthreg));
    91  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
    92  	} else {
    93  		p = appendpp(p, AMOVQ, D_CONST, cnt/widthreg, D_CX, 0);
    94  		p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0);
    95  		p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
    96  		p = appendpp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
    97  	}
    98  	return p;
    99  }
   100  
   101  static Prog*	
   102  appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)	
   103  {
   104  	Prog *q;
   105  	q = mal(sizeof(*q));	
   106  	clearp(q);	
   107  	q->as = as;	
   108  	q->lineno = p->lineno;	
   109  	q->from.type = ftype;	
   110  	q->from.offset = foffset;	
   111  	q->to.type = ttype;	
   112  	q->to.offset = toffset;	
   113  	q->link = p->link;	
   114  	p->link = q;	
   115  	return q;	
   116  }
   117  
   118  // Sweep the prog list to mark any used nodes.
   119  void
   120  markautoused(Prog* p)
   121  {
   122  	for (; p; p = p->link) {
   123  		if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
   124  			continue;
   125  
   126  		if (p->from.node)
   127  			p->from.node->used = 1;
   128  
   129  		if (p->to.node)
   130  			p->to.node->used = 1;
   131  	}
   132  }
   133  
   134  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
   135  void
   136  fixautoused(Prog *p)
   137  {
   138  	Prog **lp;
   139  
   140  	for (lp=&p; (p=*lp) != P; ) {
   141  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
   142  			*lp = p->link;
   143  			continue;
   144  		}
   145  		if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
   146  			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
   147  			// VARDEFs are interspersed with other code, and a jump might be using the
   148  			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
   149  			// the no-ops.
   150  			p->to.type = D_NONE;
   151  			p->to.node = N;
   152  			p->as = ANOP;
   153  			continue;
   154  		}
   155  		if (p->from.type == D_AUTO && p->from.node)
   156  			p->from.offset += p->from.node->stkdelta;
   157  
   158  		if (p->to.type == D_AUTO && p->to.node)
   159  			p->to.offset += p->to.node->stkdelta;
   160  
   161  		lp = &p->link;
   162  	}
   163  }
   164  
   165  
   166  /*
   167   * generate:
   168   *	call f
   169   *	proc=-1	normal call but no return
   170   *	proc=0	normal call
   171   *	proc=1	goroutine run in new proc
   172   *	proc=2	defer call save away stack
   173    *	proc=3	normal call to C pointer (not Go func value)
   174   */
   175  void
   176  ginscall(Node *f, int proc)
   177  {
   178  	Prog *p;
   179  	Node reg, stk;
   180  	Node r1;
   181  	int32 extra;
   182  
   183  	if(f->type != T) {
   184  		extra = 0;
   185  		if(proc == 1 || proc == 2)
   186  			extra = 2 * widthptr;
   187  		setmaxarg(f->type, extra);
   188  	}
   189  
   190  	switch(proc) {
   191  	default:
   192  		fatal("ginscall: bad proc %d", proc);
   193  		break;
   194  
   195  	case 0:	// normal call
   196  	case -1:	// normal call but no return
   197  		if(f->op == ONAME && f->class == PFUNC) {
   198  			if(f == deferreturn) {
   199  				// Deferred calls will appear to be returning to
   200  				// the CALL deferreturn(SB) that we are about to emit.
   201  				// However, the stack trace code will show the line
   202  				// of the instruction byte before the return PC. 
   203  				// To avoid that being an unrelated instruction,
   204  				// insert an x86 NOP that we will have the right line number.
   205  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   206  				// because the NOP pseudo-instruction would be removed by
   207  				// the linker.
   208  				nodreg(&reg, types[TINT], D_AX);
   209  				gins(AXCHGL, &reg, &reg);
   210  			}
   211  			p = gins(ACALL, N, f);
   212  			afunclit(&p->to, f);
   213  			if(proc == -1 || noreturn(p))
   214  				gins(AUNDEF, N, N);
   215  			break;
   216  		}
   217  		nodreg(&reg, types[tptr], D_DX);
   218  		nodreg(&r1, types[tptr], D_BX);
   219  		gmove(f, &reg);
   220  		reg.op = OINDREG;
   221  		gmove(&reg, &r1);
   222  		reg.op = OREGISTER;
   223  		gins(ACALL, &reg, &r1);
   224  		break;
   225  	
   226  	case 3:	// normal call of c function pointer
   227  		gins(ACALL, N, f);
   228  		break;
   229  
   230  	case 1:	// call in new proc (go)
   231  	case 2:	// deferred call (defer)
   232  		memset(&stk, 0, sizeof(stk));
   233  		stk.op = OINDREG;
   234  		stk.val.u.reg = D_SP;
   235  		stk.xoffset = 0;
   236  
   237  		if(widthptr == 8) {
   238  			// size of arguments at 0(SP)
   239  			ginscon(AMOVQ, argsize(f->type), &stk);
   240  
   241  			// FuncVal* at 8(SP)
   242  			stk.xoffset = widthptr;
   243  			nodreg(&reg, types[TINT64], D_AX);
   244  			gmove(f, &reg);
   245  			gins(AMOVQ, &reg, &stk);
   246  		} else {
   247  			// size of arguments at 0(SP)
   248  			ginscon(AMOVL, argsize(f->type), &stk);
   249  
   250  			// FuncVal* at 4(SP)
   251  			stk.xoffset = widthptr;
   252  			nodreg(&reg, types[TINT32], D_AX);
   253  			gmove(f, &reg);
   254  			gins(AMOVL, &reg, &stk);
   255  		}
   256  
   257  		if(proc == 1)
   258  			ginscall(newproc, 0);
   259  		else {
   260  			if(!hasdefer)
   261  				fatal("hasdefer=0 but has defer");
   262  			ginscall(deferproc, 0);
   263  		}
   264  		if(proc == 2) {
   265  			nodreg(&reg, types[TINT32], D_AX);
   266  			gins(ATESTL, &reg, &reg);
   267  			p = gbranch(AJEQ, T, +1);
   268  			cgen_ret(N);
   269  			patch(p, pc);
   270  		}
   271  		break;
   272  	}
   273  }
   274  
   275  /*
   276   * n is call to interface method.
   277   * generate res = n.
   278   */
   279  void
   280  cgen_callinter(Node *n, Node *res, int proc)
   281  {
   282  	Node *i, *f;
   283  	Node tmpi, nodi, nodo, nodr, nodsp;
   284  
   285  	i = n->left;
   286  	if(i->op != ODOTINTER)
   287  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   288  
   289  	f = i->right;		// field
   290  	if(f->op != ONAME)
   291  		fatal("cgen_callinter: not ONAME %O", f->op);
   292  
   293  	i = i->left;		// interface
   294  
   295  	if(!i->addable) {
   296  		tempname(&tmpi, i->type);
   297  		cgen(i, &tmpi);
   298  		i = &tmpi;
   299  	}
   300  
   301  	genlist(n->list);		// assign the args
   302  
   303  	// i is now addable, prepare an indirected
   304  	// register to hold its address.
   305  	igen(i, &nodi, res);		// REG = &inter
   306  
   307  	nodindreg(&nodsp, types[tptr], D_SP);
   308          nodsp.xoffset = 0;
   309  	if(proc != 0)
   310  		nodsp.xoffset += 2 * widthptr; // leave room for size & fn
   311  	nodi.type = types[tptr];
   312  	nodi.xoffset += widthptr;
   313  	cgen(&nodi, &nodsp);	// {0, 8(nacl), or 16}(SP) = 8(REG) -- i.data
   314  
   315  	regalloc(&nodo, types[tptr], res);
   316  	nodi.type = types[tptr];
   317  	nodi.xoffset -= widthptr;
   318  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   319  	regfree(&nodi);
   320  
   321  	regalloc(&nodr, types[tptr], &nodo);
   322  	if(n->left->xoffset == BADWIDTH)
   323  		fatal("cgen_callinter: badwidth");
   324  	cgen_checknil(&nodo); // in case offset is huge
   325  	nodo.op = OINDREG;
   326  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   327  	if(proc == 0) {
   328  		// plain call: use direct c function pointer - more efficient
   329  		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
   330  		proc = 3;
   331  	} else {
   332  		// go/defer. generate go func value.
   333  		gins(ALEAQ, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
   334  	}
   335  
   336  	nodr.type = n->left->type;
   337  	ginscall(&nodr, proc);
   338  
   339  	regfree(&nodr);
   340  	regfree(&nodo);
   341  }
   342  
   343  /*
   344   * generate function call;
   345   *	proc=0	normal call
   346   *	proc=1	goroutine run in new proc
   347   *	proc=2	defer call save away stack
   348   */
   349  void
   350  cgen_call(Node *n, int proc)
   351  {
   352  	Type *t;
   353  	Node nod, afun;
   354  
   355  	if(n == N)
   356  		return;
   357  
   358  	if(n->left->ullman >= UINF) {
   359  		// if name involves a fn call
   360  		// precompute the address of the fn
   361  		tempname(&afun, types[tptr]);
   362  		cgen(n->left, &afun);
   363  	}
   364  
   365  	genlist(n->list);		// assign the args
   366  	t = n->left->type;
   367  
   368  	// call tempname pointer
   369  	if(n->left->ullman >= UINF) {
   370  		regalloc(&nod, types[tptr], N);
   371  		cgen_as(&nod, &afun);
   372  		nod.type = t;
   373  		ginscall(&nod, proc);
   374  		regfree(&nod);
   375  		return;
   376  	}
   377  
   378  	// call pointer
   379  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   380  		regalloc(&nod, types[tptr], N);
   381  		cgen_as(&nod, n->left);
   382  		nod.type = t;
   383  		ginscall(&nod, proc);
   384  		regfree(&nod);
   385  		return;
   386  	}
   387  
   388  	// call direct
   389  	n->left->method = 1;
   390  	ginscall(n->left, proc);
   391  }
   392  
   393  /*
   394   * call to n has already been generated.
   395   * generate:
   396   *	res = return value from call.
   397   */
   398  void
   399  cgen_callret(Node *n, Node *res)
   400  {
   401  	Node nod;
   402  	Type *fp, *t;
   403  	Iter flist;
   404  
   405  	t = n->left->type;
   406  	if(t->etype == TPTR32 || t->etype == TPTR64)
   407  		t = t->type;
   408  
   409  	fp = structfirst(&flist, getoutarg(t));
   410  	if(fp == T)
   411  		fatal("cgen_callret: nil");
   412  
   413  	memset(&nod, 0, sizeof(nod));
   414  	nod.op = OINDREG;
   415  	nod.val.u.reg = D_SP;
   416  	nod.addable = 1;
   417  
   418  	nod.xoffset = fp->width;
   419  	nod.type = fp->type;
   420  	cgen_as(res, &nod);
   421  }
   422  
   423  /*
   424   * call to n has already been generated.
   425   * generate:
   426   *	res = &return value from call.
   427   */
   428  void
   429  cgen_aret(Node *n, Node *res)
   430  {
   431  	Node nod1, nod2;
   432  	Type *fp, *t;
   433  	Iter flist;
   434  
   435  	t = n->left->type;
   436  	if(isptr[t->etype])
   437  		t = t->type;
   438  
   439  	fp = structfirst(&flist, getoutarg(t));
   440  	if(fp == T)
   441  		fatal("cgen_aret: nil");
   442  
   443  	memset(&nod1, 0, sizeof(nod1));
   444  	nod1.op = OINDREG;
   445  	nod1.val.u.reg = D_SP;
   446  	nod1.addable = 1;
   447  
   448  	nod1.xoffset = fp->width;
   449  	nod1.type = fp->type;
   450  
   451  	if(res->op != OREGISTER) {
   452  		regalloc(&nod2, types[tptr], res);
   453  		gins(leaptr, &nod1, &nod2);
   454  		gins(movptr, &nod2, res);
   455  		regfree(&nod2);
   456  	} else
   457  		gins(leaptr, &nod1, res);
   458  }
   459  
   460  /*
   461   * generate return.
   462   * n->left is assignments to return values.
   463   */
   464  void
   465  cgen_ret(Node *n)
   466  {
   467  	Prog *p;
   468  
   469  	if(n != N)
   470  		genlist(n->list);		// copy out args
   471  	if(hasdefer)
   472  		ginscall(deferreturn, 0);
   473  	genlist(curfn->exit);
   474  	p = gins(ARET, N, N);
   475  	if(n != N && n->op == ORETJMP) {
   476  		p->to.type = D_EXTERN;
   477  		p->to.sym = linksym(n->left->sym);
   478  	}
   479  }
   480  
   481  /*
   482   * generate += *= etc.
   483   */
   484  void
   485  cgen_asop(Node *n)
   486  {
   487  	Node n1, n2, n3, n4;
   488  	Node *nl, *nr;
   489  	Prog *p1;
   490  	Addr addr;
   491  	int a;
   492  
   493  	nl = n->left;
   494  	nr = n->right;
   495  
   496  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   497  		tempname(&n1, nr->type);
   498  		cgen(nr, &n1);
   499  		n2 = *n;
   500  		n2.right = &n1;
   501  		cgen_asop(&n2);
   502  		goto ret;
   503  	}
   504  
   505  	if(!isint[nl->type->etype])
   506  		goto hard;
   507  	if(!isint[nr->type->etype])
   508  		goto hard;
   509  
   510  	switch(n->etype) {
   511  	case OADD:
   512  		if(smallintconst(nr))
   513  		if(mpgetfix(nr->val.u.xval) == 1) {
   514  			a = optoas(OINC, nl->type);
   515  			if(nl->addable) {
   516  				gins(a, N, nl);
   517  				goto ret;
   518  			}
   519  			if(sudoaddable(a, nl, &addr)) {
   520  				p1 = gins(a, N, N);
   521  				p1->to = addr;
   522  				sudoclean();
   523  				goto ret;
   524  			}
   525  		}
   526  		break;
   527  
   528  	case OSUB:
   529  		if(smallintconst(nr))
   530  		if(mpgetfix(nr->val.u.xval) == 1) {
   531  			a = optoas(ODEC, nl->type);
   532  			if(nl->addable) {
   533  				gins(a, N, nl);
   534  				goto ret;
   535  			}
   536  			if(sudoaddable(a, nl, &addr)) {
   537  				p1 = gins(a, N, N);
   538  				p1->to = addr;
   539  				sudoclean();
   540  				goto ret;
   541  			}
   542  		}
   543  		break;
   544  	}
   545  
   546  	switch(n->etype) {
   547  	case OADD:
   548  	case OSUB:
   549  	case OXOR:
   550  	case OAND:
   551  	case OOR:
   552  		a = optoas(n->etype, nl->type);
   553  		if(nl->addable) {
   554  			if(smallintconst(nr)) {
   555  				gins(a, nr, nl);
   556  				goto ret;
   557  			}
   558  			regalloc(&n2, nr->type, N);
   559  			cgen(nr, &n2);
   560  			gins(a, &n2, nl);
   561  			regfree(&n2);
   562  			goto ret;
   563  		}
   564  		if(nr->ullman < UINF)
   565  		if(sudoaddable(a, nl, &addr)) {
   566  			if(smallintconst(nr)) {
   567  				p1 = gins(a, nr, N);
   568  				p1->to = addr;
   569  				sudoclean();
   570  				goto ret;
   571  			}
   572  			regalloc(&n2, nr->type, N);
   573  			cgen(nr, &n2);
   574  			p1 = gins(a, &n2, N);
   575  			p1->to = addr;
   576  			regfree(&n2);
   577  			sudoclean();
   578  			goto ret;
   579  		}
   580  	}
   581  
   582  hard:
   583  	n2.op = 0;
   584  	n1.op = 0;
   585  	if(nr->op == OLITERAL) {
   586  		// don't allocate a register for literals.
   587  	} else if(nr->ullman >= nl->ullman || nl->addable) {
   588  		regalloc(&n2, nr->type, N);
   589  		cgen(nr, &n2);
   590  		nr = &n2;
   591  	} else {
   592  		tempname(&n2, nr->type);
   593  		cgen(nr, &n2);
   594  		nr = &n2;
   595  	}
   596  	if(!nl->addable) {
   597  		igen(nl, &n1, N);
   598  		nl = &n1;
   599  	}
   600  
   601  	n3 = *n;
   602  	n3.left = nl;
   603  	n3.right = nr;
   604  	n3.op = n->etype;
   605  
   606  	regalloc(&n4, nl->type, N);
   607  	cgen(&n3, &n4);
   608  	gmove(&n4, nl);
   609  
   610  	if(n1.op)
   611  		regfree(&n1);
   612  	if(n2.op == OREGISTER)
   613  		regfree(&n2);
   614  	regfree(&n4);
   615  
   616  ret:
   617  	;
   618  }
   619  
   620  int
   621  samereg(Node *a, Node *b)
   622  {
   623  	if(a == N || b == N)
   624  		return 0;
   625  	if(a->op != OREGISTER)
   626  		return 0;
   627  	if(b->op != OREGISTER)
   628  		return 0;
   629  	if(a->val.u.reg != b->val.u.reg)
   630  		return 0;
   631  	return 1;
   632  }
   633  
   634  /*
   635   * generate division.
   636   * generates one of:
   637   *	res = nl / nr
   638   *	res = nl % nr
   639   * according to op.
   640   */
   641  void
   642  dodiv(int op, Node *nl, Node *nr, Node *res)
   643  {
   644  	int a, check;
   645  	Node n3, n4;
   646  	Type *t, *t0;
   647  	Node ax, dx, ax1, n31, oldax, olddx;
   648  	Prog *p1, *p2;
   649  
   650  	// Have to be careful about handling
   651  	// most negative int divided by -1 correctly.
   652  	// The hardware will trap.
   653  	// Also the byte divide instruction needs AH,
   654  	// which we otherwise don't have to deal with.
   655  	// Easiest way to avoid for int8, int16: use int32.
   656  	// For int32 and int64, use explicit test.
   657  	// Could use int64 hw for int32.
   658  	t = nl->type;
   659  	t0 = t;
   660  	check = 0;
   661  	if(issigned[t->etype]) {
   662  		check = 1;
   663  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
   664  			check = 0;
   665  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   666  			check = 0;
   667  	}
   668  	if(t->width < 4) {
   669  		if(issigned[t->etype])
   670  			t = types[TINT32];
   671  		else
   672  			t = types[TUINT32];
   673  		check = 0;
   674  	}
   675  	a = optoas(op, t);
   676  
   677  	regalloc(&n3, t0, N);
   678  	if(nl->ullman >= nr->ullman) {
   679  		savex(D_AX, &ax, &oldax, res, t0);
   680  		cgen(nl, &ax);
   681  		regalloc(&ax, t0, &ax);	// mark ax live during cgen
   682  		cgen(nr, &n3);
   683  		regfree(&ax);
   684  	} else {
   685  		cgen(nr, &n3);
   686  		savex(D_AX, &ax, &oldax, res, t0);
   687  		cgen(nl, &ax);
   688  	}
   689  	if(t != t0) {
   690  		// Convert
   691  		ax1 = ax;
   692  		n31 = n3;
   693  		ax.type = t;
   694  		n3.type = t;
   695  		gmove(&ax1, &ax);
   696  		gmove(&n31, &n3);
   697  	}
   698  
   699  	p2 = P;
   700  	if(nacl) {
   701  		// Native Client does not relay the divide-by-zero trap
   702  		// to the executing program, so we must insert a check
   703  		// for ourselves.
   704  		nodconst(&n4, t, 0);
   705  		gins(optoas(OCMP, t), &n3, &n4);
   706  		p1 = gbranch(optoas(ONE, t), T, +1);
   707  		if(panicdiv == N)
   708  			panicdiv = sysfunc("panicdivide");
   709  		ginscall(panicdiv, -1);
   710  		patch(p1, pc);
   711  	}
   712  	if(check) {
   713  		nodconst(&n4, t, -1);
   714  		gins(optoas(OCMP, t), &n3, &n4);
   715  		p1 = gbranch(optoas(ONE, t), T, +1);
   716  		if(op == ODIV) {
   717  			// a / (-1) is -a.
   718  			gins(optoas(OMINUS, t), N, &ax);
   719  			gmove(&ax, res);
   720  		} else {
   721  			// a % (-1) is 0.
   722  			nodconst(&n4, t, 0);
   723  			gmove(&n4, res);
   724  		}
   725  		p2 = gbranch(AJMP, T, 0);
   726  		patch(p1, pc);
   727  	}
   728  	savex(D_DX, &dx, &olddx, res, t);
   729  	if(!issigned[t->etype]) {
   730  		nodconst(&n4, t, 0);
   731  		gmove(&n4, &dx);
   732  	} else
   733  		gins(optoas(OEXTEND, t), N, N);
   734  	gins(a, &n3, N);
   735  	regfree(&n3);
   736  	if(op == ODIV)
   737  		gmove(&ax, res);
   738  	else
   739  		gmove(&dx, res);
   740  	restx(&dx, &olddx);
   741  	if(check)
   742  		patch(p2, pc);
   743  	restx(&ax, &oldax);
   744  }
   745  
   746  /*
   747   * register dr is one of the special ones (AX, CX, DI, SI, etc.).
   748   * we need to use it.  if it is already allocated as a temporary
   749   * (r > 1; can only happen if a routine like sgen passed a
   750   * special as cgen's res and then cgen used regalloc to reuse
   751   * it as its own temporary), then move it for now to another
   752   * register.  caller must call restx to move it back.
   753   * the move is not necessary if dr == res, because res is
   754   * known to be dead.
   755   */
   756  void
   757  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   758  {
   759  	int r;
   760  
   761  	r = reg[dr];
   762  
   763  	// save current ax and dx if they are live
   764  	// and not the destination
   765  	memset(oldx, 0, sizeof *oldx);
   766  	nodreg(x, t, dr);
   767  	if(r > 1 && !samereg(x, res)) {
   768  		regalloc(oldx, types[TINT64], N);
   769  		x->type = types[TINT64];
   770  		gmove(x, oldx);
   771  		x->type = t;
   772  		oldx->ostk = r;	// squirrel away old r value
   773  		reg[dr] = 1;
   774  	}
   775  }
   776  
   777  void
   778  restx(Node *x, Node *oldx)
   779  {
   780  	if(oldx->op != 0) {
   781  		x->type = types[TINT64];
   782  		reg[x->val.u.reg] = oldx->ostk;
   783  		gmove(oldx, x);
   784  		regfree(oldx);
   785  	}
   786  }
   787  
   788  /*
   789   * generate division according to op, one of:
   790   *	res = nl / nr
   791   *	res = nl % nr
   792   */
   793  void
   794  cgen_div(int op, Node *nl, Node *nr, Node *res)
   795  {
   796  	Node n1, n2, n3;
   797  	int w, a;
   798  	Magic m;
   799  
   800  	if(nr->op != OLITERAL)
   801  		goto longdiv;
   802  	w = nl->type->width*8;
   803  
   804  	// Front end handled 32-bit division. We only need to handle 64-bit.
   805  	// try to do division by multiply by (2^w)/d
   806  	// see hacker's delight chapter 10
   807  	switch(simtype[nl->type->etype]) {
   808  	default:
   809  		goto longdiv;
   810  
   811  	case TUINT64:
   812  		m.w = w;
   813  		m.ud = mpgetfix(nr->val.u.xval);
   814  		umagic(&m);
   815  		if(m.bad)
   816  			break;
   817  		if(op == OMOD)
   818  			goto longmod;
   819  
   820  		cgenr(nl, &n1, N);
   821  		nodconst(&n2, nl->type, m.um);
   822  		regalloc(&n3, nl->type, res);
   823  		cgen_hmul(&n1, &n2, &n3);
   824  
   825  		if(m.ua) {
   826  			// need to add numerator accounting for overflow
   827  			gins(optoas(OADD, nl->type), &n1, &n3);
   828  			nodconst(&n2, nl->type, 1);
   829  			gins(optoas(ORROTC, nl->type), &n2, &n3);
   830  			nodconst(&n2, nl->type, m.s-1);
   831  			gins(optoas(ORSH, nl->type), &n2, &n3);
   832  		} else {
   833  			nodconst(&n2, nl->type, m.s);
   834  			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
   835  		}
   836  
   837  		gmove(&n3, res);
   838  		regfree(&n1);
   839  		regfree(&n3);
   840  		return;
   841  
   842  	case TINT64:
   843  		m.w = w;
   844  		m.sd = mpgetfix(nr->val.u.xval);
   845  		smagic(&m);
   846  		if(m.bad)
   847  			break;
   848  		if(op == OMOD)
   849  			goto longmod;
   850  
   851  		cgenr(nl, &n1, res);
   852  		nodconst(&n2, nl->type, m.sm);
   853  		regalloc(&n3, nl->type, N);
   854  		cgen_hmul(&n1, &n2, &n3);
   855  
   856  		if(m.sm < 0) {
   857  			// need to add numerator
   858  			gins(optoas(OADD, nl->type), &n1, &n3);
   859  		}
   860  
   861  		nodconst(&n2, nl->type, m.s);
   862  		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
   863  
   864  		nodconst(&n2, nl->type, w-1);
   865  		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
   866  		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
   867  
   868  		if(m.sd < 0) {
   869  			// this could probably be removed
   870  			// by factoring it into the multiplier
   871  			gins(optoas(OMINUS, nl->type), N, &n3);
   872  		}
   873  
   874  		gmove(&n3, res);
   875  		regfree(&n1);
   876  		regfree(&n3);
   877  		return;
   878  	}
   879  	goto longdiv;
   880  
   881  longdiv:
   882  	// division and mod using (slow) hardware instruction
   883  	dodiv(op, nl, nr, res);
   884  	return;
   885  
   886  longmod:
   887  	// mod using formula A%B = A-(A/B*B) but
   888  	// we know that there is a fast algorithm for A/B
   889  	regalloc(&n1, nl->type, res);
   890  	cgen(nl, &n1);
   891  	regalloc(&n2, nl->type, N);
   892  	cgen_div(ODIV, &n1, nr, &n2);
   893  	a = optoas(OMUL, nl->type);
   894  	if(w == 8) {
   895  		// use 2-operand 16-bit multiply
   896  		// because there is no 2-operand 8-bit multiply
   897  		a = AIMULW;
   898  	}
   899  	if(!smallintconst(nr)) {
   900  		regalloc(&n3, nl->type, N);
   901  		cgen(nr, &n3);
   902  		gins(a, &n3, &n2);
   903  		regfree(&n3);
   904  	} else
   905  		gins(a, nr, &n2);
   906  	gins(optoas(OSUB, nl->type), &n2, &n1);
   907  	gmove(&n1, res);
   908  	regfree(&n1);
   909  	regfree(&n2);
   910  }
   911  
   912  /*
   913   * generate high multiply:
   914   *   res = (nl*nr) >> width
   915   */
   916  void
   917  cgen_hmul(Node *nl, Node *nr, Node *res)
   918  {
   919  	Type *t;
   920  	int a;
   921  	Node n1, n2, ax, dx, *tmp;
   922  
   923  	t = nl->type;
   924  	a = optoas(OHMUL, t);
   925  	if(nl->ullman < nr->ullman) {
   926  		tmp = nl;
   927  		nl = nr;
   928  		nr = tmp;
   929  	}
   930  	cgenr(nl, &n1, res);
   931  	cgenr(nr, &n2, N);
   932  	nodreg(&ax, t, D_AX);
   933  	gmove(&n1, &ax);
   934  	gins(a, &n2, N);
   935  	regfree(&n2);
   936  	regfree(&n1);
   937  
   938  	if(t->width == 1) {
   939  		// byte multiply behaves differently.
   940  		nodreg(&ax, t, D_AH);
   941  		nodreg(&dx, t, D_DX);
   942  		gmove(&ax, &dx);
   943  	}
   944  	nodreg(&dx, t, D_DX);
   945  	gmove(&dx, res);
   946  }
   947  
   948  /*
   949   * generate shift according to op, one of:
   950   *	res = nl << nr
   951   *	res = nl >> nr
   952   */
   953  void
   954  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   955  {
   956  	Node n1, n2, n3, n4, n5, cx, oldcx;
   957  	int a, rcx;
   958  	Prog *p1;
   959  	uvlong sc;
   960  	Type *tcount;
   961  
   962  	a = optoas(op, nl->type);
   963  
   964  	if(nr->op == OLITERAL) {
   965  		regalloc(&n1, nl->type, res);
   966  		cgen(nl, &n1);
   967  		sc = mpgetfix(nr->val.u.xval);
   968  		if(sc >= nl->type->width*8) {
   969  			// large shift gets 2 shifts by width-1
   970  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   971  			gins(a, &n3, &n1);
   972  			gins(a, &n3, &n1);
   973  		} else
   974  			gins(a, nr, &n1);
   975  		gmove(&n1, res);
   976  		regfree(&n1);
   977  		goto ret;
   978  	}
   979  
   980  	if(nl->ullman >= UINF) {
   981  		tempname(&n4, nl->type);
   982  		cgen(nl, &n4);
   983  		nl = &n4;
   984  	}
   985  	if(nr->ullman >= UINF) {
   986  		tempname(&n5, nr->type);
   987  		cgen(nr, &n5);
   988  		nr = &n5;
   989  	}
   990  
   991  	rcx = reg[D_CX];
   992  	nodreg(&n1, types[TUINT32], D_CX);
   993  	
   994  	// Allow either uint32 or uint64 as shift type,
   995  	// to avoid unnecessary conversion from uint32 to uint64
   996  	// just to do the comparison.
   997  	tcount = types[simtype[nr->type->etype]];
   998  	if(tcount->etype < TUINT32)
   999  		tcount = types[TUINT32];
  1000  
  1001  	regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
  1002  	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
  1003  
  1004  	nodreg(&cx, types[TUINT64], D_CX);
  1005  	memset(&oldcx, 0, sizeof oldcx);
  1006  	if(rcx > 0 && !samereg(&cx, res)) {
  1007  		regalloc(&oldcx, types[TUINT64], N);
  1008  		gmove(&cx, &oldcx);
  1009  	}
  1010  	cx.type = tcount;
  1011  
  1012  	if(samereg(&cx, res))
  1013  		regalloc(&n2, nl->type, N);
  1014  	else
  1015  		regalloc(&n2, nl->type, res);
  1016  	if(nl->ullman >= nr->ullman) {
  1017  		cgen(nl, &n2);
  1018  		cgen(nr, &n1);
  1019  		gmove(&n1, &n3);
  1020  	} else {
  1021  		cgen(nr, &n1);
  1022  		gmove(&n1, &n3);
  1023  		cgen(nl, &n2);
  1024  	}
  1025  	regfree(&n3);
  1026  
  1027  	// test and fix up large shifts
  1028  	if(!bounded) {
  1029  		nodconst(&n3, tcount, nl->type->width*8);
  1030  		gins(optoas(OCMP, tcount), &n1, &n3);
  1031  		p1 = gbranch(optoas(OLT, tcount), T, +1);
  1032  		if(op == ORSH && issigned[nl->type->etype]) {
  1033  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
  1034  			gins(a, &n3, &n2);
  1035  		} else {
  1036  			nodconst(&n3, nl->type, 0);
  1037  			gmove(&n3, &n2);
  1038  		}
  1039  		patch(p1, pc);
  1040  	}
  1041  
  1042  	gins(a, &n1, &n2);
  1043  
  1044  	if(oldcx.op != 0) {
  1045  		cx.type = types[TUINT64];
  1046  		gmove(&oldcx, &cx);
  1047  		regfree(&oldcx);
  1048  	}
  1049  
  1050  	gmove(&n2, res);
  1051  
  1052  	regfree(&n1);
  1053  	regfree(&n2);
  1054  
  1055  ret:
  1056  	;
  1057  }
  1058  
  1059  /*
  1060   * generate byte multiply:
  1061   *	res = nl * nr
  1062   * there is no 2-operand byte multiply instruction so
  1063   * we do a full-width multiplication and truncate afterwards.
  1064   */
  1065  void
  1066  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
  1067  {
  1068  	Node n1, n2, n1b, n2b, *tmp;
  1069  	Type *t;
  1070  	int a;
  1071  
  1072  	// largest ullman on left.
  1073  	if(nl->ullman < nr->ullman) {
  1074  		tmp = nl;
  1075  		nl = nr;
  1076  		nr = tmp;
  1077  	}
  1078  
  1079  	// generate operands in "8-bit" registers.
  1080  	regalloc(&n1b, nl->type, res);
  1081  	cgen(nl, &n1b);
  1082  	regalloc(&n2b, nr->type, N);
  1083  	cgen(nr, &n2b);
  1084  
  1085  	// perform full-width multiplication.
  1086  	t = types[TUINT64];
  1087  	if(issigned[nl->type->etype])
  1088  		t = types[TINT64];
  1089  	nodreg(&n1, t, n1b.val.u.reg);
  1090  	nodreg(&n2, t, n2b.val.u.reg);
  1091  	a = optoas(op, t);
  1092  	gins(a, &n2, &n1);
  1093  
  1094  	// truncate.
  1095  	gmove(&n1, res);
  1096  	regfree(&n1b);
  1097  	regfree(&n2b);
  1098  }
  1099  
  1100  void
  1101  clearfat(Node *nl)
  1102  {
  1103  	int64 w, c, q;
  1104  	Node n1, oldn1, ax, oldax, di, z;
  1105  	Prog *p;
  1106  
  1107  	/* clear a fat object */
  1108  	if(debug['g'])
  1109  		dump("\nclearfat", nl);
  1110  
  1111  	w = nl->type->width;
  1112  	// Avoid taking the address for simple enough types.
  1113  	if(componentgen(N, nl))
  1114  		return;
  1115  
  1116  	c = w % 8;	// bytes
  1117  	q = w / 8;	// quads
  1118  
  1119  	if(q < 4) {
  1120  		// Write sequence of MOV 0, off(base) instead of using STOSQ.
  1121  		// The hope is that although the code will be slightly longer,
  1122  		// the MOVs will have no dependencies and pipeline better
  1123  		// than the unrolled STOSQ loop.
  1124  		// NOTE: Must use agen, not igen, so that optimizer sees address
  1125  		// being taken. We are not writing on field boundaries.
  1126  		agenr(nl, &n1, N);
  1127  		n1.op = OINDREG;
  1128  		nodconst(&z, types[TUINT64], 0);
  1129  		while(q-- > 0) {
  1130  			n1.type = z.type;
  1131  			gins(AMOVQ, &z, &n1);
  1132  			n1.xoffset += 8;
  1133  		}
  1134  		if(c >= 4) {
  1135  			nodconst(&z, types[TUINT32], 0);
  1136  			n1.type = z.type;
  1137  			gins(AMOVL, &z, &n1);
  1138  			n1.xoffset += 4;
  1139  			c -= 4;
  1140  		}
  1141  		nodconst(&z, types[TUINT8], 0);
  1142  		while(c-- > 0) {
  1143  			n1.type = z.type;
  1144  			gins(AMOVB, &z, &n1);
  1145  			n1.xoffset++;
  1146  		}
  1147  		regfree(&n1);
  1148  		return;
  1149  	}
  1150  
  1151  	savex(D_DI, &n1, &oldn1, N, types[tptr]);
  1152  	agen(nl, &n1);
  1153  
  1154  	savex(D_AX, &ax, &oldax, N, types[tptr]);
  1155  	gconreg(AMOVL, 0, D_AX);
  1156  
  1157  	if(q > 128 || nacl) {
  1158  		gconreg(movptr, q, D_CX);
  1159  		gins(AREP, N, N);	// repeat
  1160  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
  1161  	} else {
  1162  		p = gins(ADUFFZERO, N, N);
  1163  		p->to.type = D_ADDR;
  1164  		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
  1165  		// 2 and 128 = magic constants: see ../../runtime/asm_amd64.s
  1166  		p->to.offset = 2*(128-q);
  1167  	}
  1168  
  1169  	z = ax;
  1170  	di = n1;
  1171  	if(w >= 8 && c >= 4) {
  1172  		di.op = OINDREG;
  1173  		di.type = z.type = types[TINT64];
  1174  		p = gins(AMOVQ, &z, &di);
  1175  		p->to.scale = 1;
  1176  		p->to.offset = c-8;
  1177  	} else if(c >= 4) {
  1178  		di.op = OINDREG;
  1179  		di.type = z.type = types[TINT32];
  1180  		p = gins(AMOVL, &z, &di);
  1181  		if(c > 4) {
  1182  			p = gins(AMOVL, &z, &di);
  1183  			p->to.scale = 1;
  1184  			p->to.offset = c-4;
  1185  		}
  1186  	} else
  1187  	while(c > 0) {
  1188  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
  1189  		c--;
  1190  	}
  1191  
  1192  	restx(&n1, &oldn1);
  1193  	restx(&ax, &oldax);
  1194  }
  1195  
  1196  // Called after regopt and peep have run.
  1197  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1198  void
  1199  expandchecks(Prog *firstp)
  1200  {
  1201  	Prog *p, *p1, *p2;
  1202  
  1203  	for(p = firstp; p != P; p = p->link) {
  1204  		if(p->as != ACHECKNIL)
  1205  			continue;
  1206  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1207  			warnl(p->lineno, "generated nil check");
  1208  		// check is
  1209  		//	CMP arg, $0
  1210  		//	JNE 2(PC) (likely)
  1211  		//	MOV AX, 0
  1212  		p1 = mal(sizeof *p1);
  1213  		p2 = mal(sizeof *p2);
  1214  		clearp(p1);
  1215  		clearp(p2);
  1216  		p1->link = p2;
  1217  		p2->link = p->link;
  1218  		p->link = p1;
  1219  		p1->lineno = p->lineno;
  1220  		p2->lineno = p->lineno;
  1221  		p1->pc = 9999;
  1222  		p2->pc = 9999;
  1223  		p->as = cmpptr;
  1224  		p->to.type = D_CONST;
  1225  		p->to.offset = 0;
  1226  		p1->as = AJNE;
  1227  		p1->from.type = D_CONST;
  1228  		p1->from.offset = 1; // likely
  1229  		p1->to.type = D_BRANCH;
  1230  		p1->to.u.branch = p2->link;
  1231  		// crash by write to memory address 0.
  1232  		// if possible, since we know arg is 0, use 0(arg),
  1233  		// which will be shorter to encode than plain 0.
  1234  		p2->as = AMOVL;
  1235  		p2->from.type = D_AX;
  1236  		if(regtyp(&p->from))
  1237  			p2->to.type = p->from.type + D_INDIR;
  1238  		else
  1239  			p2->to.type = D_INDIR+D_NONE;
  1240  		p2->to.offset = 0;
  1241  	}
  1242  }