github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/cmd/6g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog* appendp(Prog*, int, int, vlong, int, vlong);
    13  
    14  void
    15  defframe(Prog *ptxt, Bvec *bv)
    16  {
    17  	int i, j;
    18  	uint32 frame;
    19  	Prog *p;
    20  
    21  	// fill in argument size
    22  	ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
    23  
    24  	// fill in final stack size
    25  	ptxt->to.offset <<= 32;
    26  	frame = rnd(stksize+maxarg, widthptr);
    27  	ptxt->to.offset |= frame;
    28  
    29  	// insert code to clear pointered part of the frame,
    30  	// so that garbage collector only sees initialized values
    31  	// when it looks for pointers.
    32  	p = ptxt;
    33  	if(stkzerosize >= 8*widthptr) {
    34  		p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0);
    35  		p = appendp(p, AMOVQ, D_CONST, stkzerosize/widthptr, D_CX, 0);
    36  		p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0);
    37  		p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
    38  		appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
    39  	} else {
    40  		for(i=0, j=(stkptrsize-stkzerosize)/widthptr*2; i<stkzerosize; i+=widthptr, j+=2)
    41  			if(bvget(bv, j) || bvget(bv, j+1))
    42  				p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i);
    43  	}
    44  }
    45  
    46  static Prog*
    47  appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
    48  {
    49  	Prog *q;
    50  	
    51  	q = mal(sizeof(*q));
    52  	clearp(q);
    53  	q->as = as;
    54  	q->lineno = p->lineno;
    55  	q->from.type = ftype;
    56  	q->from.offset = foffset;
    57  	q->to.type = ttype;
    58  	q->to.offset = toffset;
    59  	q->link = p->link;
    60  	p->link = q;
    61  	return q;
    62  }
    63  
    64  // Sweep the prog list to mark any used nodes.
    65  void
    66  markautoused(Prog* p)
    67  {
    68  	for (; p; p = p->link) {
    69  		if (p->as == ATYPE)
    70  			continue;
    71  
    72  		if (p->from.type == D_AUTO && p->from.node)
    73  			p->from.node->used = 1;
    74  
    75  		if (p->to.type == D_AUTO && p->to.node)
    76  			p->to.node->used = 1;
    77  	}
    78  }
    79  
    80  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
    81  void
    82  fixautoused(Prog *p)
    83  {
    84  	Prog **lp;
    85  
    86  	for (lp=&p; (p=*lp) != P; ) {
    87  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
    88  			*lp = p->link;
    89  			continue;
    90  		}
    91  		if (p->from.type == D_AUTO && p->from.node)
    92  			p->from.offset += p->from.node->stkdelta;
    93  
    94  		if (p->to.type == D_AUTO && p->to.node)
    95  			p->to.offset += p->to.node->stkdelta;
    96  
    97  		lp = &p->link;
    98  	}
    99  }
   100  
   101  
   102  /*
   103   * generate:
   104   *	call f
   105   *	proc=-1	normal call but no return
   106   *	proc=0	normal call
   107   *	proc=1	goroutine run in new proc
   108   *	proc=2	defer call save away stack
   109    *	proc=3	normal call to C pointer (not Go func value)
   110   */
   111  void
   112  ginscall(Node *f, int proc)
   113  {
   114  	int32 arg;
   115  	Prog *p;
   116  	Node reg, con;
   117  	Node r1;
   118  
   119  	if(f->type != T)
   120  		setmaxarg(f->type);
   121  
   122  	arg = -1;
   123  	// Most functions have a fixed-size argument block, so traceback uses that during unwind.
   124  	// Not all, though: there are some variadic functions in package runtime,
   125  	// and for those we emit call-specific metadata recorded by caller.
   126  	// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
   127  	// so we do this for all indirect calls as well.
   128  	if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
   129  		arg = f->type->argwid;
   130  		if(proc == 1 || proc == 2)
   131  			arg += 2*widthptr;
   132  	}
   133  
   134  	if(arg != -1)
   135  		gargsize(arg);
   136  
   137  	switch(proc) {
   138  	default:
   139  		fatal("ginscall: bad proc %d", proc);
   140  		break;
   141  
   142  	case 0:	// normal call
   143  	case -1:	// normal call but no return
   144  		if(f->op == ONAME && f->class == PFUNC) {
   145  			if(f == deferreturn) {
   146  				// Deferred calls will appear to be returning to
   147  				// the CALL deferreturn(SB) that we are about to emit.
   148  				// However, the stack trace code will show the line
   149  				// of the instruction byte before the return PC. 
   150  				// To avoid that being an unrelated instruction,
   151  				// insert an x86 NOP that we will have the right line number.
   152  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   153  				// because the NOP pseudo-instruction would be removed by
   154  				// the linker.
   155  				nodreg(&reg, types[TINT], D_AX);
   156  				gins(AXCHGL, &reg, &reg);
   157  			}
   158  			p = gins(ACALL, N, f);
   159  			afunclit(&p->to, f);
   160  			if(proc == -1 || noreturn(p))
   161  				gins(AUNDEF, N, N);
   162  			break;
   163  		}
   164  		nodreg(&reg, types[tptr], D_DX);
   165  		nodreg(&r1, types[tptr], D_BX);
   166  		gmove(f, &reg);
   167  		reg.op = OINDREG;
   168  		gmove(&reg, &r1);
   169  		reg.op = OREGISTER;
   170  		gins(ACALL, &reg, &r1);
   171  		break;
   172  	
   173  	case 3:	// normal call of c function pointer
   174  		gins(ACALL, N, f);
   175  		break;
   176  
   177  	case 1:	// call in new proc (go)
   178  	case 2:	// deferred call (defer)
   179  		nodreg(&reg, types[TINT64], D_CX);
   180  		if(flag_largemodel) {
   181  			regalloc(&r1, f->type, f);
   182  			gmove(f, &r1);
   183  			gins(APUSHQ, &r1, N);
   184  			regfree(&r1);
   185  		} else {
   186  			gins(APUSHQ, f, N);
   187  		}
   188  		nodconst(&con, types[TINT32], argsize(f->type));
   189  		gins(APUSHQ, &con, N);
   190  		if(proc == 1)
   191  			ginscall(newproc, 0);
   192  		else {
   193  			if(!hasdefer)
   194  				fatal("hasdefer=0 but has defer");
   195  			ginscall(deferproc, 0);
   196  		}
   197  		gins(APOPQ, N, &reg);
   198  		gins(APOPQ, N, &reg);
   199  		if(proc == 2) {
   200  			nodreg(&reg, types[TINT64], D_AX);
   201  			gins(ATESTQ, &reg, &reg);
   202  			patch(gbranch(AJNE, T, -1), retpc);
   203  		}
   204  		break;
   205  	}
   206  
   207  	if(arg != -1)
   208  		gargsize(-1);
   209  }
   210  
   211  /*
   212   * n is call to interface method.
   213   * generate res = n.
   214   */
   215  void
   216  cgen_callinter(Node *n, Node *res, int proc)
   217  {
   218  	Node *i, *f;
   219  	Node tmpi, nodi, nodo, nodr, nodsp;
   220  
   221  	i = n->left;
   222  	if(i->op != ODOTINTER)
   223  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   224  
   225  	f = i->right;		// field
   226  	if(f->op != ONAME)
   227  		fatal("cgen_callinter: not ONAME %O", f->op);
   228  
   229  	i = i->left;		// interface
   230  
   231  	if(!i->addable) {
   232  		tempname(&tmpi, i->type);
   233  		cgen(i, &tmpi);
   234  		i = &tmpi;
   235  	}
   236  
   237  	genlist(n->list);		// assign the args
   238  
   239  	// i is now addable, prepare an indirected
   240  	// register to hold its address.
   241  	igen(i, &nodi, res);		// REG = &inter
   242  
   243  	nodindreg(&nodsp, types[tptr], D_SP);
   244  	nodi.type = types[tptr];
   245  	nodi.xoffset += widthptr;
   246  	cgen(&nodi, &nodsp);	// 0(SP) = 8(REG) -- i.data
   247  
   248  	regalloc(&nodo, types[tptr], res);
   249  	nodi.type = types[tptr];
   250  	nodi.xoffset -= widthptr;
   251  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   252  	regfree(&nodi);
   253  
   254  	regalloc(&nodr, types[tptr], &nodo);
   255  	if(n->left->xoffset == BADWIDTH)
   256  		fatal("cgen_callinter: badwidth");
   257  	cgen_checknil(&nodo); // in case offset is huge
   258  	nodo.op = OINDREG;
   259  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   260  	if(proc == 0) {
   261  		// plain call: use direct c function pointer - more efficient
   262  		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
   263  		proc = 3;
   264  	} else {
   265  		// go/defer. generate go func value.
   266  		gins(ALEAQ, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
   267  	}
   268  
   269  	nodr.type = n->left->type;
   270  	ginscall(&nodr, proc);
   271  
   272  	regfree(&nodr);
   273  	regfree(&nodo);
   274  }
   275  
   276  /*
   277   * generate function call;
   278   *	proc=0	normal call
   279   *	proc=1	goroutine run in new proc
   280   *	proc=2	defer call save away stack
   281   */
   282  void
   283  cgen_call(Node *n, int proc)
   284  {
   285  	Type *t;
   286  	Node nod, afun;
   287  
   288  	if(n == N)
   289  		return;
   290  
   291  	if(n->left->ullman >= UINF) {
   292  		// if name involves a fn call
   293  		// precompute the address of the fn
   294  		tempname(&afun, types[tptr]);
   295  		cgen(n->left, &afun);
   296  	}
   297  
   298  	genlist(n->list);		// assign the args
   299  	t = n->left->type;
   300  
   301  	// call tempname pointer
   302  	if(n->left->ullman >= UINF) {
   303  		regalloc(&nod, types[tptr], N);
   304  		cgen_as(&nod, &afun);
   305  		nod.type = t;
   306  		ginscall(&nod, proc);
   307  		regfree(&nod);
   308  		return;
   309  	}
   310  
   311  	// call pointer
   312  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   313  		regalloc(&nod, types[tptr], N);
   314  		cgen_as(&nod, n->left);
   315  		nod.type = t;
   316  		ginscall(&nod, proc);
   317  		regfree(&nod);
   318  		return;
   319  	}
   320  
   321  	// call direct
   322  	n->left->method = 1;
   323  	ginscall(n->left, proc);
   324  }
   325  
   326  /*
   327   * call to n has already been generated.
   328   * generate:
   329   *	res = return value from call.
   330   */
   331  void
   332  cgen_callret(Node *n, Node *res)
   333  {
   334  	Node nod;
   335  	Type *fp, *t;
   336  	Iter flist;
   337  
   338  	t = n->left->type;
   339  	if(t->etype == TPTR32 || t->etype == TPTR64)
   340  		t = t->type;
   341  
   342  	fp = structfirst(&flist, getoutarg(t));
   343  	if(fp == T)
   344  		fatal("cgen_callret: nil");
   345  
   346  	memset(&nod, 0, sizeof(nod));
   347  	nod.op = OINDREG;
   348  	nod.val.u.reg = D_SP;
   349  	nod.addable = 1;
   350  
   351  	nod.xoffset = fp->width;
   352  	nod.type = fp->type;
   353  	cgen_as(res, &nod);
   354  }
   355  
   356  /*
   357   * call to n has already been generated.
   358   * generate:
   359   *	res = &return value from call.
   360   */
   361  void
   362  cgen_aret(Node *n, Node *res)
   363  {
   364  	Node nod1, nod2;
   365  	Type *fp, *t;
   366  	Iter flist;
   367  
   368  	t = n->left->type;
   369  	if(isptr[t->etype])
   370  		t = t->type;
   371  
   372  	fp = structfirst(&flist, getoutarg(t));
   373  	if(fp == T)
   374  		fatal("cgen_aret: nil");
   375  
   376  	memset(&nod1, 0, sizeof(nod1));
   377  	nod1.op = OINDREG;
   378  	nod1.val.u.reg = D_SP;
   379  	nod1.addable = 1;
   380  
   381  	nod1.xoffset = fp->width;
   382  	nod1.type = fp->type;
   383  
   384  	if(res->op != OREGISTER) {
   385  		regalloc(&nod2, types[tptr], res);
   386  		gins(ALEAQ, &nod1, &nod2);
   387  		gins(AMOVQ, &nod2, res);
   388  		regfree(&nod2);
   389  	} else
   390  		gins(ALEAQ, &nod1, res);
   391  }
   392  
   393  /*
   394   * generate return.
   395   * n->left is assignments to return values.
   396   */
   397  void
   398  cgen_ret(Node *n)
   399  {
   400  	Prog *p;
   401  
   402  	genlist(n->list);		// copy out args
   403  	if(hasdefer || curfn->exit) {
   404  		gjmp(retpc);
   405  		return;
   406  	}
   407  	p = gins(ARET, N, N);
   408  	if(n->op == ORETJMP) {
   409  		p->to.type = D_EXTERN;
   410  		p->to.sym = n->left->sym;
   411  	}
   412  }
   413  
   414  /*
   415   * generate += *= etc.
   416   */
   417  void
   418  cgen_asop(Node *n)
   419  {
   420  	Node n1, n2, n3, n4;
   421  	Node *nl, *nr;
   422  	Prog *p1;
   423  	Addr addr;
   424  	int a;
   425  
   426  	nl = n->left;
   427  	nr = n->right;
   428  
   429  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   430  		tempname(&n1, nr->type);
   431  		cgen(nr, &n1);
   432  		n2 = *n;
   433  		n2.right = &n1;
   434  		cgen_asop(&n2);
   435  		goto ret;
   436  	}
   437  
   438  	if(!isint[nl->type->etype])
   439  		goto hard;
   440  	if(!isint[nr->type->etype])
   441  		goto hard;
   442  
   443  	switch(n->etype) {
   444  	case OADD:
   445  		if(smallintconst(nr))
   446  		if(mpgetfix(nr->val.u.xval) == 1) {
   447  			a = optoas(OINC, nl->type);
   448  			if(nl->addable) {
   449  				gins(a, N, nl);
   450  				goto ret;
   451  			}
   452  			if(sudoaddable(a, nl, &addr)) {
   453  				p1 = gins(a, N, N);
   454  				p1->to = addr;
   455  				sudoclean();
   456  				goto ret;
   457  			}
   458  		}
   459  		break;
   460  
   461  	case OSUB:
   462  		if(smallintconst(nr))
   463  		if(mpgetfix(nr->val.u.xval) == 1) {
   464  			a = optoas(ODEC, nl->type);
   465  			if(nl->addable) {
   466  				gins(a, N, nl);
   467  				goto ret;
   468  			}
   469  			if(sudoaddable(a, nl, &addr)) {
   470  				p1 = gins(a, N, N);
   471  				p1->to = addr;
   472  				sudoclean();
   473  				goto ret;
   474  			}
   475  		}
   476  		break;
   477  	}
   478  
   479  	switch(n->etype) {
   480  	case OADD:
   481  	case OSUB:
   482  	case OXOR:
   483  	case OAND:
   484  	case OOR:
   485  		a = optoas(n->etype, nl->type);
   486  		if(nl->addable) {
   487  			if(smallintconst(nr)) {
   488  				gins(a, nr, nl);
   489  				goto ret;
   490  			}
   491  			regalloc(&n2, nr->type, N);
   492  			cgen(nr, &n2);
   493  			gins(a, &n2, nl);
   494  			regfree(&n2);
   495  			goto ret;
   496  		}
   497  		if(nr->ullman < UINF)
   498  		if(sudoaddable(a, nl, &addr)) {
   499  			if(smallintconst(nr)) {
   500  				p1 = gins(a, nr, N);
   501  				p1->to = addr;
   502  				sudoclean();
   503  				goto ret;
   504  			}
   505  			regalloc(&n2, nr->type, N);
   506  			cgen(nr, &n2);
   507  			p1 = gins(a, &n2, N);
   508  			p1->to = addr;
   509  			regfree(&n2);
   510  			sudoclean();
   511  			goto ret;
   512  		}
   513  	}
   514  
   515  hard:
   516  	n2.op = 0;
   517  	n1.op = 0;
   518  	if(nr->op == OLITERAL) {
   519  		// don't allocate a register for literals.
   520  	} else if(nr->ullman >= nl->ullman || nl->addable) {
   521  		regalloc(&n2, nr->type, N);
   522  		cgen(nr, &n2);
   523  		nr = &n2;
   524  	} else {
   525  		tempname(&n2, nr->type);
   526  		cgen(nr, &n2);
   527  		nr = &n2;
   528  	}
   529  	if(!nl->addable) {
   530  		igen(nl, &n1, N);
   531  		nl = &n1;
   532  	}
   533  
   534  	n3 = *n;
   535  	n3.left = nl;
   536  	n3.right = nr;
   537  	n3.op = n->etype;
   538  
   539  	regalloc(&n4, nl->type, N);
   540  	cgen(&n3, &n4);
   541  	gmove(&n4, nl);
   542  
   543  	if(n1.op)
   544  		regfree(&n1);
   545  	if(n2.op == OREGISTER)
   546  		regfree(&n2);
   547  	regfree(&n4);
   548  
   549  ret:
   550  	;
   551  }
   552  
   553  int
   554  samereg(Node *a, Node *b)
   555  {
   556  	if(a == N || b == N)
   557  		return 0;
   558  	if(a->op != OREGISTER)
   559  		return 0;
   560  	if(b->op != OREGISTER)
   561  		return 0;
   562  	if(a->val.u.reg != b->val.u.reg)
   563  		return 0;
   564  	return 1;
   565  }
   566  
   567  /*
   568   * generate division.
   569   * generates one of:
   570   *	res = nl / nr
   571   *	res = nl % nr
   572   * according to op.
   573   */
   574  void
   575  dodiv(int op, Node *nl, Node *nr, Node *res)
   576  {
   577  	int a, check;
   578  	Node n3, n4;
   579  	Type *t, *t0;
   580  	Node ax, dx, ax1, n31, oldax, olddx;
   581  	Prog *p1, *p2;
   582  
   583  	// Have to be careful about handling
   584  	// most negative int divided by -1 correctly.
   585  	// The hardware will trap.
   586  	// Also the byte divide instruction needs AH,
   587  	// which we otherwise don't have to deal with.
   588  	// Easiest way to avoid for int8, int16: use int32.
   589  	// For int32 and int64, use explicit test.
   590  	// Could use int64 hw for int32.
   591  	t = nl->type;
   592  	t0 = t;
   593  	check = 0;
   594  	if(issigned[t->etype]) {
   595  		check = 1;
   596  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   597  			check = 0;
   598  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   599  			check = 0;
   600  	}
   601  	if(t->width < 4) {
   602  		if(issigned[t->etype])
   603  			t = types[TINT32];
   604  		else
   605  			t = types[TUINT32];
   606  		check = 0;
   607  	}
   608  	a = optoas(op, t);
   609  
   610  	regalloc(&n3, t0, N);
   611  	if(nl->ullman >= nr->ullman) {
   612  		savex(D_AX, &ax, &oldax, res, t0);
   613  		cgen(nl, &ax);
   614  		regalloc(&ax, t0, &ax);	// mark ax live during cgen
   615  		cgen(nr, &n3);
   616  		regfree(&ax);
   617  	} else {
   618  		cgen(nr, &n3);
   619  		savex(D_AX, &ax, &oldax, res, t0);
   620  		cgen(nl, &ax);
   621  	}
   622  	if(t != t0) {
   623  		// Convert
   624  		ax1 = ax;
   625  		n31 = n3;
   626  		ax.type = t;
   627  		n3.type = t;
   628  		gmove(&ax1, &ax);
   629  		gmove(&n31, &n3);
   630  	}
   631  
   632  	p2 = P;
   633  	if(check) {
   634  		nodconst(&n4, t, -1);
   635  		gins(optoas(OCMP, t), &n3, &n4);
   636  		p1 = gbranch(optoas(ONE, t), T, +1);
   637  		if(op == ODIV) {
   638  			// a / (-1) is -a.
   639  			gins(optoas(OMINUS, t), N, &ax);
   640  			gmove(&ax, res);
   641  		} else {
   642  			// a % (-1) is 0.
   643  			nodconst(&n4, t, 0);
   644  			gmove(&n4, res);
   645  		}
   646  		p2 = gbranch(AJMP, T, 0);
   647  		patch(p1, pc);
   648  	}
   649  	savex(D_DX, &dx, &olddx, res, t);
   650  	if(!issigned[t->etype]) {
   651  		nodconst(&n4, t, 0);
   652  		gmove(&n4, &dx);
   653  	} else
   654  		gins(optoas(OEXTEND, t), N, N);
   655  	gins(a, &n3, N);
   656  	regfree(&n3);
   657  	if(op == ODIV)
   658  		gmove(&ax, res);
   659  	else
   660  		gmove(&dx, res);
   661  	restx(&dx, &olddx);
   662  	if(check)
   663  		patch(p2, pc);
   664  	restx(&ax, &oldax);
   665  }
   666  
   667  /*
   668   * register dr is one of the special ones (AX, CX, DI, SI, etc.).
   669   * we need to use it.  if it is already allocated as a temporary
   670   * (r > 1; can only happen if a routine like sgen passed a
   671   * special as cgen's res and then cgen used regalloc to reuse
   672   * it as its own temporary), then move it for now to another
   673   * register.  caller must call restx to move it back.
   674   * the move is not necessary if dr == res, because res is
   675   * known to be dead.
   676   */
   677  void
   678  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   679  {
   680  	int r;
   681  
   682  	r = reg[dr];
   683  
   684  	// save current ax and dx if they are live
   685  	// and not the destination
   686  	memset(oldx, 0, sizeof *oldx);
   687  	nodreg(x, t, dr);
   688  	if(r > 1 && !samereg(x, res)) {
   689  		regalloc(oldx, types[TINT64], N);
   690  		x->type = types[TINT64];
   691  		gmove(x, oldx);
   692  		x->type = t;
   693  		oldx->ostk = r;	// squirrel away old r value
   694  		reg[dr] = 1;
   695  	}
   696  }
   697  
   698  void
   699  restx(Node *x, Node *oldx)
   700  {
   701  	if(oldx->op != 0) {
   702  		x->type = types[TINT64];
   703  		reg[x->val.u.reg] = oldx->ostk;
   704  		gmove(oldx, x);
   705  		regfree(oldx);
   706  	}
   707  }
   708  
   709  /*
   710   * generate division according to op, one of:
   711   *	res = nl / nr
   712   *	res = nl % nr
   713   */
   714  void
   715  cgen_div(int op, Node *nl, Node *nr, Node *res)
   716  {
   717  	Node n1, n2, n3;
   718  	int w, a;
   719  	Magic m;
   720  
   721  	if(nr->op != OLITERAL)
   722  		goto longdiv;
   723  	w = nl->type->width*8;
   724  
   725  	// Front end handled 32-bit division. We only need to handle 64-bit.
   726  	// try to do division by multiply by (2^w)/d
   727  	// see hacker's delight chapter 10
   728  	switch(simtype[nl->type->etype]) {
   729  	default:
   730  		goto longdiv;
   731  
   732  	case TUINT64:
   733  		m.w = w;
   734  		m.ud = mpgetfix(nr->val.u.xval);
   735  		umagic(&m);
   736  		if(m.bad)
   737  			break;
   738  		if(op == OMOD)
   739  			goto longmod;
   740  
   741  		cgenr(nl, &n1, N);
   742  		nodconst(&n2, nl->type, m.um);
   743  		regalloc(&n3, nl->type, res);
   744  		cgen_hmul(&n1, &n2, &n3);
   745  
   746  		if(m.ua) {
   747  			// need to add numerator accounting for overflow
   748  			gins(optoas(OADD, nl->type), &n1, &n3);
   749  			nodconst(&n2, nl->type, 1);
   750  			gins(optoas(ORROTC, nl->type), &n2, &n3);
   751  			nodconst(&n2, nl->type, m.s-1);
   752  			gins(optoas(ORSH, nl->type), &n2, &n3);
   753  		} else {
   754  			nodconst(&n2, nl->type, m.s);
   755  			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
   756  		}
   757  
   758  		gmove(&n3, res);
   759  		regfree(&n1);
   760  		regfree(&n3);
   761  		return;
   762  
   763  	case TINT64:
   764  		m.w = w;
   765  		m.sd = mpgetfix(nr->val.u.xval);
   766  		smagic(&m);
   767  		if(m.bad)
   768  			break;
   769  		if(op == OMOD)
   770  			goto longmod;
   771  
   772  		cgenr(nl, &n1, res);
   773  		nodconst(&n2, nl->type, m.sm);
   774  		regalloc(&n3, nl->type, N);
   775  		cgen_hmul(&n1, &n2, &n3);
   776  
   777  		if(m.sm < 0) {
   778  			// need to add numerator
   779  			gins(optoas(OADD, nl->type), &n1, &n3);
   780  		}
   781  
   782  		nodconst(&n2, nl->type, m.s);
   783  		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
   784  
   785  		nodconst(&n2, nl->type, w-1);
   786  		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
   787  		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
   788  
   789  		if(m.sd < 0) {
   790  			// this could probably be removed
   791  			// by factoring it into the multiplier
   792  			gins(optoas(OMINUS, nl->type), N, &n3);
   793  		}
   794  
   795  		gmove(&n3, res);
   796  		regfree(&n1);
   797  		regfree(&n3);
   798  		return;
   799  	}
   800  	goto longdiv;
   801  
   802  longdiv:
   803  	// division and mod using (slow) hardware instruction
   804  	dodiv(op, nl, nr, res);
   805  	return;
   806  
   807  longmod:
   808  	// mod using formula A%B = A-(A/B*B) but
   809  	// we know that there is a fast algorithm for A/B
   810  	regalloc(&n1, nl->type, res);
   811  	cgen(nl, &n1);
   812  	regalloc(&n2, nl->type, N);
   813  	cgen_div(ODIV, &n1, nr, &n2);
   814  	a = optoas(OMUL, nl->type);
   815  	if(w == 8) {
   816  		// use 2-operand 16-bit multiply
   817  		// because there is no 2-operand 8-bit multiply
   818  		a = AIMULW;
   819  	}
   820  	if(!smallintconst(nr)) {
   821  		regalloc(&n3, nl->type, N);
   822  		cgen(nr, &n3);
   823  		gins(a, &n3, &n2);
   824  		regfree(&n3);
   825  	} else
   826  		gins(a, nr, &n2);
   827  	gins(optoas(OSUB, nl->type), &n2, &n1);
   828  	gmove(&n1, res);
   829  	regfree(&n1);
   830  	regfree(&n2);
   831  }
   832  
   833  /*
   834   * generate high multiply:
   835   *   res = (nl*nr) >> width
   836   */
   837  void
   838  cgen_hmul(Node *nl, Node *nr, Node *res)
   839  {
   840  	Type *t;
   841  	int a;
   842  	Node n1, n2, ax, dx, *tmp;
   843  
   844  	t = nl->type;
   845  	a = optoas(OHMUL, t);
   846  	if(nl->ullman < nr->ullman) {
   847  		tmp = nl;
   848  		nl = nr;
   849  		nr = tmp;
   850  	}
   851  	cgenr(nl, &n1, res);
   852  	cgenr(nr, &n2, N);
   853  	nodreg(&ax, t, D_AX);
   854  	gmove(&n1, &ax);
   855  	gins(a, &n2, N);
   856  	regfree(&n2);
   857  	regfree(&n1);
   858  
   859  	if(t->width == 1) {
   860  		// byte multiply behaves differently.
   861  		nodreg(&ax, t, D_AH);
   862  		nodreg(&dx, t, D_DL);
   863  		gmove(&ax, &dx);
   864  	}
   865  	nodreg(&dx, t, D_DX);
   866  	gmove(&dx, res);
   867  }
   868  
   869  /*
   870   * generate shift according to op, one of:
   871   *	res = nl << nr
   872   *	res = nl >> nr
   873   */
   874  void
   875  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   876  {
   877  	Node n1, n2, n3, n4, n5, cx, oldcx;
   878  	int a, rcx;
   879  	Prog *p1;
   880  	uvlong sc;
   881  	Type *tcount;
   882  
   883  	a = optoas(op, nl->type);
   884  
   885  	if(nr->op == OLITERAL) {
   886  		regalloc(&n1, nl->type, res);
   887  		cgen(nl, &n1);
   888  		sc = mpgetfix(nr->val.u.xval);
   889  		if(sc >= nl->type->width*8) {
   890  			// large shift gets 2 shifts by width-1
   891  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   892  			gins(a, &n3, &n1);
   893  			gins(a, &n3, &n1);
   894  		} else
   895  			gins(a, nr, &n1);
   896  		gmove(&n1, res);
   897  		regfree(&n1);
   898  		goto ret;
   899  	}
   900  
   901  	if(nl->ullman >= UINF) {
   902  		tempname(&n4, nl->type);
   903  		cgen(nl, &n4);
   904  		nl = &n4;
   905  	}
   906  	if(nr->ullman >= UINF) {
   907  		tempname(&n5, nr->type);
   908  		cgen(nr, &n5);
   909  		nr = &n5;
   910  	}
   911  
   912  	rcx = reg[D_CX];
   913  	nodreg(&n1, types[TUINT32], D_CX);
   914  	
   915  	// Allow either uint32 or uint64 as shift type,
   916  	// to avoid unnecessary conversion from uint32 to uint64
   917  	// just to do the comparison.
   918  	tcount = types[simtype[nr->type->etype]];
   919  	if(tcount->etype < TUINT32)
   920  		tcount = types[TUINT32];
   921  
   922  	regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   923  	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
   924  
   925  	nodreg(&cx, types[TUINT64], D_CX);
   926  	memset(&oldcx, 0, sizeof oldcx);
   927  	if(rcx > 0 && !samereg(&cx, res)) {
   928  		regalloc(&oldcx, types[TUINT64], N);
   929  		gmove(&cx, &oldcx);
   930  	}
   931  	cx.type = tcount;
   932  
   933  	if(samereg(&cx, res))
   934  		regalloc(&n2, nl->type, N);
   935  	else
   936  		regalloc(&n2, nl->type, res);
   937  	if(nl->ullman >= nr->ullman) {
   938  		cgen(nl, &n2);
   939  		cgen(nr, &n1);
   940  		gmove(&n1, &n3);
   941  	} else {
   942  		cgen(nr, &n1);
   943  		gmove(&n1, &n3);
   944  		cgen(nl, &n2);
   945  	}
   946  	regfree(&n3);
   947  
   948  	// test and fix up large shifts
   949  	if(!bounded) {
   950  		nodconst(&n3, tcount, nl->type->width*8);
   951  		gins(optoas(OCMP, tcount), &n1, &n3);
   952  		p1 = gbranch(optoas(OLT, tcount), T, +1);
   953  		if(op == ORSH && issigned[nl->type->etype]) {
   954  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   955  			gins(a, &n3, &n2);
   956  		} else {
   957  			nodconst(&n3, nl->type, 0);
   958  			gmove(&n3, &n2);
   959  		}
   960  		patch(p1, pc);
   961  	}
   962  
   963  	gins(a, &n1, &n2);
   964  
   965  	if(oldcx.op != 0) {
   966  		cx.type = types[TUINT64];
   967  		gmove(&oldcx, &cx);
   968  		regfree(&oldcx);
   969  	}
   970  
   971  	gmove(&n2, res);
   972  
   973  	regfree(&n1);
   974  	regfree(&n2);
   975  
   976  ret:
   977  	;
   978  }
   979  
   980  /*
   981   * generate byte multiply:
   982   *	res = nl * nr
   983   * there is no 2-operand byte multiply instruction so
   984   * we do a full-width multiplication and truncate afterwards.
   985   */
   986  void
   987  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   988  {
   989  	Node n1, n2, n1b, n2b, *tmp;
   990  	Type *t;
   991  	int a;
   992  
   993  	// largest ullman on left.
   994  	if(nl->ullman < nr->ullman) {
   995  		tmp = nl;
   996  		nl = nr;
   997  		nr = tmp;
   998  	}
   999  
  1000  	// generate operands in "8-bit" registers.
  1001  	regalloc(&n1b, nl->type, res);
  1002  	cgen(nl, &n1b);
  1003  	regalloc(&n2b, nr->type, N);
  1004  	cgen(nr, &n2b);
  1005  
  1006  	// perform full-width multiplication.
  1007  	t = types[TUINT64];
  1008  	if(issigned[nl->type->etype])
  1009  		t = types[TINT64];
  1010  	nodreg(&n1, t, n1b.val.u.reg);
  1011  	nodreg(&n2, t, n2b.val.u.reg);
  1012  	a = optoas(op, t);
  1013  	gins(a, &n2, &n1);
  1014  
  1015  	// truncate.
  1016  	gmove(&n1, res);
  1017  	regfree(&n1b);
  1018  	regfree(&n2b);
  1019  }
  1020  
  1021  void
  1022  clearfat(Node *nl)
  1023  {
  1024  	int64 w, c, q;
  1025  	Node n1, oldn1, ax, oldax;
  1026  
  1027  	/* clear a fat object */
  1028  	if(debug['g'])
  1029  		dump("\nclearfat", nl);
  1030  
  1031  
  1032  	w = nl->type->width;
  1033  	// Avoid taking the address for simple enough types.
  1034  	if(componentgen(N, nl))
  1035  		return;
  1036  
  1037  	c = w % 8;	// bytes
  1038  	q = w / 8;	// quads
  1039  
  1040  	savex(D_DI, &n1, &oldn1, N, types[tptr]);
  1041  	agen(nl, &n1);
  1042  
  1043  	savex(D_AX, &ax, &oldax, N, types[tptr]);
  1044  	gconreg(AMOVQ, 0, D_AX);
  1045  
  1046  	if(q >= 4) {
  1047  		gconreg(AMOVQ, q, D_CX);
  1048  		gins(AREP, N, N);	// repeat
  1049  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
  1050  	} else
  1051  	while(q > 0) {
  1052  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
  1053  		q--;
  1054  	}
  1055  
  1056  	if(c >= 4) {
  1057  		gconreg(AMOVQ, c, D_CX);
  1058  		gins(AREP, N, N);	// repeat
  1059  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
  1060  	} else
  1061  	while(c > 0) {
  1062  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
  1063  		c--;
  1064  	}
  1065  
  1066  	restx(&n1, &oldn1);
  1067  	restx(&ax, &oldax);
  1068  }
  1069  
  1070  // Called after regopt and peep have run.
  1071  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1072  void
  1073  expandchecks(Prog *firstp)
  1074  {
  1075  	Prog *p, *p1, *p2;
  1076  
  1077  	for(p = firstp; p != P; p = p->link) {
  1078  		if(p->as != ACHECKNIL)
  1079  			continue;
  1080  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1081  			warnl(p->lineno, "nil check %D", &p->from);
  1082  		// check is
  1083  		//	CMP arg, $0
  1084  		//	JNE 2(PC) (likely)
  1085  		//	MOV AX, 0
  1086  		p1 = mal(sizeof *p1);
  1087  		p2 = mal(sizeof *p2);
  1088  		clearp(p1);
  1089  		clearp(p2);
  1090  		p1->link = p2;
  1091  		p2->link = p->link;
  1092  		p->link = p1;
  1093  		p1->lineno = p->lineno;
  1094  		p2->lineno = p->lineno;
  1095  		p1->loc = 9999;
  1096  		p2->loc = 9999;
  1097  		p->as = ACMPQ;
  1098  		p->to.type = D_CONST;
  1099  		p->to.offset = 0;
  1100  		p1->as = AJNE;
  1101  		p1->from.type = D_CONST;
  1102  		p1->from.offset = 1; // likely
  1103  		p1->to.type = D_BRANCH;
  1104  		p1->to.u.branch = p2->link;
  1105  		// crash by write to memory address 0.
  1106  		// if possible, since we know arg is 0, use 0(arg),
  1107  		// which will be shorter to encode than plain 0.
  1108  		p2->as = AMOVL;
  1109  		p2->from.type = D_AX;
  1110  		if(regtyp(&p->from))
  1111  			p2->to.type = p->from.type + D_INDIR;
  1112  		else
  1113  			p2->to.type = D_INDIR+D_NONE;
  1114  		p2->to.offset = 0;
  1115  	}
  1116  }