github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/6g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog* appendp(Prog*, int, int, vlong, int, vlong);
    13  
    14  void
    15  defframe(Prog *ptxt, Bvec *bv)
    16  {
    17  	int i, j;
    18  	uint32 frame;
    19  	Prog *p;
    20  
    21  	// fill in argument size
    22  	ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
    23  
    24  	// fill in final stack size
    25  	ptxt->to.offset <<= 32;
    26  	frame = rnd(stksize+maxarg, widthptr);
    27  	ptxt->to.offset |= frame;
    28  
    29  	// insert code to clear pointered part of the frame,
    30  	// so that garbage collector only sees initialized values
    31  	// when it looks for pointers.
    32  	p = ptxt;
    33  	if(stkzerosize >= 8*widthptr) {
    34  		p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0);
    35  		p = appendp(p, AMOVQ, D_CONST, stkzerosize/widthptr, D_CX, 0);
    36  		p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0);
    37  		p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
    38  		appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
    39  	} else {
    40  		j = (stkptrsize - stkzerosize)/widthptr * 2;
    41  		for(i=0; i<stkzerosize; i+=widthptr) {
    42  			if(bvget(bv, j) || bvget(bv, j+1))
    43  				p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i);
    44  			j += 2;
    45  		}
    46  	}
    47  }
    48  
    49  static Prog*
    50  appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
    51  {
    52  	Prog *q;
    53  	
    54  	q = mal(sizeof(*q));
    55  	clearp(q);
    56  	q->as = as;
    57  	q->lineno = p->lineno;
    58  	q->from.type = ftype;
    59  	q->from.offset = foffset;
    60  	q->to.type = ttype;
    61  	q->to.offset = toffset;
    62  	q->link = p->link;
    63  	p->link = q;
    64  	return q;
    65  }
    66  
    67  // Sweep the prog list to mark any used nodes.
    68  void
    69  markautoused(Prog* p)
    70  {
    71  	for (; p; p = p->link) {
    72  		if (p->as == ATYPE)
    73  			continue;
    74  
    75  		if (p->from.type == D_AUTO && p->from.node)
    76  			p->from.node->used = 1;
    77  
    78  		if (p->to.type == D_AUTO && p->to.node)
    79  			p->to.node->used = 1;
    80  	}
    81  }
    82  
    83  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
    84  void
    85  fixautoused(Prog *p)
    86  {
    87  	Prog **lp;
    88  
    89  	for (lp=&p; (p=*lp) != P; ) {
    90  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
    91  			*lp = p->link;
    92  			continue;
    93  		}
    94  		if (p->from.type == D_AUTO && p->from.node)
    95  			p->from.offset += p->from.node->stkdelta;
    96  
    97  		if (p->to.type == D_AUTO && p->to.node)
    98  			p->to.offset += p->to.node->stkdelta;
    99  
   100  		lp = &p->link;
   101  	}
   102  }
   103  
   104  
   105  /*
   106   * generate:
   107   *	call f
   108   *	proc=-1	normal call but no return
   109   *	proc=0	normal call
   110   *	proc=1	goroutine run in new proc
   111   *	proc=2	defer call save away stack
   112    *	proc=3	normal call to C pointer (not Go func value)
   113   */
   114  void
   115  ginscall(Node *f, int proc)
   116  {
   117  	int32 arg;
   118  	Prog *p;
   119  	Node reg, con;
   120  	Node r1;
   121  
   122  	if(f->type != T)
   123  		setmaxarg(f->type);
   124  
   125  	arg = -1;
   126  	// Most functions have a fixed-size argument block, so traceback uses that during unwind.
   127  	// Not all, though: there are some variadic functions in package runtime,
   128  	// and for those we emit call-specific metadata recorded by caller.
   129  	// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
   130  	// so we do this for all indirect calls as well.
   131  	if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
   132  		arg = f->type->argwid;
   133  		if(proc == 1 || proc == 2)
   134  			arg += 2*widthptr;
   135  	}
   136  
   137  	if(arg != -1)
   138  		gargsize(arg);
   139  
   140  	switch(proc) {
   141  	default:
   142  		fatal("ginscall: bad proc %d", proc);
   143  		break;
   144  
   145  	case 0:	// normal call
   146  	case -1:	// normal call but no return
   147  		if(f->op == ONAME && f->class == PFUNC) {
   148  			if(f == deferreturn) {
   149  				// Deferred calls will appear to be returning to
   150  				// the CALL deferreturn(SB) that we are about to emit.
   151  				// However, the stack trace code will show the line
   152  				// of the instruction byte before the return PC. 
   153  				// To avoid that being an unrelated instruction,
   154  				// insert an x86 NOP that we will have the right line number.
   155  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   156  				// because the NOP pseudo-instruction would be removed by
   157  				// the linker.
   158  				nodreg(&reg, types[TINT], D_AX);
   159  				gins(AXCHGL, &reg, &reg);
   160  			}
   161  			p = gins(ACALL, N, f);
   162  			afunclit(&p->to, f);
   163  			if(proc == -1 || noreturn(p))
   164  				gins(AUNDEF, N, N);
   165  			break;
   166  		}
   167  		nodreg(&reg, types[tptr], D_DX);
   168  		nodreg(&r1, types[tptr], D_BX);
   169  		gmove(f, &reg);
   170  		reg.op = OINDREG;
   171  		gmove(&reg, &r1);
   172  		reg.op = OREGISTER;
   173  		gins(ACALL, &reg, &r1);
   174  		break;
   175  	
   176  	case 3:	// normal call of c function pointer
   177  		gins(ACALL, N, f);
   178  		break;
   179  
   180  	case 1:	// call in new proc (go)
   181  	case 2:	// deferred call (defer)
   182  		nodreg(&reg, types[TINT64], D_CX);
   183  		if(flag_largemodel) {
   184  			regalloc(&r1, f->type, f);
   185  			gmove(f, &r1);
   186  			gins(APUSHQ, &r1, N);
   187  			regfree(&r1);
   188  		} else {
   189  			gins(APUSHQ, f, N);
   190  		}
   191  		nodconst(&con, types[TINT32], argsize(f->type));
   192  		gins(APUSHQ, &con, N);
   193  		if(proc == 1)
   194  			ginscall(newproc, 0);
   195  		else {
   196  			if(!hasdefer)
   197  				fatal("hasdefer=0 but has defer");
   198  			ginscall(deferproc, 0);
   199  		}
   200  		gins(APOPQ, N, &reg);
   201  		gins(APOPQ, N, &reg);
   202  		if(proc == 2) {
   203  			nodreg(&reg, types[TINT64], D_AX);
   204  			gins(ATESTQ, &reg, &reg);
   205  			patch(gbranch(AJNE, T, -1), retpc);
   206  		}
   207  		break;
   208  	}
   209  
   210  	if(arg != -1)
   211  		gargsize(-1);
   212  }
   213  
   214  /*
   215   * n is call to interface method.
   216   * generate res = n.
   217   */
   218  void
   219  cgen_callinter(Node *n, Node *res, int proc)
   220  {
   221  	Node *i, *f;
   222  	Node tmpi, nodi, nodo, nodr, nodsp;
   223  
   224  	i = n->left;
   225  	if(i->op != ODOTINTER)
   226  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   227  
   228  	f = i->right;		// field
   229  	if(f->op != ONAME)
   230  		fatal("cgen_callinter: not ONAME %O", f->op);
   231  
   232  	i = i->left;		// interface
   233  
   234  	if(!i->addable) {
   235  		tempname(&tmpi, i->type);
   236  		cgen(i, &tmpi);
   237  		i = &tmpi;
   238  	}
   239  
   240  	genlist(n->list);		// assign the args
   241  
   242  	// i is now addable, prepare an indirected
   243  	// register to hold its address.
   244  	igen(i, &nodi, res);		// REG = &inter
   245  
   246  	nodindreg(&nodsp, types[tptr], D_SP);
   247  	nodi.type = types[tptr];
   248  	nodi.xoffset += widthptr;
   249  	cgen(&nodi, &nodsp);	// 0(SP) = 8(REG) -- i.data
   250  
   251  	regalloc(&nodo, types[tptr], res);
   252  	nodi.type = types[tptr];
   253  	nodi.xoffset -= widthptr;
   254  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   255  	regfree(&nodi);
   256  
   257  	regalloc(&nodr, types[tptr], &nodo);
   258  	if(n->left->xoffset == BADWIDTH)
   259  		fatal("cgen_callinter: badwidth");
   260  	cgen_checknil(&nodo); // in case offset is huge
   261  	nodo.op = OINDREG;
   262  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   263  	if(proc == 0) {
   264  		// plain call: use direct c function pointer - more efficient
   265  		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
   266  		proc = 3;
   267  	} else {
   268  		// go/defer. generate go func value.
   269  		gins(ALEAQ, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
   270  	}
   271  
   272  	nodr.type = n->left->type;
   273  	ginscall(&nodr, proc);
   274  
   275  	regfree(&nodr);
   276  	regfree(&nodo);
   277  }
   278  
   279  /*
   280   * generate function call;
   281   *	proc=0	normal call
   282   *	proc=1	goroutine run in new proc
   283   *	proc=2	defer call save away stack
   284   */
   285  void
   286  cgen_call(Node *n, int proc)
   287  {
   288  	Type *t;
   289  	Node nod, afun;
   290  
   291  	if(n == N)
   292  		return;
   293  
   294  	if(n->left->ullman >= UINF) {
   295  		// if name involves a fn call
   296  		// precompute the address of the fn
   297  		tempname(&afun, types[tptr]);
   298  		cgen(n->left, &afun);
   299  	}
   300  
   301  	genlist(n->list);		// assign the args
   302  	t = n->left->type;
   303  
   304  	// call tempname pointer
   305  	if(n->left->ullman >= UINF) {
   306  		regalloc(&nod, types[tptr], N);
   307  		cgen_as(&nod, &afun);
   308  		nod.type = t;
   309  		ginscall(&nod, proc);
   310  		regfree(&nod);
   311  		return;
   312  	}
   313  
   314  	// call pointer
   315  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   316  		regalloc(&nod, types[tptr], N);
   317  		cgen_as(&nod, n->left);
   318  		nod.type = t;
   319  		ginscall(&nod, proc);
   320  		regfree(&nod);
   321  		return;
   322  	}
   323  
   324  	// call direct
   325  	n->left->method = 1;
   326  	ginscall(n->left, proc);
   327  }
   328  
   329  /*
   330   * call to n has already been generated.
   331   * generate:
   332   *	res = return value from call.
   333   */
   334  void
   335  cgen_callret(Node *n, Node *res)
   336  {
   337  	Node nod;
   338  	Type *fp, *t;
   339  	Iter flist;
   340  
   341  	t = n->left->type;
   342  	if(t->etype == TPTR32 || t->etype == TPTR64)
   343  		t = t->type;
   344  
   345  	fp = structfirst(&flist, getoutarg(t));
   346  	if(fp == T)
   347  		fatal("cgen_callret: nil");
   348  
   349  	memset(&nod, 0, sizeof(nod));
   350  	nod.op = OINDREG;
   351  	nod.val.u.reg = D_SP;
   352  	nod.addable = 1;
   353  
   354  	nod.xoffset = fp->width;
   355  	nod.type = fp->type;
   356  	cgen_as(res, &nod);
   357  }
   358  
   359  /*
   360   * call to n has already been generated.
   361   * generate:
   362   *	res = &return value from call.
   363   */
   364  void
   365  cgen_aret(Node *n, Node *res)
   366  {
   367  	Node nod1, nod2;
   368  	Type *fp, *t;
   369  	Iter flist;
   370  
   371  	t = n->left->type;
   372  	if(isptr[t->etype])
   373  		t = t->type;
   374  
   375  	fp = structfirst(&flist, getoutarg(t));
   376  	if(fp == T)
   377  		fatal("cgen_aret: nil");
   378  
   379  	memset(&nod1, 0, sizeof(nod1));
   380  	nod1.op = OINDREG;
   381  	nod1.val.u.reg = D_SP;
   382  	nod1.addable = 1;
   383  
   384  	nod1.xoffset = fp->width;
   385  	nod1.type = fp->type;
   386  
   387  	if(res->op != OREGISTER) {
   388  		regalloc(&nod2, types[tptr], res);
   389  		gins(ALEAQ, &nod1, &nod2);
   390  		gins(AMOVQ, &nod2, res);
   391  		regfree(&nod2);
   392  	} else
   393  		gins(ALEAQ, &nod1, res);
   394  }
   395  
   396  /*
   397   * generate return.
   398   * n->left is assignments to return values.
   399   */
   400  void
   401  cgen_ret(Node *n)
   402  {
   403  	Prog *p;
   404  
   405  	genlist(n->list);		// copy out args
   406  	if(hasdefer || curfn->exit) {
   407  		gjmp(retpc);
   408  		return;
   409  	}
   410  	p = gins(ARET, N, N);
   411  	if(n->op == ORETJMP) {
   412  		p->to.type = D_EXTERN;
   413  		p->to.sym = n->left->sym;
   414  	}
   415  }
   416  
   417  /*
   418   * generate += *= etc.
   419   */
   420  void
   421  cgen_asop(Node *n)
   422  {
   423  	Node n1, n2, n3, n4;
   424  	Node *nl, *nr;
   425  	Prog *p1;
   426  	Addr addr;
   427  	int a;
   428  
   429  	nl = n->left;
   430  	nr = n->right;
   431  
   432  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   433  		tempname(&n1, nr->type);
   434  		cgen(nr, &n1);
   435  		n2 = *n;
   436  		n2.right = &n1;
   437  		cgen_asop(&n2);
   438  		goto ret;
   439  	}
   440  
   441  	if(!isint[nl->type->etype])
   442  		goto hard;
   443  	if(!isint[nr->type->etype])
   444  		goto hard;
   445  
   446  	switch(n->etype) {
   447  	case OADD:
   448  		if(smallintconst(nr))
   449  		if(mpgetfix(nr->val.u.xval) == 1) {
   450  			a = optoas(OINC, nl->type);
   451  			if(nl->addable) {
   452  				gins(a, N, nl);
   453  				goto ret;
   454  			}
   455  			if(sudoaddable(a, nl, &addr)) {
   456  				p1 = gins(a, N, N);
   457  				p1->to = addr;
   458  				sudoclean();
   459  				goto ret;
   460  			}
   461  		}
   462  		break;
   463  
   464  	case OSUB:
   465  		if(smallintconst(nr))
   466  		if(mpgetfix(nr->val.u.xval) == 1) {
   467  			a = optoas(ODEC, nl->type);
   468  			if(nl->addable) {
   469  				gins(a, N, nl);
   470  				goto ret;
   471  			}
   472  			if(sudoaddable(a, nl, &addr)) {
   473  				p1 = gins(a, N, N);
   474  				p1->to = addr;
   475  				sudoclean();
   476  				goto ret;
   477  			}
   478  		}
   479  		break;
   480  	}
   481  
   482  	switch(n->etype) {
   483  	case OADD:
   484  	case OSUB:
   485  	case OXOR:
   486  	case OAND:
   487  	case OOR:
   488  		a = optoas(n->etype, nl->type);
   489  		if(nl->addable) {
   490  			if(smallintconst(nr)) {
   491  				gins(a, nr, nl);
   492  				goto ret;
   493  			}
   494  			regalloc(&n2, nr->type, N);
   495  			cgen(nr, &n2);
   496  			gins(a, &n2, nl);
   497  			regfree(&n2);
   498  			goto ret;
   499  		}
   500  		if(nr->ullman < UINF)
   501  		if(sudoaddable(a, nl, &addr)) {
   502  			if(smallintconst(nr)) {
   503  				p1 = gins(a, nr, N);
   504  				p1->to = addr;
   505  				sudoclean();
   506  				goto ret;
   507  			}
   508  			regalloc(&n2, nr->type, N);
   509  			cgen(nr, &n2);
   510  			p1 = gins(a, &n2, N);
   511  			p1->to = addr;
   512  			regfree(&n2);
   513  			sudoclean();
   514  			goto ret;
   515  		}
   516  	}
   517  
   518  hard:
   519  	n2.op = 0;
   520  	n1.op = 0;
   521  	if(nr->op == OLITERAL) {
   522  		// don't allocate a register for literals.
   523  	} else if(nr->ullman >= nl->ullman || nl->addable) {
   524  		regalloc(&n2, nr->type, N);
   525  		cgen(nr, &n2);
   526  		nr = &n2;
   527  	} else {
   528  		tempname(&n2, nr->type);
   529  		cgen(nr, &n2);
   530  		nr = &n2;
   531  	}
   532  	if(!nl->addable) {
   533  		igen(nl, &n1, N);
   534  		nl = &n1;
   535  	}
   536  
   537  	n3 = *n;
   538  	n3.left = nl;
   539  	n3.right = nr;
   540  	n3.op = n->etype;
   541  
   542  	regalloc(&n4, nl->type, N);
   543  	cgen(&n3, &n4);
   544  	gmove(&n4, nl);
   545  
   546  	if(n1.op)
   547  		regfree(&n1);
   548  	if(n2.op == OREGISTER)
   549  		regfree(&n2);
   550  	regfree(&n4);
   551  
   552  ret:
   553  	;
   554  }
   555  
   556  int
   557  samereg(Node *a, Node *b)
   558  {
   559  	if(a == N || b == N)
   560  		return 0;
   561  	if(a->op != OREGISTER)
   562  		return 0;
   563  	if(b->op != OREGISTER)
   564  		return 0;
   565  	if(a->val.u.reg != b->val.u.reg)
   566  		return 0;
   567  	return 1;
   568  }
   569  
   570  /*
   571   * generate division.
   572   * generates one of:
   573   *	res = nl / nr
   574   *	res = nl % nr
   575   * according to op.
   576   */
   577  void
   578  dodiv(int op, Node *nl, Node *nr, Node *res)
   579  {
   580  	int a, check;
   581  	Node n3, n4;
   582  	Type *t, *t0;
   583  	Node ax, dx, ax1, n31, oldax, olddx;
   584  	Prog *p1, *p2;
   585  
   586  	// Have to be careful about handling
   587  	// most negative int divided by -1 correctly.
   588  	// The hardware will trap.
   589  	// Also the byte divide instruction needs AH,
   590  	// which we otherwise don't have to deal with.
   591  	// Easiest way to avoid for int8, int16: use int32.
   592  	// For int32 and int64, use explicit test.
   593  	// Could use int64 hw for int32.
   594  	t = nl->type;
   595  	t0 = t;
   596  	check = 0;
   597  	if(issigned[t->etype]) {
   598  		check = 1;
   599  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
   600  			check = 0;
   601  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   602  			check = 0;
   603  	}
   604  	if(t->width < 4) {
   605  		if(issigned[t->etype])
   606  			t = types[TINT32];
   607  		else
   608  			t = types[TUINT32];
   609  		check = 0;
   610  	}
   611  	a = optoas(op, t);
   612  
   613  	regalloc(&n3, t0, N);
   614  	if(nl->ullman >= nr->ullman) {
   615  		savex(D_AX, &ax, &oldax, res, t0);
   616  		cgen(nl, &ax);
   617  		regalloc(&ax, t0, &ax);	// mark ax live during cgen
   618  		cgen(nr, &n3);
   619  		regfree(&ax);
   620  	} else {
   621  		cgen(nr, &n3);
   622  		savex(D_AX, &ax, &oldax, res, t0);
   623  		cgen(nl, &ax);
   624  	}
   625  	if(t != t0) {
   626  		// Convert
   627  		ax1 = ax;
   628  		n31 = n3;
   629  		ax.type = t;
   630  		n3.type = t;
   631  		gmove(&ax1, &ax);
   632  		gmove(&n31, &n3);
   633  	}
   634  
   635  	p2 = P;
   636  	if(check) {
   637  		nodconst(&n4, t, -1);
   638  		gins(optoas(OCMP, t), &n3, &n4);
   639  		p1 = gbranch(optoas(ONE, t), T, +1);
   640  		if(op == ODIV) {
   641  			// a / (-1) is -a.
   642  			gins(optoas(OMINUS, t), N, &ax);
   643  			gmove(&ax, res);
   644  		} else {
   645  			// a % (-1) is 0.
   646  			nodconst(&n4, t, 0);
   647  			gmove(&n4, res);
   648  		}
   649  		p2 = gbranch(AJMP, T, 0);
   650  		patch(p1, pc);
   651  	}
   652  	savex(D_DX, &dx, &olddx, res, t);
   653  	if(!issigned[t->etype]) {
   654  		nodconst(&n4, t, 0);
   655  		gmove(&n4, &dx);
   656  	} else
   657  		gins(optoas(OEXTEND, t), N, N);
   658  	gins(a, &n3, N);
   659  	regfree(&n3);
   660  	if(op == ODIV)
   661  		gmove(&ax, res);
   662  	else
   663  		gmove(&dx, res);
   664  	restx(&dx, &olddx);
   665  	if(check)
   666  		patch(p2, pc);
   667  	restx(&ax, &oldax);
   668  }
   669  
   670  /*
   671   * register dr is one of the special ones (AX, CX, DI, SI, etc.).
   672   * we need to use it.  if it is already allocated as a temporary
   673   * (r > 1; can only happen if a routine like sgen passed a
   674   * special as cgen's res and then cgen used regalloc to reuse
   675   * it as its own temporary), then move it for now to another
   676   * register.  caller must call restx to move it back.
   677   * the move is not necessary if dr == res, because res is
   678   * known to be dead.
   679   */
   680  void
   681  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   682  {
   683  	int r;
   684  
   685  	r = reg[dr];
   686  
   687  	// save current ax and dx if they are live
   688  	// and not the destination
   689  	memset(oldx, 0, sizeof *oldx);
   690  	nodreg(x, t, dr);
   691  	if(r > 1 && !samereg(x, res)) {
   692  		regalloc(oldx, types[TINT64], N);
   693  		x->type = types[TINT64];
   694  		gmove(x, oldx);
   695  		x->type = t;
   696  		oldx->ostk = r;	// squirrel away old r value
   697  		reg[dr] = 1;
   698  	}
   699  }
   700  
   701  void
   702  restx(Node *x, Node *oldx)
   703  {
   704  	if(oldx->op != 0) {
   705  		x->type = types[TINT64];
   706  		reg[x->val.u.reg] = oldx->ostk;
   707  		gmove(oldx, x);
   708  		regfree(oldx);
   709  	}
   710  }
   711  
   712  /*
   713   * generate division according to op, one of:
   714   *	res = nl / nr
   715   *	res = nl % nr
   716   */
   717  void
   718  cgen_div(int op, Node *nl, Node *nr, Node *res)
   719  {
   720  	Node n1, n2, n3;
   721  	int w, a;
   722  	Magic m;
   723  
   724  	if(nr->op != OLITERAL)
   725  		goto longdiv;
   726  	w = nl->type->width*8;
   727  
   728  	// Front end handled 32-bit division. We only need to handle 64-bit.
   729  	// try to do division by multiply by (2^w)/d
   730  	// see hacker's delight chapter 10
   731  	switch(simtype[nl->type->etype]) {
   732  	default:
   733  		goto longdiv;
   734  
   735  	case TUINT64:
   736  		m.w = w;
   737  		m.ud = mpgetfix(nr->val.u.xval);
   738  		umagic(&m);
   739  		if(m.bad)
   740  			break;
   741  		if(op == OMOD)
   742  			goto longmod;
   743  
   744  		cgenr(nl, &n1, N);
   745  		nodconst(&n2, nl->type, m.um);
   746  		regalloc(&n3, nl->type, res);
   747  		cgen_hmul(&n1, &n2, &n3);
   748  
   749  		if(m.ua) {
   750  			// need to add numerator accounting for overflow
   751  			gins(optoas(OADD, nl->type), &n1, &n3);
   752  			nodconst(&n2, nl->type, 1);
   753  			gins(optoas(ORROTC, nl->type), &n2, &n3);
   754  			nodconst(&n2, nl->type, m.s-1);
   755  			gins(optoas(ORSH, nl->type), &n2, &n3);
   756  		} else {
   757  			nodconst(&n2, nl->type, m.s);
   758  			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
   759  		}
   760  
   761  		gmove(&n3, res);
   762  		regfree(&n1);
   763  		regfree(&n3);
   764  		return;
   765  
   766  	case TINT64:
   767  		m.w = w;
   768  		m.sd = mpgetfix(nr->val.u.xval);
   769  		smagic(&m);
   770  		if(m.bad)
   771  			break;
   772  		if(op == OMOD)
   773  			goto longmod;
   774  
   775  		cgenr(nl, &n1, res);
   776  		nodconst(&n2, nl->type, m.sm);
   777  		regalloc(&n3, nl->type, N);
   778  		cgen_hmul(&n1, &n2, &n3);
   779  
   780  		if(m.sm < 0) {
   781  			// need to add numerator
   782  			gins(optoas(OADD, nl->type), &n1, &n3);
   783  		}
   784  
   785  		nodconst(&n2, nl->type, m.s);
   786  		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
   787  
   788  		nodconst(&n2, nl->type, w-1);
   789  		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
   790  		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
   791  
   792  		if(m.sd < 0) {
   793  			// this could probably be removed
   794  			// by factoring it into the multiplier
   795  			gins(optoas(OMINUS, nl->type), N, &n3);
   796  		}
   797  
   798  		gmove(&n3, res);
   799  		regfree(&n1);
   800  		regfree(&n3);
   801  		return;
   802  	}
   803  	goto longdiv;
   804  
   805  longdiv:
   806  	// division and mod using (slow) hardware instruction
   807  	dodiv(op, nl, nr, res);
   808  	return;
   809  
   810  longmod:
   811  	// mod using formula A%B = A-(A/B*B) but
   812  	// we know that there is a fast algorithm for A/B
   813  	regalloc(&n1, nl->type, res);
   814  	cgen(nl, &n1);
   815  	regalloc(&n2, nl->type, N);
   816  	cgen_div(ODIV, &n1, nr, &n2);
   817  	a = optoas(OMUL, nl->type);
   818  	if(w == 8) {
   819  		// use 2-operand 16-bit multiply
   820  		// because there is no 2-operand 8-bit multiply
   821  		a = AIMULW;
   822  	}
   823  	if(!smallintconst(nr)) {
   824  		regalloc(&n3, nl->type, N);
   825  		cgen(nr, &n3);
   826  		gins(a, &n3, &n2);
   827  		regfree(&n3);
   828  	} else
   829  		gins(a, nr, &n2);
   830  	gins(optoas(OSUB, nl->type), &n2, &n1);
   831  	gmove(&n1, res);
   832  	regfree(&n1);
   833  	regfree(&n2);
   834  }
   835  
   836  /*
   837   * generate high multiply:
   838   *   res = (nl*nr) >> width
   839   */
   840  void
   841  cgen_hmul(Node *nl, Node *nr, Node *res)
   842  {
   843  	Type *t;
   844  	int a;
   845  	Node n1, n2, ax, dx, *tmp;
   846  
   847  	t = nl->type;
   848  	a = optoas(OHMUL, t);
   849  	if(nl->ullman < nr->ullman) {
   850  		tmp = nl;
   851  		nl = nr;
   852  		nr = tmp;
   853  	}
   854  	cgenr(nl, &n1, res);
   855  	cgenr(nr, &n2, N);
   856  	nodreg(&ax, t, D_AX);
   857  	gmove(&n1, &ax);
   858  	gins(a, &n2, N);
   859  	regfree(&n2);
   860  	regfree(&n1);
   861  
   862  	if(t->width == 1) {
   863  		// byte multiply behaves differently.
   864  		nodreg(&ax, t, D_AH);
   865  		nodreg(&dx, t, D_DL);
   866  		gmove(&ax, &dx);
   867  	}
   868  	nodreg(&dx, t, D_DX);
   869  	gmove(&dx, res);
   870  }
   871  
   872  /*
   873   * generate shift according to op, one of:
   874   *	res = nl << nr
   875   *	res = nl >> nr
   876   */
   877  void
   878  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   879  {
   880  	Node n1, n2, n3, n4, n5, cx, oldcx;
   881  	int a, rcx;
   882  	Prog *p1;
   883  	uvlong sc;
   884  	Type *tcount;
   885  
   886  	a = optoas(op, nl->type);
   887  
   888  	if(nr->op == OLITERAL) {
   889  		regalloc(&n1, nl->type, res);
   890  		cgen(nl, &n1);
   891  		sc = mpgetfix(nr->val.u.xval);
   892  		if(sc >= nl->type->width*8) {
   893  			// large shift gets 2 shifts by width-1
   894  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   895  			gins(a, &n3, &n1);
   896  			gins(a, &n3, &n1);
   897  		} else
   898  			gins(a, nr, &n1);
   899  		gmove(&n1, res);
   900  		regfree(&n1);
   901  		goto ret;
   902  	}
   903  
   904  	if(nl->ullman >= UINF) {
   905  		tempname(&n4, nl->type);
   906  		cgen(nl, &n4);
   907  		nl = &n4;
   908  	}
   909  	if(nr->ullman >= UINF) {
   910  		tempname(&n5, nr->type);
   911  		cgen(nr, &n5);
   912  		nr = &n5;
   913  	}
   914  
   915  	rcx = reg[D_CX];
   916  	nodreg(&n1, types[TUINT32], D_CX);
   917  	
   918  	// Allow either uint32 or uint64 as shift type,
   919  	// to avoid unnecessary conversion from uint32 to uint64
   920  	// just to do the comparison.
   921  	tcount = types[simtype[nr->type->etype]];
   922  	if(tcount->etype < TUINT32)
   923  		tcount = types[TUINT32];
   924  
   925  	regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   926  	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
   927  
   928  	nodreg(&cx, types[TUINT64], D_CX);
   929  	memset(&oldcx, 0, sizeof oldcx);
   930  	if(rcx > 0 && !samereg(&cx, res)) {
   931  		regalloc(&oldcx, types[TUINT64], N);
   932  		gmove(&cx, &oldcx);
   933  	}
   934  	cx.type = tcount;
   935  
   936  	if(samereg(&cx, res))
   937  		regalloc(&n2, nl->type, N);
   938  	else
   939  		regalloc(&n2, nl->type, res);
   940  	if(nl->ullman >= nr->ullman) {
   941  		cgen(nl, &n2);
   942  		cgen(nr, &n1);
   943  		gmove(&n1, &n3);
   944  	} else {
   945  		cgen(nr, &n1);
   946  		gmove(&n1, &n3);
   947  		cgen(nl, &n2);
   948  	}
   949  	regfree(&n3);
   950  
   951  	// test and fix up large shifts
   952  	if(!bounded) {
   953  		nodconst(&n3, tcount, nl->type->width*8);
   954  		gins(optoas(OCMP, tcount), &n1, &n3);
   955  		p1 = gbranch(optoas(OLT, tcount), T, +1);
   956  		if(op == ORSH && issigned[nl->type->etype]) {
   957  			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
   958  			gins(a, &n3, &n2);
   959  		} else {
   960  			nodconst(&n3, nl->type, 0);
   961  			gmove(&n3, &n2);
   962  		}
   963  		patch(p1, pc);
   964  	}
   965  
   966  	gins(a, &n1, &n2);
   967  
   968  	if(oldcx.op != 0) {
   969  		cx.type = types[TUINT64];
   970  		gmove(&oldcx, &cx);
   971  		regfree(&oldcx);
   972  	}
   973  
   974  	gmove(&n2, res);
   975  
   976  	regfree(&n1);
   977  	regfree(&n2);
   978  
   979  ret:
   980  	;
   981  }
   982  
   983  /*
   984   * generate byte multiply:
   985   *	res = nl * nr
   986   * there is no 2-operand byte multiply instruction so
   987   * we do a full-width multiplication and truncate afterwards.
   988   */
   989  void
   990  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   991  {
   992  	Node n1, n2, n1b, n2b, *tmp;
   993  	Type *t;
   994  	int a;
   995  
   996  	// largest ullman on left.
   997  	if(nl->ullman < nr->ullman) {
   998  		tmp = nl;
   999  		nl = nr;
  1000  		nr = tmp;
  1001  	}
  1002  
  1003  	// generate operands in "8-bit" registers.
  1004  	regalloc(&n1b, nl->type, res);
  1005  	cgen(nl, &n1b);
  1006  	regalloc(&n2b, nr->type, N);
  1007  	cgen(nr, &n2b);
  1008  
  1009  	// perform full-width multiplication.
  1010  	t = types[TUINT64];
  1011  	if(issigned[nl->type->etype])
  1012  		t = types[TINT64];
  1013  	nodreg(&n1, t, n1b.val.u.reg);
  1014  	nodreg(&n2, t, n2b.val.u.reg);
  1015  	a = optoas(op, t);
  1016  	gins(a, &n2, &n1);
  1017  
  1018  	// truncate.
  1019  	gmove(&n1, res);
  1020  	regfree(&n1b);
  1021  	regfree(&n2b);
  1022  }
  1023  
  1024  void
  1025  clearfat(Node *nl)
  1026  {
  1027  	int64 w, c, q;
  1028  	Node n1, oldn1, ax, oldax;
  1029  
  1030  	/* clear a fat object */
  1031  	if(debug['g'])
  1032  		dump("\nclearfat", nl);
  1033  
  1034  
  1035  	w = nl->type->width;
  1036  	// Avoid taking the address for simple enough types.
  1037  	if(componentgen(N, nl))
  1038  		return;
  1039  
  1040  	c = w % 8;	// bytes
  1041  	q = w / 8;	// quads
  1042  
  1043  	savex(D_DI, &n1, &oldn1, N, types[tptr]);
  1044  	agen(nl, &n1);
  1045  
  1046  	savex(D_AX, &ax, &oldax, N, types[tptr]);
  1047  	gconreg(AMOVQ, 0, D_AX);
  1048  
  1049  	if(q >= 4) {
  1050  		gconreg(AMOVQ, q, D_CX);
  1051  		gins(AREP, N, N);	// repeat
  1052  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
  1053  	} else
  1054  	while(q > 0) {
  1055  		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
  1056  		q--;
  1057  	}
  1058  
  1059  	if(c >= 4) {
  1060  		gconreg(AMOVQ, c, D_CX);
  1061  		gins(AREP, N, N);	// repeat
  1062  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
  1063  	} else
  1064  	while(c > 0) {
  1065  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
  1066  		c--;
  1067  	}
  1068  
  1069  	restx(&n1, &oldn1);
  1070  	restx(&ax, &oldax);
  1071  }
  1072  
  1073  // Called after regopt and peep have run.
  1074  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1075  void
  1076  expandchecks(Prog *firstp)
  1077  {
  1078  	Prog *p, *p1, *p2;
  1079  
  1080  	for(p = firstp; p != P; p = p->link) {
  1081  		if(p->as != ACHECKNIL)
  1082  			continue;
  1083  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1084  			warnl(p->lineno, "generated nil check");
  1085  		// check is
  1086  		//	CMP arg, $0
  1087  		//	JNE 2(PC) (likely)
  1088  		//	MOV AX, 0
  1089  		p1 = mal(sizeof *p1);
  1090  		p2 = mal(sizeof *p2);
  1091  		clearp(p1);
  1092  		clearp(p2);
  1093  		p1->link = p2;
  1094  		p2->link = p->link;
  1095  		p->link = p1;
  1096  		p1->lineno = p->lineno;
  1097  		p2->lineno = p->lineno;
  1098  		p1->loc = 9999;
  1099  		p2->loc = 9999;
  1100  		p->as = ACMPQ;
  1101  		p->to.type = D_CONST;
  1102  		p->to.offset = 0;
  1103  		p1->as = AJNE;
  1104  		p1->from.type = D_CONST;
  1105  		p1->from.offset = 1; // likely
  1106  		p1->to.type = D_BRANCH;
  1107  		p1->to.u.branch = p2->link;
  1108  		// crash by write to memory address 0.
  1109  		// if possible, since we know arg is 0, use 0(arg),
  1110  		// which will be shorter to encode than plain 0.
  1111  		p2->as = AMOVL;
  1112  		p2->from.type = D_AX;
  1113  		if(regtyp(&p->from))
  1114  			p2->to.type = p->from.type + D_INDIR;
  1115  		else
  1116  			p2->to.type = D_INDIR+D_NONE;
  1117  		p2->to.offset = 0;
  1118  	}
  1119  }