github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/cmd/8g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog* appendp(Prog*, int, int, int32, int, int32);
    13  
    14  void
    15  defframe(Prog *ptxt, Bvec *bv)
    16  {
    17  	uint32 frame;
    18  	Prog *p;
    19  	int i, j;
    20  
    21  	// fill in argument size
    22  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    23  
    24  	// fill in final stack size
    25  	if(stksize > maxstksize)
    26  		maxstksize = stksize;
    27  	frame = rnd(maxstksize+maxarg, widthptr);
    28  	ptxt->to.offset = frame;
    29  	maxstksize = 0;
    30  
    31  	// insert code to clear pointered part of the frame,
    32  	// so that garbage collector only sees initialized values
    33  	// when it looks for pointers.
    34  	p = ptxt;
    35  	if(stkzerosize >= 8*widthptr) {
    36  		p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0);
    37  		p = appendp(p, AMOVL, D_CONST, stkzerosize/widthptr, D_CX, 0);
    38  		p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0);
    39  		p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
    40  		appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
    41  	} else {
    42  		for(i=0, j=(stkptrsize-stkzerosize)/widthptr*2; i<stkzerosize; i+=widthptr, j+=2)
    43  			if(bvget(bv, j) || bvget(bv, j+1))
    44  				p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i);
    45  	}
    46  }
    47  
    48  static Prog*
    49  appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset)
    50  {
    51  	Prog *q;
    52  	
    53  	q = mal(sizeof(*q));
    54  	clearp(q);
    55  	q->as = as;
    56  	q->lineno = p->lineno;
    57  	q->from.type = ftype;
    58  	q->from.offset = foffset;
    59  	q->to.type = ttype;
    60  	q->to.offset = toffset;
    61  	q->link = p->link;
    62  	p->link = q;
    63  	return q;
    64  }
    65  
    66  // Sweep the prog list to mark any used nodes.
    67  void
    68  markautoused(Prog* p)
    69  {
    70  	for (; p; p = p->link) {
    71  		if (p->as == ATYPE)
    72  			continue;
    73  
    74  		if (p->from.type == D_AUTO && p->from.node)
    75  			p->from.node->used = 1;
    76  
    77  		if (p->to.type == D_AUTO && p->to.node)
    78  			p->to.node->used = 1;
    79  	}
    80  }
    81  
    82  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
    83  void
    84  fixautoused(Prog* p)
    85  {
    86  	Prog **lp;
    87  
    88  	for (lp=&p; (p=*lp) != P; ) {
    89  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
    90  			*lp = p->link;
    91  			continue;
    92  		}
    93  
    94  		if (p->from.type == D_AUTO && p->from.node)
    95  			p->from.offset += p->from.node->stkdelta;
    96  
    97  		if (p->to.type == D_AUTO && p->to.node)
    98  			p->to.offset += p->to.node->stkdelta;
    99  
   100  		lp = &p->link;
   101  	}
   102  }
   103  
   104  void
   105  clearfat(Node *nl)
   106  {
   107  	uint32 w, c, q;
   108  	Node n1;
   109  
   110  	/* clear a fat object */
   111  	if(debug['g'])
   112  		dump("\nclearfat", nl);
   113  
   114  	w = nl->type->width;
   115  	// Avoid taking the address for simple enough types.
   116  	if(componentgen(N, nl))
   117  		return;
   118  
   119  	c = w % 4;	// bytes
   120  	q = w / 4;	// quads
   121  
   122  	nodreg(&n1, types[tptr], D_DI);
   123  	agen(nl, &n1);
   124  	gconreg(AMOVL, 0, D_AX);
   125  
   126  	if(q >= 4) {
   127  		gconreg(AMOVL, q, D_CX);
   128  		gins(AREP, N, N);	// repeat
   129  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   130  	} else
   131  	while(q > 0) {
   132  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   133  		q--;
   134  	}
   135  
   136  	if(c >= 4) {
   137  		gconreg(AMOVL, c, D_CX);
   138  		gins(AREP, N, N);	// repeat
   139  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   140  	} else
   141  	while(c > 0) {
   142  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   143  		c--;
   144  	}
   145  }
   146  
   147  /*
   148   * generate:
   149   *	call f
   150   *	proc=-1	normal call but no return
   151   *	proc=0	normal call
   152   *	proc=1	goroutine run in new proc
   153   *	proc=2	defer call save away stack
   154    *	proc=3	normal call to C pointer (not Go func value)
   155   */
   156  void
   157  ginscall(Node *f, int proc)
   158  {
   159  	int32 arg;
   160  	Prog *p;
   161  	Node reg, r1, con;
   162  
   163  	if(f->type != T)
   164  		setmaxarg(f->type);
   165  
   166  	arg = -1;
   167  	// Most functions have a fixed-size argument block, so traceback uses that during unwind.
   168  	// Not all, though: there are some variadic functions in package runtime,
   169  	// and for those we emit call-specific metadata recorded by caller.
   170  	// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
   171  	// so we do this for all indirect calls as well.
   172  	if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
   173  		arg = f->type->argwid;
   174  		if(proc == 1 || proc == 2)
   175  			arg += 2*widthptr;
   176  	}
   177  
   178  	if(arg != -1)
   179  		gargsize(arg);
   180  
   181  	switch(proc) {
   182  	default:
   183  		fatal("ginscall: bad proc %d", proc);
   184  		break;
   185  
   186  	case 0:	// normal call
   187  	case -1:	// normal call but no return
   188  		if(f->op == ONAME && f->class == PFUNC) {
   189  			if(f == deferreturn) {
   190  				// Deferred calls will appear to be returning to
   191  				// the CALL deferreturn(SB) that we are about to emit.
   192  				// However, the stack trace code will show the line
   193  				// of the instruction byte before the return PC. 
   194  				// To avoid that being an unrelated instruction,
   195  				// insert an x86 NOP that we will have the right line number.
   196  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   197  				// because the NOP pseudo-instruction will be removed by
   198  				// the linker.
   199  				nodreg(&reg, types[TINT], D_AX);
   200  				gins(AXCHGL, &reg, &reg);
   201  			}
   202  			p = gins(ACALL, N, f);
   203  			afunclit(&p->to, f);
   204  			if(proc == -1 || noreturn(p))
   205  				gins(AUNDEF, N, N);
   206  			break;
   207  		}
   208  		nodreg(&reg, types[tptr], D_DX);
   209  		nodreg(&r1, types[tptr], D_BX);
   210  		gmove(f, &reg);
   211  		reg.op = OINDREG;
   212  		gmove(&reg, &r1);
   213  		reg.op = OREGISTER;
   214  		gins(ACALL, &reg, &r1);
   215  		break;
   216  	
   217  	case 3:	// normal call of c function pointer
   218  		gins(ACALL, N, f);
   219  		break;
   220  
   221  	case 1:	// call in new proc (go)
   222  	case 2:	// deferred call (defer)
   223  		nodreg(&reg, types[TINT32], D_CX);
   224  		gins(APUSHL, f, N);
   225  		nodconst(&con, types[TINT32], argsize(f->type));
   226  		gins(APUSHL, &con, N);
   227  		if(proc == 1)
   228  			ginscall(newproc, 0);
   229  		else
   230  			ginscall(deferproc, 0);
   231  		gins(APOPL, N, &reg);
   232  		gins(APOPL, N, &reg);
   233  		if(proc == 2) {
   234  			nodreg(&reg, types[TINT64], D_AX);
   235  			gins(ATESTL, &reg, &reg);
   236  			patch(gbranch(AJNE, T, -1), retpc);
   237  		}
   238  		break;
   239  	}
   240  	
   241  	if(arg != -1)
   242  		gargsize(-1);
   243  }
   244  
   245  /*
   246   * n is call to interface method.
   247   * generate res = n.
   248   */
   249  void
   250  cgen_callinter(Node *n, Node *res, int proc)
   251  {
   252  	Node *i, *f;
   253  	Node tmpi, nodi, nodo, nodr, nodsp;
   254  
   255  	i = n->left;
   256  	if(i->op != ODOTINTER)
   257  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   258  
   259  	f = i->right;		// field
   260  	if(f->op != ONAME)
   261  		fatal("cgen_callinter: not ONAME %O", f->op);
   262  
   263  	i = i->left;		// interface
   264  
   265  	if(!i->addable) {
   266  		tempname(&tmpi, i->type);
   267  		cgen(i, &tmpi);
   268  		i = &tmpi;
   269  	}
   270  
   271  	genlist(n->list);		// assign the args
   272  
   273  	// i is now addable, prepare an indirected
   274  	// register to hold its address.
   275  	igen(i, &nodi, res);		// REG = &inter
   276  
   277  	nodindreg(&nodsp, types[tptr], D_SP);
   278  	nodi.type = types[tptr];
   279  	nodi.xoffset += widthptr;
   280  	cgen(&nodi, &nodsp);	// 0(SP) = 4(REG) -- i.data
   281  
   282  	regalloc(&nodo, types[tptr], res);
   283  	nodi.type = types[tptr];
   284  	nodi.xoffset -= widthptr;
   285  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   286  	regfree(&nodi);
   287  
   288  	regalloc(&nodr, types[tptr], &nodo);
   289  	if(n->left->xoffset == BADWIDTH)
   290  		fatal("cgen_callinter: badwidth");
   291  	cgen_checknil(&nodo);
   292  	nodo.op = OINDREG;
   293  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   294  	
   295  	if(proc == 0) {
   296  		// plain call: use direct c function pointer - more efficient
   297  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   298  		proc = 3;
   299  	} else {
   300  		// go/defer. generate go func value.
   301  		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   302  	}
   303  
   304  	nodr.type = n->left->type;
   305  	ginscall(&nodr, proc);
   306  
   307  	regfree(&nodr);
   308  	regfree(&nodo);
   309  }
   310  
   311  /*
   312   * generate function call;
   313   *	proc=0	normal call
   314   *	proc=1	goroutine run in new proc
   315   *	proc=2	defer call save away stack
   316   */
   317  void
   318  cgen_call(Node *n, int proc)
   319  {
   320  	Type *t;
   321  	Node nod, afun;
   322  
   323  	if(n == N)
   324  		return;
   325  
   326  	if(n->left->ullman >= UINF) {
   327  		// if name involves a fn call
   328  		// precompute the address of the fn
   329  		tempname(&afun, types[tptr]);
   330  		cgen(n->left, &afun);
   331  	}
   332  
   333  	genlist(n->list);		// assign the args
   334  	t = n->left->type;
   335  
   336  	// call tempname pointer
   337  	if(n->left->ullman >= UINF) {
   338  		regalloc(&nod, types[tptr], N);
   339  		cgen_as(&nod, &afun);
   340  		nod.type = t;
   341  		ginscall(&nod, proc);
   342  		regfree(&nod);
   343  		return;
   344  	}
   345  
   346  	// call pointer
   347  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   348  		regalloc(&nod, types[tptr], N);
   349  		cgen_as(&nod, n->left);
   350  		nod.type = t;
   351  		ginscall(&nod, proc);
   352  		regfree(&nod);
   353  		return;
   354  	}
   355  
   356  	// call direct
   357  	n->left->method = 1;
   358  	ginscall(n->left, proc);
   359  }
   360  
   361  /*
   362   * call to n has already been generated.
   363   * generate:
   364   *	res = return value from call.
   365   */
   366  void
   367  cgen_callret(Node *n, Node *res)
   368  {
   369  	Node nod;
   370  	Type *fp, *t;
   371  	Iter flist;
   372  
   373  	t = n->left->type;
   374  	if(t->etype == TPTR32 || t->etype == TPTR64)
   375  		t = t->type;
   376  
   377  	fp = structfirst(&flist, getoutarg(t));
   378  	if(fp == T)
   379  		fatal("cgen_callret: nil");
   380  
   381  	memset(&nod, 0, sizeof(nod));
   382  	nod.op = OINDREG;
   383  	nod.val.u.reg = D_SP;
   384  	nod.addable = 1;
   385  
   386  	nod.xoffset = fp->width;
   387  	nod.type = fp->type;
   388  	cgen_as(res, &nod);
   389  }
   390  
   391  /*
   392   * call to n has already been generated.
   393   * generate:
   394   *	res = &return value from call.
   395   */
   396  void
   397  cgen_aret(Node *n, Node *res)
   398  {
   399  	Node nod1, nod2;
   400  	Type *fp, *t;
   401  	Iter flist;
   402  
   403  	t = n->left->type;
   404  	if(isptr[t->etype])
   405  		t = t->type;
   406  
   407  	fp = structfirst(&flist, getoutarg(t));
   408  	if(fp == T)
   409  		fatal("cgen_aret: nil");
   410  
   411  	memset(&nod1, 0, sizeof(nod1));
   412  	nod1.op = OINDREG;
   413  	nod1.val.u.reg = D_SP;
   414  	nod1.addable = 1;
   415  
   416  	nod1.xoffset = fp->width;
   417  	nod1.type = fp->type;
   418  
   419  	if(res->op != OREGISTER) {
   420  		regalloc(&nod2, types[tptr], res);
   421  		gins(ALEAL, &nod1, &nod2);
   422  		gins(AMOVL, &nod2, res);
   423  		regfree(&nod2);
   424  	} else
   425  		gins(ALEAL, &nod1, res);
   426  }
   427  
   428  /*
   429   * generate return.
   430   * n->left is assignments to return values.
   431   */
   432  void
   433  cgen_ret(Node *n)
   434  {
   435  	Prog *p;
   436  
   437  	genlist(n->list);		// copy out args
   438  	if(retpc) {
   439  		gjmp(retpc);
   440  		return;
   441  	}
   442  	p = gins(ARET, N, N);
   443  	if(n->op == ORETJMP) {
   444  		p->to.type = D_EXTERN;
   445  		p->to.sym = n->left->sym;
   446  	}
   447  }
   448  
   449  /*
   450   * generate += *= etc.
   451   */
   452  void
   453  cgen_asop(Node *n)
   454  {
   455  	Node n1, n2, n3, n4;
   456  	Node *nl, *nr;
   457  	Prog *p1;
   458  	Addr addr;
   459  	int a;
   460  
   461  	nl = n->left;
   462  	nr = n->right;
   463  
   464  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   465  		tempname(&n1, nr->type);
   466  		cgen(nr, &n1);
   467  		n2 = *n;
   468  		n2.right = &n1;
   469  		cgen_asop(&n2);
   470  		goto ret;
   471  	}
   472  
   473  	if(!isint[nl->type->etype])
   474  		goto hard;
   475  	if(!isint[nr->type->etype])
   476  		goto hard;
   477  	if(is64(nl->type) || is64(nr->type))
   478  		goto hard;
   479  
   480  	switch(n->etype) {
   481  	case OADD:
   482  		if(smallintconst(nr))
   483  		if(mpgetfix(nr->val.u.xval) == 1) {
   484  			a = optoas(OINC, nl->type);
   485  			if(nl->addable) {
   486  				gins(a, N, nl);
   487  				goto ret;
   488  			}
   489  			if(sudoaddable(a, nl, &addr)) {
   490  				p1 = gins(a, N, N);
   491  				p1->to = addr;
   492  				sudoclean();
   493  				goto ret;
   494  			}
   495  		}
   496  		break;
   497  
   498  	case OSUB:
   499  		if(smallintconst(nr))
   500  		if(mpgetfix(nr->val.u.xval) == 1) {
   501  			a = optoas(ODEC, nl->type);
   502  			if(nl->addable) {
   503  				gins(a, N, nl);
   504  				goto ret;
   505  			}
   506  			if(sudoaddable(a, nl, &addr)) {
   507  				p1 = gins(a, N, N);
   508  				p1->to = addr;
   509  				sudoclean();
   510  				goto ret;
   511  			}
   512  		}
   513  		break;
   514  	}
   515  
   516  	switch(n->etype) {
   517  	case OADD:
   518  	case OSUB:
   519  	case OXOR:
   520  	case OAND:
   521  	case OOR:
   522  		a = optoas(n->etype, nl->type);
   523  		if(nl->addable) {
   524  			if(smallintconst(nr)) {
   525  				gins(a, nr, nl);
   526  				goto ret;
   527  			}
   528  			regalloc(&n2, nr->type, N);
   529  			cgen(nr, &n2);
   530  			gins(a, &n2, nl);
   531  			regfree(&n2);
   532  			goto ret;
   533  		}
   534  		if(nr->ullman < UINF)
   535  		if(sudoaddable(a, nl, &addr)) {
   536  			if(smallintconst(nr)) {
   537  				p1 = gins(a, nr, N);
   538  				p1->to = addr;
   539  				sudoclean();
   540  				goto ret;
   541  			}
   542  			regalloc(&n2, nr->type, N);
   543  			cgen(nr, &n2);
   544  			p1 = gins(a, &n2, N);
   545  			p1->to = addr;
   546  			regfree(&n2);
   547  			sudoclean();
   548  			goto ret;
   549  		}
   550  	}
   551  
   552  hard:
   553  	n2.op = 0;
   554  	n1.op = 0;
   555  	if(nr->ullman >= nl->ullman || nl->addable) {
   556  		mgen(nr, &n2, N);
   557  		nr = &n2;
   558  	} else {
   559  		tempname(&n2, nr->type);
   560  		cgen(nr, &n2);
   561  		nr = &n2;
   562  	}
   563  	if(!nl->addable) {
   564  		igen(nl, &n1, N);
   565  		nl = &n1;
   566  	}
   567  
   568  	n3 = *n;
   569  	n3.left = nl;
   570  	n3.right = nr;
   571  	n3.op = n->etype;
   572  
   573  	mgen(&n3, &n4, N);
   574  	gmove(&n4, nl);
   575  
   576  	if(n1.op)
   577  		regfree(&n1);
   578  	mfree(&n2);
   579  	mfree(&n4);
   580  
   581  ret:
   582  	;
   583  }
   584  
   585  int
   586  samereg(Node *a, Node *b)
   587  {
   588  	if(a->op != OREGISTER)
   589  		return 0;
   590  	if(b->op != OREGISTER)
   591  		return 0;
   592  	if(a->val.u.reg != b->val.u.reg)
   593  		return 0;
   594  	return 1;
   595  }
   596  
   597  /*
   598   * generate division.
   599   * caller must set:
   600   *	ax = allocated AX register
   601   *	dx = allocated DX register
   602   * generates one of:
   603   *	res = nl / nr
   604   *	res = nl % nr
   605   * according to op.
   606   */
   607  void
   608  dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
   609  {
   610  	int check;
   611  	Node n1, t1, t2, t3, t4, n4, nz;
   612  	Type *t, *t0;
   613  	Prog *p1, *p2;
   614  
   615  	// Have to be careful about handling
   616  	// most negative int divided by -1 correctly.
   617  	// The hardware will trap.
   618  	// Also the byte divide instruction needs AH,
   619  	// which we otherwise don't have to deal with.
   620  	// Easiest way to avoid for int8, int16: use int32.
   621  	// For int32 and int64, use explicit test.
   622  	// Could use int64 hw for int32.
   623  	t = nl->type;
   624  	t0 = t;
   625  	check = 0;
   626  	if(issigned[t->etype]) {
   627  		check = 1;
   628  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   629  			check = 0;
   630  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   631  			check = 0;
   632  	}
   633  	if(t->width < 4) {
   634  		if(issigned[t->etype])
   635  			t = types[TINT32];
   636  		else
   637  			t = types[TUINT32];
   638  		check = 0;
   639  	}
   640  
   641  	tempname(&t1, t);
   642  	tempname(&t2, t);
   643  	if(t0 != t) {
   644  		tempname(&t3, t0);
   645  		tempname(&t4, t0);
   646  		cgen(nl, &t3);
   647  		cgen(nr, &t4);
   648  		// Convert.
   649  		gmove(&t3, &t1);
   650  		gmove(&t4, &t2);
   651  	} else {
   652  		cgen(nl, &t1);
   653  		cgen(nr, &t2);
   654  	}
   655  
   656  	if(!samereg(ax, res) && !samereg(dx, res))
   657  		regalloc(&n1, t, res);
   658  	else
   659  		regalloc(&n1, t, N);
   660  	gmove(&t2, &n1);
   661  	gmove(&t1, ax);
   662  	p2 = P;
   663  	if(check) {
   664  		nodconst(&n4, t, -1);
   665  		gins(optoas(OCMP, t), &n1, &n4);
   666  		p1 = gbranch(optoas(ONE, t), T, +1);
   667  		if(op == ODIV) {
   668  			// a / (-1) is -a.
   669  			gins(optoas(OMINUS, t), N, ax);
   670  			gmove(ax, res);
   671  		} else {
   672  			// a % (-1) is 0.
   673  			nodconst(&n4, t, 0);
   674  			gmove(&n4, res);
   675  		}
   676  		p2 = gbranch(AJMP, T, 0);
   677  		patch(p1, pc);
   678  	}
   679  	if(!issigned[t->etype]) {
   680  		nodconst(&nz, t, 0);
   681  		gmove(&nz, dx);
   682  	} else
   683  		gins(optoas(OEXTEND, t), N, N);
   684  	gins(optoas(op, t), &n1, N);
   685  	regfree(&n1);
   686  
   687  	if(op == ODIV)
   688  		gmove(ax, res);
   689  	else
   690  		gmove(dx, res);
   691  	if(check)
   692  		patch(p2, pc);
   693  }
   694  
   695  static void
   696  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   697  {
   698  	int r;
   699  
   700  	r = reg[dr];
   701  	nodreg(x, types[TINT32], dr);
   702  
   703  	// save current ax and dx if they are live
   704  	// and not the destination
   705  	memset(oldx, 0, sizeof *oldx);
   706  	if(r > 0 && !samereg(x, res)) {
   707  		tempname(oldx, types[TINT32]);
   708  		gmove(x, oldx);
   709  	}
   710  
   711  	regalloc(x, t, x);
   712  }
   713  
   714  static void
   715  restx(Node *x, Node *oldx)
   716  {
   717  	regfree(x);
   718  
   719  	if(oldx->op != 0) {
   720  		x->type = types[TINT32];
   721  		gmove(oldx, x);
   722  	}
   723  }
   724  
   725  /*
   726   * generate division according to op, one of:
   727   *	res = nl / nr
   728   *	res = nl % nr
   729   */
   730  void
   731  cgen_div(int op, Node *nl, Node *nr, Node *res)
   732  {
   733  	Node ax, dx, oldax, olddx;
   734  	Type *t;
   735  
   736  	if(is64(nl->type))
   737  		fatal("cgen_div %T", nl->type);
   738  
   739  	if(issigned[nl->type->etype])
   740  		t = types[TINT32];
   741  	else
   742  		t = types[TUINT32];
   743  	savex(D_AX, &ax, &oldax, res, t);
   744  	savex(D_DX, &dx, &olddx, res, t);
   745  	dodiv(op, nl, nr, res, &ax, &dx);
   746  	restx(&dx, &olddx);
   747  	restx(&ax, &oldax);
   748  }
   749  
   750  /*
   751   * generate shift according to op, one of:
   752   *	res = nl << nr
   753   *	res = nl >> nr
   754   */
   755  void
   756  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   757  {
   758  	Node n1, n2, nt, cx, oldcx, hi, lo;
   759  	int a, w;
   760  	Prog *p1, *p2;
   761  	uvlong sc;
   762  
   763  	if(nl->type->width > 4)
   764  		fatal("cgen_shift %T", nl->type);
   765  
   766  	w = nl->type->width * 8;
   767  
   768  	a = optoas(op, nl->type);
   769  
   770  	if(nr->op == OLITERAL) {
   771  		tempname(&n2, nl->type);
   772  		cgen(nl, &n2);
   773  		regalloc(&n1, nl->type, res);
   774  		gmove(&n2, &n1);
   775  		sc = mpgetfix(nr->val.u.xval);
   776  		if(sc >= nl->type->width*8) {
   777  			// large shift gets 2 shifts by width-1
   778  			gins(a, ncon(w-1), &n1);
   779  			gins(a, ncon(w-1), &n1);
   780  		} else
   781  			gins(a, nr, &n1);
   782  		gmove(&n1, res);
   783  		regfree(&n1);
   784  		return;
   785  	}
   786  
   787  	memset(&oldcx, 0, sizeof oldcx);
   788  	nodreg(&cx, types[TUINT32], D_CX);
   789  	if(reg[D_CX] > 1 && !samereg(&cx, res)) {
   790  		tempname(&oldcx, types[TUINT32]);
   791  		gmove(&cx, &oldcx);
   792  	}
   793  
   794  	if(nr->type->width > 4) {
   795  		tempname(&nt, nr->type);
   796  		n1 = nt;
   797  	} else {
   798  		nodreg(&n1, types[TUINT32], D_CX);
   799  		regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   800  	}
   801  
   802  	if(samereg(&cx, res))
   803  		regalloc(&n2, nl->type, N);
   804  	else
   805  		regalloc(&n2, nl->type, res);
   806  	if(nl->ullman >= nr->ullman) {
   807  		cgen(nl, &n2);
   808  		cgen(nr, &n1);
   809  	} else {
   810  		cgen(nr, &n1);
   811  		cgen(nl, &n2);
   812  	}
   813  
   814  	// test and fix up large shifts
   815  	if(bounded) {
   816  		if(nr->type->width > 4) {
   817  			// delayed reg alloc
   818  			nodreg(&n1, types[TUINT32], D_CX);
   819  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   820  			split64(&nt, &lo, &hi);
   821  			gmove(&lo, &n1);
   822  			splitclean();
   823  		}
   824  	} else {
   825  		if(nr->type->width > 4) {
   826  			// delayed reg alloc
   827  			nodreg(&n1, types[TUINT32], D_CX);
   828  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   829  			split64(&nt, &lo, &hi);
   830  			gmove(&lo, &n1);
   831  			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
   832  			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
   833  			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
   834  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   835  			splitclean();
   836  			patch(p2, pc);
   837  		} else {
   838  			gins(optoas(OCMP, nr->type), &n1, ncon(w));
   839  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   840  		}
   841  		if(op == ORSH && issigned[nl->type->etype]) {
   842  			gins(a, ncon(w-1), &n2);
   843  		} else {
   844  			gmove(ncon(0), &n2);
   845  		}
   846  		patch(p1, pc);
   847  	}
   848  	gins(a, &n1, &n2);
   849  
   850  	if(oldcx.op != 0)
   851  		gmove(&oldcx, &cx);
   852  
   853  	gmove(&n2, res);
   854  
   855  	regfree(&n1);
   856  	regfree(&n2);
   857  }
   858  
   859  /*
   860   * generate byte multiply:
   861   *	res = nl * nr
   862   * there is no 2-operand byte multiply instruction so
   863   * we do a full-width multiplication and truncate afterwards.
   864   */
   865  void
   866  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   867  {
   868  	Node n1, n2, nt, *tmp;
   869  	Type *t;
   870  	int a;
   871  
   872  	// copy from byte to full registers
   873  	t = types[TUINT32];
   874  	if(issigned[nl->type->etype])
   875  		t = types[TINT32];
   876  
   877  	// largest ullman on left.
   878  	if(nl->ullman < nr->ullman) {
   879  		tmp = nl;
   880  		nl = nr;
   881  		nr = tmp;
   882  	}
   883  
   884  	tempname(&nt, nl->type);
   885  	cgen(nl, &nt);
   886  	regalloc(&n1, t, res);
   887  	cgen(nr, &n1);
   888  	regalloc(&n2, t, N);
   889  	gmove(&nt, &n2);
   890  	a = optoas(op, t);
   891  	gins(a, &n2, &n1);
   892  	regfree(&n2);
   893  	gmove(&n1, res);
   894  	regfree(&n1);
   895  }
   896  
   897  /*
   898   * generate high multiply:
   899   *   res = (nl*nr) >> width
   900   */
   901  void
   902  cgen_hmul(Node *nl, Node *nr, Node *res)
   903  {
   904  	Type *t;
   905  	int a;
   906  	Node n1, n2, ax, dx;
   907  
   908  	t = nl->type;
   909  	a = optoas(OHMUL, t);
   910  	// gen nl in n1.
   911  	tempname(&n1, t);
   912  	cgen(nl, &n1);
   913  	// gen nr in n2.
   914  	regalloc(&n2, t, res);
   915  	cgen(nr, &n2);
   916  
   917  	// multiply.
   918  	nodreg(&ax, t, D_AX);
   919  	gmove(&n2, &ax);
   920  	gins(a, &n1, N);
   921  	regfree(&n2);
   922  
   923  	if(t->width == 1) {
   924  		// byte multiply behaves differently.
   925  		nodreg(&ax, t, D_AH);
   926  		nodreg(&dx, t, D_DL);
   927  		gmove(&ax, &dx);
   928  	}
   929  	nodreg(&dx, t, D_DX);
   930  	gmove(&dx, res);
   931  }
   932  
   933  static void cgen_float387(Node *n, Node *res);
   934  static void cgen_floatsse(Node *n, Node *res);
   935  
   936  /*
   937   * generate floating-point operation.
   938   */
   939  void
   940  cgen_float(Node *n, Node *res)
   941  {
   942  	Node *nl;
   943  	Node n1, n2;
   944  	Prog *p1, *p2, *p3;
   945  
   946  	nl = n->left;
   947  	switch(n->op) {
   948  	case OEQ:
   949  	case ONE:
   950  	case OLT:
   951  	case OLE:
   952  	case OGE:
   953  		p1 = gbranch(AJMP, T, 0);
   954  		p2 = pc;
   955  		gmove(nodbool(1), res);
   956  		p3 = gbranch(AJMP, T, 0);
   957  		patch(p1, pc);
   958  		bgen(n, 1, 0, p2);
   959  		gmove(nodbool(0), res);
   960  		patch(p3, pc);
   961  		return;
   962  
   963  	case OPLUS:
   964  		cgen(nl, res);
   965  		return;
   966  
   967  	case OCONV:
   968  		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
   969  			cgen(nl, res);
   970  			return;
   971  		}
   972  
   973  		tempname(&n2, n->type);
   974  		mgen(nl, &n1, res);
   975  		gmove(&n1, &n2);
   976  		gmove(&n2, res);
   977  		mfree(&n1);
   978  		return;
   979  	}
   980  
   981  	if(use_sse)
   982  		cgen_floatsse(n, res);
   983  	else
   984  		cgen_float387(n, res);
   985  }
   986  
   987  // floating-point.  387 (not SSE2)
   988  static void
   989  cgen_float387(Node *n, Node *res)
   990  {
   991  	Node f0, f1;
   992  	Node *nl, *nr;
   993  
   994  	nl = n->left;
   995  	nr = n->right;
   996  	nodreg(&f0, nl->type, D_F0);
   997  	nodreg(&f1, n->type, D_F0+1);
   998  	if(nr != N)
   999  		goto flt2;
  1000  
  1001  	// unary
  1002  	cgen(nl, &f0);
  1003  	if(n->op != OCONV && n->op != OPLUS)
  1004  		gins(foptoas(n->op, n->type, 0), N, N);
  1005  	gmove(&f0, res);
  1006  	return;
  1007  
  1008  flt2:	// binary
  1009  	if(nl->ullman >= nr->ullman) {
  1010  		cgen(nl, &f0);
  1011  		if(nr->addable)
  1012  			gins(foptoas(n->op, n->type, 0), nr, &f0);
  1013  		else {
  1014  			cgen(nr, &f0);
  1015  			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
  1016  		}
  1017  	} else {
  1018  		cgen(nr, &f0);
  1019  		if(nl->addable)
  1020  			gins(foptoas(n->op, n->type, Frev), nl, &f0);
  1021  		else {
  1022  			cgen(nl, &f0);
  1023  			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
  1024  		}
  1025  	}
  1026  	gmove(&f0, res);
  1027  	return;
  1028  
  1029  }
  1030  
  1031  static void
  1032  cgen_floatsse(Node *n, Node *res)
  1033  {
  1034  	Node *nl, *nr, *r;
  1035  	Node n1, n2, nt;
  1036  	int a;
  1037  
  1038  	nl = n->left;
  1039  	nr = n->right;
  1040  	switch(n->op) {
  1041  	default:
  1042  		dump("cgen_floatsse", n);
  1043  		fatal("cgen_floatsse %O", n->op);
  1044  		return;
  1045  
  1046  	case OMINUS:
  1047  	case OCOM:
  1048  		nr = nodintconst(-1);
  1049  		convlit(&nr, n->type);
  1050  		a = foptoas(OMUL, nl->type, 0);
  1051  		goto sbop;
  1052  
  1053  	// symmetric binary
  1054  	case OADD:
  1055  	case OMUL:
  1056  		a = foptoas(n->op, nl->type, 0);
  1057  		goto sbop;
  1058  
  1059  	// asymmetric binary
  1060  	case OSUB:
  1061  	case OMOD:
  1062  	case ODIV:
  1063  		a = foptoas(n->op, nl->type, 0);
  1064  		goto abop;
  1065  	}
  1066  
  1067  sbop:	// symmetric binary
  1068  	if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
  1069  		r = nl;
  1070  		nl = nr;
  1071  		nr = r;
  1072  	}
  1073  
  1074  abop:	// asymmetric binary
  1075  	if(nl->ullman >= nr->ullman) {
  1076  		tempname(&nt, nl->type);
  1077  		cgen(nl, &nt);
  1078  		mgen(nr, &n2, N);
  1079  		regalloc(&n1, nl->type, res);
  1080  		gmove(&nt, &n1);
  1081  		gins(a, &n2, &n1);
  1082  		gmove(&n1, res);
  1083  		regfree(&n1);
  1084  		mfree(&n2);
  1085  	} else {
  1086  		regalloc(&n2, nr->type, res);
  1087  		cgen(nr, &n2);
  1088  		regalloc(&n1, nl->type, N);
  1089  		cgen(nl, &n1);
  1090  		gins(a, &n2, &n1);
  1091  		regfree(&n2);
  1092  		gmove(&n1, res);
  1093  		regfree(&n1);
  1094  	}
  1095  	return;
  1096  }
  1097  
  1098  void
  1099  bgen_float(Node *n, int true, int likely, Prog *to)
  1100  {
  1101  	int et, a;
  1102  	Node *nl, *nr, *r;
  1103  	Node n1, n2, n3, tmp, t1, t2, ax;
  1104  	Prog *p1, *p2;
  1105  
  1106  	nl = n->left;
  1107  	nr = n->right;
  1108  	a = n->op;
  1109  	if(!true) {
  1110  		// brcom is not valid on floats when NaN is involved.
  1111  		p1 = gbranch(AJMP, T, 0);
  1112  		p2 = gbranch(AJMP, T, 0);
  1113  		patch(p1, pc);
  1114  		// No need to avoid re-genning ninit.
  1115  		bgen_float(n, 1, -likely, p2);
  1116  		patch(gbranch(AJMP, T, 0), to);
  1117  		patch(p2, pc);
  1118  		return;
  1119  	}
  1120  
  1121  	if(use_sse)
  1122  		goto sse;
  1123  	else
  1124  		goto x87;
  1125  
  1126  x87:
  1127  	a = brrev(a);	// because the args are stacked
  1128  	if(a == OGE || a == OGT) {
  1129  		// only < and <= work right with NaN; reverse if needed
  1130  		r = nr;
  1131  		nr = nl;
  1132  		nl = r;
  1133  		a = brrev(a);
  1134  	}
  1135  
  1136  	nodreg(&tmp, nr->type, D_F0);
  1137  	nodreg(&n2, nr->type, D_F0 + 1);
  1138  	nodreg(&ax, types[TUINT16], D_AX);
  1139  	et = simsimtype(nr->type);
  1140  	if(et == TFLOAT64) {
  1141  		if(nl->ullman > nr->ullman) {
  1142  			cgen(nl, &tmp);
  1143  			cgen(nr, &tmp);
  1144  			gins(AFXCHD, &tmp, &n2);
  1145  		} else {
  1146  			cgen(nr, &tmp);
  1147  			cgen(nl, &tmp);
  1148  		}
  1149  		gins(AFUCOMIP, &tmp, &n2);
  1150  		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF
  1151  	} else {
  1152  		// TODO(rsc): The moves back and forth to memory
  1153  		// here are for truncating the value to 32 bits.
  1154  		// This handles 32-bit comparison but presumably
  1155  		// all the other ops have the same problem.
  1156  		// We need to figure out what the right general
  1157  		// solution is, besides telling people to use float64.
  1158  		tempname(&t1, types[TFLOAT32]);
  1159  		tempname(&t2, types[TFLOAT32]);
  1160  		cgen(nr, &t1);
  1161  		cgen(nl, &t2);
  1162  		gmove(&t2, &tmp);
  1163  		gins(AFCOMFP, &t1, &tmp);
  1164  		gins(AFSTSW, N, &ax);
  1165  		gins(ASAHF, N, N);
  1166  	}
  1167  
  1168  	goto ret;
  1169  
  1170  sse:
  1171  	if(!nl->addable) {
  1172  		tempname(&n1, nl->type);
  1173  		cgen(nl, &n1);
  1174  		nl = &n1;
  1175  	}
  1176  	if(!nr->addable) {
  1177  		tempname(&tmp, nr->type);
  1178  		cgen(nr, &tmp);
  1179  		nr = &tmp;
  1180  	}
  1181  	regalloc(&n2, nr->type, N);
  1182  	gmove(nr, &n2);
  1183  	nr = &n2;
  1184  
  1185  	if(nl->op != OREGISTER) {
  1186  		regalloc(&n3, nl->type, N);
  1187  		gmove(nl, &n3);
  1188  		nl = &n3;
  1189  	}
  1190  
  1191  	if(a == OGE || a == OGT) {
  1192  		// only < and <= work right with NaN; reverse if needed
  1193  		r = nr;
  1194  		nr = nl;
  1195  		nl = r;
  1196  		a = brrev(a);
  1197  	}
  1198  
  1199  	gins(foptoas(OCMP, nr->type, 0), nl, nr);
  1200  	if(nl->op == OREGISTER)
  1201  		regfree(nl);
  1202  	regfree(nr);
  1203  
  1204  ret:
  1205  	if(a == OEQ) {
  1206  		// neither NE nor P
  1207  		p1 = gbranch(AJNE, T, -likely);
  1208  		p2 = gbranch(AJPS, T, -likely);
  1209  		patch(gbranch(AJMP, T, 0), to);
  1210  		patch(p1, pc);
  1211  		patch(p2, pc);
  1212  	} else if(a == ONE) {
  1213  		// either NE or P
  1214  		patch(gbranch(AJNE, T, likely), to);
  1215  		patch(gbranch(AJPS, T, likely), to);
  1216  	} else
  1217  		patch(gbranch(optoas(a, nr->type), T, likely), to);
  1218  
  1219  }
  1220  
  1221  // Called after regopt and peep have run.
  1222  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1223  void
  1224  expandchecks(Prog *firstp)
  1225  {
  1226  	Prog *p, *p1, *p2;
  1227  
  1228  	for(p = firstp; p != P; p = p->link) {
  1229  		if(p->as != ACHECKNIL)
  1230  			continue;
  1231  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1232  			warnl(p->lineno, "nil check %D", &p->from);
  1233  		// check is
  1234  		//	CMP arg, $0
  1235  		//	JNE 2(PC) (likely)
  1236  		//	MOV AX, 0
  1237  		p1 = mal(sizeof *p1);
  1238  		p2 = mal(sizeof *p2);
  1239  		clearp(p1);
  1240  		clearp(p2);
  1241  		p1->link = p2;
  1242  		p2->link = p->link;
  1243  		p->link = p1;
  1244  		p1->lineno = p->lineno;
  1245  		p2->lineno = p->lineno;
  1246  		p1->loc = 9999;
  1247  		p2->loc = 9999;
  1248  		p->as = ACMPL;
  1249  		p->to.type = D_CONST;
  1250  		p->to.offset = 0;
  1251  		p1->as = AJNE;
  1252  		p1->from.type = D_CONST;
  1253  		p1->from.offset = 1; // likely
  1254  		p1->to.type = D_BRANCH;
  1255  		p1->to.u.branch = p2->link;
  1256  		// crash by write to memory address 0.
  1257  		// if possible, since we know arg is 0, use 0(arg),
  1258  		// which will be shorter to encode than plain 0.
  1259  		p2->as = AMOVL;
  1260  		p2->from.type = D_AX;
  1261  		if(regtyp(&p->from))
  1262  			p2->to.type = p->from.type + D_INDIR;
  1263  		else
  1264  			p2->to.type = D_INDIR+D_NONE;
  1265  		p2->to.offset = 0;
  1266  	}
  1267  }