github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/8g/ggen.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #undef	EXTERN
     6  #define	EXTERN
     7  #include <u.h>
     8  #include <libc.h>
     9  #include "gg.h"
    10  #include "opt.h"
    11  
    12  static Prog* appendp(Prog*, int, int, int32, int, int32);
    13  
    14  void
    15  defframe(Prog *ptxt, Bvec *bv)
    16  {
    17  	uint32 frame;
    18  	Prog *p;
    19  	int i, j;
    20  
    21  	// fill in argument size
    22  	ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
    23  
    24  	// fill in final stack size
    25  	if(stksize > maxstksize)
    26  		maxstksize = stksize;
    27  	frame = rnd(maxstksize+maxarg, widthptr);
    28  	ptxt->to.offset = frame;
    29  	maxstksize = 0;
    30  
    31  	// insert code to clear pointered part of the frame,
    32  	// so that garbage collector only sees initialized values
    33  	// when it looks for pointers.
    34  	p = ptxt;
    35  	if(stkzerosize >= 8*widthptr) {
    36  		p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0);
    37  		p = appendp(p, AMOVL, D_CONST, stkzerosize/widthptr, D_CX, 0);
    38  		p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0);
    39  		p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
    40  		appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
    41  	} else {
    42  		j = (stkptrsize - stkzerosize)/widthptr * 2;
    43  		for(i=0; i<stkzerosize; i+=widthptr) {
    44  			if(bvget(bv, j) || bvget(bv, j+1))
    45  				p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i);
    46  			j += 2;
    47  		}
    48  	}
    49  }
    50  
    51  static Prog*
    52  appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset)
    53  {
    54  	Prog *q;
    55  	
    56  	q = mal(sizeof(*q));
    57  	clearp(q);
    58  	q->as = as;
    59  	q->lineno = p->lineno;
    60  	q->from.type = ftype;
    61  	q->from.offset = foffset;
    62  	q->to.type = ttype;
    63  	q->to.offset = toffset;
    64  	q->link = p->link;
    65  	p->link = q;
    66  	return q;
    67  }
    68  
    69  // Sweep the prog list to mark any used nodes.
    70  void
    71  markautoused(Prog* p)
    72  {
    73  	for (; p; p = p->link) {
    74  		if (p->as == ATYPE)
    75  			continue;
    76  
    77  		if (p->from.type == D_AUTO && p->from.node)
    78  			p->from.node->used = 1;
    79  
    80  		if (p->to.type == D_AUTO && p->to.node)
    81  			p->to.node->used = 1;
    82  	}
    83  }
    84  
    85  // Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
    86  void
    87  fixautoused(Prog* p)
    88  {
    89  	Prog **lp;
    90  
    91  	for (lp=&p; (p=*lp) != P; ) {
    92  		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
    93  			*lp = p->link;
    94  			continue;
    95  		}
    96  
    97  		if (p->from.type == D_AUTO && p->from.node)
    98  			p->from.offset += p->from.node->stkdelta;
    99  
   100  		if (p->to.type == D_AUTO && p->to.node)
   101  			p->to.offset += p->to.node->stkdelta;
   102  
   103  		lp = &p->link;
   104  	}
   105  }
   106  
   107  void
   108  clearfat(Node *nl)
   109  {
   110  	uint32 w, c, q;
   111  	Node n1;
   112  
   113  	/* clear a fat object */
   114  	if(debug['g'])
   115  		dump("\nclearfat", nl);
   116  
   117  	w = nl->type->width;
   118  	// Avoid taking the address for simple enough types.
   119  	if(componentgen(N, nl))
   120  		return;
   121  
   122  	c = w % 4;	// bytes
   123  	q = w / 4;	// quads
   124  
   125  	nodreg(&n1, types[tptr], D_DI);
   126  	agen(nl, &n1);
   127  	gconreg(AMOVL, 0, D_AX);
   128  
   129  	if(q >= 4) {
   130  		gconreg(AMOVL, q, D_CX);
   131  		gins(AREP, N, N);	// repeat
   132  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   133  	} else
   134  	while(q > 0) {
   135  		gins(ASTOSL, N, N);	// STOL AL,*(DI)+
   136  		q--;
   137  	}
   138  
   139  	if(c >= 4) {
   140  		gconreg(AMOVL, c, D_CX);
   141  		gins(AREP, N, N);	// repeat
   142  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   143  	} else
   144  	while(c > 0) {
   145  		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
   146  		c--;
   147  	}
   148  }
   149  
   150  /*
   151   * generate:
   152   *	call f
   153   *	proc=-1	normal call but no return
   154   *	proc=0	normal call
   155   *	proc=1	goroutine run in new proc
   156   *	proc=2	defer call save away stack
   157    *	proc=3	normal call to C pointer (not Go func value)
   158   */
   159  void
   160  ginscall(Node *f, int proc)
   161  {
   162  	int32 arg;
   163  	Prog *p;
   164  	Node reg, r1, con;
   165  
   166  	if(f->type != T)
   167  		setmaxarg(f->type);
   168  
   169  	arg = -1;
   170  	// Most functions have a fixed-size argument block, so traceback uses that during unwind.
   171  	// Not all, though: there are some variadic functions in package runtime,
   172  	// and for those we emit call-specific metadata recorded by caller.
   173  	// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
   174  	// so we do this for all indirect calls as well.
   175  	if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
   176  		arg = f->type->argwid;
   177  		if(proc == 1 || proc == 2)
   178  			arg += 2*widthptr;
   179  	}
   180  
   181  	if(arg != -1)
   182  		gargsize(arg);
   183  
   184  	switch(proc) {
   185  	default:
   186  		fatal("ginscall: bad proc %d", proc);
   187  		break;
   188  
   189  	case 0:	// normal call
   190  	case -1:	// normal call but no return
   191  		if(f->op == ONAME && f->class == PFUNC) {
   192  			if(f == deferreturn) {
   193  				// Deferred calls will appear to be returning to
   194  				// the CALL deferreturn(SB) that we are about to emit.
   195  				// However, the stack trace code will show the line
   196  				// of the instruction byte before the return PC. 
   197  				// To avoid that being an unrelated instruction,
   198  				// insert an x86 NOP that we will have the right line number.
   199  				// x86 NOP 0x90 is really XCHG AX, AX; use that description
   200  				// because the NOP pseudo-instruction will be removed by
   201  				// the linker.
   202  				nodreg(&reg, types[TINT], D_AX);
   203  				gins(AXCHGL, &reg, &reg);
   204  			}
   205  			p = gins(ACALL, N, f);
   206  			afunclit(&p->to, f);
   207  			if(proc == -1 || noreturn(p))
   208  				gins(AUNDEF, N, N);
   209  			break;
   210  		}
   211  		nodreg(&reg, types[tptr], D_DX);
   212  		nodreg(&r1, types[tptr], D_BX);
   213  		gmove(f, &reg);
   214  		reg.op = OINDREG;
   215  		gmove(&reg, &r1);
   216  		reg.op = OREGISTER;
   217  		gins(ACALL, &reg, &r1);
   218  		break;
   219  	
   220  	case 3:	// normal call of c function pointer
   221  		gins(ACALL, N, f);
   222  		break;
   223  
   224  	case 1:	// call in new proc (go)
   225  	case 2:	// deferred call (defer)
   226  		nodreg(&reg, types[TINT32], D_CX);
   227  		gins(APUSHL, f, N);
   228  		nodconst(&con, types[TINT32], argsize(f->type));
   229  		gins(APUSHL, &con, N);
   230  		if(proc == 1)
   231  			ginscall(newproc, 0);
   232  		else
   233  			ginscall(deferproc, 0);
   234  		gins(APOPL, N, &reg);
   235  		gins(APOPL, N, &reg);
   236  		if(proc == 2) {
   237  			nodreg(&reg, types[TINT64], D_AX);
   238  			gins(ATESTL, &reg, &reg);
   239  			patch(gbranch(AJNE, T, -1), retpc);
   240  		}
   241  		break;
   242  	}
   243  	
   244  	if(arg != -1)
   245  		gargsize(-1);
   246  }
   247  
   248  /*
   249   * n is call to interface method.
   250   * generate res = n.
   251   */
   252  void
   253  cgen_callinter(Node *n, Node *res, int proc)
   254  {
   255  	Node *i, *f;
   256  	Node tmpi, nodi, nodo, nodr, nodsp;
   257  
   258  	i = n->left;
   259  	if(i->op != ODOTINTER)
   260  		fatal("cgen_callinter: not ODOTINTER %O", i->op);
   261  
   262  	f = i->right;		// field
   263  	if(f->op != ONAME)
   264  		fatal("cgen_callinter: not ONAME %O", f->op);
   265  
   266  	i = i->left;		// interface
   267  
   268  	if(!i->addable) {
   269  		tempname(&tmpi, i->type);
   270  		cgen(i, &tmpi);
   271  		i = &tmpi;
   272  	}
   273  
   274  	genlist(n->list);		// assign the args
   275  
   276  	// i is now addable, prepare an indirected
   277  	// register to hold its address.
   278  	igen(i, &nodi, res);		// REG = &inter
   279  
   280  	nodindreg(&nodsp, types[tptr], D_SP);
   281  	nodi.type = types[tptr];
   282  	nodi.xoffset += widthptr;
   283  	cgen(&nodi, &nodsp);	// 0(SP) = 4(REG) -- i.data
   284  
   285  	regalloc(&nodo, types[tptr], res);
   286  	nodi.type = types[tptr];
   287  	nodi.xoffset -= widthptr;
   288  	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
   289  	regfree(&nodi);
   290  
   291  	regalloc(&nodr, types[tptr], &nodo);
   292  	if(n->left->xoffset == BADWIDTH)
   293  		fatal("cgen_callinter: badwidth");
   294  	cgen_checknil(&nodo);
   295  	nodo.op = OINDREG;
   296  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
   297  	
   298  	if(proc == 0) {
   299  		// plain call: use direct c function pointer - more efficient
   300  		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f]
   301  		proc = 3;
   302  	} else {
   303  		// go/defer. generate go func value.
   304  		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f]
   305  	}
   306  
   307  	nodr.type = n->left->type;
   308  	ginscall(&nodr, proc);
   309  
   310  	regfree(&nodr);
   311  	regfree(&nodo);
   312  }
   313  
   314  /*
   315   * generate function call;
   316   *	proc=0	normal call
   317   *	proc=1	goroutine run in new proc
   318   *	proc=2	defer call save away stack
   319   */
   320  void
   321  cgen_call(Node *n, int proc)
   322  {
   323  	Type *t;
   324  	Node nod, afun;
   325  
   326  	if(n == N)
   327  		return;
   328  
   329  	if(n->left->ullman >= UINF) {
   330  		// if name involves a fn call
   331  		// precompute the address of the fn
   332  		tempname(&afun, types[tptr]);
   333  		cgen(n->left, &afun);
   334  	}
   335  
   336  	genlist(n->list);		// assign the args
   337  	t = n->left->type;
   338  
   339  	// call tempname pointer
   340  	if(n->left->ullman >= UINF) {
   341  		regalloc(&nod, types[tptr], N);
   342  		cgen_as(&nod, &afun);
   343  		nod.type = t;
   344  		ginscall(&nod, proc);
   345  		regfree(&nod);
   346  		return;
   347  	}
   348  
   349  	// call pointer
   350  	if(n->left->op != ONAME || n->left->class != PFUNC) {
   351  		regalloc(&nod, types[tptr], N);
   352  		cgen_as(&nod, n->left);
   353  		nod.type = t;
   354  		ginscall(&nod, proc);
   355  		regfree(&nod);
   356  		return;
   357  	}
   358  
   359  	// call direct
   360  	n->left->method = 1;
   361  	ginscall(n->left, proc);
   362  }
   363  
   364  /*
   365   * call to n has already been generated.
   366   * generate:
   367   *	res = return value from call.
   368   */
   369  void
   370  cgen_callret(Node *n, Node *res)
   371  {
   372  	Node nod;
   373  	Type *fp, *t;
   374  	Iter flist;
   375  
   376  	t = n->left->type;
   377  	if(t->etype == TPTR32 || t->etype == TPTR64)
   378  		t = t->type;
   379  
   380  	fp = structfirst(&flist, getoutarg(t));
   381  	if(fp == T)
   382  		fatal("cgen_callret: nil");
   383  
   384  	memset(&nod, 0, sizeof(nod));
   385  	nod.op = OINDREG;
   386  	nod.val.u.reg = D_SP;
   387  	nod.addable = 1;
   388  
   389  	nod.xoffset = fp->width;
   390  	nod.type = fp->type;
   391  	cgen_as(res, &nod);
   392  }
   393  
   394  /*
   395   * call to n has already been generated.
   396   * generate:
   397   *	res = &return value from call.
   398   */
   399  void
   400  cgen_aret(Node *n, Node *res)
   401  {
   402  	Node nod1, nod2;
   403  	Type *fp, *t;
   404  	Iter flist;
   405  
   406  	t = n->left->type;
   407  	if(isptr[t->etype])
   408  		t = t->type;
   409  
   410  	fp = structfirst(&flist, getoutarg(t));
   411  	if(fp == T)
   412  		fatal("cgen_aret: nil");
   413  
   414  	memset(&nod1, 0, sizeof(nod1));
   415  	nod1.op = OINDREG;
   416  	nod1.val.u.reg = D_SP;
   417  	nod1.addable = 1;
   418  
   419  	nod1.xoffset = fp->width;
   420  	nod1.type = fp->type;
   421  
   422  	if(res->op != OREGISTER) {
   423  		regalloc(&nod2, types[tptr], res);
   424  		gins(ALEAL, &nod1, &nod2);
   425  		gins(AMOVL, &nod2, res);
   426  		regfree(&nod2);
   427  	} else
   428  		gins(ALEAL, &nod1, res);
   429  }
   430  
   431  /*
   432   * generate return.
   433   * n->left is assignments to return values.
   434   */
   435  void
   436  cgen_ret(Node *n)
   437  {
   438  	Prog *p;
   439  
   440  	genlist(n->list);		// copy out args
   441  	if(retpc) {
   442  		gjmp(retpc);
   443  		return;
   444  	}
   445  	p = gins(ARET, N, N);
   446  	if(n->op == ORETJMP) {
   447  		p->to.type = D_EXTERN;
   448  		p->to.sym = n->left->sym;
   449  	}
   450  }
   451  
   452  /*
   453   * generate += *= etc.
   454   */
   455  void
   456  cgen_asop(Node *n)
   457  {
   458  	Node n1, n2, n3, n4;
   459  	Node *nl, *nr;
   460  	Prog *p1;
   461  	Addr addr;
   462  	int a;
   463  
   464  	nl = n->left;
   465  	nr = n->right;
   466  
   467  	if(nr->ullman >= UINF && nl->ullman >= UINF) {
   468  		tempname(&n1, nr->type);
   469  		cgen(nr, &n1);
   470  		n2 = *n;
   471  		n2.right = &n1;
   472  		cgen_asop(&n2);
   473  		goto ret;
   474  	}
   475  
   476  	if(!isint[nl->type->etype])
   477  		goto hard;
   478  	if(!isint[nr->type->etype])
   479  		goto hard;
   480  	if(is64(nl->type) || is64(nr->type))
   481  		goto hard;
   482  
   483  	switch(n->etype) {
   484  	case OADD:
   485  		if(smallintconst(nr))
   486  		if(mpgetfix(nr->val.u.xval) == 1) {
   487  			a = optoas(OINC, nl->type);
   488  			if(nl->addable) {
   489  				gins(a, N, nl);
   490  				goto ret;
   491  			}
   492  			if(sudoaddable(a, nl, &addr)) {
   493  				p1 = gins(a, N, N);
   494  				p1->to = addr;
   495  				sudoclean();
   496  				goto ret;
   497  			}
   498  		}
   499  		break;
   500  
   501  	case OSUB:
   502  		if(smallintconst(nr))
   503  		if(mpgetfix(nr->val.u.xval) == 1) {
   504  			a = optoas(ODEC, nl->type);
   505  			if(nl->addable) {
   506  				gins(a, N, nl);
   507  				goto ret;
   508  			}
   509  			if(sudoaddable(a, nl, &addr)) {
   510  				p1 = gins(a, N, N);
   511  				p1->to = addr;
   512  				sudoclean();
   513  				goto ret;
   514  			}
   515  		}
   516  		break;
   517  	}
   518  
   519  	switch(n->etype) {
   520  	case OADD:
   521  	case OSUB:
   522  	case OXOR:
   523  	case OAND:
   524  	case OOR:
   525  		a = optoas(n->etype, nl->type);
   526  		if(nl->addable) {
   527  			if(smallintconst(nr)) {
   528  				gins(a, nr, nl);
   529  				goto ret;
   530  			}
   531  			regalloc(&n2, nr->type, N);
   532  			cgen(nr, &n2);
   533  			gins(a, &n2, nl);
   534  			regfree(&n2);
   535  			goto ret;
   536  		}
   537  		if(nr->ullman < UINF)
   538  		if(sudoaddable(a, nl, &addr)) {
   539  			if(smallintconst(nr)) {
   540  				p1 = gins(a, nr, N);
   541  				p1->to = addr;
   542  				sudoclean();
   543  				goto ret;
   544  			}
   545  			regalloc(&n2, nr->type, N);
   546  			cgen(nr, &n2);
   547  			p1 = gins(a, &n2, N);
   548  			p1->to = addr;
   549  			regfree(&n2);
   550  			sudoclean();
   551  			goto ret;
   552  		}
   553  	}
   554  
   555  hard:
   556  	n2.op = 0;
   557  	n1.op = 0;
   558  	if(nr->ullman >= nl->ullman || nl->addable) {
   559  		mgen(nr, &n2, N);
   560  		nr = &n2;
   561  	} else {
   562  		tempname(&n2, nr->type);
   563  		cgen(nr, &n2);
   564  		nr = &n2;
   565  	}
   566  	if(!nl->addable) {
   567  		igen(nl, &n1, N);
   568  		nl = &n1;
   569  	}
   570  
   571  	n3 = *n;
   572  	n3.left = nl;
   573  	n3.right = nr;
   574  	n3.op = n->etype;
   575  
   576  	mgen(&n3, &n4, N);
   577  	gmove(&n4, nl);
   578  
   579  	if(n1.op)
   580  		regfree(&n1);
   581  	mfree(&n2);
   582  	mfree(&n4);
   583  
   584  ret:
   585  	;
   586  }
   587  
   588  int
   589  samereg(Node *a, Node *b)
   590  {
   591  	if(a->op != OREGISTER)
   592  		return 0;
   593  	if(b->op != OREGISTER)
   594  		return 0;
   595  	if(a->val.u.reg != b->val.u.reg)
   596  		return 0;
   597  	return 1;
   598  }
   599  
   600  /*
   601   * generate division.
   602   * caller must set:
   603   *	ax = allocated AX register
   604   *	dx = allocated DX register
   605   * generates one of:
   606   *	res = nl / nr
   607   *	res = nl % nr
   608   * according to op.
   609   */
   610  void
   611  dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
   612  {
   613  	int check;
   614  	Node n1, t1, t2, t3, t4, n4, nz;
   615  	Type *t, *t0;
   616  	Prog *p1, *p2;
   617  
   618  	// Have to be careful about handling
   619  	// most negative int divided by -1 correctly.
   620  	// The hardware will trap.
   621  	// Also the byte divide instruction needs AH,
   622  	// which we otherwise don't have to deal with.
   623  	// Easiest way to avoid for int8, int16: use int32.
   624  	// For int32 and int64, use explicit test.
   625  	// Could use int64 hw for int32.
   626  	t = nl->type;
   627  	t0 = t;
   628  	check = 0;
   629  	if(issigned[t->etype]) {
   630  		check = 1;
   631  		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
   632  			check = 0;
   633  		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
   634  			check = 0;
   635  	}
   636  	if(t->width < 4) {
   637  		if(issigned[t->etype])
   638  			t = types[TINT32];
   639  		else
   640  			t = types[TUINT32];
   641  		check = 0;
   642  	}
   643  
   644  	tempname(&t1, t);
   645  	tempname(&t2, t);
   646  	if(t0 != t) {
   647  		tempname(&t3, t0);
   648  		tempname(&t4, t0);
   649  		cgen(nl, &t3);
   650  		cgen(nr, &t4);
   651  		// Convert.
   652  		gmove(&t3, &t1);
   653  		gmove(&t4, &t2);
   654  	} else {
   655  		cgen(nl, &t1);
   656  		cgen(nr, &t2);
   657  	}
   658  
   659  	if(!samereg(ax, res) && !samereg(dx, res))
   660  		regalloc(&n1, t, res);
   661  	else
   662  		regalloc(&n1, t, N);
   663  	gmove(&t2, &n1);
   664  	gmove(&t1, ax);
   665  	p2 = P;
   666  	if(check) {
   667  		nodconst(&n4, t, -1);
   668  		gins(optoas(OCMP, t), &n1, &n4);
   669  		p1 = gbranch(optoas(ONE, t), T, +1);
   670  		if(op == ODIV) {
   671  			// a / (-1) is -a.
   672  			gins(optoas(OMINUS, t), N, ax);
   673  			gmove(ax, res);
   674  		} else {
   675  			// a % (-1) is 0.
   676  			nodconst(&n4, t, 0);
   677  			gmove(&n4, res);
   678  		}
   679  		p2 = gbranch(AJMP, T, 0);
   680  		patch(p1, pc);
   681  	}
   682  	if(!issigned[t->etype]) {
   683  		nodconst(&nz, t, 0);
   684  		gmove(&nz, dx);
   685  	} else
   686  		gins(optoas(OEXTEND, t), N, N);
   687  	gins(optoas(op, t), &n1, N);
   688  	regfree(&n1);
   689  
   690  	if(op == ODIV)
   691  		gmove(ax, res);
   692  	else
   693  		gmove(dx, res);
   694  	if(check)
   695  		patch(p2, pc);
   696  }
   697  
   698  static void
   699  savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
   700  {
   701  	int r;
   702  
   703  	r = reg[dr];
   704  	nodreg(x, types[TINT32], dr);
   705  
   706  	// save current ax and dx if they are live
   707  	// and not the destination
   708  	memset(oldx, 0, sizeof *oldx);
   709  	if(r > 0 && !samereg(x, res)) {
   710  		tempname(oldx, types[TINT32]);
   711  		gmove(x, oldx);
   712  	}
   713  
   714  	regalloc(x, t, x);
   715  }
   716  
   717  static void
   718  restx(Node *x, Node *oldx)
   719  {
   720  	regfree(x);
   721  
   722  	if(oldx->op != 0) {
   723  		x->type = types[TINT32];
   724  		gmove(oldx, x);
   725  	}
   726  }
   727  
   728  /*
   729   * generate division according to op, one of:
   730   *	res = nl / nr
   731   *	res = nl % nr
   732   */
   733  void
   734  cgen_div(int op, Node *nl, Node *nr, Node *res)
   735  {
   736  	Node ax, dx, oldax, olddx;
   737  	Type *t;
   738  
   739  	if(is64(nl->type))
   740  		fatal("cgen_div %T", nl->type);
   741  
   742  	if(issigned[nl->type->etype])
   743  		t = types[TINT32];
   744  	else
   745  		t = types[TUINT32];
   746  	savex(D_AX, &ax, &oldax, res, t);
   747  	savex(D_DX, &dx, &olddx, res, t);
   748  	dodiv(op, nl, nr, res, &ax, &dx);
   749  	restx(&dx, &olddx);
   750  	restx(&ax, &oldax);
   751  }
   752  
   753  /*
   754   * generate shift according to op, one of:
   755   *	res = nl << nr
   756   *	res = nl >> nr
   757   */
   758  void
   759  cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
   760  {
   761  	Node n1, n2, nt, cx, oldcx, hi, lo;
   762  	int a, w;
   763  	Prog *p1, *p2;
   764  	uvlong sc;
   765  
   766  	if(nl->type->width > 4)
   767  		fatal("cgen_shift %T", nl->type);
   768  
   769  	w = nl->type->width * 8;
   770  
   771  	a = optoas(op, nl->type);
   772  
   773  	if(nr->op == OLITERAL) {
   774  		tempname(&n2, nl->type);
   775  		cgen(nl, &n2);
   776  		regalloc(&n1, nl->type, res);
   777  		gmove(&n2, &n1);
   778  		sc = mpgetfix(nr->val.u.xval);
   779  		if(sc >= nl->type->width*8) {
   780  			// large shift gets 2 shifts by width-1
   781  			gins(a, ncon(w-1), &n1);
   782  			gins(a, ncon(w-1), &n1);
   783  		} else
   784  			gins(a, nr, &n1);
   785  		gmove(&n1, res);
   786  		regfree(&n1);
   787  		return;
   788  	}
   789  
   790  	memset(&oldcx, 0, sizeof oldcx);
   791  	nodreg(&cx, types[TUINT32], D_CX);
   792  	if(reg[D_CX] > 1 && !samereg(&cx, res)) {
   793  		tempname(&oldcx, types[TUINT32]);
   794  		gmove(&cx, &oldcx);
   795  	}
   796  
   797  	if(nr->type->width > 4) {
   798  		tempname(&nt, nr->type);
   799  		n1 = nt;
   800  	} else {
   801  		nodreg(&n1, types[TUINT32], D_CX);
   802  		regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
   803  	}
   804  
   805  	if(samereg(&cx, res))
   806  		regalloc(&n2, nl->type, N);
   807  	else
   808  		regalloc(&n2, nl->type, res);
   809  	if(nl->ullman >= nr->ullman) {
   810  		cgen(nl, &n2);
   811  		cgen(nr, &n1);
   812  	} else {
   813  		cgen(nr, &n1);
   814  		cgen(nl, &n2);
   815  	}
   816  
   817  	// test and fix up large shifts
   818  	if(bounded) {
   819  		if(nr->type->width > 4) {
   820  			// delayed reg alloc
   821  			nodreg(&n1, types[TUINT32], D_CX);
   822  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   823  			split64(&nt, &lo, &hi);
   824  			gmove(&lo, &n1);
   825  			splitclean();
   826  		}
   827  	} else {
   828  		if(nr->type->width > 4) {
   829  			// delayed reg alloc
   830  			nodreg(&n1, types[TUINT32], D_CX);
   831  			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX
   832  			split64(&nt, &lo, &hi);
   833  			gmove(&lo, &n1);
   834  			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
   835  			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
   836  			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
   837  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   838  			splitclean();
   839  			patch(p2, pc);
   840  		} else {
   841  			gins(optoas(OCMP, nr->type), &n1, ncon(w));
   842  			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
   843  		}
   844  		if(op == ORSH && issigned[nl->type->etype]) {
   845  			gins(a, ncon(w-1), &n2);
   846  		} else {
   847  			gmove(ncon(0), &n2);
   848  		}
   849  		patch(p1, pc);
   850  	}
   851  	gins(a, &n1, &n2);
   852  
   853  	if(oldcx.op != 0)
   854  		gmove(&oldcx, &cx);
   855  
   856  	gmove(&n2, res);
   857  
   858  	regfree(&n1);
   859  	regfree(&n2);
   860  }
   861  
   862  /*
   863   * generate byte multiply:
   864   *	res = nl * nr
   865   * there is no 2-operand byte multiply instruction so
   866   * we do a full-width multiplication and truncate afterwards.
   867   */
   868  void
   869  cgen_bmul(int op, Node *nl, Node *nr, Node *res)
   870  {
   871  	Node n1, n2, nt, *tmp;
   872  	Type *t;
   873  	int a;
   874  
   875  	// copy from byte to full registers
   876  	t = types[TUINT32];
   877  	if(issigned[nl->type->etype])
   878  		t = types[TINT32];
   879  
   880  	// largest ullman on left.
   881  	if(nl->ullman < nr->ullman) {
   882  		tmp = nl;
   883  		nl = nr;
   884  		nr = tmp;
   885  	}
   886  
   887  	tempname(&nt, nl->type);
   888  	cgen(nl, &nt);
   889  	regalloc(&n1, t, res);
   890  	cgen(nr, &n1);
   891  	regalloc(&n2, t, N);
   892  	gmove(&nt, &n2);
   893  	a = optoas(op, t);
   894  	gins(a, &n2, &n1);
   895  	regfree(&n2);
   896  	gmove(&n1, res);
   897  	regfree(&n1);
   898  }
   899  
   900  /*
   901   * generate high multiply:
   902   *   res = (nl*nr) >> width
   903   */
   904  void
   905  cgen_hmul(Node *nl, Node *nr, Node *res)
   906  {
   907  	Type *t;
   908  	int a;
   909  	Node n1, n2, ax, dx;
   910  
   911  	t = nl->type;
   912  	a = optoas(OHMUL, t);
   913  	// gen nl in n1.
   914  	tempname(&n1, t);
   915  	cgen(nl, &n1);
   916  	// gen nr in n2.
   917  	regalloc(&n2, t, res);
   918  	cgen(nr, &n2);
   919  
   920  	// multiply.
   921  	nodreg(&ax, t, D_AX);
   922  	gmove(&n2, &ax);
   923  	gins(a, &n1, N);
   924  	regfree(&n2);
   925  
   926  	if(t->width == 1) {
   927  		// byte multiply behaves differently.
   928  		nodreg(&ax, t, D_AH);
   929  		nodreg(&dx, t, D_DL);
   930  		gmove(&ax, &dx);
   931  	}
   932  	nodreg(&dx, t, D_DX);
   933  	gmove(&dx, res);
   934  }
   935  
   936  static void cgen_float387(Node *n, Node *res);
   937  static void cgen_floatsse(Node *n, Node *res);
   938  
   939  /*
   940   * generate floating-point operation.
   941   */
   942  void
   943  cgen_float(Node *n, Node *res)
   944  {
   945  	Node *nl;
   946  	Node n1, n2;
   947  	Prog *p1, *p2, *p3;
   948  
   949  	nl = n->left;
   950  	switch(n->op) {
   951  	case OEQ:
   952  	case ONE:
   953  	case OLT:
   954  	case OLE:
   955  	case OGE:
   956  		p1 = gbranch(AJMP, T, 0);
   957  		p2 = pc;
   958  		gmove(nodbool(1), res);
   959  		p3 = gbranch(AJMP, T, 0);
   960  		patch(p1, pc);
   961  		bgen(n, 1, 0, p2);
   962  		gmove(nodbool(0), res);
   963  		patch(p3, pc);
   964  		return;
   965  
   966  	case OPLUS:
   967  		cgen(nl, res);
   968  		return;
   969  
   970  	case OCONV:
   971  		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
   972  			cgen(nl, res);
   973  			return;
   974  		}
   975  
   976  		tempname(&n2, n->type);
   977  		mgen(nl, &n1, res);
   978  		gmove(&n1, &n2);
   979  		gmove(&n2, res);
   980  		mfree(&n1);
   981  		return;
   982  	}
   983  
   984  	if(use_sse)
   985  		cgen_floatsse(n, res);
   986  	else
   987  		cgen_float387(n, res);
   988  }
   989  
   990  // floating-point.  387 (not SSE2)
   991  static void
   992  cgen_float387(Node *n, Node *res)
   993  {
   994  	Node f0, f1;
   995  	Node *nl, *nr;
   996  
   997  	nl = n->left;
   998  	nr = n->right;
   999  	nodreg(&f0, nl->type, D_F0);
  1000  	nodreg(&f1, n->type, D_F0+1);
  1001  	if(nr != N)
  1002  		goto flt2;
  1003  
  1004  	// unary
  1005  	cgen(nl, &f0);
  1006  	if(n->op != OCONV && n->op != OPLUS)
  1007  		gins(foptoas(n->op, n->type, 0), N, N);
  1008  	gmove(&f0, res);
  1009  	return;
  1010  
  1011  flt2:	// binary
  1012  	if(nl->ullman >= nr->ullman) {
  1013  		cgen(nl, &f0);
  1014  		if(nr->addable)
  1015  			gins(foptoas(n->op, n->type, 0), nr, &f0);
  1016  		else {
  1017  			cgen(nr, &f0);
  1018  			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
  1019  		}
  1020  	} else {
  1021  		cgen(nr, &f0);
  1022  		if(nl->addable)
  1023  			gins(foptoas(n->op, n->type, Frev), nl, &f0);
  1024  		else {
  1025  			cgen(nl, &f0);
  1026  			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
  1027  		}
  1028  	}
  1029  	gmove(&f0, res);
  1030  	return;
  1031  
  1032  }
  1033  
  1034  static void
  1035  cgen_floatsse(Node *n, Node *res)
  1036  {
  1037  	Node *nl, *nr, *r;
  1038  	Node n1, n2, nt;
  1039  	int a;
  1040  
  1041  	nl = n->left;
  1042  	nr = n->right;
  1043  	switch(n->op) {
  1044  	default:
  1045  		dump("cgen_floatsse", n);
  1046  		fatal("cgen_floatsse %O", n->op);
  1047  		return;
  1048  
  1049  	case OMINUS:
  1050  	case OCOM:
  1051  		nr = nodintconst(-1);
  1052  		convlit(&nr, n->type);
  1053  		a = foptoas(OMUL, nl->type, 0);
  1054  		goto sbop;
  1055  
  1056  	// symmetric binary
  1057  	case OADD:
  1058  	case OMUL:
  1059  		a = foptoas(n->op, nl->type, 0);
  1060  		goto sbop;
  1061  
  1062  	// asymmetric binary
  1063  	case OSUB:
  1064  	case OMOD:
  1065  	case ODIV:
  1066  		a = foptoas(n->op, nl->type, 0);
  1067  		goto abop;
  1068  	}
  1069  
  1070  sbop:	// symmetric binary
  1071  	if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
  1072  		r = nl;
  1073  		nl = nr;
  1074  		nr = r;
  1075  	}
  1076  
  1077  abop:	// asymmetric binary
  1078  	if(nl->ullman >= nr->ullman) {
  1079  		tempname(&nt, nl->type);
  1080  		cgen(nl, &nt);
  1081  		mgen(nr, &n2, N);
  1082  		regalloc(&n1, nl->type, res);
  1083  		gmove(&nt, &n1);
  1084  		gins(a, &n2, &n1);
  1085  		gmove(&n1, res);
  1086  		regfree(&n1);
  1087  		mfree(&n2);
  1088  	} else {
  1089  		regalloc(&n2, nr->type, res);
  1090  		cgen(nr, &n2);
  1091  		regalloc(&n1, nl->type, N);
  1092  		cgen(nl, &n1);
  1093  		gins(a, &n2, &n1);
  1094  		regfree(&n2);
  1095  		gmove(&n1, res);
  1096  		regfree(&n1);
  1097  	}
  1098  	return;
  1099  }
  1100  
  1101  void
  1102  bgen_float(Node *n, int true, int likely, Prog *to)
  1103  {
  1104  	int et, a;
  1105  	Node *nl, *nr, *r;
  1106  	Node n1, n2, n3, tmp, t1, t2, ax;
  1107  	Prog *p1, *p2;
  1108  
  1109  	nl = n->left;
  1110  	nr = n->right;
  1111  	a = n->op;
  1112  	if(!true) {
  1113  		// brcom is not valid on floats when NaN is involved.
  1114  		p1 = gbranch(AJMP, T, 0);
  1115  		p2 = gbranch(AJMP, T, 0);
  1116  		patch(p1, pc);
  1117  		// No need to avoid re-genning ninit.
  1118  		bgen_float(n, 1, -likely, p2);
  1119  		patch(gbranch(AJMP, T, 0), to);
  1120  		patch(p2, pc);
  1121  		return;
  1122  	}
  1123  
  1124  	if(use_sse)
  1125  		goto sse;
  1126  	else
  1127  		goto x87;
  1128  
  1129  x87:
  1130  	a = brrev(a);	// because the args are stacked
  1131  	if(a == OGE || a == OGT) {
  1132  		// only < and <= work right with NaN; reverse if needed
  1133  		r = nr;
  1134  		nr = nl;
  1135  		nl = r;
  1136  		a = brrev(a);
  1137  	}
  1138  
  1139  	nodreg(&tmp, nr->type, D_F0);
  1140  	nodreg(&n2, nr->type, D_F0 + 1);
  1141  	nodreg(&ax, types[TUINT16], D_AX);
  1142  	et = simsimtype(nr->type);
  1143  	if(et == TFLOAT64) {
  1144  		if(nl->ullman > nr->ullman) {
  1145  			cgen(nl, &tmp);
  1146  			cgen(nr, &tmp);
  1147  			gins(AFXCHD, &tmp, &n2);
  1148  		} else {
  1149  			cgen(nr, &tmp);
  1150  			cgen(nl, &tmp);
  1151  		}
  1152  		gins(AFUCOMIP, &tmp, &n2);
  1153  		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF
  1154  	} else {
  1155  		// TODO(rsc): The moves back and forth to memory
  1156  		// here are for truncating the value to 32 bits.
  1157  		// This handles 32-bit comparison but presumably
  1158  		// all the other ops have the same problem.
  1159  		// We need to figure out what the right general
  1160  		// solution is, besides telling people to use float64.
  1161  		tempname(&t1, types[TFLOAT32]);
  1162  		tempname(&t2, types[TFLOAT32]);
  1163  		cgen(nr, &t1);
  1164  		cgen(nl, &t2);
  1165  		gmove(&t2, &tmp);
  1166  		gins(AFCOMFP, &t1, &tmp);
  1167  		gins(AFSTSW, N, &ax);
  1168  		gins(ASAHF, N, N);
  1169  	}
  1170  
  1171  	goto ret;
  1172  
  1173  sse:
  1174  	if(!nl->addable) {
  1175  		tempname(&n1, nl->type);
  1176  		cgen(nl, &n1);
  1177  		nl = &n1;
  1178  	}
  1179  	if(!nr->addable) {
  1180  		tempname(&tmp, nr->type);
  1181  		cgen(nr, &tmp);
  1182  		nr = &tmp;
  1183  	}
  1184  	regalloc(&n2, nr->type, N);
  1185  	gmove(nr, &n2);
  1186  	nr = &n2;
  1187  
  1188  	if(nl->op != OREGISTER) {
  1189  		regalloc(&n3, nl->type, N);
  1190  		gmove(nl, &n3);
  1191  		nl = &n3;
  1192  	}
  1193  
  1194  	if(a == OGE || a == OGT) {
  1195  		// only < and <= work right with NaN; reverse if needed
  1196  		r = nr;
  1197  		nr = nl;
  1198  		nl = r;
  1199  		a = brrev(a);
  1200  	}
  1201  
  1202  	gins(foptoas(OCMP, nr->type, 0), nl, nr);
  1203  	if(nl->op == OREGISTER)
  1204  		regfree(nl);
  1205  	regfree(nr);
  1206  
  1207  ret:
  1208  	if(a == OEQ) {
  1209  		// neither NE nor P
  1210  		p1 = gbranch(AJNE, T, -likely);
  1211  		p2 = gbranch(AJPS, T, -likely);
  1212  		patch(gbranch(AJMP, T, 0), to);
  1213  		patch(p1, pc);
  1214  		patch(p2, pc);
  1215  	} else if(a == ONE) {
  1216  		// either NE or P
  1217  		patch(gbranch(AJNE, T, likely), to);
  1218  		patch(gbranch(AJPS, T, likely), to);
  1219  	} else
  1220  		patch(gbranch(optoas(a, nr->type), T, likely), to);
  1221  
  1222  }
  1223  
  1224  // Called after regopt and peep have run.
  1225  // Expand CHECKNIL pseudo-op into actual nil pointer check.
  1226  void
  1227  expandchecks(Prog *firstp)
  1228  {
  1229  	Prog *p, *p1, *p2;
  1230  
  1231  	for(p = firstp; p != P; p = p->link) {
  1232  		if(p->as != ACHECKNIL)
  1233  			continue;
  1234  		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
  1235  			warnl(p->lineno, "generated nil check");
  1236  		// check is
  1237  		//	CMP arg, $0
  1238  		//	JNE 2(PC) (likely)
  1239  		//	MOV AX, 0
  1240  		p1 = mal(sizeof *p1);
  1241  		p2 = mal(sizeof *p2);
  1242  		clearp(p1);
  1243  		clearp(p2);
  1244  		p1->link = p2;
  1245  		p2->link = p->link;
  1246  		p->link = p1;
  1247  		p1->lineno = p->lineno;
  1248  		p2->lineno = p->lineno;
  1249  		p1->loc = 9999;
  1250  		p2->loc = 9999;
  1251  		p->as = ACMPL;
  1252  		p->to.type = D_CONST;
  1253  		p->to.offset = 0;
  1254  		p1->as = AJNE;
  1255  		p1->from.type = D_CONST;
  1256  		p1->from.offset = 1; // likely
  1257  		p1->to.type = D_BRANCH;
  1258  		p1->to.u.branch = p2->link;
  1259  		// crash by write to memory address 0.
  1260  		// if possible, since we know arg is 0, use 0(arg),
  1261  		// which will be shorter to encode than plain 0.
  1262  		p2->as = AMOVL;
  1263  		p2->from.type = D_AX;
  1264  		if(regtyp(&p->from))
  1265  			p2->to.type = p->from.type + D_INDIR;
  1266  		else
  1267  			p2->to.type = D_INDIR+D_NONE;
  1268  		p2->to.offset = 0;
  1269  	}
  1270  }