github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/8g/reg.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  #define	NREGVAR	16	/* 8 integer + 8 floating */
    37  #define	REGBITS	((uint32)0xffff)
    38  
    39  static	Reg*	firstr;
    40  static	int	first	= 1;
    41  
    42  int
    43  rcmp(const void *a1, const void *a2)
    44  {
    45  	Rgn *p1, *p2;
    46  	int c1, c2;
    47  
    48  	p1 = (Rgn*)a1;
    49  	p2 = (Rgn*)a2;
    50  	c1 = p2->cost;
    51  	c2 = p1->cost;
    52  	if(c1 -= c2)
    53  		return c1;
    54  	return p2->varno - p1->varno;
    55  }
    56  
    57  static void
    58  setoutvar(void)
    59  {
    60  	Type *t;
    61  	Node *n;
    62  	Addr a;
    63  	Iter save;
    64  	Bits bit;
    65  	int z;
    66  
    67  	t = structfirst(&save, getoutarg(curfn->type));
    68  	while(t != T) {
    69  		n = nodarg(t, 1);
    70  		a = zprog.from;
    71  		naddr(n, &a, 0);
    72  		bit = mkvar(R, &a);
    73  		for(z=0; z<BITS; z++)
    74  			ovar.b[z] |= bit.b[z];
    75  		t = structnext(&save);
    76  	}
    77  //if(bany(ovar))
    78  //print("ovars = %Q\n", ovar);
    79  }
    80  
    81  static void
    82  setaddrs(Bits bit)
    83  {
    84  	int i, n;
    85  	Var *v;
    86  	Node *node;
    87  
    88  	while(bany(&bit)) {
    89  		// convert each bit to a variable
    90  		i = bnum(bit);
    91  		node = var[i].node;
    92  		n = var[i].name;
    93  		bit.b[i/32] &= ~(1L<<(i%32));
    94  
    95  		// disable all pieces of that variable
    96  		for(i=0; i<nvar; i++) {
    97  			v = var+i;
    98  			if(v->node == node && v->name == n)
    99  				v->addr = 2;
   100  		}
   101  	}
   102  }
   103  
   104  static char* regname[] = {
   105  	".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di",
   106  	".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7",
   107  };
   108  
   109  static Node* regnodes[NREGVAR];
   110  
   111  void
   112  regopt(Prog *firstp)
   113  {
   114  	Reg *r, *r1;
   115  	Prog *p;
   116  	Graph *g;
   117  	ProgInfo info;
   118  	int i, z;
   119  	uint32 vreg;
   120  	Bits bit;
   121  
   122  	if(first) {
   123  		fmtinstall('Q', Qconv);
   124  		exregoffset = D_DI;	// no externals
   125  		first = 0;
   126  	}
   127  	
   128  	fixjmp(firstp);
   129  	mergetemp(firstp);
   130  
   131  	/*
   132  	 * control flow is more complicated in generated go code
   133  	 * than in generated c code.  define pseudo-variables for
   134  	 * registers, so we have complete register usage information.
   135  	 */
   136  	nvar = NREGVAR;
   137  	memset(var, 0, NREGVAR*sizeof var[0]);
   138  	for(i=0; i<NREGVAR; i++) {
   139  		if(regnodes[i] == N)
   140  			regnodes[i] = newname(lookup(regname[i]));
   141  		var[i].node = regnodes[i];
   142  	}
   143  
   144  	regbits = RtoB(D_SP);
   145  	for(z=0; z<BITS; z++) {
   146  		externs.b[z] = 0;
   147  		params.b[z] = 0;
   148  		consts.b[z] = 0;
   149  		addrs.b[z] = 0;
   150  		ovar.b[z] = 0;
   151  	}
   152  
   153  	// build list of return variables
   154  	setoutvar();
   155  
   156  	/*
   157  	 * pass 1
   158  	 * build aux data structure
   159  	 * allocate pcs
   160  	 * find use and set of variables
   161  	 */
   162  	g = flowstart(firstp, sizeof(Reg));
   163  	if(g == nil)
   164  		return;
   165  	firstr = (Reg*)g->start;
   166  
   167  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   168  		p = r->f.prog;
   169  		proginfo(&info, p);
   170  
   171  		// Avoid making variables for direct-called functions.
   172  		if(p->as == ACALL && p->to.type == D_EXTERN)
   173  			continue;
   174  
   175  		r->use1.b[0] |= info.reguse | info.regindex;
   176  		r->set.b[0] |= info.regset;
   177  
   178  		bit = mkvar(r, &p->from);
   179  		if(bany(&bit)) {
   180  			if(info.flags & LeftAddr)
   181  				setaddrs(bit);
   182  			if(info.flags & LeftRead)
   183  				for(z=0; z<BITS; z++)
   184  					r->use1.b[z] |= bit.b[z];
   185  			if(info.flags & LeftWrite)
   186  				for(z=0; z<BITS; z++)
   187  					r->set.b[z] |= bit.b[z];
   188  		}
   189  
   190  		bit = mkvar(r, &p->to);
   191  		if(bany(&bit)) {	
   192  			if(info.flags & RightAddr)
   193  				setaddrs(bit);
   194  			if(info.flags & RightRead)
   195  				for(z=0; z<BITS; z++)
   196  					r->use2.b[z] |= bit.b[z];
   197  			if(info.flags & RightWrite)
   198  				for(z=0; z<BITS; z++)
   199  					r->set.b[z] |= bit.b[z];
   200  		}
   201  	}
   202  	if(firstr == R)
   203  		return;
   204  
   205  	for(i=0; i<nvar; i++) {
   206  		Var *v = var+i;
   207  		if(v->addr) {
   208  			bit = blsh(i);
   209  			for(z=0; z<BITS; z++)
   210  				addrs.b[z] |= bit.b[z];
   211  		}
   212  
   213  		if(debug['R'] && debug['v'])
   214  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   215  				i, v->addr, v->etype, v->width, v->node, v->offset);
   216  	}
   217  
   218  	if(debug['R'] && debug['v'])
   219  		dumpit("pass1", &firstr->f, 1);
   220  
   221  	/*
   222  	 * pass 2
   223  	 * find looping structure
   224  	 */
   225  	flowrpo(g);
   226  
   227  	if(debug['R'] && debug['v'])
   228  		dumpit("pass2", &firstr->f, 1);
   229  
   230  	/*
   231  	 * pass 3
   232  	 * iterate propagating usage
   233  	 * 	back until flow graph is complete
   234  	 */
   235  loop1:
   236  	change = 0;
   237  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   238  		r->f.active = 0;
   239  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   240  		if(r->f.prog->as == ARET)
   241  			prop(r, zbits, zbits);
   242  loop11:
   243  	/* pick up unreachable code */
   244  	i = 0;
   245  	for(r = firstr; r != R; r = r1) {
   246  		r1 = (Reg*)r->f.link;
   247  		if(r1 && r1->f.active && !r->f.active) {
   248  			prop(r, zbits, zbits);
   249  			i = 1;
   250  		}
   251  	}
   252  	if(i)
   253  		goto loop11;
   254  	if(change)
   255  		goto loop1;
   256  
   257  	if(debug['R'] && debug['v'])
   258  		dumpit("pass3", &firstr->f, 1);
   259  
   260  	/*
   261  	 * pass 4
   262  	 * iterate propagating register/variable synchrony
   263  	 * 	forward until graph is complete
   264  	 */
   265  loop2:
   266  	change = 0;
   267  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   268  		r->f.active = 0;
   269  	synch(firstr, zbits);
   270  	if(change)
   271  		goto loop2;
   272  
   273  	if(debug['R'] && debug['v'])
   274  		dumpit("pass4", &firstr->f, 1);
   275  
   276  	/*
   277  	 * pass 4.5
   278  	 * move register pseudo-variables into regu.
   279  	 */
   280  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   281  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   282  
   283  		r->set.b[0] &= ~REGBITS;
   284  		r->use1.b[0] &= ~REGBITS;
   285  		r->use2.b[0] &= ~REGBITS;
   286  		r->refbehind.b[0] &= ~REGBITS;
   287  		r->refahead.b[0] &= ~REGBITS;
   288  		r->calbehind.b[0] &= ~REGBITS;
   289  		r->calahead.b[0] &= ~REGBITS;
   290  		r->regdiff.b[0] &= ~REGBITS;
   291  		r->act.b[0] &= ~REGBITS;
   292  	}
   293  
   294  	/*
   295  	 * pass 5
   296  	 * isolate regions
   297  	 * calculate costs (paint1)
   298  	 */
   299  	r = firstr;
   300  	if(r) {
   301  		for(z=0; z<BITS; z++)
   302  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   303  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   304  		if(bany(&bit) && !r->f.refset) {
   305  			// should never happen - all variables are preset
   306  			if(debug['w'])
   307  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   308  			r->f.refset = 1;
   309  		}
   310  	}
   311  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   312  		r->act = zbits;
   313  	rgp = region;
   314  	nregion = 0;
   315  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   316  		for(z=0; z<BITS; z++)
   317  			bit.b[z] = r->set.b[z] &
   318  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   319  		if(bany(&bit) && !r->f.refset) {
   320  			if(debug['w'])
   321  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   322  			r->f.refset = 1;
   323  			excise(&r->f);
   324  		}
   325  		for(z=0; z<BITS; z++)
   326  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   327  		while(bany(&bit)) {
   328  			i = bnum(bit);
   329  			rgp->enter = r;
   330  			rgp->varno = i;
   331  			change = 0;
   332  			paint1(r, i);
   333  			bit.b[i/32] &= ~(1L<<(i%32));
   334  			if(change <= 0)
   335  				continue;
   336  			rgp->cost = change;
   337  			nregion++;
   338  			if(nregion >= NRGN) {
   339  				if(debug['R'] && debug['v'])
   340  					print("too many regions\n");
   341  				goto brk;
   342  			}
   343  			rgp++;
   344  		}
   345  	}
   346  brk:
   347  	qsort(region, nregion, sizeof(region[0]), rcmp);
   348  
   349  	/*
   350  	 * pass 6
   351  	 * determine used registers (paint2)
   352  	 * replace code (paint3)
   353  	 */
   354  	rgp = region;
   355  	for(i=0; i<nregion; i++) {
   356  		bit = blsh(rgp->varno);
   357  		vreg = paint2(rgp->enter, rgp->varno);
   358  		vreg = allreg(vreg, rgp);
   359  		if(rgp->regno != 0)
   360  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   361  		rgp++;
   362  	}
   363  
   364  	if(debug['R'] && debug['v'])
   365  		dumpit("pass6", &firstr->f, 1);
   366  
   367  	/*
   368  	 * free aux structures. peep allocates new ones.
   369  	 */
   370  	flowend(g);
   371  	firstr = R;
   372  
   373  	/*
   374  	 * pass 7
   375  	 * peep-hole on basic block
   376  	 */
   377  	if(!debug['R'] || debug['P'])
   378  		peep(firstp);
   379  
   380  	/*
   381  	 * eliminate nops
   382  	 */
   383  	for(p=firstp; p!=P; p=p->link) {
   384  		while(p->link != P && p->link->as == ANOP)
   385  			p->link = p->link->link;
   386  		if(p->to.type == D_BRANCH)
   387  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   388  				p->to.u.branch = p->to.u.branch->link;
   389  	}
   390  
   391  	if(!use_sse)
   392  	for(p=firstp; p!=P; p=p->link) {
   393  		if(p->from.type >= D_X0 && p->from.type <= D_X7)
   394  			fatal("invalid use of %R with GO386=387: %P", p->from.type, p);
   395  		if(p->to.type >= D_X0 && p->to.type <= D_X7)
   396  			fatal("invalid use of %R with GO386=387: %P", p->to.type, p);
   397  	}
   398  
   399  	if(debug['R']) {
   400  		if(ostats.ncvtreg ||
   401  		   ostats.nspill ||
   402  		   ostats.nreload ||
   403  		   ostats.ndelmov ||
   404  		   ostats.nvar ||
   405  		   ostats.naddr ||
   406  		   0)
   407  			print("\nstats\n");
   408  
   409  		if(ostats.ncvtreg)
   410  			print("	%4d cvtreg\n", ostats.ncvtreg);
   411  		if(ostats.nspill)
   412  			print("	%4d spill\n", ostats.nspill);
   413  		if(ostats.nreload)
   414  			print("	%4d reload\n", ostats.nreload);
   415  		if(ostats.ndelmov)
   416  			print("	%4d delmov\n", ostats.ndelmov);
   417  		if(ostats.nvar)
   418  			print("	%4d var\n", ostats.nvar);
   419  		if(ostats.naddr)
   420  			print("	%4d addr\n", ostats.naddr);
   421  
   422  		memset(&ostats, 0, sizeof(ostats));
   423  	}
   424  }
   425  
   426  /*
   427   * add mov b,rn
   428   * just after r
   429   */
   430  void
   431  addmove(Reg *r, int bn, int rn, int f)
   432  {
   433  	Prog *p, *p1;
   434  	Adr *a;
   435  	Var *v;
   436  
   437  	p1 = mal(sizeof(*p1));
   438  	clearp(p1);
   439  	p1->loc = 9999;
   440  
   441  	p = r->f.prog;
   442  	p1->link = p->link;
   443  	p->link = p1;
   444  	p1->lineno = p->lineno;
   445  
   446  	v = var + bn;
   447  
   448  	a = &p1->to;
   449  	a->offset = v->offset;
   450  	a->etype = v->etype;
   451  	a->type = v->name;
   452  	a->node = v->node;
   453  	a->sym = v->node->sym;
   454  
   455  	// need to clean this up with wptr and
   456  	// some of the defaults
   457  	p1->as = AMOVL;
   458  	switch(v->etype) {
   459  	default:
   460  		fatal("unknown type %E", v->etype);
   461  	case TINT8:
   462  	case TUINT8:
   463  	case TBOOL:
   464  		p1->as = AMOVB;
   465  		break;
   466  	case TINT16:
   467  	case TUINT16:
   468  		p1->as = AMOVW;
   469  		break;
   470  	case TFLOAT32:
   471  		p1->as = AMOVSS;
   472  		break;
   473  	case TFLOAT64:
   474  		p1->as = AMOVSD;
   475  		break;
   476  	case TINT:
   477  	case TUINT:
   478  	case TINT32:
   479  	case TUINT32:
   480  	case TPTR32:
   481  		break;
   482  	}
   483  
   484  	p1->from.type = rn;
   485  	if(!f) {
   486  		p1->from = *a;
   487  		*a = zprog.from;
   488  		a->type = rn;
   489  		if(v->etype == TUINT8)
   490  			p1->as = AMOVB;
   491  		if(v->etype == TUINT16)
   492  			p1->as = AMOVW;
   493  	}
   494  	if(debug['R'] && debug['v'])
   495  		print("%P ===add=== %P\n", p, p1);
   496  	ostats.nspill++;
   497  }
   498  
   499  uint32
   500  doregbits(int r)
   501  {
   502  	uint32 b;
   503  
   504  	b = 0;
   505  	if(r >= D_INDIR)
   506  		r -= D_INDIR;
   507  	if(r >= D_AX && r <= D_DI)
   508  		b |= RtoB(r);
   509  	else
   510  	if(r >= D_AL && r <= D_BL)
   511  		b |= RtoB(r-D_AL+D_AX);
   512  	else
   513  	if(r >= D_AH && r <= D_BH)
   514  		b |= RtoB(r-D_AH+D_AX);
   515  	else
   516  	if(r >= D_X0 && r <= D_X0+7)
   517  		b |= FtoB(r);
   518  	return b;
   519  }
   520  
   521  static int
   522  overlap(int32 o1, int w1, int32 o2, int w2)
   523  {
   524  	int32 t1, t2;
   525  
   526  	t1 = o1+w1;
   527  	t2 = o2+w2;
   528  
   529  	if(!(t1 > o2 && t2 > o1))
   530  		return 0;
   531  
   532  	return 1;
   533  }
   534  
   535  Bits
   536  mkvar(Reg *r, Adr *a)
   537  {
   538  	Var *v;
   539  	int i, t, n, et, z, w, flag, regu;
   540  	int32 o;
   541  	Bits bit;
   542  	Node *node;
   543  
   544  	/*
   545  	 * mark registers used
   546  	 */
   547  	t = a->type;
   548  	if(t == D_NONE)
   549  		goto none;
   550  
   551  	if(r != R)
   552  		r->use1.b[0] |= doregbits(a->index);
   553  
   554  	switch(t) {
   555  	default:
   556  		regu = doregbits(t);
   557  		if(regu == 0)
   558  			goto none;
   559  		bit = zbits;
   560  		bit.b[0] = regu;
   561  		return bit;
   562  
   563  	case D_ADDR:
   564  		a->type = a->index;
   565  		bit = mkvar(r, a);
   566  		setaddrs(bit);
   567  		a->type = t;
   568  		ostats.naddr++;
   569  		goto none;
   570  
   571  	case D_EXTERN:
   572  	case D_STATIC:
   573  	case D_PARAM:
   574  	case D_AUTO:
   575  		n = t;
   576  		break;
   577  	}
   578  
   579  	node = a->node;
   580  	if(node == N || node->op != ONAME || node->orig == N)
   581  		goto none;
   582  	node = node->orig;
   583  	if(node->orig != node)
   584  		fatal("%D: bad node", a);
   585  	if(node->sym == S || node->sym->name[0] == '.')
   586  		goto none;
   587  	et = a->etype;
   588  	o = a->offset;
   589  	w = a->width;
   590  	if(w < 0)
   591  		fatal("bad width %d for %D", w, a);
   592  
   593  	flag = 0;
   594  	for(i=0; i<nvar; i++) {
   595  		v = var+i;
   596  		if(v->node == node && v->name == n) {
   597  			if(v->offset == o)
   598  			if(v->etype == et)
   599  			if(v->width == w)
   600  				return blsh(i);
   601  
   602  			// if they overlap, disable both
   603  			if(overlap(v->offset, v->width, o, w)) {
   604  				if(debug['R'])
   605  					print("disable %s\n", node->sym->name);
   606  				v->addr = 1;
   607  				flag = 1;
   608  			}
   609  		}
   610  	}
   611  
   612  	switch(et) {
   613  	case 0:
   614  	case TFUNC:
   615  		goto none;
   616  	}
   617  
   618  	if(nvar >= NVAR) {
   619  		if(debug['w'] > 1 && node != N)
   620  			fatal("variable not optimized: %D", a);
   621  		goto none;
   622  	}
   623  
   624  	i = nvar;
   625  	nvar++;
   626  	v = var+i;
   627  	v->offset = o;
   628  	v->name = n;
   629  	v->etype = et;
   630  	v->width = w;
   631  	v->addr = flag;		// funny punning
   632  	v->node = node;
   633  
   634  	if(debug['R'])
   635  		print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   636  	ostats.nvar++;
   637  
   638  	bit = blsh(i);
   639  	if(n == D_EXTERN || n == D_STATIC)
   640  		for(z=0; z<BITS; z++)
   641  			externs.b[z] |= bit.b[z];
   642  	if(n == D_PARAM)
   643  		for(z=0; z<BITS; z++)
   644  			params.b[z] |= bit.b[z];
   645  
   646  	return bit;
   647  
   648  none:
   649  	return zbits;
   650  }
   651  
   652  void
   653  prop(Reg *r, Bits ref, Bits cal)
   654  {
   655  	Reg *r1, *r2;
   656  	int z;
   657  
   658  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   659  		for(z=0; z<BITS; z++) {
   660  			ref.b[z] |= r1->refahead.b[z];
   661  			if(ref.b[z] != r1->refahead.b[z]) {
   662  				r1->refahead.b[z] = ref.b[z];
   663  				change++;
   664  			}
   665  			cal.b[z] |= r1->calahead.b[z];
   666  			if(cal.b[z] != r1->calahead.b[z]) {
   667  				r1->calahead.b[z] = cal.b[z];
   668  				change++;
   669  			}
   670  		}
   671  		switch(r1->f.prog->as) {
   672  		case ACALL:
   673  			if(noreturn(r1->f.prog))
   674  				break;
   675  			for(z=0; z<BITS; z++) {
   676  				cal.b[z] |= ref.b[z] | externs.b[z];
   677  				ref.b[z] = 0;
   678  			}
   679  			break;
   680  
   681  		case ATEXT:
   682  			for(z=0; z<BITS; z++) {
   683  				cal.b[z] = 0;
   684  				ref.b[z] = 0;
   685  			}
   686  			break;
   687  
   688  		case ARET:
   689  			for(z=0; z<BITS; z++) {
   690  				cal.b[z] = externs.b[z] | ovar.b[z];
   691  				ref.b[z] = 0;
   692  			}
   693  			break;
   694  
   695  		default:
   696  			// Work around for issue 1304:
   697  			// flush modified globals before each instruction.
   698  			for(z=0; z<BITS; z++) {
   699  				cal.b[z] |= externs.b[z];
   700  				// issue 4066: flush modified return variables in case of panic
   701  				if(hasdefer)
   702  					cal.b[z] |= ovar.b[z];
   703  			}
   704  			break;
   705  		}
   706  		for(z=0; z<BITS; z++) {
   707  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   708  				r1->use1.b[z] | r1->use2.b[z];
   709  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   710  			r1->refbehind.b[z] = ref.b[z];
   711  			r1->calbehind.b[z] = cal.b[z];
   712  		}
   713  		if(r1->f.active)
   714  			break;
   715  		r1->f.active = 1;
   716  	}
   717  	for(; r != r1; r = (Reg*)r->f.p1)
   718  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
   719  			prop(r2, r->refbehind, r->calbehind);
   720  }
   721  
   722  void
   723  synch(Reg *r, Bits dif)
   724  {
   725  	Reg *r1;
   726  	int z;
   727  
   728  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
   729  		for(z=0; z<BITS; z++) {
   730  			dif.b[z] = (dif.b[z] &
   731  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   732  					r1->set.b[z] | r1->regdiff.b[z];
   733  			if(dif.b[z] != r1->regdiff.b[z]) {
   734  				r1->regdiff.b[z] = dif.b[z];
   735  				change++;
   736  			}
   737  		}
   738  		if(r1->f.active)
   739  			break;
   740  		r1->f.active = 1;
   741  		for(z=0; z<BITS; z++)
   742  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   743  		if((Reg*)r1->f.s2 != R)
   744  			synch((Reg*)r1->f.s2, dif);
   745  	}
   746  }
   747  
   748  uint32
   749  allreg(uint32 b, Rgn *r)
   750  {
   751  	Var *v;
   752  	int i;
   753  
   754  	v = var + r->varno;
   755  	r->regno = 0;
   756  	switch(v->etype) {
   757  
   758  	default:
   759  		fatal("unknown etype %d/%E", bitno(b), v->etype);
   760  		break;
   761  
   762  	case TINT8:
   763  	case TUINT8:
   764  	case TINT16:
   765  	case TUINT16:
   766  	case TINT32:
   767  	case TUINT32:
   768  	case TINT64:
   769  	case TINT:
   770  	case TUINT:
   771  	case TUINTPTR:
   772  	case TBOOL:
   773  	case TPTR32:
   774  		i = BtoR(~b);
   775  		if(i && r->cost > 0) {
   776  			r->regno = i;
   777  			return RtoB(i);
   778  		}
   779  		break;
   780  
   781  	case TFLOAT32:
   782  	case TFLOAT64:
   783  		if(!use_sse)
   784  			break;
   785  		i = BtoF(~b);
   786  		if(i && r->cost > 0) {
   787  			r->regno = i;
   788  			return FtoB(i);
   789  		}
   790  		break;
   791  	}
   792  	return 0;
   793  }
   794  
   795  void
   796  paint1(Reg *r, int bn)
   797  {
   798  	Reg *r1;
   799  	Prog *p;
   800  	int z;
   801  	uint32 bb;
   802  
   803  	z = bn/32;
   804  	bb = 1L<<(bn%32);
   805  	if(r->act.b[z] & bb)
   806  		return;
   807  	for(;;) {
   808  		if(!(r->refbehind.b[z] & bb))
   809  			break;
   810  		r1 = (Reg*)r->f.p1;
   811  		if(r1 == R)
   812  			break;
   813  		if(!(r1->refahead.b[z] & bb))
   814  			break;
   815  		if(r1->act.b[z] & bb)
   816  			break;
   817  		r = r1;
   818  	}
   819  
   820  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
   821  		change -= CLOAD * r->f.loop;
   822  	}
   823  	for(;;) {
   824  		r->act.b[z] |= bb;
   825  		p = r->f.prog;
   826  
   827  		if(r->use1.b[z] & bb) {
   828  			change += CREF * r->f.loop;
   829  			if(p->as == AFMOVL || p->as == AFMOVW)
   830  				if(BtoR(bb) != D_F0)
   831  					change = -CINF;
   832  		}
   833  
   834  		if((r->use2.b[z]|r->set.b[z]) & bb) {
   835  			change += CREF * r->f.loop;
   836  			if(p->as == AFMOVL || p->as == AFMOVW)
   837  				if(BtoR(bb) != D_F0)
   838  					change = -CINF;
   839  		}
   840  
   841  		if(STORE(r) & r->regdiff.b[z] & bb) {
   842  			change -= CLOAD * r->f.loop;
   843  			if(p->as == AFMOVL || p->as == AFMOVW)
   844  				if(BtoR(bb) != D_F0)
   845  					change = -CINF;
   846  		}
   847  
   848  		if(r->refbehind.b[z] & bb)
   849  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
   850  				if(r1->refahead.b[z] & bb)
   851  					paint1(r1, bn);
   852  
   853  		if(!(r->refahead.b[z] & bb))
   854  			break;
   855  		r1 = (Reg*)r->f.s2;
   856  		if(r1 != R)
   857  			if(r1->refbehind.b[z] & bb)
   858  				paint1(r1, bn);
   859  		r = (Reg*)r->f.s1;
   860  		if(r == R)
   861  			break;
   862  		if(r->act.b[z] & bb)
   863  			break;
   864  		if(!(r->refbehind.b[z] & bb))
   865  			break;
   866  	}
   867  }
   868  
   869  uint32
   870  regset(Reg *r, uint32 bb)
   871  {
   872  	uint32 b, set;
   873  	Adr v;
   874  	int c;
   875  
   876  	set = 0;
   877  	v = zprog.from;
   878  	while(b = bb & ~(bb-1)) {
   879  		v.type = b & 0xFF ? BtoR(b): BtoF(b);
   880  		c = copyu(r->f.prog, &v, A);
   881  		if(c == 3)
   882  			set |= b;
   883  		bb &= ~b;
   884  	}
   885  	return set;
   886  }
   887  
   888  uint32
   889  reguse(Reg *r, uint32 bb)
   890  {
   891  	uint32 b, set;
   892  	Adr v;
   893  	int c;
   894  
   895  	set = 0;
   896  	v = zprog.from;
   897  	while(b = bb & ~(bb-1)) {
   898  		v.type = b & 0xFF ? BtoR(b): BtoF(b);
   899  		c = copyu(r->f.prog, &v, A);
   900  		if(c == 1 || c == 2 || c == 4)
   901  			set |= b;
   902  		bb &= ~b;
   903  	}
   904  	return set;
   905  }
   906  
   907  uint32
   908  paint2(Reg *r, int bn)
   909  {
   910  	Reg *r1;
   911  	int z;
   912  	uint32 bb, vreg, x;
   913  
   914  	z = bn/32;
   915  	bb = 1L << (bn%32);
   916  	vreg = regbits;
   917  	if(!(r->act.b[z] & bb))
   918  		return vreg;
   919  	for(;;) {
   920  		if(!(r->refbehind.b[z] & bb))
   921  			break;
   922  		r1 = (Reg*)r->f.p1;
   923  		if(r1 == R)
   924  			break;
   925  		if(!(r1->refahead.b[z] & bb))
   926  			break;
   927  		if(!(r1->act.b[z] & bb))
   928  			break;
   929  		r = r1;
   930  	}
   931  	for(;;) {
   932  		r->act.b[z] &= ~bb;
   933  
   934  		vreg |= r->regu;
   935  
   936  		if(r->refbehind.b[z] & bb)
   937  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
   938  				if(r1->refahead.b[z] & bb)
   939  					vreg |= paint2(r1, bn);
   940  
   941  		if(!(r->refahead.b[z] & bb))
   942  			break;
   943  		r1 = (Reg*)r->f.s2;
   944  		if(r1 != R)
   945  			if(r1->refbehind.b[z] & bb)
   946  				vreg |= paint2(r1, bn);
   947  		r = (Reg*)r->f.s1;
   948  		if(r == R)
   949  			break;
   950  		if(!(r->act.b[z] & bb))
   951  			break;
   952  		if(!(r->refbehind.b[z] & bb))
   953  			break;
   954  	}
   955  
   956  	bb = vreg;
   957  	for(; r; r=(Reg*)r->f.s1) {
   958  		x = r->regu & ~bb;
   959  		if(x) {
   960  			vreg |= reguse(r, x);
   961  			bb |= regset(r, x);
   962  		}
   963  	}
   964  	return vreg;
   965  }
   966  
   967  void
   968  paint3(Reg *r, int bn, int32 rb, int rn)
   969  {
   970  	Reg *r1;
   971  	Prog *p;
   972  	int z;
   973  	uint32 bb;
   974  
   975  	z = bn/32;
   976  	bb = 1L << (bn%32);
   977  	if(r->act.b[z] & bb)
   978  		return;
   979  	for(;;) {
   980  		if(!(r->refbehind.b[z] & bb))
   981  			break;
   982  		r1 = (Reg*)r->f.p1;
   983  		if(r1 == R)
   984  			break;
   985  		if(!(r1->refahead.b[z] & bb))
   986  			break;
   987  		if(r1->act.b[z] & bb)
   988  			break;
   989  		r = r1;
   990  	}
   991  
   992  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
   993  		addmove(r, bn, rn, 0);
   994  	for(;;) {
   995  		r->act.b[z] |= bb;
   996  		p = r->f.prog;
   997  
   998  		if(r->use1.b[z] & bb) {
   999  			if(debug['R'] && debug['v'])
  1000  				print("%P", p);
  1001  			addreg(&p->from, rn);
  1002  			if(debug['R'] && debug['v'])
  1003  				print(" ===change== %P\n", p);
  1004  		}
  1005  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1006  			if(debug['R'] && debug['v'])
  1007  				print("%P", p);
  1008  			addreg(&p->to, rn);
  1009  			if(debug['R'] && debug['v'])
  1010  				print(" ===change== %P\n", p);
  1011  		}
  1012  
  1013  		if(STORE(r) & r->regdiff.b[z] & bb)
  1014  			addmove(r, bn, rn, 1);
  1015  		r->regu |= rb;
  1016  
  1017  		if(r->refbehind.b[z] & bb)
  1018  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1019  				if(r1->refahead.b[z] & bb)
  1020  					paint3(r1, bn, rb, rn);
  1021  
  1022  		if(!(r->refahead.b[z] & bb))
  1023  			break;
  1024  		r1 = (Reg*)r->f.s2;
  1025  		if(r1 != R)
  1026  			if(r1->refbehind.b[z] & bb)
  1027  				paint3(r1, bn, rb, rn);
  1028  		r = (Reg*)r->f.s1;
  1029  		if(r == R)
  1030  			break;
  1031  		if(r->act.b[z] & bb)
  1032  			break;
  1033  		if(!(r->refbehind.b[z] & bb))
  1034  			break;
  1035  	}
  1036  }
  1037  
  1038  void
  1039  addreg(Adr *a, int rn)
  1040  {
  1041  
  1042  	a->sym = 0;
  1043  	a->offset = 0;
  1044  	a->type = rn;
  1045  
  1046  	ostats.ncvtreg++;
  1047  }
  1048  
  1049  int32
  1050  RtoB(int r)
  1051  {
  1052  
  1053  	if(r < D_AX || r > D_DI)
  1054  		return 0;
  1055  	return 1L << (r-D_AX);
  1056  }
  1057  
  1058  int
  1059  BtoR(int32 b)
  1060  {
  1061  
  1062  	b &= 0xffL;
  1063  	if(b == 0)
  1064  		return 0;
  1065  	return bitno(b) + D_AX;
  1066  }
  1067  
  1068  int32
  1069  FtoB(int f)
  1070  {
  1071  	if(f < D_X0 || f > D_X7)
  1072  		return 0;
  1073  	return 1L << (f - D_X0 + 8);
  1074  }
  1075  
  1076  int
  1077  BtoF(int32 b)
  1078  {
  1079  	b &= 0xFF00L;
  1080  	if(b == 0)
  1081  		return 0;
  1082  	return bitno(b) - 8 + D_X0;
  1083  }
  1084  
  1085  void
  1086  dumpone(Flow *f, int isreg)
  1087  {
  1088  	int z;
  1089  	Bits bit;
  1090  	Reg *r;
  1091  
  1092  	print("%d:%P", f->loop, f->prog);
  1093  	if(isreg) {
  1094  		r = (Reg*)f;
  1095  		for(z=0; z<BITS; z++)
  1096  			bit.b[z] =
  1097  				r->set.b[z] |
  1098  				r->use1.b[z] |
  1099  				r->use2.b[z] |
  1100  				r->refbehind.b[z] |
  1101  				r->refahead.b[z] |
  1102  				r->calbehind.b[z] |
  1103  				r->calahead.b[z] |
  1104  				r->regdiff.b[z] |
  1105  				r->act.b[z] |
  1106  					0;
  1107  		if(bany(&bit)) {
  1108  			print("\t");
  1109  			if(bany(&r->set))
  1110  				print(" s:%Q", r->set);
  1111  			if(bany(&r->use1))
  1112  				print(" u1:%Q", r->use1);
  1113  			if(bany(&r->use2))
  1114  				print(" u2:%Q", r->use2);
  1115  			if(bany(&r->refbehind))
  1116  				print(" rb:%Q ", r->refbehind);
  1117  			if(bany(&r->refahead))
  1118  				print(" ra:%Q ", r->refahead);
  1119  			if(bany(&r->calbehind))
  1120  				print(" cb:%Q ", r->calbehind);
  1121  			if(bany(&r->calahead))
  1122  				print(" ca:%Q ", r->calahead);
  1123  			if(bany(&r->regdiff))
  1124  				print(" d:%Q ", r->regdiff);
  1125  			if(bany(&r->act))
  1126  				print(" a:%Q ", r->act);
  1127  		}
  1128  	}
  1129  	print("\n");
  1130  }
  1131  
  1132  void
  1133  dumpit(char *str, Flow *r0, int isreg)
  1134  {
  1135  	Flow *r, *r1;
  1136  
  1137  	print("\n%s\n", str);
  1138  	for(r = r0; r != nil; r = r->link) {
  1139  		dumpone(r, isreg);
  1140  		r1 = r->p2;
  1141  		if(r1 != nil) {
  1142  			print("	pred:");
  1143  			for(; r1 != nil; r1 = r->p2link)
  1144  				print(" %.4ud", r1->prog->loc);
  1145  			print("\n");
  1146  		}
  1147  //		r1 = r->s1;
  1148  //		if(r1 != nil) {
  1149  //			print("	succ:");
  1150  //			for(; r1 != R; r1 = r1->s1)
  1151  //				print(" %.4ud", r1->prog->loc);
  1152  //			print("\n");
  1153  //		}
  1154  	}
  1155  }