github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/6g/reg.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  #define	NREGVAR	32	/* 16 general + 16 floating */
    37  #define	REGBITS	((uint32)0xffffffff)
    38  
    39  static	Reg*	firstr;
    40  static	int	first	= 1;
    41  
    42  int
    43  rcmp(const void *a1, const void *a2)
    44  {
    45  	Rgn *p1, *p2;
    46  	int c1, c2;
    47  
    48  	p1 = (Rgn*)a1;
    49  	p2 = (Rgn*)a2;
    50  	c1 = p2->cost;
    51  	c2 = p1->cost;
    52  	if(c1 -= c2)
    53  		return c1;
    54  	return p2->varno - p1->varno;
    55  }
    56  
    57  static void
    58  setoutvar(void)
    59  {
    60  	Type *t;
    61  	Node *n;
    62  	Addr a;
    63  	Iter save;
    64  	Bits bit;
    65  	int z;
    66  
    67  	t = structfirst(&save, getoutarg(curfn->type));
    68  	while(t != T) {
    69  		n = nodarg(t, 1);
    70  		a = zprog.from;
    71  		naddr(n, &a, 0);
    72  		bit = mkvar(R, &a);
    73  		for(z=0; z<BITS; z++)
    74  			ovar.b[z] |= bit.b[z];
    75  		t = structnext(&save);
    76  	}
    77  //if(bany(&ovar))
    78  //print("ovars = %Q\n", ovar);
    79  }
    80  
    81  static void
    82  setaddrs(Bits bit)
    83  {
    84  	int i, n;
    85  	Var *v;
    86  	Node *node;
    87  
    88  	while(bany(&bit)) {
    89  		// convert each bit to a variable
    90  		i = bnum(bit);
    91  		node = var[i].node;
    92  		n = var[i].name;
    93  		bit.b[i/32] &= ~(1L<<(i%32));
    94  
    95  		// disable all pieces of that variable
    96  		for(i=0; i<nvar; i++) {
    97  			v = var+i;
    98  			if(v->node == node && v->name == n)
    99  				v->addr = 2;
   100  		}
   101  	}
   102  }
   103  
   104  static char* regname[] = {
   105  	".AX",
   106  	".CX",
   107  	".DX",
   108  	".BX",
   109  	".SP",
   110  	".BP",
   111  	".SI",
   112  	".DI",
   113  	".R8",
   114  	".R9",
   115  	".R10",
   116  	".R11",
   117  	".R12",
   118  	".R13",
   119  	".R14",
   120  	".R15",
   121  	".X0",
   122  	".X1",
   123  	".X2",
   124  	".X3",
   125  	".X4",
   126  	".X5",
   127  	".X6",
   128  	".X7",
   129  	".X8",
   130  	".X9",
   131  	".X10",
   132  	".X11",
   133  	".X12",
   134  	".X13",
   135  	".X14",
   136  	".X15",
   137  };
   138  
   139  static Node* regnodes[NREGVAR];
   140  
   141  void
   142  regopt(Prog *firstp)
   143  {
   144  	Reg *r, *r1;
   145  	Prog *p;
   146  	Graph *g;
   147  	ProgInfo info;
   148  	int i, z;
   149  	uint32 vreg;
   150  	Bits bit;
   151  
   152  	if(first) {
   153  		fmtinstall('Q', Qconv);
   154  		exregoffset = D_R15;
   155  		first = 0;
   156  	}
   157  
   158  	fixjmp(firstp);
   159  	mergetemp(firstp);
   160  	
   161  	/*
   162  	 * control flow is more complicated in generated go code
   163  	 * than in generated c code.  define pseudo-variables for
   164  	 * registers, so we have complete register usage information.
   165  	 */
   166  	nvar = NREGVAR;
   167  	memset(var, 0, NREGVAR*sizeof var[0]);
   168  	for(i=0; i<NREGVAR; i++) {
   169  		if(regnodes[i] == N)
   170  			regnodes[i] = newname(lookup(regname[i]));
   171  		var[i].node = regnodes[i];
   172  	}
   173  
   174  	regbits = RtoB(D_SP);
   175  	for(z=0; z<BITS; z++) {
   176  		externs.b[z] = 0;
   177  		params.b[z] = 0;
   178  		consts.b[z] = 0;
   179  		addrs.b[z] = 0;
   180  		ovar.b[z] = 0;
   181  	}
   182  
   183  	// build list of return variables
   184  	setoutvar();
   185  
   186  	/*
   187  	 * pass 1
   188  	 * build aux data structure
   189  	 * allocate pcs
   190  	 * find use and set of variables
   191  	 */
   192  	g = flowstart(firstp, sizeof(Reg));
   193  	if(g == nil)
   194  		return;
   195  	firstr = (Reg*)g->start;
   196  
   197  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   198  		p = r->f.prog;
   199  		proginfo(&info, p);
   200  
   201  		// Avoid making variables for direct-called functions.
   202  		if(p->as == ACALL && p->to.type == D_EXTERN)
   203  			continue;
   204  
   205  		r->use1.b[0] |= info.reguse | info.regindex;
   206  		r->set.b[0] |= info.regset;
   207  
   208  		bit = mkvar(r, &p->from);
   209  		if(bany(&bit)) {
   210  			if(info.flags & LeftAddr)
   211  				setaddrs(bit);
   212  			if(info.flags & LeftRead)
   213  				for(z=0; z<BITS; z++)
   214  					r->use1.b[z] |= bit.b[z];
   215  			if(info.flags & LeftWrite)
   216  				for(z=0; z<BITS; z++)
   217  					r->set.b[z] |= bit.b[z];
   218  		}
   219  
   220  		bit = mkvar(r, &p->to);
   221  		if(bany(&bit)) {	
   222  			if(info.flags & RightAddr)
   223  				setaddrs(bit);
   224  			if(info.flags & RightRead)
   225  				for(z=0; z<BITS; z++)
   226  					r->use2.b[z] |= bit.b[z];
   227  			if(info.flags & RightWrite)
   228  				for(z=0; z<BITS; z++)
   229  					r->set.b[z] |= bit.b[z];
   230  		}
   231  	}
   232  
   233  	for(i=0; i<nvar; i++) {
   234  		Var *v = var+i;
   235  		if(v->addr) {
   236  			bit = blsh(i);
   237  			for(z=0; z<BITS; z++)
   238  				addrs.b[z] |= bit.b[z];
   239  		}
   240  
   241  		if(debug['R'] && debug['v'])
   242  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   243  				i, v->addr, v->etype, v->width, v->node, v->offset);
   244  	}
   245  
   246  	if(debug['R'] && debug['v'])
   247  		dumpit("pass1", &firstr->f, 1);
   248  
   249  	/*
   250  	 * pass 2
   251  	 * find looping structure
   252  	 */
   253  	flowrpo(g);
   254  
   255  	if(debug['R'] && debug['v'])
   256  		dumpit("pass2", &firstr->f, 1);
   257  
   258  	/*
   259  	 * pass 3
   260  	 * iterate propagating usage
   261  	 * 	back until flow graph is complete
   262  	 */
   263  loop1:
   264  	change = 0;
   265  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   266  		r->f.active = 0;
   267  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   268  		if(r->f.prog->as == ARET)
   269  			prop(r, zbits, zbits);
   270  loop11:
   271  	/* pick up unreachable code */
   272  	i = 0;
   273  	for(r = firstr; r != R; r = r1) {
   274  		r1 = (Reg*)r->f.link;
   275  		if(r1 && r1->f.active && !r->f.active) {
   276  			prop(r, zbits, zbits);
   277  			i = 1;
   278  		}
   279  	}
   280  	if(i)
   281  		goto loop11;
   282  	if(change)
   283  		goto loop1;
   284  
   285  	if(debug['R'] && debug['v'])
   286  		dumpit("pass3", &firstr->f, 1);
   287  
   288  	/*
   289  	 * pass 4
   290  	 * iterate propagating register/variable synchrony
   291  	 * 	forward until graph is complete
   292  	 */
   293  loop2:
   294  	change = 0;
   295  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   296  		r->f.active = 0;
   297  	synch(firstr, zbits);
   298  	if(change)
   299  		goto loop2;
   300  
   301  	if(debug['R'] && debug['v'])
   302  		dumpit("pass4", &firstr->f, 1);
   303  
   304  	/*
   305  	 * pass 4.5
   306  	 * move register pseudo-variables into regu.
   307  	 */
   308  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   309  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   310  
   311  		r->set.b[0] &= ~REGBITS;
   312  		r->use1.b[0] &= ~REGBITS;
   313  		r->use2.b[0] &= ~REGBITS;
   314  		r->refbehind.b[0] &= ~REGBITS;
   315  		r->refahead.b[0] &= ~REGBITS;
   316  		r->calbehind.b[0] &= ~REGBITS;
   317  		r->calahead.b[0] &= ~REGBITS;
   318  		r->regdiff.b[0] &= ~REGBITS;
   319  		r->act.b[0] &= ~REGBITS;
   320  	}
   321  
   322  	/*
   323  	 * pass 5
   324  	 * isolate regions
   325  	 * calculate costs (paint1)
   326  	 */
   327  	r = firstr;
   328  	if(r) {
   329  		for(z=0; z<BITS; z++)
   330  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   331  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   332  		if(bany(&bit) && !r->f.refset) {
   333  			// should never happen - all variables are preset
   334  			if(debug['w'])
   335  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   336  			r->f.refset = 1;
   337  		}
   338  	}
   339  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   340  		r->act = zbits;
   341  	rgp = region;
   342  	nregion = 0;
   343  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   344  		for(z=0; z<BITS; z++)
   345  			bit.b[z] = r->set.b[z] &
   346  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   347  		if(bany(&bit) && !r->f.refset) {
   348  			if(debug['w'])
   349  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   350  			r->f.refset = 1;
   351  			excise(&r->f);
   352  		}
   353  		for(z=0; z<BITS; z++)
   354  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   355  		while(bany(&bit)) {
   356  			i = bnum(bit);
   357  			rgp->enter = r;
   358  			rgp->varno = i;
   359  			change = 0;
   360  			paint1(r, i);
   361  			bit.b[i/32] &= ~(1L<<(i%32));
   362  			if(change <= 0)
   363  				continue;
   364  			rgp->cost = change;
   365  			nregion++;
   366  			if(nregion >= NRGN) {
   367  				if(debug['R'] && debug['v'])
   368  					print("too many regions\n");
   369  				goto brk;
   370  			}
   371  			rgp++;
   372  		}
   373  	}
   374  brk:
   375  	qsort(region, nregion, sizeof(region[0]), rcmp);
   376  
   377  	if(debug['R'] && debug['v'])
   378  		dumpit("pass5", &firstr->f, 1);
   379  
   380  	/*
   381  	 * pass 6
   382  	 * determine used registers (paint2)
   383  	 * replace code (paint3)
   384  	 */
   385  	rgp = region;
   386  	for(i=0; i<nregion; i++) {
   387  		bit = blsh(rgp->varno);
   388  		vreg = paint2(rgp->enter, rgp->varno);
   389  		vreg = allreg(vreg, rgp);
   390  		if(rgp->regno != 0) {
   391  			if(debug['R'] && debug['v']) {
   392  				Var *v;
   393  
   394  				v = var + rgp->varno;
   395  				print("registerize %N+%lld (bit=%2d et=%2E) in %R\n",
   396  						v->node, v->offset, rgp->varno, v->etype, rgp->regno);
   397  			}
   398  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   399  		}
   400  		rgp++;
   401  	}
   402  
   403  	if(debug['R'] && debug['v'])
   404  		dumpit("pass6", &firstr->f, 1);
   405  	
   406  	/*
   407  	 * free aux structures. peep allocates new ones.
   408  	 */
   409  	flowend(g);
   410  	firstr = R;
   411  
   412  	/*
   413  	 * pass 7
   414  	 * peep-hole on basic block
   415  	 */
   416  	if(!debug['R'] || debug['P'])
   417  		peep(firstp);
   418  
   419  	/*
   420  	 * eliminate nops
   421  	 */
   422  	for(p=firstp; p!=P; p=p->link) {
   423  		while(p->link != P && p->link->as == ANOP)
   424  			p->link = p->link->link;
   425  		if(p->to.type == D_BRANCH)
   426  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   427  				p->to.u.branch = p->to.u.branch->link;
   428  	}
   429  
   430  	if(debug['R']) {
   431  		if(ostats.ncvtreg ||
   432  		   ostats.nspill ||
   433  		   ostats.nreload ||
   434  		   ostats.ndelmov ||
   435  		   ostats.nvar ||
   436  		   ostats.naddr ||
   437  		   0)
   438  			print("\nstats\n");
   439  
   440  		if(ostats.ncvtreg)
   441  			print("	%4d cvtreg\n", ostats.ncvtreg);
   442  		if(ostats.nspill)
   443  			print("	%4d spill\n", ostats.nspill);
   444  		if(ostats.nreload)
   445  			print("	%4d reload\n", ostats.nreload);
   446  		if(ostats.ndelmov)
   447  			print("	%4d delmov\n", ostats.ndelmov);
   448  		if(ostats.nvar)
   449  			print("	%4d var\n", ostats.nvar);
   450  		if(ostats.naddr)
   451  			print("	%4d addr\n", ostats.naddr);
   452  
   453  		memset(&ostats, 0, sizeof(ostats));
   454  	}
   455  }
   456  
   457  /*
   458   * add mov b,rn
   459   * just after r
   460   */
   461  void
   462  addmove(Reg *r, int bn, int rn, int f)
   463  {
   464  	Prog *p, *p1;
   465  	Adr *a;
   466  	Var *v;
   467  
   468  	p1 = mal(sizeof(*p1));
   469  	clearp(p1);
   470  	p1->loc = 9999;
   471  
   472  	p = r->f.prog;
   473  	p1->link = p->link;
   474  	p->link = p1;
   475  	p1->lineno = p->lineno;
   476  
   477  	v = var + bn;
   478  
   479  	a = &p1->to;
   480  	a->offset = v->offset;
   481  	a->etype = v->etype;
   482  	a->type = v->name;
   483  	a->node = v->node;
   484  	a->sym = v->node->sym;
   485  
   486  	// need to clean this up with wptr and
   487  	// some of the defaults
   488  	p1->as = AMOVL;
   489  	switch(v->etype) {
   490  	default:
   491  		fatal("unknown type %E", v->etype);
   492  	case TINT8:
   493  	case TUINT8:
   494  	case TBOOL:
   495  		p1->as = AMOVB;
   496  		break;
   497  	case TINT16:
   498  	case TUINT16:
   499  		p1->as = AMOVW;
   500  		break;
   501  	case TINT64:
   502  	case TUINT64:
   503  	case TUINTPTR:
   504  	case TPTR64:
   505  		p1->as = AMOVQ;
   506  		break;
   507  	case TFLOAT32:
   508  		p1->as = AMOVSS;
   509  		break;
   510  	case TFLOAT64:
   511  		p1->as = AMOVSD;
   512  		break;
   513  	case TINT:
   514  	case TUINT:
   515  	case TINT32:
   516  	case TUINT32:
   517  	case TPTR32:
   518  		break;
   519  	}
   520  
   521  	p1->from.type = rn;
   522  	if(!f) {
   523  		p1->from = *a;
   524  		*a = zprog.from;
   525  		a->type = rn;
   526  		if(v->etype == TUINT8)
   527  			p1->as = AMOVB;
   528  		if(v->etype == TUINT16)
   529  			p1->as = AMOVW;
   530  	}
   531  	if(debug['R'] && debug['v'])
   532  		print("%P ===add=== %P\n", p, p1);
   533  	ostats.nspill++;
   534  }
   535  
   536  uint32
   537  doregbits(int r)
   538  {
   539  	uint32 b;
   540  
   541  	b = 0;
   542  	if(r >= D_INDIR)
   543  		r -= D_INDIR;
   544  	if(r >= D_AX && r <= D_R15)
   545  		b |= RtoB(r);
   546  	else
   547  	if(r >= D_AL && r <= D_R15B)
   548  		b |= RtoB(r-D_AL+D_AX);
   549  	else
   550  	if(r >= D_AH && r <= D_BH)
   551  		b |= RtoB(r-D_AH+D_AX);
   552  	else
   553  	if(r >= D_X0 && r <= D_X0+15)
   554  		b |= FtoB(r);
   555  	return b;
   556  }
   557  
   558  static int
   559  overlap(int64 o1, int w1, int64 o2, int w2)
   560  {
   561  	int64 t1, t2;
   562  
   563  	t1 = o1+w1;
   564  	t2 = o2+w2;
   565  
   566  	if(!(t1 > o2 && t2 > o1))
   567  		return 0;
   568  
   569  	return 1;
   570  }
   571  
   572  Bits
   573  mkvar(Reg *r, Adr *a)
   574  {
   575  	Var *v;
   576  	int i, t, n, et, z, flag;
   577  	int64 w;
   578  	uint32 regu;
   579  	int64 o;
   580  	Bits bit;
   581  	Node *node;
   582  
   583  	/*
   584  	 * mark registers used
   585  	 */
   586  	t = a->type;
   587  	if(t == D_NONE)
   588  		goto none;
   589  
   590  	if(r != R)
   591  		r->use1.b[0] |= doregbits(a->index);
   592  
   593  	switch(t) {
   594  	default:
   595  		regu = doregbits(t);
   596  		if(regu == 0)
   597  			goto none;
   598  		bit = zbits;
   599  		bit.b[0] = regu;
   600  		return bit;
   601  
   602  	case D_ADDR:
   603  		a->type = a->index;
   604  		bit = mkvar(r, a);
   605  		setaddrs(bit);
   606  		a->type = t;
   607  		ostats.naddr++;
   608  		goto none;
   609  
   610  	case D_EXTERN:
   611  	case D_STATIC:
   612  	case D_PARAM:
   613  	case D_AUTO:
   614  		n = t;
   615  		break;
   616  	}
   617  
   618  	node = a->node;
   619  	if(node == N || node->op != ONAME || node->orig == N)
   620  		goto none;
   621  	node = node->orig;
   622  	if(node->orig != node)
   623  		fatal("%D: bad node", a);
   624  	if(node->sym == S || node->sym->name[0] == '.')
   625  		goto none;
   626  	et = a->etype;
   627  	o = a->offset;
   628  	w = a->width;
   629  	if(w < 0)
   630  		fatal("bad width %lld for %D", w, a);
   631  
   632  	flag = 0;
   633  	for(i=0; i<nvar; i++) {
   634  		v = var+i;
   635  		if(v->node == node && v->name == n) {
   636  			if(v->offset == o)
   637  			if(v->etype == et)
   638  			if(v->width == w)
   639  				return blsh(i);
   640  
   641  			// if they overlaps, disable both
   642  			if(overlap(v->offset, v->width, o, w)) {
   643  //				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   644  				v->addr = 1;
   645  				flag = 1;
   646  			}
   647  		}
   648  	}
   649  	switch(et) {
   650  	case 0:
   651  	case TFUNC:
   652  		goto none;
   653  	}
   654  
   655  	if(nvar >= NVAR) {
   656  		if(debug['w'] > 1 && node != N)
   657  			fatal("variable not optimized: %#N", node);
   658  		goto none;
   659  	}
   660  
   661  	i = nvar;
   662  	nvar++;
   663  	v = var+i;
   664  	v->offset = o;
   665  	v->name = n;
   666  	v->etype = et;
   667  	v->width = w;
   668  	v->addr = flag;		// funny punning
   669  	v->node = node;
   670  
   671  	if(debug['R'])
   672  		print("bit=%2d et=%2E w=%d+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   673  
   674  	ostats.nvar++;
   675  
   676  	bit = blsh(i);
   677  	if(n == D_EXTERN || n == D_STATIC)
   678  		for(z=0; z<BITS; z++)
   679  			externs.b[z] |= bit.b[z];
   680  	if(n == D_PARAM)
   681  		for(z=0; z<BITS; z++)
   682  			params.b[z] |= bit.b[z];
   683  
   684  	return bit;
   685  
   686  none:
   687  	return zbits;
   688  }
   689  
   690  void
   691  prop(Reg *r, Bits ref, Bits cal)
   692  {
   693  	Reg *r1, *r2;
   694  	int z;
   695  
   696  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   697  		for(z=0; z<BITS; z++) {
   698  			ref.b[z] |= r1->refahead.b[z];
   699  			if(ref.b[z] != r1->refahead.b[z]) {
   700  				r1->refahead.b[z] = ref.b[z];
   701  				change++;
   702  			}
   703  			cal.b[z] |= r1->calahead.b[z];
   704  			if(cal.b[z] != r1->calahead.b[z]) {
   705  				r1->calahead.b[z] = cal.b[z];
   706  				change++;
   707  			}
   708  		}
   709  		switch(r1->f.prog->as) {
   710  		case ACALL:
   711  			if(noreturn(r1->f.prog))
   712  				break;
   713  			for(z=0; z<BITS; z++) {
   714  				cal.b[z] |= ref.b[z] | externs.b[z];
   715  				ref.b[z] = 0;
   716  			}
   717  			break;
   718  
   719  		case ATEXT:
   720  			for(z=0; z<BITS; z++) {
   721  				cal.b[z] = 0;
   722  				ref.b[z] = 0;
   723  			}
   724  			break;
   725  
   726  		case ARET:
   727  			for(z=0; z<BITS; z++) {
   728  				cal.b[z] = externs.b[z] | ovar.b[z];
   729  				ref.b[z] = 0;
   730  			}
   731  			break;
   732  
   733  		default:
   734  			// Work around for issue 1304:
   735  			// flush modified globals before each instruction.
   736  			for(z=0; z<BITS; z++) {
   737  				cal.b[z] |= externs.b[z];
   738  				// issue 4066: flush modified return variables in case of panic
   739  				if(hasdefer)
   740  					cal.b[z] |= ovar.b[z];
   741  			}
   742  			break;
   743  		}
   744  		for(z=0; z<BITS; z++) {
   745  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   746  				r1->use1.b[z] | r1->use2.b[z];
   747  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   748  			r1->refbehind.b[z] = ref.b[z];
   749  			r1->calbehind.b[z] = cal.b[z];
   750  		}
   751  		if(r1->f.active)
   752  			break;
   753  		r1->f.active = 1;
   754  	}
   755  	for(; r != r1; r = (Reg*)r->f.p1)
   756  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
   757  			prop(r2, r->refbehind, r->calbehind);
   758  }
   759  
   760  void
   761  synch(Reg *r, Bits dif)
   762  {
   763  	Reg *r1;
   764  	int z;
   765  
   766  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
   767  		for(z=0; z<BITS; z++) {
   768  			dif.b[z] = (dif.b[z] &
   769  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   770  					r1->set.b[z] | r1->regdiff.b[z];
   771  			if(dif.b[z] != r1->regdiff.b[z]) {
   772  				r1->regdiff.b[z] = dif.b[z];
   773  				change++;
   774  			}
   775  		}
   776  		if(r1->f.active)
   777  			break;
   778  		r1->f.active = 1;
   779  		for(z=0; z<BITS; z++)
   780  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   781  		if(r1->f.s2 != nil)
   782  			synch((Reg*)r1->f.s2, dif);
   783  	}
   784  }
   785  
   786  uint32
   787  allreg(uint32 b, Rgn *r)
   788  {
   789  	Var *v;
   790  	int i;
   791  
   792  	v = var + r->varno;
   793  	r->regno = 0;
   794  	switch(v->etype) {
   795  
   796  	default:
   797  		fatal("unknown etype %d/%E", bitno(b), v->etype);
   798  		break;
   799  
   800  	case TINT8:
   801  	case TUINT8:
   802  	case TINT16:
   803  	case TUINT16:
   804  	case TINT32:
   805  	case TUINT32:
   806  	case TINT64:
   807  	case TUINT64:
   808  	case TINT:
   809  	case TUINT:
   810  	case TUINTPTR:
   811  	case TBOOL:
   812  	case TPTR32:
   813  	case TPTR64:
   814  		i = BtoR(~b);
   815  		if(i && r->cost > 0) {
   816  			r->regno = i;
   817  			return RtoB(i);
   818  		}
   819  		break;
   820  
   821  	case TFLOAT32:
   822  	case TFLOAT64:
   823  		i = BtoF(~b);
   824  		if(i && r->cost > 0) {
   825  			r->regno = i;
   826  			return FtoB(i);
   827  		}
   828  		break;
   829  	}
   830  	return 0;
   831  }
   832  
   833  void
   834  paint1(Reg *r, int bn)
   835  {
   836  	Reg *r1;
   837  	int z;
   838  	uint32 bb;
   839  
   840  	z = bn/32;
   841  	bb = 1L<<(bn%32);
   842  	if(r->act.b[z] & bb)
   843  		return;
   844  	for(;;) {
   845  		if(!(r->refbehind.b[z] & bb))
   846  			break;
   847  		r1 = (Reg*)r->f.p1;
   848  		if(r1 == R)
   849  			break;
   850  		if(!(r1->refahead.b[z] & bb))
   851  			break;
   852  		if(r1->act.b[z] & bb)
   853  			break;
   854  		r = r1;
   855  	}
   856  
   857  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
   858  		change -= CLOAD * r->f.loop;
   859  	}
   860  	for(;;) {
   861  		r->act.b[z] |= bb;
   862  
   863  		if(r->use1.b[z] & bb) {
   864  			change += CREF * r->f.loop;
   865  		}
   866  
   867  		if((r->use2.b[z]|r->set.b[z]) & bb) {
   868  			change += CREF * r->f.loop;
   869  		}
   870  
   871  		if(STORE(r) & r->regdiff.b[z] & bb) {
   872  			change -= CLOAD * r->f.loop;
   873  		}
   874  
   875  		if(r->refbehind.b[z] & bb)
   876  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
   877  				if(r1->refahead.b[z] & bb)
   878  					paint1(r1, bn);
   879  
   880  		if(!(r->refahead.b[z] & bb))
   881  			break;
   882  		r1 = (Reg*)r->f.s2;
   883  		if(r1 != R)
   884  			if(r1->refbehind.b[z] & bb)
   885  				paint1(r1, bn);
   886  		r = (Reg*)r->f.s1;
   887  		if(r == R)
   888  			break;
   889  		if(r->act.b[z] & bb)
   890  			break;
   891  		if(!(r->refbehind.b[z] & bb))
   892  			break;
   893  	}
   894  }
   895  
   896  uint32
   897  regset(Reg *r, uint32 bb)
   898  {
   899  	uint32 b, set;
   900  	Adr v;
   901  	int c;
   902  
   903  	set = 0;
   904  	v = zprog.from;
   905  	while(b = bb & ~(bb-1)) {
   906  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
   907  		if(v.type == 0)
   908  			fatal("zero v.type for %#ux", b);
   909  		c = copyu(r->f.prog, &v, A);
   910  		if(c == 3)
   911  			set |= b;
   912  		bb &= ~b;
   913  	}
   914  	return set;
   915  }
   916  
   917  uint32
   918  reguse(Reg *r, uint32 bb)
   919  {
   920  	uint32 b, set;
   921  	Adr v;
   922  	int c;
   923  
   924  	set = 0;
   925  	v = zprog.from;
   926  	while(b = bb & ~(bb-1)) {
   927  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
   928  		c = copyu(r->f.prog, &v, A);
   929  		if(c == 1 || c == 2 || c == 4)
   930  			set |= b;
   931  		bb &= ~b;
   932  	}
   933  	return set;
   934  }
   935  
   936  uint32
   937  paint2(Reg *r, int bn)
   938  {
   939  	Reg *r1;
   940  	int z;
   941  	uint32 bb, vreg, x;
   942  
   943  	z = bn/32;
   944  	bb = 1L << (bn%32);
   945  	vreg = regbits;
   946  	if(!(r->act.b[z] & bb))
   947  		return vreg;
   948  	for(;;) {
   949  		if(!(r->refbehind.b[z] & bb))
   950  			break;
   951  		r1 = (Reg*)r->f.p1;
   952  		if(r1 == R)
   953  			break;
   954  		if(!(r1->refahead.b[z] & bb))
   955  			break;
   956  		if(!(r1->act.b[z] & bb))
   957  			break;
   958  		r = r1;
   959  	}
   960  	for(;;) {
   961  		r->act.b[z] &= ~bb;
   962  
   963  		vreg |= r->regu;
   964  
   965  		if(r->refbehind.b[z] & bb)
   966  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
   967  				if(r1->refahead.b[z] & bb)
   968  					vreg |= paint2(r1, bn);
   969  
   970  		if(!(r->refahead.b[z] & bb))
   971  			break;
   972  		r1 = (Reg*)r->f.s2;
   973  		if(r1 != R)
   974  			if(r1->refbehind.b[z] & bb)
   975  				vreg |= paint2(r1, bn);
   976  		r = (Reg*)r->f.s1;
   977  		if(r == R)
   978  			break;
   979  		if(!(r->act.b[z] & bb))
   980  			break;
   981  		if(!(r->refbehind.b[z] & bb))
   982  			break;
   983  	}
   984  
   985  	bb = vreg;
   986  	for(; r; r=(Reg*)r->f.s1) {
   987  		x = r->regu & ~bb;
   988  		if(x) {
   989  			vreg |= reguse(r, x);
   990  			bb |= regset(r, x);
   991  		}
   992  	}
   993  	return vreg;
   994  }
   995  
   996  void
   997  paint3(Reg *r, int bn, int32 rb, int rn)
   998  {
   999  	Reg *r1;
  1000  	Prog *p;
  1001  	int z;
  1002  	uint32 bb;
  1003  
  1004  	z = bn/32;
  1005  	bb = 1L << (bn%32);
  1006  	if(r->act.b[z] & bb)
  1007  		return;
  1008  	for(;;) {
  1009  		if(!(r->refbehind.b[z] & bb))
  1010  			break;
  1011  		r1 = (Reg*)r->f.p1;
  1012  		if(r1 == R)
  1013  			break;
  1014  		if(!(r1->refahead.b[z] & bb))
  1015  			break;
  1016  		if(r1->act.b[z] & bb)
  1017  			break;
  1018  		r = r1;
  1019  	}
  1020  
  1021  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1022  		addmove(r, bn, rn, 0);
  1023  	for(;;) {
  1024  		r->act.b[z] |= bb;
  1025  		p = r->f.prog;
  1026  
  1027  		if(r->use1.b[z] & bb) {
  1028  			if(debug['R'] && debug['v'])
  1029  				print("%P", p);
  1030  			addreg(&p->from, rn);
  1031  			if(debug['R'] && debug['v'])
  1032  				print(" ===change== %P\n", p);
  1033  		}
  1034  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1035  			if(debug['R'] && debug['v'])
  1036  				print("%P", p);
  1037  			addreg(&p->to, rn);
  1038  			if(debug['R'] && debug['v'])
  1039  				print(" ===change== %P\n", p);
  1040  		}
  1041  
  1042  		if(STORE(r) & r->regdiff.b[z] & bb)
  1043  			addmove(r, bn, rn, 1);
  1044  		r->regu |= rb;
  1045  
  1046  		if(r->refbehind.b[z] & bb)
  1047  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1048  				if(r1->refahead.b[z] & bb)
  1049  					paint3(r1, bn, rb, rn);
  1050  
  1051  		if(!(r->refahead.b[z] & bb))
  1052  			break;
  1053  		r1 = (Reg*)r->f.s2;
  1054  		if(r1 != R)
  1055  			if(r1->refbehind.b[z] & bb)
  1056  				paint3(r1, bn, rb, rn);
  1057  		r = (Reg*)r->f.s1;
  1058  		if(r == R)
  1059  			break;
  1060  		if(r->act.b[z] & bb)
  1061  			break;
  1062  		if(!(r->refbehind.b[z] & bb))
  1063  			break;
  1064  	}
  1065  }
  1066  
  1067  void
  1068  addreg(Adr *a, int rn)
  1069  {
  1070  
  1071  	a->sym = 0;
  1072  	a->offset = 0;
  1073  	a->type = rn;
  1074  
  1075  	ostats.ncvtreg++;
  1076  }
  1077  
  1078  int32
  1079  RtoB(int r)
  1080  {
  1081  
  1082  	if(r < D_AX || r > D_R15)
  1083  		return 0;
  1084  	return 1L << (r-D_AX);
  1085  }
  1086  
  1087  int
  1088  BtoR(int32 b)
  1089  {
  1090  	b &= 0xffffL;
  1091  	if(b == 0)
  1092  		return 0;
  1093  	return bitno(b) + D_AX;
  1094  }
  1095  
  1096  /*
  1097   *	bit	reg
  1098   *	16	X0
  1099   *	...
  1100   *	31	X15
  1101   */
  1102  int32
  1103  FtoB(int f)
  1104  {
  1105  	if(f < D_X0 || f > D_X15)
  1106  		return 0;
  1107  	return 1L << (f - D_X0 + 16);
  1108  }
  1109  
  1110  int
  1111  BtoF(int32 b)
  1112  {
  1113  
  1114  	b &= 0xFFFF0000L;
  1115  	if(b == 0)
  1116  		return 0;
  1117  	return bitno(b) - 16 + D_X0;
  1118  }
  1119  
  1120  void
  1121  dumpone(Flow *f, int isreg)
  1122  {
  1123  	int z;
  1124  	Bits bit;
  1125  	Reg *r;
  1126  
  1127  	print("%d:%P", f->loop, f->prog);
  1128  	if(isreg) {	
  1129  		r = (Reg*)f;
  1130  		for(z=0; z<BITS; z++)
  1131  			bit.b[z] =
  1132  				r->set.b[z] |
  1133  				r->use1.b[z] |
  1134  				r->use2.b[z] |
  1135  				r->refbehind.b[z] |
  1136  				r->refahead.b[z] |
  1137  				r->calbehind.b[z] |
  1138  				r->calahead.b[z] |
  1139  				r->regdiff.b[z] |
  1140  				r->act.b[z] |
  1141  					0;
  1142  		if(bany(&bit)) {
  1143  			print("\t");
  1144  			if(bany(&r->set))
  1145  				print(" s:%Q", r->set);
  1146  			if(bany(&r->use1))
  1147  				print(" u1:%Q", r->use1);
  1148  			if(bany(&r->use2))
  1149  				print(" u2:%Q", r->use2);
  1150  			if(bany(&r->refbehind))
  1151  				print(" rb:%Q ", r->refbehind);
  1152  			if(bany(&r->refahead))
  1153  				print(" ra:%Q ", r->refahead);
  1154  			if(bany(&r->calbehind))
  1155  				print(" cb:%Q ", r->calbehind);
  1156  			if(bany(&r->calahead))
  1157  				print(" ca:%Q ", r->calahead);
  1158  			if(bany(&r->regdiff))
  1159  				print(" d:%Q ", r->regdiff);
  1160  			if(bany(&r->act))
  1161  				print(" a:%Q ", r->act);
  1162  		}
  1163  	}
  1164  	print("\n");
  1165  }
  1166  
  1167  void
  1168  dumpit(char *str, Flow *r0, int isreg)
  1169  {
  1170  	Flow *r, *r1;
  1171  
  1172  	print("\n%s\n", str);
  1173  	for(r = r0; r != nil; r = r->link) {
  1174  		dumpone(r, isreg);
  1175  		r1 = r->p2;
  1176  		if(r1 != nil) {
  1177  			print("	pred:");
  1178  			for(; r1 != nil; r1 = r1->p2link)
  1179  				print(" %.4ud", r1->prog->loc);
  1180  			print("\n");
  1181  		}
  1182  //		r1 = r->s1;
  1183  //		if(r1 != R) {
  1184  //			print("	succ:");
  1185  //			for(; r1 != R; r1 = r1->s1)
  1186  //				print(" %.4ud", r1->prog->loc);
  1187  //			print("\n");
  1188  //		}
  1189  	}
  1190  }