github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/5g/reg.c (about)

     1  // Inferno utils/5c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  
    32  #include <u.h>
    33  #include <libc.h>
    34  #include "gg.h"
    35  #include "opt.h"
    36  
    37  #define	NREGVAR	32
    38  #define	REGBITS	((uint32)0xffffffff)
    39  
    40  	void	addsplits(void);
    41  static	Reg*	firstr;
    42  static	int	first	= 1;
    43  
    44  int
    45  rcmp(const void *a1, const void *a2)
    46  {
    47  	Rgn *p1, *p2;
    48  	int c1, c2;
    49  
    50  	p1 = (Rgn*)a1;
    51  	p2 = (Rgn*)a2;
    52  	c1 = p2->cost;
    53  	c2 = p1->cost;
    54  	if(c1 -= c2)
    55  		return c1;
    56  	return p2->varno - p1->varno;
    57  }
    58  
    59  void
    60  excise(Flow *r)
    61  {
    62  	Prog *p;
    63  
    64  	p = r->prog;
    65  	p->as = ANOP;
    66  	p->scond = zprog.scond;
    67  	p->from = zprog.from;
    68  	p->to = zprog.to;
    69  	p->reg = zprog.reg;
    70  }
    71  
    72  static void
    73  setaddrs(Bits bit)
    74  {
    75  	int i, n;
    76  	Var *v;
    77  	Node *node;
    78  
    79  	while(bany(&bit)) {
    80  		// convert each bit to a variable
    81  		i = bnum(bit);
    82  		node = var[i].node;
    83  		n = var[i].name;
    84  		bit.b[i/32] &= ~(1L<<(i%32));
    85  
    86  		// disable all pieces of that variable
    87  		for(i=0; i<nvar; i++) {
    88  			v = var+i;
    89  			if(v->node == node && v->name == n)
    90  				v->addr = 2;
    91  		}
    92  	}
    93  }
    94  
    95  static char* regname[] = {
    96  	".R0",
    97  	".R1",
    98  	".R2",
    99  	".R3",
   100  	".R4",
   101  	".R5",
   102  	".R6",
   103  	".R7",
   104  	".R8",
   105  	".R9",
   106  	".R10",
   107  	".R11",
   108  	".R12",
   109  	".R13",
   110  	".R14",
   111  	".R15",
   112  	".F0",
   113  	".F1",
   114  	".F2",
   115  	".F3",
   116  	".F4",
   117  	".F5",
   118  	".F6",
   119  	".F7",
   120  	".F8",
   121  	".F9",
   122  	".F10",
   123  	".F11",
   124  	".F12",
   125  	".F13",
   126  	".F14",
   127  	".F15",
   128  };
   129  
   130  static Node* regnodes[NREGVAR];
   131  
   132  static void walkvardef(Node *n, Reg *r, int active);
   133  
   134  void
   135  regopt(Prog *firstp)
   136  {
   137  	Reg *r, *r1;
   138  	Prog *p;
   139  	Graph *g;
   140  	int i, z, active;
   141  	uint32 vreg;
   142  	Bits bit;
   143  	ProgInfo info;
   144  
   145  	if(first) {
   146  		fmtinstall('Q', Qconv);
   147  		first = 0;
   148  	}
   149  
   150  	mergetemp(firstp);
   151  
   152  	/*
   153  	 * control flow is more complicated in generated go code
   154  	 * than in generated c code.  define pseudo-variables for
   155  	 * registers, so we have complete register usage information.
   156  	 */
   157  	nvar = NREGVAR;
   158  	memset(var, 0, NREGVAR*sizeof var[0]);
   159  	for(i=0; i<NREGVAR; i++) {
   160  		if(regnodes[i] == N)
   161  			regnodes[i] = newname(lookup(regname[i]));
   162  		var[i].node = regnodes[i];
   163  	}
   164  
   165  	regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
   166  	for(z=0; z<BITS; z++) {
   167  		externs.b[z] = 0;
   168  		params.b[z] = 0;
   169  		consts.b[z] = 0;
   170  		addrs.b[z] = 0;
   171  		ivar.b[z] = 0;
   172  		ovar.b[z] = 0;
   173  	}
   174  
   175  	/*
   176  	 * pass 1
   177  	 * build aux data structure
   178  	 * allocate pcs
   179  	 * find use and set of variables
   180  	 */
   181  	g = flowstart(firstp, sizeof(Reg));
   182  	if(g == nil) {
   183  		for(i=0; i<nvar; i++)
   184  			var[i].node->opt = nil;
   185  		return;
   186  	}
   187  
   188  	firstr = (Reg*)g->start;
   189  
   190  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   191  		p = r->f.prog;
   192  		if(p->as == AVARDEF || p->as == AVARKILL)
   193  			continue;
   194  		proginfo(&info, p);
   195  
   196  		// Avoid making variables for direct-called functions.
   197  		if(p->as == ABL && p->to.type == D_EXTERN)
   198  			continue;
   199  
   200  		bit = mkvar(r, &p->from);
   201  		if(info.flags & LeftRead)
   202  			for(z=0; z<BITS; z++)
   203  				r->use1.b[z] |= bit.b[z];
   204  		if(info.flags & LeftAddr)
   205  			setaddrs(bit);
   206  
   207  		if(info.flags & RegRead) {	
   208  			if(p->from.type != D_FREG)
   209  				r->use1.b[0] |= RtoB(p->reg);
   210  			else
   211  				r->use1.b[0] |= FtoB(p->reg);
   212  		}
   213  
   214  		if(info.flags & (RightAddr | RightRead | RightWrite)) {
   215  			bit = mkvar(r, &p->to);
   216  			if(info.flags & RightAddr)
   217  				setaddrs(bit);
   218  			if(info.flags & RightRead)
   219  				for(z=0; z<BITS; z++)
   220  					r->use2.b[z] |= bit.b[z];
   221  			if(info.flags & RightWrite)
   222  				for(z=0; z<BITS; z++)
   223  					r->set.b[z] |= bit.b[z];
   224  		}
   225  	}
   226  	if(firstr == R)
   227  		return;
   228  
   229  	for(i=0; i<nvar; i++) {
   230  		Var *v = var+i;
   231  		if(v->addr) {
   232  			bit = blsh(i);
   233  			for(z=0; z<BITS; z++)
   234  				addrs.b[z] |= bit.b[z];
   235  		}
   236  
   237  		if(debug['R'] && debug['v'])
   238  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   239  				i, v->addr, v->etype, v->width, v->node, v->offset);
   240  	}
   241  
   242  	if(debug['R'] && debug['v'])
   243  		dumpit("pass1", &firstr->f, 1);
   244  
   245  	/*
   246  	 * pass 2
   247  	 * find looping structure
   248  	 */
   249  	flowrpo(g);
   250  
   251  	if(debug['R'] && debug['v'])
   252  		dumpit("pass2", &firstr->f, 1);
   253  
   254  	/*
   255  	 * pass 2.5
   256  	 * iterate propagating fat vardef covering forward
   257  	 * r->act records vars with a VARDEF since the last CALL.
   258  	 * (r->act will be reused in pass 5 for something else,
   259  	 * but we'll be done with it by then.)
   260  	 */
   261  	active = 0;
   262  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   263  		r->f.active = 0;
   264  		r->act = zbits;
   265  	}
   266  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   267  		p = r->f.prog;
   268  		if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
   269  			active++;
   270  			walkvardef(p->to.node, r, active);
   271  		}
   272  	}
   273  
   274  	/*
   275  	 * pass 3
   276  	 * iterate propagating usage
   277  	 * 	back until flow graph is complete
   278  	 */
   279  loop1:
   280  	change = 0;
   281  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   282  		r->f.active = 0;
   283  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   284  		if(r->f.prog->as == ARET)
   285  			prop(r, zbits, zbits);
   286  loop11:
   287  	/* pick up unreachable code */
   288  	i = 0;
   289  	for(r = firstr; r != R; r = r1) {
   290  		r1 = (Reg*)r->f.link;
   291  		if(r1 && r1->f.active && !r->f.active) {
   292  			prop(r, zbits, zbits);
   293  			i = 1;
   294  		}
   295  	}
   296  	if(i)
   297  		goto loop11;
   298  	if(change)
   299  		goto loop1;
   300  
   301  	if(debug['R'] && debug['v'])
   302  		dumpit("pass3", &firstr->f, 1);
   303  
   304  
   305  	/*
   306  	 * pass 4
   307  	 * iterate propagating register/variable synchrony
   308  	 * 	forward until graph is complete
   309  	 */
   310  loop2:
   311  	change = 0;
   312  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   313  		r->f.active = 0;
   314  	synch(firstr, zbits);
   315  	if(change)
   316  		goto loop2;
   317  
   318  	addsplits();
   319  
   320  	if(debug['R'] && debug['v'])
   321  		dumpit("pass4", &firstr->f, 1);
   322  
   323  	if(debug['R'] > 1) {
   324  		print("\nprop structure:\n");
   325  		for(r = firstr; r != R; r = (Reg*)r->f.link) {
   326  			print("%d:%P", r->f.loop, r->f.prog);
   327  			for(z=0; z<BITS; z++) {
   328  				bit.b[z] = r->set.b[z] |
   329  					r->refahead.b[z] | r->calahead.b[z] |
   330  					r->refbehind.b[z] | r->calbehind.b[z] |
   331  					r->use1.b[z] | r->use2.b[z];
   332  				bit.b[z] &= ~addrs.b[z];
   333  			}
   334  
   335  			if(bany(&bit)) {
   336  				print("\t");
   337  				if(bany(&r->use1))
   338  					print(" u1=%Q", r->use1);
   339  				if(bany(&r->use2))
   340  					print(" u2=%Q", r->use2);
   341  				if(bany(&r->set))
   342  					print(" st=%Q", r->set);
   343  				if(bany(&r->refahead))
   344  					print(" ra=%Q", r->refahead);
   345  				if(bany(&r->calahead))
   346  					print(" ca=%Q", r->calahead);
   347  				if(bany(&r->refbehind))
   348  					print(" rb=%Q", r->refbehind);
   349  				if(bany(&r->calbehind))
   350  					print(" cb=%Q", r->calbehind);
   351  			}
   352  			print("\n");
   353  		}
   354  	}
   355  
   356  	/*
   357  	 * pass 4.5
   358  	 * move register pseudo-variables into regu.
   359  	 */
   360  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   361  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   362  
   363  		r->set.b[0] &= ~REGBITS;
   364  		r->use1.b[0] &= ~REGBITS;
   365  		r->use2.b[0] &= ~REGBITS;
   366  		r->refbehind.b[0] &= ~REGBITS;
   367  		r->refahead.b[0] &= ~REGBITS;
   368  		r->calbehind.b[0] &= ~REGBITS;
   369  		r->calahead.b[0] &= ~REGBITS;
   370  		r->regdiff.b[0] &= ~REGBITS;
   371  		r->act.b[0] &= ~REGBITS;
   372  	}
   373  
   374  	if(debug['R'] && debug['v'])
   375  		dumpit("pass4.5", &firstr->f, 1);
   376  
   377  	/*
   378  	 * pass 5
   379  	 * isolate regions
   380  	 * calculate costs (paint1)
   381  	 */
   382  	r = firstr;
   383  	if(r) {
   384  		for(z=0; z<BITS; z++)
   385  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   386  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   387  		if(bany(&bit) & !r->f.refset) {
   388  			// should never happen - all variables are preset
   389  			if(debug['w'])
   390  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   391  			r->f.refset = 1;
   392  		}
   393  	}
   394  
   395  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   396  		r->act = zbits;
   397  	rgp = region;
   398  	nregion = 0;
   399  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   400  		for(z=0; z<BITS; z++)
   401  			bit.b[z] = r->set.b[z] &
   402  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   403  		if(bany(&bit) && !r->f.refset) {
   404  			if(debug['w'])
   405  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   406  			r->f.refset = 1;
   407  			excise(&r->f);
   408  		}
   409  		for(z=0; z<BITS; z++)
   410  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   411  		while(bany(&bit)) {
   412  			i = bnum(bit);
   413  			rgp->enter = r;
   414  			rgp->varno = i;
   415  			change = 0;
   416  			if(debug['R'] > 1)
   417  				print("\n");
   418  			paint1(r, i);
   419  			bit.b[i/32] &= ~(1L<<(i%32));
   420  			if(change <= 0) {
   421  				if(debug['R'])
   422  					print("%L $%d: %Q\n",
   423  						r->f.prog->lineno, change, blsh(i));
   424  				continue;
   425  			}
   426  			rgp->cost = change;
   427  			nregion++;
   428  			if(nregion >= NRGN) {
   429  				if(debug['R'] > 1)
   430  					print("too many regions\n");
   431  				goto brk;
   432  			}
   433  			rgp++;
   434  		}
   435  	}
   436  brk:
   437  	qsort(region, nregion, sizeof(region[0]), rcmp);
   438  
   439  	if(debug['R'] && debug['v'])
   440  		dumpit("pass5", &firstr->f, 1);
   441  
   442  	/*
   443  	 * pass 6
   444  	 * determine used registers (paint2)
   445  	 * replace code (paint3)
   446  	 */
   447  	rgp = region;
   448  	for(i=0; i<nregion; i++) {
   449  		bit = blsh(rgp->varno);
   450  		vreg = paint2(rgp->enter, rgp->varno);
   451  		vreg = allreg(vreg, rgp);
   452  		if(debug['R']) {
   453  			if(rgp->regno >= NREG)
   454  				print("%L $%d F%d: %Q\n",
   455  					rgp->enter->f.prog->lineno,
   456  					rgp->cost,
   457  					rgp->regno-NREG,
   458  					bit);
   459  			else
   460  				print("%L $%d R%d: %Q\n",
   461  					rgp->enter->f.prog->lineno,
   462  					rgp->cost,
   463  					rgp->regno,
   464  					bit);
   465  		}
   466  		if(rgp->regno != 0)
   467  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   468  		rgp++;
   469  	}
   470  
   471  	if(debug['R'] && debug['v'])
   472  		dumpit("pass6", &firstr->f, 1);
   473  
   474  	/*
   475  	 * free aux structures. peep allocates new ones.
   476  	 */
   477  	for(i=0; i<nvar; i++)
   478  		var[i].node->opt = nil;
   479  	flowend(g);
   480  	firstr = R;
   481  
   482  	/*
   483  	 * pass 7
   484  	 * peep-hole on basic block
   485  	 */
   486  	if(!debug['R'] || debug['P']) {
   487  		peep(firstp);
   488  	}
   489  
   490  	if(debug['R'] && debug['v'])
   491  		dumpit("pass7", &firstr->f, 1);
   492  
   493  	/*
   494  	 * last pass
   495  	 * eliminate nops
   496  	 * free aux structures
   497  	 * adjust the stack pointer
   498  	 *	MOVW.W 	R1,-12(R13)			<<- start
   499  	 *	MOVW   	R0,R1
   500  	 *	MOVW   	R1,8(R13)
   501  	 *	MOVW   	$0,R1
   502  	 *	MOVW   	R1,4(R13)
   503  	 *	BL     	,runtime.newproc+0(SB)
   504  	 *	MOVW   	&ft+-32(SP),R7			<<- adjust
   505  	 *	MOVW   	&j+-40(SP),R6			<<- adjust
   506  	 *	MOVW   	autotmp_0003+-24(SP),R5		<<- adjust
   507  	 *	MOVW   	$12(R13),R13			<<- finish
   508  	 */
   509  	vreg = 0;
   510  	for(p = firstp; p != P; p = p->link) {
   511  		while(p->link != P && p->link->as == ANOP)
   512  			p->link = p->link->link;
   513  		if(p->to.type == D_BRANCH)
   514  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   515  				p->to.u.branch = p->to.u.branch->link;
   516  		if(p->as == AMOVW && p->to.reg == 13) {
   517  			if(p->scond & C_WBIT) {
   518  				vreg = -p->to.offset;		// in adjust region
   519  //				print("%P adjusting %d\n", p, vreg);
   520  				continue;
   521  			}
   522  			if(p->from.type == D_CONST && p->to.type == D_REG) {
   523  				if(p->from.offset != vreg)
   524  					print("in and out different\n");
   525  //				print("%P finish %d\n", p, vreg);
   526  				vreg = 0;	// done adjust region
   527  				continue;
   528  			}
   529  
   530  //			print("%P %d %d from type\n", p, p->from.type, D_CONST);
   531  //			print("%P %d %d to type\n\n", p, p->to.type, D_REG);
   532  		}
   533  
   534  		if(p->as == AMOVW && vreg != 0) {
   535  			if(p->from.sym != nil)
   536  			if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
   537  				p->from.offset += vreg;
   538  //				print("%P adjusting from %d %d\n", p, vreg, p->from.type);
   539  			}
   540  			if(p->to.sym != nil)
   541  			if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
   542  				p->to.offset += vreg;
   543  //				print("%P adjusting to %d %d\n", p, vreg, p->from.type);
   544  			}
   545  		}
   546  	}
   547  }
   548  
   549  static void
   550  walkvardef(Node *n, Reg *r, int active)
   551  {
   552  	Reg *r1, *r2;
   553  	int bn;
   554  	Var *v;
   555  	
   556  	for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
   557  		if(r1->f.active == active)
   558  			break;
   559  		r1->f.active = active;
   560  		if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
   561  			break;
   562  		for(v=n->opt; v!=nil; v=v->nextinnode) {
   563  			bn = v - var;
   564  			r1->act.b[bn/32] |= 1L << (bn%32);
   565  		}
   566  		if(r1->f.prog->as == ABL)
   567  			break;
   568  	}
   569  
   570  	for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
   571  		if(r2->f.s2 != nil)
   572  			walkvardef(n, (Reg*)r2->f.s2, active);
   573  }
   574  
   575  void
   576  addsplits(void)
   577  {
   578  	Reg *r, *r1;
   579  	int z, i;
   580  	Bits bit;
   581  
   582  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   583  		if(r->f.loop > 1)
   584  			continue;
   585  		if(r->f.prog->as == ABL)
   586  			continue;
   587  		if(r->f.prog->as == ADUFFZERO)
   588  			continue;
   589  		if(r->f.prog->as == ADUFFCOPY)
   590  			continue;
   591  		for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
   592  			if(r1->f.loop <= 1)
   593  				continue;
   594  			for(z=0; z<BITS; z++)
   595  				bit.b[z] = r1->calbehind.b[z] &
   596  					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
   597  					~(r->calahead.b[z] & addrs.b[z]);
   598  			while(bany(&bit)) {
   599  				i = bnum(bit);
   600  				bit.b[i/32] &= ~(1L << (i%32));
   601  			}
   602  		}
   603  	}
   604  }
   605  
   606  /*
   607   * add mov b,rn
   608   * just after r
   609   */
   610  void
   611  addmove(Reg *r, int bn, int rn, int f)
   612  {
   613  	Prog *p, *p1, *p2;
   614  	Adr *a;
   615  	Var *v;
   616  
   617  	p1 = mal(sizeof(*p1));
   618  	*p1 = zprog;
   619  	p = r->f.prog;
   620  	
   621  	// If there's a stack fixup coming (after BL newproc or BL deferproc),
   622  	// delay the load until after the fixup.
   623  	p2 = p->link;
   624  	if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
   625  		p = p2;
   626  
   627  	p1->link = p->link;
   628  	p->link = p1;
   629  	p1->lineno = p->lineno;
   630  
   631  	v = var + bn;
   632  
   633  	a = &p1->to;
   634  	a->name = v->name;
   635  	a->node = v->node;
   636  	a->sym = linksym(v->node->sym);
   637  	a->offset = v->offset;
   638  	a->etype = v->etype;
   639  	a->type = D_OREG;
   640  	if(a->etype == TARRAY || a->sym == nil)
   641  		a->type = D_CONST;
   642  
   643  	if(v->addr)
   644  		fatal("addmove: shouldn't be doing this %A\n", a);
   645  
   646  	switch(v->etype) {
   647  	default:
   648  		print("What is this %E\n", v->etype);
   649  
   650  	case TINT8:
   651  		p1->as = AMOVBS;
   652  		break;
   653  	case TBOOL:
   654  	case TUINT8:
   655  //print("movbu %E %d %S\n", v->etype, bn, v->sym);
   656  		p1->as = AMOVBU;
   657  		break;
   658  	case TINT16:
   659  		p1->as = AMOVHS;
   660  		break;
   661  	case TUINT16:
   662  		p1->as = AMOVHU;
   663  		break;
   664  	case TINT32:
   665  	case TUINT32:
   666  	case TPTR32:
   667  		p1->as = AMOVW;
   668  		break;
   669  	case TFLOAT32:
   670  		p1->as = AMOVF;
   671  		break;
   672  	case TFLOAT64:
   673  		p1->as = AMOVD;
   674  		break;
   675  	}
   676  
   677  	p1->from.type = D_REG;
   678  	p1->from.reg = rn;
   679  	if(rn >= NREG) {
   680  		p1->from.type = D_FREG;
   681  		p1->from.reg = rn-NREG;
   682  	}
   683  	if(!f) {
   684  		p1->from = *a;
   685  		*a = zprog.from;
   686  		a->type = D_REG;
   687  		a->reg = rn;
   688  		if(rn >= NREG) {
   689  			a->type = D_FREG;
   690  			a->reg = rn-NREG;
   691  		}
   692  		if(v->etype == TUINT8 || v->etype == TBOOL)
   693  			p1->as = AMOVBU;
   694  		if(v->etype == TUINT16)
   695  			p1->as = AMOVHU;
   696  	}
   697  	if(debug['R'])
   698  		print("%P\t.a%P\n", p, p1);
   699  }
   700  
   701  static int
   702  overlap(int32 o1, int w1, int32 o2, int w2)
   703  {
   704  	int32 t1, t2;
   705  
   706  	t1 = o1+w1;
   707  	t2 = o2+w2;
   708  
   709  	if(!(t1 > o2 && t2 > o1))
   710  		return 0;
   711  
   712  	return 1;
   713  }
   714  
   715  Bits
   716  mkvar(Reg *r, Adr *a)
   717  {
   718  	Var *v;
   719  	int i, t, n, et, z, w, flag;
   720  	int32 o;
   721  	Bits bit;
   722  	Node *node;
   723  
   724  	// mark registers used
   725  	t = a->type;
   726  
   727  	flag = 0;
   728  	switch(t) {
   729  	default:
   730  		print("type %d %d %D\n", t, a->name, a);
   731  		goto none;
   732  
   733  	case D_NONE:
   734  	case D_FCONST:
   735  	case D_BRANCH:
   736  		break;
   737  
   738  
   739  	case D_REGREG:
   740  	case D_REGREG2:
   741  		bit = zbits;
   742  		if(a->offset != NREG)
   743  			bit.b[0] |= RtoB(a->offset);
   744  		if(a->reg != NREG)
   745  			bit.b[0] |= RtoB(a->reg);
   746  		return bit;
   747  
   748  	case D_CONST:
   749  	case D_REG:
   750  	case D_SHIFT:
   751  		if(a->reg != NREG) {
   752  			bit = zbits;
   753  			bit.b[0] = RtoB(a->reg);
   754  			return bit;
   755  		}
   756  		break;
   757  
   758  	case D_OREG:
   759  		if(a->reg != NREG) {
   760  			if(a == &r->f.prog->from)
   761  				r->use1.b[0] |= RtoB(a->reg);
   762  			else
   763  				r->use2.b[0] |= RtoB(a->reg);
   764  			if(r->f.prog->scond & (C_PBIT|C_WBIT))
   765  				r->set.b[0] |= RtoB(a->reg);
   766  		}
   767  		break;
   768  
   769  	case D_FREG:
   770  		if(a->reg != NREG) {
   771  			bit = zbits;
   772  			bit.b[0] = FtoB(a->reg);
   773  			return bit;
   774  		}
   775  		break;
   776  	}
   777  
   778  	switch(a->name) {
   779  	default:
   780  		goto none;
   781  
   782  	case D_EXTERN:
   783  	case D_STATIC:
   784  	case D_AUTO:
   785  	case D_PARAM:
   786  		n = a->name;
   787  		break;
   788  	}
   789  
   790  	node = a->node;
   791  	if(node == N || node->op != ONAME || node->orig == N)
   792  		goto none;
   793  	node = node->orig;
   794  	if(node->orig != node)
   795  		fatal("%D: bad node", a);
   796  	if(node->sym == S || node->sym->name[0] == '.')
   797  		goto none;
   798  	et = a->etype;
   799  	o = a->offset;
   800  	w = a->width;
   801  	if(w < 0)
   802  		fatal("bad width %d for %D", w, a);
   803  
   804  	for(i=0; i<nvar; i++) {
   805  		v = var+i;
   806  		if(v->node == node && v->name == n) {
   807  			if(v->offset == o)
   808  			if(v->etype == et)
   809  			if(v->width == w)
   810  				if(!flag)
   811  					return blsh(i);
   812  
   813  			// if they overlap, disable both
   814  			if(overlap(v->offset, v->width, o, w)) {
   815  				v->addr = 1;
   816  				flag = 1;
   817  			}
   818  		}
   819  	}
   820  
   821  	switch(et) {
   822  	case 0:
   823  	case TFUNC:
   824  		goto none;
   825  	}
   826  
   827  	if(nvar >= NVAR) {
   828  		if(debug['w'] > 1 && node)
   829  			fatal("variable not optimized: %D", a);
   830  		
   831  		// If we're not tracking a word in a variable, mark the rest as
   832  		// having its address taken, so that we keep the whole thing
   833  		// live at all calls. otherwise we might optimize away part of
   834  		// a variable but not all of it.
   835  		for(i=0; i<nvar; i++) {
   836  			v = var+i;
   837  			if(v->node == node)
   838  				v->addr = 1;
   839  		}
   840  		goto none;
   841  	}
   842  
   843  	i = nvar;
   844  	nvar++;
   845  //print("var %d %E %D %S\n", i, et, a, s);
   846  	v = var+i;
   847  	v->offset = o;
   848  	v->name = n;
   849  	v->etype = et;
   850  	v->width = w;
   851  	v->addr = flag;		// funny punning
   852  	v->node = node;
   853  	
   854  	// node->opt is the head of a linked list
   855  	// of Vars within the given Node, so that
   856  	// we can start at a Var and find all the other
   857  	// Vars in the same Go variable.
   858  	v->nextinnode = node->opt;
   859  	node->opt = v;
   860  	
   861  	bit = blsh(i);
   862  	if(n == D_EXTERN || n == D_STATIC)
   863  		for(z=0; z<BITS; z++)
   864  			externs.b[z] |= bit.b[z];
   865  	if(n == D_PARAM)
   866  		for(z=0; z<BITS; z++)
   867  			params.b[z] |= bit.b[z];
   868  
   869  	if(node->class == PPARAM)
   870  		for(z=0; z<BITS; z++)
   871  			ivar.b[z] |= bit.b[z];
   872  	if(node->class == PPARAMOUT)
   873  		for(z=0; z<BITS; z++)
   874  			ovar.b[z] |= bit.b[z];
   875  
   876  	// Treat values with their address taken as live at calls,
   877  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   878  	// These must be consistent or else we will elide stores and the garbage
   879  	// collector will see uninitialized data.
   880  	// The typical case where our own analysis is out of sync is when the
   881  	// node appears to have its address taken but that code doesn't actually
   882  	// get generated and therefore doesn't show up as an address being
   883  	// taken when we analyze the instruction stream.
   884  	// One instance of this case is when a closure uses the same name as
   885  	// an outer variable for one of its own variables declared with :=.
   886  	// The parser flags the outer variable as possibly shared, and therefore
   887  	// sets addrtaken, even though it ends up not being actually shared.
   888  	// If we were better about _ elision, _ = &x would suffice too.
   889  	// The broader := in a closure problem is mentioned in a comment in
   890  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   891  	if(node->addrtaken)
   892  		v->addr = 1;
   893  
   894  	// Disable registerization for globals, because:
   895  	// (1) we might panic at any time and we want the recovery code
   896  	// to see the latest values (issue 1304).
   897  	// (2) we don't know what pointers might point at them and we want
   898  	// loads via those pointers to see updated values and vice versa (issue 7995).
   899  	//
   900  	// Disable registerization for results if using defer, because the deferred func
   901  	// might recover and return, causing the current values to be used.
   902  	if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
   903  		v->addr = 1;
   904  
   905  	if(debug['R'])
   906  		print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   907  
   908  	return bit;
   909  
   910  none:
   911  	return zbits;
   912  }
   913  
   914  void
   915  prop(Reg *r, Bits ref, Bits cal)
   916  {
   917  	Reg *r1, *r2;
   918  	int z, i, j;
   919  	Var *v, *v1;
   920  
   921  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   922  		for(z=0; z<BITS; z++) {
   923  			ref.b[z] |= r1->refahead.b[z];
   924  			if(ref.b[z] != r1->refahead.b[z]) {
   925  				r1->refahead.b[z] = ref.b[z];
   926  				change++;
   927  			}
   928  			cal.b[z] |= r1->calahead.b[z];
   929  			if(cal.b[z] != r1->calahead.b[z]) {
   930  				r1->calahead.b[z] = cal.b[z];
   931  				change++;
   932  			}
   933  		}
   934  		switch(r1->f.prog->as) {
   935  		case ABL:
   936  			if(noreturn(r1->f.prog))
   937  				break;
   938  
   939  			// Mark all input variables (ivar) as used, because that's what the
   940  			// liveness bitmaps say. The liveness bitmaps say that so that a
   941  			// panic will not show stale values in the parameter dump.
   942  			// Mark variables with a recent VARDEF (r1->act) as used,
   943  			// so that the optimizer flushes initializations to memory,
   944  			// so that if a garbage collection happens during this CALL,
   945  			// the collector will see initialized memory. Again this is to
   946  			// match what the liveness bitmaps say.
   947  			for(z=0; z<BITS; z++) {
   948  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
   949  				ref.b[z] = 0;
   950  			}
   951  			
   952  			// cal.b is the current approximation of what's live across the call.
   953  			// Every bit in cal.b is a single stack word. For each such word,
   954  			// find all the other tracked stack words in the same Go variable
   955  			// (struct/slice/string/interface) and mark them live too.
   956  			// This is necessary because the liveness analysis for the garbage
   957  			// collector works at variable granularity, not at word granularity.
   958  			// It is fundamental for slice/string/interface: the garbage collector
   959  			// needs the whole value, not just some of the words, in order to
   960  			// interpret the other bits correctly. Specifically, slice needs a consistent
   961  			// ptr and cap, string needs a consistent ptr and len, and interface
   962  			// needs a consistent type word and data word.
   963  			for(z=0; z<BITS; z++) {
   964  				if(cal.b[z] == 0)
   965  					continue;
   966  				for(i=0; i<32; i++) {
   967  					if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
   968  						continue;
   969  					v = var+z*32+i;
   970  					if(v->node->opt == nil) // v represents fixed register, not Go variable
   971  						continue;
   972  
   973  					// v->node->opt is the head of a linked list of Vars
   974  					// corresponding to tracked words from the Go variable v->node.
   975  					// Walk the list and set all the bits.
   976  					// For a large struct this could end up being quadratic:
   977  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   978  					// for all of the remaining words in the struct, and for each such
   979  					// word would go through and turn on all the bits again.
   980  					// To avoid the quadratic behavior, we only turn on the bits if
   981  					// v is the head of the list or if the head's bit is not yet turned on.
   982  					// This will set the bits at most twice, keeping the overall loop linear.
   983  					v1 = v->node->opt;
   984  					j = v1 - var;
   985  					if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
   986  						for(; v1 != nil; v1 = v1->nextinnode) {
   987  							j = v1 - var;
   988  							cal.b[j/32] |= 1<<(j&31);
   989  						}
   990  					}
   991  				}
   992  			}
   993  			break;
   994  
   995  		case ATEXT:
   996  			for(z=0; z<BITS; z++) {
   997  				cal.b[z] = 0;
   998  				ref.b[z] = 0;
   999  			}
  1000  			break;
  1001  
  1002  		case ARET:
  1003  			for(z=0; z<BITS; z++) {
  1004  				cal.b[z] = externs.b[z] | ovar.b[z];
  1005  				ref.b[z] = 0;
  1006  			}
  1007  			break;
  1008  		}
  1009  		for(z=0; z<BITS; z++) {
  1010  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  1011  				r1->use1.b[z] | r1->use2.b[z];
  1012  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  1013  			r1->refbehind.b[z] = ref.b[z];
  1014  			r1->calbehind.b[z] = cal.b[z];
  1015  		}
  1016  		if(r1->f.active)
  1017  			break;
  1018  		r1->f.active = 1;
  1019  	}
  1020  	for(; r != r1; r = (Reg*)r->f.p1)
  1021  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
  1022  			prop(r2, r->refbehind, r->calbehind);
  1023  }
  1024  
  1025  void
  1026  synch(Reg *r, Bits dif)
  1027  {
  1028  	Reg *r1;
  1029  	int z;
  1030  
  1031  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
  1032  		for(z=0; z<BITS; z++) {
  1033  			dif.b[z] = (dif.b[z] &
  1034  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1035  					r1->set.b[z] | r1->regdiff.b[z];
  1036  			if(dif.b[z] != r1->regdiff.b[z]) {
  1037  				r1->regdiff.b[z] = dif.b[z];
  1038  				change++;
  1039  			}
  1040  		}
  1041  		if(r1->f.active)
  1042  			break;
  1043  		r1->f.active = 1;
  1044  		for(z=0; z<BITS; z++)
  1045  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1046  		if(r1->f.s2 != nil)
  1047  			synch((Reg*)r1->f.s2, dif);
  1048  	}
  1049  }
  1050  
  1051  uint32
  1052  allreg(uint32 b, Rgn *r)
  1053  {
  1054  	Var *v;
  1055  	int i;
  1056  
  1057  	v = var + r->varno;
  1058  	r->regno = 0;
  1059  	switch(v->etype) {
  1060  
  1061  	default:
  1062  		fatal("unknown etype %d/%E", bitno(b), v->etype);
  1063  		break;
  1064  
  1065  	case TINT8:
  1066  	case TUINT8:
  1067  	case TINT16:
  1068  	case TUINT16:
  1069  	case TINT32:
  1070  	case TUINT32:
  1071  	case TINT:
  1072  	case TUINT:
  1073  	case TUINTPTR:
  1074  	case TBOOL:
  1075  	case TPTR32:
  1076  		i = BtoR(~b);
  1077  		if(i && r->cost >= 0) {
  1078  			r->regno = i;
  1079  			return RtoB(i);
  1080  		}
  1081  		break;
  1082  
  1083  	case TFLOAT32:
  1084  	case TFLOAT64:
  1085  		i = BtoF(~b);
  1086  		if(i && r->cost >= 0) {
  1087  			r->regno = i+NREG;
  1088  			return FtoB(i);
  1089  		}
  1090  		break;
  1091  
  1092  	case TINT64:
  1093  	case TUINT64:
  1094  	case TPTR64:
  1095  	case TINTER:
  1096  	case TSTRUCT:
  1097  	case TARRAY:
  1098  		break;
  1099  	}
  1100  	return 0;
  1101  }
  1102  
  1103  void
  1104  paint1(Reg *r, int bn)
  1105  {
  1106  	Reg *r1;
  1107  	Prog *p;
  1108  	int z;
  1109  	uint32 bb;
  1110  
  1111  	z = bn/32;
  1112  	bb = 1L<<(bn%32);
  1113  	if(r->act.b[z] & bb)
  1114  		return;
  1115  	for(;;) {
  1116  		if(!(r->refbehind.b[z] & bb))
  1117  			break;
  1118  		r1 = (Reg*)r->f.p1;
  1119  		if(r1 == R)
  1120  			break;
  1121  		if(!(r1->refahead.b[z] & bb))
  1122  			break;
  1123  		if(r1->act.b[z] & bb)
  1124  			break;
  1125  		r = r1;
  1126  	}
  1127  
  1128  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1129  		change -= CLOAD * r->f.loop;
  1130  		if(debug['R'] > 1)
  1131  			print("%d%P\td %Q $%d\n", r->f.loop,
  1132  				r->f.prog, blsh(bn), change);
  1133  	}
  1134  	for(;;) {
  1135  		r->act.b[z] |= bb;
  1136  		p = r->f.prog;
  1137  
  1138  
  1139  		if(r->f.prog->as != ANOP) { // don't give credit for NOPs
  1140  			if(r->use1.b[z] & bb) {
  1141  				change += CREF * r->f.loop;
  1142  				if(debug['R'] > 1)
  1143  					print("%d%P\tu1 %Q $%d\n", r->f.loop,
  1144  						p, blsh(bn), change);
  1145  			}
  1146  			if((r->use2.b[z]|r->set.b[z]) & bb) {
  1147  				change += CREF * r->f.loop;
  1148  				if(debug['R'] > 1)
  1149  					print("%d%P\tu2 %Q $%d\n", r->f.loop,
  1150  						p, blsh(bn), change);
  1151  			}
  1152  		}
  1153  
  1154  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1155  			change -= CLOAD * r->f.loop;
  1156  			if(debug['R'] > 1)
  1157  				print("%d%P\tst %Q $%d\n", r->f.loop,
  1158  					p, blsh(bn), change);
  1159  		}
  1160  
  1161  		if(r->refbehind.b[z] & bb)
  1162  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1163  				if(r1->refahead.b[z] & bb)
  1164  					paint1(r1, bn);
  1165  
  1166  		if(!(r->refahead.b[z] & bb))
  1167  			break;
  1168  		r1 = (Reg*)r->f.s2;
  1169  		if(r1 != R)
  1170  			if(r1->refbehind.b[z] & bb)
  1171  				paint1(r1, bn);
  1172  		r = (Reg*)r->f.s1;
  1173  		if(r == R)
  1174  			break;
  1175  		if(r->act.b[z] & bb)
  1176  			break;
  1177  		if(!(r->refbehind.b[z] & bb))
  1178  			break;
  1179  	}
  1180  }
  1181  
  1182  uint32
  1183  paint2(Reg *r, int bn)
  1184  {
  1185  	Reg *r1;
  1186  	int z;
  1187  	uint32 bb, vreg;
  1188  
  1189  	z = bn/32;
  1190  	bb = 1L << (bn%32);
  1191  	vreg = regbits;
  1192  	if(!(r->act.b[z] & bb))
  1193  		return vreg;
  1194  	for(;;) {
  1195  		if(!(r->refbehind.b[z] & bb))
  1196  			break;
  1197  		r1 = (Reg*)r->f.p1;
  1198  		if(r1 == R)
  1199  			break;
  1200  		if(!(r1->refahead.b[z] & bb))
  1201  			break;
  1202  		if(!(r1->act.b[z] & bb))
  1203  			break;
  1204  		r = r1;
  1205  	}
  1206  	for(;;) {
  1207  		r->act.b[z] &= ~bb;
  1208  
  1209  		vreg |= r->regu;
  1210  
  1211  		if(r->refbehind.b[z] & bb)
  1212  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1213  				if(r1->refahead.b[z] & bb)
  1214  					vreg |= paint2(r1, bn);
  1215  
  1216  		if(!(r->refahead.b[z] & bb))
  1217  			break;
  1218  		r1 = (Reg*)r->f.s2;
  1219  		if(r1 != R)
  1220  			if(r1->refbehind.b[z] & bb)
  1221  				vreg |= paint2(r1, bn);
  1222  		r = (Reg*)r->f.s1;
  1223  		if(r == R)
  1224  			break;
  1225  		if(!(r->act.b[z] & bb))
  1226  			break;
  1227  		if(!(r->refbehind.b[z] & bb))
  1228  			break;
  1229  	}
  1230  	return vreg;
  1231  }
  1232  
  1233  void
  1234  paint3(Reg *r, int bn, int32 rb, int rn)
  1235  {
  1236  	Reg *r1;
  1237  	Prog *p;
  1238  	int z;
  1239  	uint32 bb;
  1240  
  1241  	z = bn/32;
  1242  	bb = 1L << (bn%32);
  1243  	if(r->act.b[z] & bb)
  1244  		return;
  1245  	for(;;) {
  1246  		if(!(r->refbehind.b[z] & bb))
  1247  			break;
  1248  		r1 = (Reg*)r->f.p1;
  1249  		if(r1 == R)
  1250  			break;
  1251  		if(!(r1->refahead.b[z] & bb))
  1252  			break;
  1253  		if(r1->act.b[z] & bb)
  1254  			break;
  1255  		r = r1;
  1256  	}
  1257  
  1258  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1259  		addmove(r, bn, rn, 0);
  1260  
  1261  	for(;;) {
  1262  		r->act.b[z] |= bb;
  1263  		p = r->f.prog;
  1264  
  1265  		if(r->use1.b[z] & bb) {
  1266  			if(debug['R'])
  1267  				print("%P", p);
  1268  			addreg(&p->from, rn);
  1269  			if(debug['R'])
  1270  				print("\t.c%P\n", p);
  1271  		}
  1272  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1273  			if(debug['R'])
  1274  				print("%P", p);
  1275  			addreg(&p->to, rn);
  1276  			if(debug['R'])
  1277  				print("\t.c%P\n", p);
  1278  		}
  1279  
  1280  		if(STORE(r) & r->regdiff.b[z] & bb)
  1281  			addmove(r, bn, rn, 1);
  1282  		r->regu |= rb;
  1283  
  1284  		if(r->refbehind.b[z] & bb)
  1285  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1286  				if(r1->refahead.b[z] & bb)
  1287  					paint3(r1, bn, rb, rn);
  1288  
  1289  		if(!(r->refahead.b[z] & bb))
  1290  			break;
  1291  		r1 = (Reg*)r->f.s2;
  1292  		if(r1 != R)
  1293  			if(r1->refbehind.b[z] & bb)
  1294  				paint3(r1, bn, rb, rn);
  1295  		r = (Reg*)r->f.s1;
  1296  		if(r == R)
  1297  			break;
  1298  		if(r->act.b[z] & bb)
  1299  			break;
  1300  		if(!(r->refbehind.b[z] & bb))
  1301  			break;
  1302  	}
  1303  }
  1304  
  1305  void
  1306  addreg(Adr *a, int rn)
  1307  {
  1308  	a->sym = nil;
  1309  	a->name = D_NONE;
  1310  	a->type = D_REG;
  1311  	a->reg = rn;
  1312  	if(rn >= NREG) {
  1313  		a->type = D_FREG;
  1314  		a->reg = rn-NREG;
  1315  	}
  1316  }
  1317  
  1318  /*
  1319   *	bit	reg
  1320   *	0	R0
  1321   *	1	R1
  1322   *	...	...
  1323   *	10	R10
  1324   *	12  R12
  1325   */
  1326  int32
  1327  RtoB(int r)
  1328  {
  1329  	if(r >= REGTMP-2 && r != 12)	// excluded R9 and R10 for m and g, but not R12
  1330  		return 0;
  1331  	return 1L << r;
  1332  }
  1333  
  1334  int
  1335  BtoR(int32 b)
  1336  {
  1337  	b &= 0x11fcL;	// excluded R9 and R10 for m and g, but not R12
  1338  	if(b == 0)
  1339  		return 0;
  1340  	return bitno(b);
  1341  }
  1342  
  1343  /*
  1344   *	bit	reg
  1345   *	18	F2
  1346   *	19	F3
  1347   *	...	...
  1348   *	31	F15
  1349   */
  1350  int32
  1351  FtoB(int f)
  1352  {
  1353  
  1354  	if(f < 2 || f > NFREG-1)
  1355  		return 0;
  1356  	return 1L << (f + 16);
  1357  }
  1358  
  1359  int
  1360  BtoF(int32 b)
  1361  {
  1362  
  1363  	b &= 0xfffc0000L;
  1364  	if(b == 0)
  1365  		return 0;
  1366  	return bitno(b) - 16;
  1367  }
  1368  
  1369  void
  1370  dumpone(Flow *f, int isreg)
  1371  {
  1372  	int z;
  1373  	Bits bit;
  1374  	Reg *r;
  1375  
  1376  	print("%d:%P", f->loop, f->prog);
  1377  	if(isreg) {
  1378  		r = (Reg*)f;
  1379  		for(z=0; z<BITS; z++)
  1380  			bit.b[z] =
  1381  				r->set.b[z] |
  1382  				r->use1.b[z] |
  1383  				r->use2.b[z] |
  1384  				r->refbehind.b[z] |
  1385  				r->refahead.b[z] |
  1386  				r->calbehind.b[z] |
  1387  				r->calahead.b[z] |
  1388  				r->regdiff.b[z] |
  1389  				r->act.b[z] |
  1390  					0;
  1391  		if(bany(&bit)) {
  1392  			print("\t");
  1393  			if(bany(&r->set))
  1394  				print(" s:%Q", r->set);
  1395  			if(bany(&r->use1))
  1396  				print(" u1:%Q", r->use1);
  1397  			if(bany(&r->use2))
  1398  				print(" u2:%Q", r->use2);
  1399  			if(bany(&r->refbehind))
  1400  				print(" rb:%Q ", r->refbehind);
  1401  			if(bany(&r->refahead))
  1402  				print(" ra:%Q ", r->refahead);
  1403  			if(bany(&r->calbehind))
  1404  				print(" cb:%Q ", r->calbehind);
  1405  			if(bany(&r->calahead))
  1406  				print(" ca:%Q ", r->calahead);
  1407  			if(bany(&r->regdiff))
  1408  				print(" d:%Q ", r->regdiff);
  1409  			if(bany(&r->act))
  1410  				print(" a:%Q ", r->act);
  1411  		}
  1412  	}
  1413  	print("\n");
  1414  }
  1415  
  1416  void
  1417  dumpit(char *str, Flow *r0, int isreg)
  1418  {
  1419  	Flow *r, *r1;
  1420  
  1421  	print("\n%s\n", str);
  1422  	for(r = r0; r != nil; r = r->link) {
  1423  		dumpone(r, isreg);
  1424  		r1 = r->p2;
  1425  		if(r1 != nil) {
  1426  			print("	pred:");
  1427  			for(; r1 != nil; r1 = r1->p2link)
  1428  				print(" %.4ud", (int)r1->prog->pc);
  1429  			if(r->p1 != nil)
  1430  				print(" (and %.4ud)", (int)r->p1->prog->pc);
  1431  			else
  1432  				print(" (only)");
  1433  			print("\n");
  1434  		}
  1435  //		r1 = r->s1;
  1436  //		if(r1 != nil) {
  1437  //			print("	succ:");
  1438  //			for(; r1 != R; r1 = r1->s1)
  1439  //				print(" %.4ud", (int)r1->prog->pc);
  1440  //			print("\n");
  1441  //		}
  1442  	}
  1443  }