github.com/akaros/go-akaros@v0.0.0-20181004170632-85005d477eab/src/cmd/5g/reg.c (about)

     1  // Inferno utils/5c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  
    32  #include <u.h>
    33  #include <libc.h>
    34  #include "gg.h"
    35  #include "opt.h"
    36  
    37  #define	NREGVAR	32
    38  #define	REGBITS	((uint32)0xffffffff)
    39  /*c2go enum {
    40  	NREGVAR = 32,
    41  	REGBITS = 0xffffffff,
    42  };
    43  */
    44  
    45  	void	addsplits(void);
    46  static	Reg*	firstr;
    47  static	int	first	= 1;
    48  
    49  int
    50  rcmp(const void *a1, const void *a2)
    51  {
    52  	Rgn *p1, *p2;
    53  	int c1, c2;
    54  
    55  	p1 = (Rgn*)a1;
    56  	p2 = (Rgn*)a2;
    57  	c1 = p2->cost;
    58  	c2 = p1->cost;
    59  	if(c1 -= c2)
    60  		return c1;
    61  	return p2->varno - p1->varno;
    62  }
    63  
    64  void
    65  excise(Flow *r)
    66  {
    67  	Prog *p;
    68  
    69  	p = r->prog;
    70  	p->as = ANOP;
    71  	p->scond = zprog.scond;
    72  	p->from = zprog.from;
    73  	p->to = zprog.to;
    74  	p->reg = zprog.reg;
    75  }
    76  
    77  static void
    78  setaddrs(Bits bit)
    79  {
    80  	int i, n;
    81  	Var *v;
    82  	Node *node;
    83  
    84  	while(bany(&bit)) {
    85  		// convert each bit to a variable
    86  		i = bnum(bit);
    87  		node = var[i].node;
    88  		n = var[i].name;
    89  		bit.b[i/32] &= ~(1L<<(i%32));
    90  
    91  		// disable all pieces of that variable
    92  		for(i=0; i<nvar; i++) {
    93  			v = var+i;
    94  			if(v->node == node && v->name == n)
    95  				v->addr = 2;
    96  		}
    97  	}
    98  }
    99  
   100  static char* regname[] = {
   101  	".R0",
   102  	".R1",
   103  	".R2",
   104  	".R3",
   105  	".R4",
   106  	".R5",
   107  	".R6",
   108  	".R7",
   109  	".R8",
   110  	".R9",
   111  	".R10",
   112  	".R11",
   113  	".R12",
   114  	".R13",
   115  	".R14",
   116  	".R15",
   117  	".F0",
   118  	".F1",
   119  	".F2",
   120  	".F3",
   121  	".F4",
   122  	".F5",
   123  	".F6",
   124  	".F7",
   125  	".F8",
   126  	".F9",
   127  	".F10",
   128  	".F11",
   129  	".F12",
   130  	".F13",
   131  	".F14",
   132  	".F15",
   133  };
   134  
   135  static Node* regnodes[NREGVAR];
   136  
   137  static void walkvardef(Node *n, Reg *r, int active);
   138  
   139  void
   140  regopt(Prog *firstp)
   141  {
   142  	Reg *r, *r1;
   143  	Prog *p;
   144  	Graph *g;
   145  	int i, z, active;
   146  	uint32 vreg;
   147  	Bits bit;
   148  	ProgInfo info;
   149  
   150  	if(first) {
   151  		fmtinstall('Q', Qconv);
   152  		first = 0;
   153  	}
   154  
   155  	mergetemp(firstp);
   156  
   157  	/*
   158  	 * control flow is more complicated in generated go code
   159  	 * than in generated c code.  define pseudo-variables for
   160  	 * registers, so we have complete register usage information.
   161  	 */
   162  	nvar = NREGVAR;
   163  	memset(var, 0, NREGVAR*sizeof var[0]);
   164  	for(i=0; i<NREGVAR; i++) {
   165  		if(regnodes[i] == N)
   166  			regnodes[i] = newname(lookup(regname[i]));
   167  		var[i].node = regnodes[i];
   168  	}
   169  
   170  	regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
   171  	for(z=0; z<BITS; z++) {
   172  		externs.b[z] = 0;
   173  		params.b[z] = 0;
   174  		consts.b[z] = 0;
   175  		addrs.b[z] = 0;
   176  		ivar.b[z] = 0;
   177  		ovar.b[z] = 0;
   178  	}
   179  
   180  	/*
   181  	 * pass 1
   182  	 * build aux data structure
   183  	 * allocate pcs
   184  	 * find use and set of variables
   185  	 */
   186  	g = flowstart(firstp, sizeof(Reg));
   187  	if(g == nil) {
   188  		for(i=0; i<nvar; i++)
   189  			var[i].node->opt = nil;
   190  		return;
   191  	}
   192  
   193  	firstr = (Reg*)g->start;
   194  
   195  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   196  		p = r->f.prog;
   197  		if(p->as == AVARDEF || p->as == AVARKILL)
   198  			continue;
   199  		proginfo(&info, p);
   200  
   201  		// Avoid making variables for direct-called functions.
   202  		if(p->as == ABL && p->to.name == D_EXTERN)
   203  			continue;
   204  
   205  		bit = mkvar(r, &p->from);
   206  		if(info.flags & LeftRead)
   207  			for(z=0; z<BITS; z++)
   208  				r->use1.b[z] |= bit.b[z];
   209  		if(info.flags & LeftAddr)
   210  			setaddrs(bit);
   211  
   212  		if(info.flags & RegRead) {	
   213  			if(p->from.type != D_FREG)
   214  				r->use1.b[0] |= RtoB(p->reg);
   215  			else
   216  				r->use1.b[0] |= FtoB(p->reg);
   217  		}
   218  
   219  		if(info.flags & (RightAddr | RightRead | RightWrite)) {
   220  			bit = mkvar(r, &p->to);
   221  			if(info.flags & RightAddr)
   222  				setaddrs(bit);
   223  			if(info.flags & RightRead)
   224  				for(z=0; z<BITS; z++)
   225  					r->use2.b[z] |= bit.b[z];
   226  			if(info.flags & RightWrite)
   227  				for(z=0; z<BITS; z++)
   228  					r->set.b[z] |= bit.b[z];
   229  		}
   230  
   231  		/* the mod/div runtime routines smash R12 */
   232  		if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU)
   233  			r->set.b[0] |= RtoB(12);
   234  	}
   235  	if(firstr == R)
   236  		return;
   237  
   238  	for(i=0; i<nvar; i++) {
   239  		Var *v = var+i;
   240  		if(v->addr) {
   241  			bit = blsh(i);
   242  			for(z=0; z<BITS; z++)
   243  				addrs.b[z] |= bit.b[z];
   244  		}
   245  
   246  		if(debug['R'] && debug['v'])
   247  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   248  				i, v->addr, v->etype, v->width, v->node, v->offset);
   249  	}
   250  
   251  	if(debug['R'] && debug['v'])
   252  		dumpit("pass1", &firstr->f, 1);
   253  
   254  	/*
   255  	 * pass 2
   256  	 * find looping structure
   257  	 */
   258  	flowrpo(g);
   259  
   260  	if(debug['R'] && debug['v'])
   261  		dumpit("pass2", &firstr->f, 1);
   262  
   263  	/*
   264  	 * pass 2.5
   265  	 * iterate propagating fat vardef covering forward
   266  	 * r->act records vars with a VARDEF since the last CALL.
   267  	 * (r->act will be reused in pass 5 for something else,
   268  	 * but we'll be done with it by then.)
   269  	 */
   270  	active = 0;
   271  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   272  		r->f.active = 0;
   273  		r->act = zbits;
   274  	}
   275  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   276  		p = r->f.prog;
   277  		if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
   278  			active++;
   279  			walkvardef(p->to.node, r, active);
   280  		}
   281  	}
   282  
   283  	/*
   284  	 * pass 3
   285  	 * iterate propagating usage
   286  	 * 	back until flow graph is complete
   287  	 */
   288  loop1:
   289  	change = 0;
   290  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   291  		r->f.active = 0;
   292  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   293  		if(r->f.prog->as == ARET)
   294  			prop(r, zbits, zbits);
   295  loop11:
   296  	/* pick up unreachable code */
   297  	i = 0;
   298  	for(r = firstr; r != R; r = r1) {
   299  		r1 = (Reg*)r->f.link;
   300  		if(r1 && r1->f.active && !r->f.active) {
   301  			prop(r, zbits, zbits);
   302  			i = 1;
   303  		}
   304  	}
   305  	if(i)
   306  		goto loop11;
   307  	if(change)
   308  		goto loop1;
   309  
   310  	if(debug['R'] && debug['v'])
   311  		dumpit("pass3", &firstr->f, 1);
   312  
   313  
   314  	/*
   315  	 * pass 4
   316  	 * iterate propagating register/variable synchrony
   317  	 * 	forward until graph is complete
   318  	 */
   319  loop2:
   320  	change = 0;
   321  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   322  		r->f.active = 0;
   323  	synch(firstr, zbits);
   324  	if(change)
   325  		goto loop2;
   326  
   327  	addsplits();
   328  
   329  	if(debug['R'] && debug['v'])
   330  		dumpit("pass4", &firstr->f, 1);
   331  
   332  	if(debug['R'] > 1) {
   333  		print("\nprop structure:\n");
   334  		for(r = firstr; r != R; r = (Reg*)r->f.link) {
   335  			print("%d:%P", r->f.loop, r->f.prog);
   336  			for(z=0; z<BITS; z++) {
   337  				bit.b[z] = r->set.b[z] |
   338  					r->refahead.b[z] | r->calahead.b[z] |
   339  					r->refbehind.b[z] | r->calbehind.b[z] |
   340  					r->use1.b[z] | r->use2.b[z];
   341  				bit.b[z] &= ~addrs.b[z];
   342  			}
   343  
   344  			if(bany(&bit)) {
   345  				print("\t");
   346  				if(bany(&r->use1))
   347  					print(" u1=%Q", r->use1);
   348  				if(bany(&r->use2))
   349  					print(" u2=%Q", r->use2);
   350  				if(bany(&r->set))
   351  					print(" st=%Q", r->set);
   352  				if(bany(&r->refahead))
   353  					print(" ra=%Q", r->refahead);
   354  				if(bany(&r->calahead))
   355  					print(" ca=%Q", r->calahead);
   356  				if(bany(&r->refbehind))
   357  					print(" rb=%Q", r->refbehind);
   358  				if(bany(&r->calbehind))
   359  					print(" cb=%Q", r->calbehind);
   360  			}
   361  			print("\n");
   362  		}
   363  	}
   364  
   365  	/*
   366  	 * pass 4.5
   367  	 * move register pseudo-variables into regu.
   368  	 */
   369  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   370  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   371  
   372  		r->set.b[0] &= ~REGBITS;
   373  		r->use1.b[0] &= ~REGBITS;
   374  		r->use2.b[0] &= ~REGBITS;
   375  		r->refbehind.b[0] &= ~REGBITS;
   376  		r->refahead.b[0] &= ~REGBITS;
   377  		r->calbehind.b[0] &= ~REGBITS;
   378  		r->calahead.b[0] &= ~REGBITS;
   379  		r->regdiff.b[0] &= ~REGBITS;
   380  		r->act.b[0] &= ~REGBITS;
   381  	}
   382  
   383  	if(debug['R'] && debug['v'])
   384  		dumpit("pass4.5", &firstr->f, 1);
   385  
   386  	/*
   387  	 * pass 5
   388  	 * isolate regions
   389  	 * calculate costs (paint1)
   390  	 */
   391  	r = firstr;
   392  	if(r) {
   393  		for(z=0; z<BITS; z++)
   394  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   395  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   396  		if(bany(&bit) & !r->f.refset) {
   397  			// should never happen - all variables are preset
   398  			if(debug['w'])
   399  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   400  			r->f.refset = 1;
   401  		}
   402  	}
   403  
   404  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   405  		r->act = zbits;
   406  	rgp = region;
   407  	nregion = 0;
   408  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   409  		for(z=0; z<BITS; z++)
   410  			bit.b[z] = r->set.b[z] &
   411  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   412  		if(bany(&bit) && !r->f.refset) {
   413  			if(debug['w'])
   414  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   415  			r->f.refset = 1;
   416  			excise(&r->f);
   417  		}
   418  		for(z=0; z<BITS; z++)
   419  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   420  		while(bany(&bit)) {
   421  			i = bnum(bit);
   422  			rgp->enter = r;
   423  			rgp->varno = i;
   424  			change = 0;
   425  			if(debug['R'] > 1)
   426  				print("\n");
   427  			paint1(r, i);
   428  			bit.b[i/32] &= ~(1L<<(i%32));
   429  			if(change <= 0) {
   430  				if(debug['R'])
   431  					print("%L $%d: %Q\n",
   432  						r->f.prog->lineno, change, blsh(i));
   433  				continue;
   434  			}
   435  			rgp->cost = change;
   436  			nregion++;
   437  			if(nregion >= NRGN) {
   438  				if(debug['R'] > 1)
   439  					print("too many regions\n");
   440  				goto brk;
   441  			}
   442  			rgp++;
   443  		}
   444  	}
   445  brk:
   446  	qsort(region, nregion, sizeof(region[0]), rcmp);
   447  
   448  	if(debug['R'] && debug['v'])
   449  		dumpit("pass5", &firstr->f, 1);
   450  
   451  	/*
   452  	 * pass 6
   453  	 * determine used registers (paint2)
   454  	 * replace code (paint3)
   455  	 */
   456  	rgp = region;
   457  	for(i=0; i<nregion; i++) {
   458  		bit = blsh(rgp->varno);
   459  		vreg = paint2(rgp->enter, rgp->varno);
   460  		vreg = allreg(vreg, rgp);
   461  		if(debug['R']) {
   462  			if(rgp->regno >= NREG)
   463  				print("%L $%d F%d: %Q\n",
   464  					rgp->enter->f.prog->lineno,
   465  					rgp->cost,
   466  					rgp->regno-NREG,
   467  					bit);
   468  			else
   469  				print("%L $%d R%d: %Q\n",
   470  					rgp->enter->f.prog->lineno,
   471  					rgp->cost,
   472  					rgp->regno,
   473  					bit);
   474  		}
   475  		if(rgp->regno != 0)
   476  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   477  		rgp++;
   478  	}
   479  
   480  	if(debug['R'] && debug['v'])
   481  		dumpit("pass6", &firstr->f, 1);
   482  
   483  	/*
   484  	 * free aux structures. peep allocates new ones.
   485  	 */
   486  	for(i=0; i<nvar; i++)
   487  		var[i].node->opt = nil;
   488  	flowend(g);
   489  	firstr = R;
   490  
   491  	/*
   492  	 * pass 7
   493  	 * peep-hole on basic block
   494  	 */
   495  	if(!debug['R'] || debug['P']) {
   496  		peep(firstp);
   497  	}
   498  
   499  	if(debug['R'] && debug['v'])
   500  		dumpit("pass7", &firstr->f, 1);
   501  
   502  	/*
   503  	 * last pass
   504  	 * eliminate nops
   505  	 * free aux structures
   506  	 * adjust the stack pointer
   507  	 *	MOVW.W 	R1,-12(R13)			<<- start
   508  	 *	MOVW   	R0,R1
   509  	 *	MOVW   	R1,8(R13)
   510  	 *	MOVW   	$0,R1
   511  	 *	MOVW   	R1,4(R13)
   512  	 *	BL     	,runtime.newproc+0(SB)
   513  	 *	MOVW   	&ft+-32(SP),R7			<<- adjust
   514  	 *	MOVW   	&j+-40(SP),R6			<<- adjust
   515  	 *	MOVW   	autotmp_0003+-24(SP),R5		<<- adjust
   516  	 *	MOVW   	$12(R13),R13			<<- finish
   517  	 */
   518  	vreg = 0;
   519  	for(p = firstp; p != P; p = p->link) {
   520  		while(p->link != P && p->link->as == ANOP)
   521  			p->link = p->link->link;
   522  		if(p->to.type == D_BRANCH)
   523  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   524  				p->to.u.branch = p->to.u.branch->link;
   525  		if(p->as == AMOVW && p->to.reg == 13) {
   526  			if(p->scond & C_WBIT) {
   527  				vreg = -p->to.offset;		// in adjust region
   528  //				print("%P adjusting %d\n", p, vreg);
   529  				continue;
   530  			}
   531  			if(p->from.type == D_CONST && p->to.type == D_REG) {
   532  				if(p->from.offset != vreg)
   533  					print("in and out different\n");
   534  //				print("%P finish %d\n", p, vreg);
   535  				vreg = 0;	// done adjust region
   536  				continue;
   537  			}
   538  
   539  //			print("%P %d %d from type\n", p, p->from.type, D_CONST);
   540  //			print("%P %d %d to type\n\n", p, p->to.type, D_REG);
   541  		}
   542  
   543  		if(p->as == AMOVW && vreg != 0) {
   544  			if(p->from.sym != nil)
   545  			if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
   546  				p->from.offset += vreg;
   547  //				print("%P adjusting from %d %d\n", p, vreg, p->from.type);
   548  			}
   549  			if(p->to.sym != nil)
   550  			if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
   551  				p->to.offset += vreg;
   552  //				print("%P adjusting to %d %d\n", p, vreg, p->from.type);
   553  			}
   554  		}
   555  	}
   556  }
   557  
   558  static void
   559  walkvardef(Node *n, Reg *r, int active)
   560  {
   561  	Reg *r1, *r2;
   562  	int bn;
   563  	Var *v;
   564  	
   565  	for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
   566  		if(r1->f.active == active)
   567  			break;
   568  		r1->f.active = active;
   569  		if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
   570  			break;
   571  		for(v=n->opt; v!=nil; v=v->nextinnode) {
   572  			bn = v - var;
   573  			r1->act.b[bn/32] |= 1L << (bn%32);
   574  		}
   575  		if(r1->f.prog->as == ABL)
   576  			break;
   577  	}
   578  
   579  	for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
   580  		if(r2->f.s2 != nil)
   581  			walkvardef(n, (Reg*)r2->f.s2, active);
   582  }
   583  
   584  void
   585  addsplits(void)
   586  {
   587  	Reg *r, *r1;
   588  	int z, i;
   589  	Bits bit;
   590  
   591  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   592  		if(r->f.loop > 1)
   593  			continue;
   594  		if(r->f.prog->as == ABL)
   595  			continue;
   596  		if(r->f.prog->as == ADUFFZERO)
   597  			continue;
   598  		if(r->f.prog->as == ADUFFCOPY)
   599  			continue;
   600  		for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
   601  			if(r1->f.loop <= 1)
   602  				continue;
   603  			for(z=0; z<BITS; z++)
   604  				bit.b[z] = r1->calbehind.b[z] &
   605  					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
   606  					~(r->calahead.b[z] & addrs.b[z]);
   607  			while(bany(&bit)) {
   608  				i = bnum(bit);
   609  				bit.b[i/32] &= ~(1L << (i%32));
   610  			}
   611  		}
   612  	}
   613  }
   614  
   615  /*
   616   * add mov b,rn
   617   * just after r
   618   */
   619  void
   620  addmove(Reg *r, int bn, int rn, int f)
   621  {
   622  	Prog *p, *p1, *p2;
   623  	Adr *a;
   624  	Var *v;
   625  
   626  	p1 = mal(sizeof(*p1));
   627  	*p1 = zprog;
   628  	p = r->f.prog;
   629  	
   630  	// If there's a stack fixup coming (after BL newproc or BL deferproc),
   631  	// delay the load until after the fixup.
   632  	p2 = p->link;
   633  	if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
   634  		p = p2;
   635  
   636  	p1->link = p->link;
   637  	p->link = p1;
   638  	p1->lineno = p->lineno;
   639  
   640  	v = var + bn;
   641  
   642  	a = &p1->to;
   643  	a->name = v->name;
   644  	a->node = v->node;
   645  	a->sym = linksym(v->node->sym);
   646  	a->offset = v->offset;
   647  	a->etype = v->etype;
   648  	a->type = D_OREG;
   649  	if(a->etype == TARRAY || a->sym == nil)
   650  		a->type = D_CONST;
   651  
   652  	if(v->addr)
   653  		fatal("addmove: shouldn't be doing this %A\n", a);
   654  
   655  	switch(v->etype) {
   656  	default:
   657  		print("What is this %E\n", v->etype);
   658  
   659  	case TINT8:
   660  		p1->as = AMOVBS;
   661  		break;
   662  	case TBOOL:
   663  	case TUINT8:
   664  //print("movbu %E %d %S\n", v->etype, bn, v->sym);
   665  		p1->as = AMOVBU;
   666  		break;
   667  	case TINT16:
   668  		p1->as = AMOVHS;
   669  		break;
   670  	case TUINT16:
   671  		p1->as = AMOVHU;
   672  		break;
   673  	case TINT32:
   674  	case TUINT32:
   675  	case TPTR32:
   676  		p1->as = AMOVW;
   677  		break;
   678  	case TFLOAT32:
   679  		p1->as = AMOVF;
   680  		break;
   681  	case TFLOAT64:
   682  		p1->as = AMOVD;
   683  		break;
   684  	}
   685  
   686  	p1->from.type = D_REG;
   687  	p1->from.reg = rn;
   688  	if(rn >= NREG) {
   689  		p1->from.type = D_FREG;
   690  		p1->from.reg = rn-NREG;
   691  	}
   692  	if(!f) {
   693  		p1->from = *a;
   694  		*a = zprog.from;
   695  		a->type = D_REG;
   696  		a->reg = rn;
   697  		if(rn >= NREG) {
   698  			a->type = D_FREG;
   699  			a->reg = rn-NREG;
   700  		}
   701  		if(v->etype == TUINT8 || v->etype == TBOOL)
   702  			p1->as = AMOVBU;
   703  		if(v->etype == TUINT16)
   704  			p1->as = AMOVHU;
   705  	}
   706  	if(debug['R'])
   707  		print("%P\t.a%P\n", p, p1);
   708  }
   709  
   710  static int
   711  overlap(int32 o1, int w1, int32 o2, int w2)
   712  {
   713  	int32 t1, t2;
   714  
   715  	t1 = o1+w1;
   716  	t2 = o2+w2;
   717  
   718  	if(!(t1 > o2 && t2 > o1))
   719  		return 0;
   720  
   721  	return 1;
   722  }
   723  
   724  Bits
   725  mkvar(Reg *r, Adr *a)
   726  {
   727  	Var *v;
   728  	int i, t, n, et, z, w, flag;
   729  	int32 o;
   730  	Bits bit;
   731  	Node *node;
   732  
   733  	// mark registers used
   734  	t = a->type;
   735  
   736  	flag = 0;
   737  	switch(t) {
   738  	default:
   739  		print("type %d %d %D\n", t, a->name, a);
   740  		goto none;
   741  
   742  	case D_NONE:
   743  	case D_FCONST:
   744  	case D_BRANCH:
   745  		break;
   746  
   747  
   748  	case D_REGREG:
   749  	case D_REGREG2:
   750  		bit = zbits;
   751  		if(a->offset != NREG)
   752  			bit.b[0] |= RtoB(a->offset);
   753  		if(a->reg != NREG)
   754  			bit.b[0] |= RtoB(a->reg);
   755  		return bit;
   756  
   757  	case D_CONST:
   758  	case D_REG:
   759  	case D_SHIFT:
   760  		if(a->reg != NREG) {
   761  			bit = zbits;
   762  			bit.b[0] = RtoB(a->reg);
   763  			return bit;
   764  		}
   765  		break;
   766  
   767  	case D_OREG:
   768  		if(a->reg != NREG) {
   769  			if(a == &r->f.prog->from)
   770  				r->use1.b[0] |= RtoB(a->reg);
   771  			else
   772  				r->use2.b[0] |= RtoB(a->reg);
   773  			if(r->f.prog->scond & (C_PBIT|C_WBIT))
   774  				r->set.b[0] |= RtoB(a->reg);
   775  		}
   776  		break;
   777  
   778  	case D_FREG:
   779  		if(a->reg != NREG) {
   780  			bit = zbits;
   781  			bit.b[0] = FtoB(a->reg);
   782  			return bit;
   783  		}
   784  		break;
   785  	}
   786  
   787  	switch(a->name) {
   788  	default:
   789  		goto none;
   790  
   791  	case D_EXTERN:
   792  	case D_STATIC:
   793  	case D_AUTO:
   794  	case D_PARAM:
   795  		n = a->name;
   796  		break;
   797  	}
   798  
   799  	node = a->node;
   800  	if(node == N || node->op != ONAME || node->orig == N)
   801  		goto none;
   802  	node = node->orig;
   803  	if(node->orig != node)
   804  		fatal("%D: bad node", a);
   805  	if(node->sym == S || node->sym->name[0] == '.')
   806  		goto none;
   807  	et = a->etype;
   808  	o = a->offset;
   809  	w = a->width;
   810  	if(w < 0)
   811  		fatal("bad width %d for %D", w, a);
   812  
   813  	for(i=0; i<nvar; i++) {
   814  		v = var+i;
   815  		if(v->node == node && v->name == n) {
   816  			if(v->offset == o)
   817  			if(v->etype == et)
   818  			if(v->width == w)
   819  				if(!flag)
   820  					return blsh(i);
   821  
   822  			// if they overlap, disable both
   823  			if(overlap(v->offset, v->width, o, w)) {
   824  				v->addr = 1;
   825  				flag = 1;
   826  			}
   827  		}
   828  	}
   829  
   830  	switch(et) {
   831  	case 0:
   832  	case TFUNC:
   833  		goto none;
   834  	}
   835  
   836  	if(nvar >= NVAR) {
   837  		if(debug['w'] > 1 && node)
   838  			fatal("variable not optimized: %D", a);
   839  		
   840  		// If we're not tracking a word in a variable, mark the rest as
   841  		// having its address taken, so that we keep the whole thing
   842  		// live at all calls. otherwise we might optimize away part of
   843  		// a variable but not all of it.
   844  		for(i=0; i<nvar; i++) {
   845  			v = var+i;
   846  			if(v->node == node)
   847  				v->addr = 1;
   848  		}
   849  		goto none;
   850  	}
   851  
   852  	i = nvar;
   853  	nvar++;
   854  //print("var %d %E %D %S\n", i, et, a, s);
   855  	v = var+i;
   856  	v->offset = o;
   857  	v->name = n;
   858  	v->etype = et;
   859  	v->width = w;
   860  	v->addr = flag;		// funny punning
   861  	v->node = node;
   862  	
   863  	// node->opt is the head of a linked list
   864  	// of Vars within the given Node, so that
   865  	// we can start at a Var and find all the other
   866  	// Vars in the same Go variable.
   867  	v->nextinnode = node->opt;
   868  	node->opt = v;
   869  	
   870  	bit = blsh(i);
   871  	if(n == D_EXTERN || n == D_STATIC)
   872  		for(z=0; z<BITS; z++)
   873  			externs.b[z] |= bit.b[z];
   874  	if(n == D_PARAM)
   875  		for(z=0; z<BITS; z++)
   876  			params.b[z] |= bit.b[z];
   877  
   878  	if(node->class == PPARAM)
   879  		for(z=0; z<BITS; z++)
   880  			ivar.b[z] |= bit.b[z];
   881  	if(node->class == PPARAMOUT)
   882  		for(z=0; z<BITS; z++)
   883  			ovar.b[z] |= bit.b[z];
   884  
   885  	// Treat values with their address taken as live at calls,
   886  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   887  	// These must be consistent or else we will elide stores and the garbage
   888  	// collector will see uninitialized data.
   889  	// The typical case where our own analysis is out of sync is when the
   890  	// node appears to have its address taken but that code doesn't actually
   891  	// get generated and therefore doesn't show up as an address being
   892  	// taken when we analyze the instruction stream.
   893  	// One instance of this case is when a closure uses the same name as
   894  	// an outer variable for one of its own variables declared with :=.
   895  	// The parser flags the outer variable as possibly shared, and therefore
   896  	// sets addrtaken, even though it ends up not being actually shared.
   897  	// If we were better about _ elision, _ = &x would suffice too.
   898  	// The broader := in a closure problem is mentioned in a comment in
   899  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   900  	if(node->addrtaken)
   901  		v->addr = 1;
   902  
   903  	// Disable registerization for globals, because:
   904  	// (1) we might panic at any time and we want the recovery code
   905  	// to see the latest values (issue 1304).
   906  	// (2) we don't know what pointers might point at them and we want
   907  	// loads via those pointers to see updated values and vice versa (issue 7995).
   908  	//
   909  	// Disable registerization for results if using defer, because the deferred func
   910  	// might recover and return, causing the current values to be used.
   911  	if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
   912  		v->addr = 1;
   913  
   914  	if(debug['R'])
   915  		print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   916  
   917  	return bit;
   918  
   919  none:
   920  	return zbits;
   921  }
   922  
   923  void
   924  prop(Reg *r, Bits ref, Bits cal)
   925  {
   926  	Reg *r1, *r2;
   927  	int z, i, j;
   928  	Var *v, *v1;
   929  
   930  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   931  		for(z=0; z<BITS; z++) {
   932  			ref.b[z] |= r1->refahead.b[z];
   933  			if(ref.b[z] != r1->refahead.b[z]) {
   934  				r1->refahead.b[z] = ref.b[z];
   935  				change++;
   936  			}
   937  			cal.b[z] |= r1->calahead.b[z];
   938  			if(cal.b[z] != r1->calahead.b[z]) {
   939  				r1->calahead.b[z] = cal.b[z];
   940  				change++;
   941  			}
   942  		}
   943  		switch(r1->f.prog->as) {
   944  		case ABL:
   945  			if(noreturn(r1->f.prog))
   946  				break;
   947  
   948  			// Mark all input variables (ivar) as used, because that's what the
   949  			// liveness bitmaps say. The liveness bitmaps say that so that a
   950  			// panic will not show stale values in the parameter dump.
   951  			// Mark variables with a recent VARDEF (r1->act) as used,
   952  			// so that the optimizer flushes initializations to memory,
   953  			// so that if a garbage collection happens during this CALL,
   954  			// the collector will see initialized memory. Again this is to
   955  			// match what the liveness bitmaps say.
   956  			for(z=0; z<BITS; z++) {
   957  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
   958  				ref.b[z] = 0;
   959  			}
   960  			
   961  			// cal.b is the current approximation of what's live across the call.
   962  			// Every bit in cal.b is a single stack word. For each such word,
   963  			// find all the other tracked stack words in the same Go variable
   964  			// (struct/slice/string/interface) and mark them live too.
   965  			// This is necessary because the liveness analysis for the garbage
   966  			// collector works at variable granularity, not at word granularity.
   967  			// It is fundamental for slice/string/interface: the garbage collector
   968  			// needs the whole value, not just some of the words, in order to
   969  			// interpret the other bits correctly. Specifically, slice needs a consistent
   970  			// ptr and cap, string needs a consistent ptr and len, and interface
   971  			// needs a consistent type word and data word.
   972  			for(z=0; z<BITS; z++) {
   973  				if(cal.b[z] == 0)
   974  					continue;
   975  				for(i=0; i<32; i++) {
   976  					if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
   977  						continue;
   978  					v = var+z*32+i;
   979  					if(v->node->opt == nil) // v represents fixed register, not Go variable
   980  						continue;
   981  
   982  					// v->node->opt is the head of a linked list of Vars
   983  					// corresponding to tracked words from the Go variable v->node.
   984  					// Walk the list and set all the bits.
   985  					// For a large struct this could end up being quadratic:
   986  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   987  					// for all of the remaining words in the struct, and for each such
   988  					// word would go through and turn on all the bits again.
   989  					// To avoid the quadratic behavior, we only turn on the bits if
   990  					// v is the head of the list or if the head's bit is not yet turned on.
   991  					// This will set the bits at most twice, keeping the overall loop linear.
   992  					v1 = v->node->opt;
   993  					j = v1 - var;
   994  					if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
   995  						for(; v1 != nil; v1 = v1->nextinnode) {
   996  							j = v1 - var;
   997  							cal.b[j/32] |= 1<<(j&31);
   998  						}
   999  					}
  1000  				}
  1001  			}
  1002  			break;
  1003  
  1004  		case ATEXT:
  1005  			for(z=0; z<BITS; z++) {
  1006  				cal.b[z] = 0;
  1007  				ref.b[z] = 0;
  1008  			}
  1009  			break;
  1010  
  1011  		case ARET:
  1012  			for(z=0; z<BITS; z++) {
  1013  				cal.b[z] = externs.b[z] | ovar.b[z];
  1014  				ref.b[z] = 0;
  1015  			}
  1016  			break;
  1017  		}
  1018  		for(z=0; z<BITS; z++) {
  1019  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  1020  				r1->use1.b[z] | r1->use2.b[z];
  1021  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  1022  			r1->refbehind.b[z] = ref.b[z];
  1023  			r1->calbehind.b[z] = cal.b[z];
  1024  		}
  1025  		if(r1->f.active)
  1026  			break;
  1027  		r1->f.active = 1;
  1028  	}
  1029  	for(; r != r1; r = (Reg*)r->f.p1)
  1030  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
  1031  			prop(r2, r->refbehind, r->calbehind);
  1032  }
  1033  
  1034  void
  1035  synch(Reg *r, Bits dif)
  1036  {
  1037  	Reg *r1;
  1038  	int z;
  1039  
  1040  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
  1041  		for(z=0; z<BITS; z++) {
  1042  			dif.b[z] = (dif.b[z] &
  1043  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1044  					r1->set.b[z] | r1->regdiff.b[z];
  1045  			if(dif.b[z] != r1->regdiff.b[z]) {
  1046  				r1->regdiff.b[z] = dif.b[z];
  1047  				change++;
  1048  			}
  1049  		}
  1050  		if(r1->f.active)
  1051  			break;
  1052  		r1->f.active = 1;
  1053  		for(z=0; z<BITS; z++)
  1054  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1055  		if(r1->f.s2 != nil)
  1056  			synch((Reg*)r1->f.s2, dif);
  1057  	}
  1058  }
  1059  
  1060  uint32
  1061  allreg(uint32 b, Rgn *r)
  1062  {
  1063  	Var *v;
  1064  	int i;
  1065  
  1066  	v = var + r->varno;
  1067  	r->regno = 0;
  1068  	switch(v->etype) {
  1069  
  1070  	default:
  1071  		fatal("unknown etype %d/%E", bitno(b), v->etype);
  1072  		break;
  1073  
  1074  	case TINT8:
  1075  	case TUINT8:
  1076  	case TINT16:
  1077  	case TUINT16:
  1078  	case TINT32:
  1079  	case TUINT32:
  1080  	case TINT:
  1081  	case TUINT:
  1082  	case TUINTPTR:
  1083  	case TBOOL:
  1084  	case TPTR32:
  1085  		i = BtoR(~b);
  1086  		if(i && r->cost >= 0) {
  1087  			r->regno = i;
  1088  			return RtoB(i);
  1089  		}
  1090  		break;
  1091  
  1092  	case TFLOAT32:
  1093  	case TFLOAT64:
  1094  		i = BtoF(~b);
  1095  		if(i && r->cost >= 0) {
  1096  			r->regno = i+NREG;
  1097  			return FtoB(i);
  1098  		}
  1099  		break;
  1100  
  1101  	case TINT64:
  1102  	case TUINT64:
  1103  	case TPTR64:
  1104  	case TINTER:
  1105  	case TSTRUCT:
  1106  	case TARRAY:
  1107  		break;
  1108  	}
  1109  	return 0;
  1110  }
  1111  
  1112  void
  1113  paint1(Reg *r, int bn)
  1114  {
  1115  	Reg *r1;
  1116  	Prog *p;
  1117  	int z;
  1118  	uint32 bb;
  1119  
  1120  	z = bn/32;
  1121  	bb = 1L<<(bn%32);
  1122  	if(r->act.b[z] & bb)
  1123  		return;
  1124  	for(;;) {
  1125  		if(!(r->refbehind.b[z] & bb))
  1126  			break;
  1127  		r1 = (Reg*)r->f.p1;
  1128  		if(r1 == R)
  1129  			break;
  1130  		if(!(r1->refahead.b[z] & bb))
  1131  			break;
  1132  		if(r1->act.b[z] & bb)
  1133  			break;
  1134  		r = r1;
  1135  	}
  1136  
  1137  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1138  		change -= CLOAD * r->f.loop;
  1139  		if(debug['R'] > 1)
  1140  			print("%d%P\td %Q $%d\n", r->f.loop,
  1141  				r->f.prog, blsh(bn), change);
  1142  	}
  1143  	for(;;) {
  1144  		r->act.b[z] |= bb;
  1145  		p = r->f.prog;
  1146  
  1147  
  1148  		if(r->f.prog->as != ANOP) { // don't give credit for NOPs
  1149  			if(r->use1.b[z] & bb) {
  1150  				change += CREF * r->f.loop;
  1151  				if(debug['R'] > 1)
  1152  					print("%d%P\tu1 %Q $%d\n", r->f.loop,
  1153  						p, blsh(bn), change);
  1154  			}
  1155  			if((r->use2.b[z]|r->set.b[z]) & bb) {
  1156  				change += CREF * r->f.loop;
  1157  				if(debug['R'] > 1)
  1158  					print("%d%P\tu2 %Q $%d\n", r->f.loop,
  1159  						p, blsh(bn), change);
  1160  			}
  1161  		}
  1162  
  1163  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1164  			change -= CLOAD * r->f.loop;
  1165  			if(debug['R'] > 1)
  1166  				print("%d%P\tst %Q $%d\n", r->f.loop,
  1167  					p, blsh(bn), change);
  1168  		}
  1169  
  1170  		if(r->refbehind.b[z] & bb)
  1171  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1172  				if(r1->refahead.b[z] & bb)
  1173  					paint1(r1, bn);
  1174  
  1175  		if(!(r->refahead.b[z] & bb))
  1176  			break;
  1177  		r1 = (Reg*)r->f.s2;
  1178  		if(r1 != R)
  1179  			if(r1->refbehind.b[z] & bb)
  1180  				paint1(r1, bn);
  1181  		r = (Reg*)r->f.s1;
  1182  		if(r == R)
  1183  			break;
  1184  		if(r->act.b[z] & bb)
  1185  			break;
  1186  		if(!(r->refbehind.b[z] & bb))
  1187  			break;
  1188  	}
  1189  }
  1190  
  1191  uint32
  1192  paint2(Reg *r, int bn)
  1193  {
  1194  	Reg *r1;
  1195  	int z;
  1196  	uint32 bb, vreg;
  1197  
  1198  	z = bn/32;
  1199  	bb = 1L << (bn%32);
  1200  	vreg = regbits;
  1201  	if(!(r->act.b[z] & bb))
  1202  		return vreg;
  1203  	for(;;) {
  1204  		if(!(r->refbehind.b[z] & bb))
  1205  			break;
  1206  		r1 = (Reg*)r->f.p1;
  1207  		if(r1 == R)
  1208  			break;
  1209  		if(!(r1->refahead.b[z] & bb))
  1210  			break;
  1211  		if(!(r1->act.b[z] & bb))
  1212  			break;
  1213  		r = r1;
  1214  	}
  1215  	for(;;) {
  1216  		r->act.b[z] &= ~bb;
  1217  
  1218  		vreg |= r->regu;
  1219  
  1220  		if(r->refbehind.b[z] & bb)
  1221  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1222  				if(r1->refahead.b[z] & bb)
  1223  					vreg |= paint2(r1, bn);
  1224  
  1225  		if(!(r->refahead.b[z] & bb))
  1226  			break;
  1227  		r1 = (Reg*)r->f.s2;
  1228  		if(r1 != R)
  1229  			if(r1->refbehind.b[z] & bb)
  1230  				vreg |= paint2(r1, bn);
  1231  		r = (Reg*)r->f.s1;
  1232  		if(r == R)
  1233  			break;
  1234  		if(!(r->act.b[z] & bb))
  1235  			break;
  1236  		if(!(r->refbehind.b[z] & bb))
  1237  			break;
  1238  	}
  1239  	return vreg;
  1240  }
  1241  
  1242  void
  1243  paint3(Reg *r, int bn, int32 rb, int rn)
  1244  {
  1245  	Reg *r1;
  1246  	Prog *p;
  1247  	int z;
  1248  	uint32 bb;
  1249  
  1250  	z = bn/32;
  1251  	bb = 1L << (bn%32);
  1252  	if(r->act.b[z] & bb)
  1253  		return;
  1254  	for(;;) {
  1255  		if(!(r->refbehind.b[z] & bb))
  1256  			break;
  1257  		r1 = (Reg*)r->f.p1;
  1258  		if(r1 == R)
  1259  			break;
  1260  		if(!(r1->refahead.b[z] & bb))
  1261  			break;
  1262  		if(r1->act.b[z] & bb)
  1263  			break;
  1264  		r = r1;
  1265  	}
  1266  
  1267  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1268  		addmove(r, bn, rn, 0);
  1269  
  1270  	for(;;) {
  1271  		r->act.b[z] |= bb;
  1272  		p = r->f.prog;
  1273  
  1274  		if(r->use1.b[z] & bb) {
  1275  			if(debug['R'])
  1276  				print("%P", p);
  1277  			addreg(&p->from, rn);
  1278  			if(debug['R'])
  1279  				print("\t.c%P\n", p);
  1280  		}
  1281  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1282  			if(debug['R'])
  1283  				print("%P", p);
  1284  			addreg(&p->to, rn);
  1285  			if(debug['R'])
  1286  				print("\t.c%P\n", p);
  1287  		}
  1288  
  1289  		if(STORE(r) & r->regdiff.b[z] & bb)
  1290  			addmove(r, bn, rn, 1);
  1291  		r->regu |= rb;
  1292  
  1293  		if(r->refbehind.b[z] & bb)
  1294  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1295  				if(r1->refahead.b[z] & bb)
  1296  					paint3(r1, bn, rb, rn);
  1297  
  1298  		if(!(r->refahead.b[z] & bb))
  1299  			break;
  1300  		r1 = (Reg*)r->f.s2;
  1301  		if(r1 != R)
  1302  			if(r1->refbehind.b[z] & bb)
  1303  				paint3(r1, bn, rb, rn);
  1304  		r = (Reg*)r->f.s1;
  1305  		if(r == R)
  1306  			break;
  1307  		if(r->act.b[z] & bb)
  1308  			break;
  1309  		if(!(r->refbehind.b[z] & bb))
  1310  			break;
  1311  	}
  1312  }
  1313  
  1314  void
  1315  addreg(Adr *a, int rn)
  1316  {
  1317  	a->sym = nil;
  1318  	a->node = nil;
  1319  	a->name = D_NONE;
  1320  	a->type = D_REG;
  1321  	a->reg = rn;
  1322  	if(rn >= NREG) {
  1323  		a->type = D_FREG;
  1324  		a->reg = rn-NREG;
  1325  	}
  1326  }
  1327  
  1328  /*
  1329   *	bit	reg
  1330   *	0	R0
  1331   *	1	R1
  1332   *	...	...
  1333   *	10	R10
  1334   *	12  R12
  1335   */
  1336  int32
  1337  RtoB(int r)
  1338  {
  1339  	if(r >= REGTMP-2 && r != 12)	// excluded R9 and R10 for m and g, but not R12
  1340  		return 0;
  1341  	return 1L << r;
  1342  }
  1343  
  1344  int
  1345  BtoR(int32 b)
  1346  {
  1347  	b &= 0x11fcL;	// excluded R9 and R10 for m and g, but not R12
  1348  	if(b == 0)
  1349  		return 0;
  1350  	return bitno(b);
  1351  }
  1352  
  1353  /*
  1354   *	bit	reg
  1355   *	18	F2
  1356   *	19	F3
  1357   *	...	...
  1358   *	31	F15
  1359   */
  1360  int32
  1361  FtoB(int f)
  1362  {
  1363  
  1364  	if(f < 2 || f > NFREG-1)
  1365  		return 0;
  1366  	return 1L << (f + 16);
  1367  }
  1368  
  1369  int
  1370  BtoF(int32 b)
  1371  {
  1372  
  1373  	b &= 0xfffc0000L;
  1374  	if(b == 0)
  1375  		return 0;
  1376  	return bitno(b) - 16;
  1377  }
  1378  
  1379  void
  1380  dumpone(Flow *f, int isreg)
  1381  {
  1382  	int z;
  1383  	Bits bit;
  1384  	Reg *r;
  1385  
  1386  	print("%d:%P", f->loop, f->prog);
  1387  	if(isreg) {
  1388  		r = (Reg*)f;
  1389  		for(z=0; z<BITS; z++)
  1390  			bit.b[z] =
  1391  				r->set.b[z] |
  1392  				r->use1.b[z] |
  1393  				r->use2.b[z] |
  1394  				r->refbehind.b[z] |
  1395  				r->refahead.b[z] |
  1396  				r->calbehind.b[z] |
  1397  				r->calahead.b[z] |
  1398  				r->regdiff.b[z] |
  1399  				r->act.b[z] |
  1400  					0;
  1401  		if(bany(&bit)) {
  1402  			print("\t");
  1403  			if(bany(&r->set))
  1404  				print(" s:%Q", r->set);
  1405  			if(bany(&r->use1))
  1406  				print(" u1:%Q", r->use1);
  1407  			if(bany(&r->use2))
  1408  				print(" u2:%Q", r->use2);
  1409  			if(bany(&r->refbehind))
  1410  				print(" rb:%Q ", r->refbehind);
  1411  			if(bany(&r->refahead))
  1412  				print(" ra:%Q ", r->refahead);
  1413  			if(bany(&r->calbehind))
  1414  				print(" cb:%Q ", r->calbehind);
  1415  			if(bany(&r->calahead))
  1416  				print(" ca:%Q ", r->calahead);
  1417  			if(bany(&r->regdiff))
  1418  				print(" d:%Q ", r->regdiff);
  1419  			if(bany(&r->act))
  1420  				print(" a:%Q ", r->act);
  1421  		}
  1422  	}
  1423  	print("\n");
  1424  }
  1425  
  1426  void
  1427  dumpit(char *str, Flow *r0, int isreg)
  1428  {
  1429  	Flow *r, *r1;
  1430  
  1431  	print("\n%s\n", str);
  1432  	for(r = r0; r != nil; r = r->link) {
  1433  		dumpone(r, isreg);
  1434  		r1 = r->p2;
  1435  		if(r1 != nil) {
  1436  			print("	pred:");
  1437  			for(; r1 != nil; r1 = r1->p2link)
  1438  				print(" %.4ud", (int)r1->prog->pc);
  1439  			if(r->p1 != nil)
  1440  				print(" (and %.4ud)", (int)r->p1->prog->pc);
  1441  			else
  1442  				print(" (only)");
  1443  			print("\n");
  1444  		}
  1445  //		r1 = r->s1;
  1446  //		if(r1 != nil) {
  1447  //			print("	succ:");
  1448  //			for(; r1 != R; r1 = r1->s1)
  1449  //				print(" %.4ud", (int)r1->prog->pc);
  1450  //			print("\n");
  1451  //		}
  1452  	}
  1453  }