github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/5g/reg.c (about)

     1  // Inferno utils/5c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  
    32  #include <u.h>
    33  #include <libc.h>
    34  #include "gg.h"
    35  #include "opt.h"
    36  
    37  #define	NREGVAR	32
    38  #define	REGBITS	((uint64)0xffffffffull)
    39  /*c2go enum {
    40  	NREGVAR = 32,
    41  	REGBITS = 0xffffffff,
    42  };
    43  */
    44  
    45  	void	addsplits(void);
    46  static	Reg*	firstr;
    47  static	int	first	= 1;
    48  
    49  int
    50  rcmp(const void *a1, const void *a2)
    51  {
    52  	Rgn *p1, *p2;
    53  	int c1, c2;
    54  
    55  	p1 = (Rgn*)a1;
    56  	p2 = (Rgn*)a2;
    57  	c1 = p2->cost;
    58  	c2 = p1->cost;
    59  	if(c1 -= c2)
    60  		return c1;
    61  	return p2->varno - p1->varno;
    62  }
    63  
    64  void
    65  excise(Flow *r)
    66  {
    67  	Prog *p;
    68  
    69  	p = r->prog;
    70  	p->as = ANOP;
    71  	p->scond = zprog.scond;
    72  	p->from = zprog.from;
    73  	p->to = zprog.to;
    74  	p->reg = zprog.reg;
    75  }
    76  
    77  static void
    78  setaddrs(Bits bit)
    79  {
    80  	int i, n;
    81  	Var *v;
    82  	Node *node;
    83  
    84  	while(bany(&bit)) {
    85  		// convert each bit to a variable
    86  		i = bnum(bit);
    87  		node = var[i].node;
    88  		n = var[i].name;
    89  		biclr(&bit, i);
    90  
    91  		// disable all pieces of that variable
    92  		for(i=0; i<nvar; i++) {
    93  			v = var+i;
    94  			if(v->node == node && v->name == n)
    95  				v->addr = 2;
    96  		}
    97  	}
    98  }
    99  
   100  static char* regname[] = {
   101  	".R0",
   102  	".R1",
   103  	".R2",
   104  	".R3",
   105  	".R4",
   106  	".R5",
   107  	".R6",
   108  	".R7",
   109  	".R8",
   110  	".R9",
   111  	".R10",
   112  	".R11",
   113  	".R12",
   114  	".R13",
   115  	".R14",
   116  	".R15",
   117  	".F0",
   118  	".F1",
   119  	".F2",
   120  	".F3",
   121  	".F4",
   122  	".F5",
   123  	".F6",
   124  	".F7",
   125  	".F8",
   126  	".F9",
   127  	".F10",
   128  	".F11",
   129  	".F12",
   130  	".F13",
   131  	".F14",
   132  	".F15",
   133  };
   134  
   135  static Node* regnodes[NREGVAR];
   136  
   137  static void walkvardef(Node *n, Reg *r, int active);
   138  
   139  void
   140  regopt(Prog *firstp)
   141  {
   142  	Reg *r, *r1;
   143  	Prog *p;
   144  	Graph *g;
   145  	int i, z, active;
   146  	uint32 vreg;
   147  	Bits bit;
   148  	ProgInfo info;
   149  
   150  	if(first) {
   151  		fmtinstall('Q', Qconv);
   152  		first = 0;
   153  	}
   154  
   155  	mergetemp(firstp);
   156  
   157  	/*
   158  	 * control flow is more complicated in generated go code
   159  	 * than in generated c code.  define pseudo-variables for
   160  	 * registers, so we have complete register usage information.
   161  	 */
   162  	nvar = NREGVAR;
   163  	memset(var, 0, NREGVAR*sizeof var[0]);
   164  	for(i=0; i<NREGVAR; i++) {
   165  		if(regnodes[i] == N)
   166  			regnodes[i] = newname(lookup(regname[i]));
   167  		var[i].node = regnodes[i];
   168  	}
   169  
   170  	regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
   171  	for(z=0; z<BITS; z++) {
   172  		externs.b[z] = 0;
   173  		params.b[z] = 0;
   174  		consts.b[z] = 0;
   175  		addrs.b[z] = 0;
   176  		ivar.b[z] = 0;
   177  		ovar.b[z] = 0;
   178  	}
   179  
   180  	/*
   181  	 * pass 1
   182  	 * build aux data structure
   183  	 * allocate pcs
   184  	 * find use and set of variables
   185  	 */
   186  	g = flowstart(firstp, sizeof(Reg));
   187  	if(g == nil) {
   188  		for(i=0; i<nvar; i++)
   189  			var[i].node->opt = nil;
   190  		return;
   191  	}
   192  
   193  	firstr = (Reg*)g->start;
   194  
   195  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   196  		p = r->f.prog;
   197  		if(p->as == AVARDEF || p->as == AVARKILL)
   198  			continue;
   199  		proginfo(&info, p);
   200  
   201  		// Avoid making variables for direct-called functions.
   202  		if(p->as == ABL && p->to.name == D_EXTERN)
   203  			continue;
   204  
   205  		bit = mkvar(r, &p->from);
   206  		if(info.flags & LeftRead)
   207  			for(z=0; z<BITS; z++)
   208  				r->use1.b[z] |= bit.b[z];
   209  		if(info.flags & LeftAddr)
   210  			setaddrs(bit);
   211  
   212  		if(info.flags & RegRead) {	
   213  			if(p->from.type != D_FREG)
   214  				r->use1.b[0] |= RtoB(p->reg);
   215  			else
   216  				r->use1.b[0] |= FtoB(p->reg);
   217  		}
   218  
   219  		if(info.flags & (RightAddr | RightRead | RightWrite)) {
   220  			bit = mkvar(r, &p->to);
   221  			if(info.flags & RightAddr)
   222  				setaddrs(bit);
   223  			if(info.flags & RightRead)
   224  				for(z=0; z<BITS; z++)
   225  					r->use2.b[z] |= bit.b[z];
   226  			if(info.flags & RightWrite)
   227  				for(z=0; z<BITS; z++)
   228  					r->set.b[z] |= bit.b[z];
   229  		}
   230  
   231  		/* the mod/div runtime routines smash R12 */
   232  		if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU)
   233  			r->set.b[0] |= RtoB(12);
   234  	}
   235  	if(firstr == R)
   236  		return;
   237  
   238  	for(i=0; i<nvar; i++) {
   239  		Var *v = var+i;
   240  		if(v->addr) {
   241  			bit = blsh(i);
   242  			for(z=0; z<BITS; z++)
   243  				addrs.b[z] |= bit.b[z];
   244  		}
   245  
   246  		if(debug['R'] && debug['v'])
   247  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   248  				i, v->addr, v->etype, v->width, v->node, v->offset);
   249  	}
   250  
   251  	if(debug['R'] && debug['v'])
   252  		dumpit("pass1", &firstr->f, 1);
   253  
   254  	/*
   255  	 * pass 2
   256  	 * find looping structure
   257  	 */
   258  	flowrpo(g);
   259  
   260  	if(debug['R'] && debug['v'])
   261  		dumpit("pass2", &firstr->f, 1);
   262  
   263  	/*
   264  	 * pass 2.5
   265  	 * iterate propagating fat vardef covering forward
   266  	 * r->act records vars with a VARDEF since the last CALL.
   267  	 * (r->act will be reused in pass 5 for something else,
   268  	 * but we'll be done with it by then.)
   269  	 */
   270  	active = 0;
   271  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   272  		r->f.active = 0;
   273  		r->act = zbits;
   274  	}
   275  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   276  		p = r->f.prog;
   277  		if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
   278  			active++;
   279  			walkvardef(p->to.node, r, active);
   280  		}
   281  	}
   282  
   283  	/*
   284  	 * pass 3
   285  	 * iterate propagating usage
   286  	 * 	back until flow graph is complete
   287  	 */
   288  loop1:
   289  	change = 0;
   290  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   291  		r->f.active = 0;
   292  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   293  		if(r->f.prog->as == ARET)
   294  			prop(r, zbits, zbits);
   295  loop11:
   296  	/* pick up unreachable code */
   297  	i = 0;
   298  	for(r = firstr; r != R; r = r1) {
   299  		r1 = (Reg*)r->f.link;
   300  		if(r1 && r1->f.active && !r->f.active) {
   301  			prop(r, zbits, zbits);
   302  			i = 1;
   303  		}
   304  	}
   305  	if(i)
   306  		goto loop11;
   307  	if(change)
   308  		goto loop1;
   309  
   310  	if(debug['R'] && debug['v'])
   311  		dumpit("pass3", &firstr->f, 1);
   312  
   313  
   314  	/*
   315  	 * pass 4
   316  	 * iterate propagating register/variable synchrony
   317  	 * 	forward until graph is complete
   318  	 */
   319  loop2:
   320  	change = 0;
   321  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   322  		r->f.active = 0;
   323  	synch(firstr, zbits);
   324  	if(change)
   325  		goto loop2;
   326  
   327  	addsplits();
   328  
   329  	if(debug['R'] && debug['v'])
   330  		dumpit("pass4", &firstr->f, 1);
   331  
   332  	if(debug['R'] > 1) {
   333  		print("\nprop structure:\n");
   334  		for(r = firstr; r != R; r = (Reg*)r->f.link) {
   335  			print("%d:%P", r->f.loop, r->f.prog);
   336  			for(z=0; z<BITS; z++) {
   337  				bit.b[z] = r->set.b[z] |
   338  					r->refahead.b[z] | r->calahead.b[z] |
   339  					r->refbehind.b[z] | r->calbehind.b[z] |
   340  					r->use1.b[z] | r->use2.b[z];
   341  				bit.b[z] &= ~addrs.b[z];
   342  			}
   343  
   344  			if(bany(&bit)) {
   345  				print("\t");
   346  				if(bany(&r->use1))
   347  					print(" u1=%Q", r->use1);
   348  				if(bany(&r->use2))
   349  					print(" u2=%Q", r->use2);
   350  				if(bany(&r->set))
   351  					print(" st=%Q", r->set);
   352  				if(bany(&r->refahead))
   353  					print(" ra=%Q", r->refahead);
   354  				if(bany(&r->calahead))
   355  					print(" ca=%Q", r->calahead);
   356  				if(bany(&r->refbehind))
   357  					print(" rb=%Q", r->refbehind);
   358  				if(bany(&r->calbehind))
   359  					print(" cb=%Q", r->calbehind);
   360  			}
   361  			print("\n");
   362  		}
   363  	}
   364  
   365  	/*
   366  	 * pass 4.5
   367  	 * move register pseudo-variables into regu.
   368  	 */
   369  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   370  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   371  
   372  		r->set.b[0] &= ~REGBITS;
   373  		r->use1.b[0] &= ~REGBITS;
   374  		r->use2.b[0] &= ~REGBITS;
   375  		r->refbehind.b[0] &= ~REGBITS;
   376  		r->refahead.b[0] &= ~REGBITS;
   377  		r->calbehind.b[0] &= ~REGBITS;
   378  		r->calahead.b[0] &= ~REGBITS;
   379  		r->regdiff.b[0] &= ~REGBITS;
   380  		r->act.b[0] &= ~REGBITS;
   381  	}
   382  
   383  	if(debug['R'] && debug['v'])
   384  		dumpit("pass4.5", &firstr->f, 1);
   385  
   386  	/*
   387  	 * pass 5
   388  	 * isolate regions
   389  	 * calculate costs (paint1)
   390  	 */
   391  	r = firstr;
   392  	if(r) {
   393  		for(z=0; z<BITS; z++)
   394  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   395  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   396  		if(bany(&bit) && !r->f.refset) {
   397  			// should never happen - all variables are preset
   398  			if(debug['w'])
   399  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   400  			r->f.refset = 1;
   401  		}
   402  	}
   403  
   404  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   405  		r->act = zbits;
   406  	rgp = region;
   407  	nregion = 0;
   408  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   409  		for(z=0; z<BITS; z++)
   410  			bit.b[z] = r->set.b[z] &
   411  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   412  		if(bany(&bit) && !r->f.refset) {
   413  			if(debug['w'])
   414  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   415  			r->f.refset = 1;
   416  			excise(&r->f);
   417  		}
   418  		for(z=0; z<BITS; z++)
   419  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   420  		while(bany(&bit)) {
   421  			i = bnum(bit);
   422  			rgp->enter = r;
   423  			rgp->varno = i;
   424  			change = 0;
   425  			if(debug['R'] > 1)
   426  				print("\n");
   427  			paint1(r, i);
   428  			biclr(&bit, i);
   429  			if(change <= 0) {
   430  				if(debug['R'])
   431  					print("%L $%d: %Q\n",
   432  						r->f.prog->lineno, change, blsh(i));
   433  				continue;
   434  			}
   435  			rgp->cost = change;
   436  			nregion++;
   437  			if(nregion >= NRGN) {
   438  				if(debug['R'] > 1)
   439  					print("too many regions\n");
   440  				goto brk;
   441  			}
   442  			rgp++;
   443  		}
   444  	}
   445  brk:
   446  	qsort(region, nregion, sizeof(region[0]), rcmp);
   447  
   448  	if(debug['R'] && debug['v'])
   449  		dumpit("pass5", &firstr->f, 1);
   450  
   451  	/*
   452  	 * pass 6
   453  	 * determine used registers (paint2)
   454  	 * replace code (paint3)
   455  	 */
   456  	rgp = region;
   457  	if(debug['R'] && debug['v'])
   458  		print("\nregisterizing\n");
   459  	for(i=0; i<nregion; i++) {
   460  		if(debug['R'] && debug['v'])
   461  			print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
   462  		bit = blsh(rgp->varno);
   463  		vreg = paint2(rgp->enter, rgp->varno, 0);
   464  		vreg = allreg(vreg, rgp);
   465  		if(debug['R']) {
   466  			if(rgp->regno >= NREG)
   467  				print("%L $%d F%d: %Q\n",
   468  					rgp->enter->f.prog->lineno,
   469  					rgp->cost,
   470  					rgp->regno-NREG,
   471  					bit);
   472  			else
   473  				print("%L $%d R%d: %Q\n",
   474  					rgp->enter->f.prog->lineno,
   475  					rgp->cost,
   476  					rgp->regno,
   477  					bit);
   478  		}
   479  		if(rgp->regno != 0)
   480  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   481  		rgp++;
   482  	}
   483  
   484  	/*
   485  	 * free aux structures. peep allocates new ones.
   486  	 */
   487  	for(i=0; i<nvar; i++)
   488  		var[i].node->opt = nil;
   489  	flowend(g);
   490  	firstr = R;
   491  
   492  	if(debug['R'] && debug['v']) {
   493  		// Rebuild flow graph, since we inserted instructions
   494  		g = flowstart(firstp, sizeof(Reg));
   495  		firstr = (Reg*)g->start;
   496  		dumpit("pass6", &firstr->f, 1);
   497  		flowend(g);
   498  		firstr = R;
   499  	}
   500  
   501  	/*
   502  	 * pass 7
   503  	 * peep-hole on basic block
   504  	 */
   505  	if(!debug['R'] || debug['P']) {
   506  		peep(firstp);
   507  	}
   508  
   509  	if(debug['R'] && debug['v'])
   510  		dumpit("pass7", &firstr->f, 1);
   511  
   512  	/*
   513  	 * last pass
   514  	 * eliminate nops
   515  	 * free aux structures
   516  	 * adjust the stack pointer
   517  	 *	MOVW.W 	R1,-12(R13)			<<- start
   518  	 *	MOVW   	R0,R1
   519  	 *	MOVW   	R1,8(R13)
   520  	 *	MOVW   	$0,R1
   521  	 *	MOVW   	R1,4(R13)
   522  	 *	BL     	,runtime.newproc+0(SB)
   523  	 *	MOVW   	&ft+-32(SP),R7			<<- adjust
   524  	 *	MOVW   	&j+-40(SP),R6			<<- adjust
   525  	 *	MOVW   	autotmp_0003+-24(SP),R5		<<- adjust
   526  	 *	MOVW   	$12(R13),R13			<<- finish
   527  	 */
   528  	vreg = 0;
   529  	for(p = firstp; p != P; p = p->link) {
   530  		while(p->link != P && p->link->as == ANOP)
   531  			p->link = p->link->link;
   532  		if(p->to.type == D_BRANCH)
   533  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   534  				p->to.u.branch = p->to.u.branch->link;
   535  		if(p->as == AMOVW && p->to.reg == 13) {
   536  			if(p->scond & C_WBIT) {
   537  				vreg = -p->to.offset;		// in adjust region
   538  //				print("%P adjusting %d\n", p, vreg);
   539  				continue;
   540  			}
   541  			if(p->from.type == D_CONST && p->to.type == D_REG) {
   542  				if(p->from.offset != vreg)
   543  					print("in and out different\n");
   544  //				print("%P finish %d\n", p, vreg);
   545  				vreg = 0;	// done adjust region
   546  				continue;
   547  			}
   548  
   549  //			print("%P %d %d from type\n", p, p->from.type, D_CONST);
   550  //			print("%P %d %d to type\n\n", p, p->to.type, D_REG);
   551  		}
   552  
   553  		if(p->as == AMOVW && vreg != 0) {
   554  			if(p->from.sym != nil)
   555  			if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
   556  				p->from.offset += vreg;
   557  //				print("%P adjusting from %d %d\n", p, vreg, p->from.type);
   558  			}
   559  			if(p->to.sym != nil)
   560  			if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
   561  				p->to.offset += vreg;
   562  //				print("%P adjusting to %d %d\n", p, vreg, p->from.type);
   563  			}
   564  		}
   565  	}
   566  }
   567  
   568  static void
   569  walkvardef(Node *n, Reg *r, int active)
   570  {
   571  	Reg *r1, *r2;
   572  	int bn;
   573  	Var *v;
   574  	
   575  	for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
   576  		if(r1->f.active == active)
   577  			break;
   578  		r1->f.active = active;
   579  		if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
   580  			break;
   581  		for(v=n->opt; v!=nil; v=v->nextinnode) {
   582  			bn = v - var;
   583  			biset(&r1->act, bn);
   584  		}
   585  		if(r1->f.prog->as == ABL)
   586  			break;
   587  	}
   588  
   589  	for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
   590  		if(r2->f.s2 != nil)
   591  			walkvardef(n, (Reg*)r2->f.s2, active);
   592  }
   593  
   594  void
   595  addsplits(void)
   596  {
   597  	Reg *r, *r1;
   598  	int z, i;
   599  	Bits bit;
   600  
   601  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   602  		if(r->f.loop > 1)
   603  			continue;
   604  		if(r->f.prog->as == ABL)
   605  			continue;
   606  		if(r->f.prog->as == ADUFFZERO)
   607  			continue;
   608  		if(r->f.prog->as == ADUFFCOPY)
   609  			continue;
   610  		for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
   611  			if(r1->f.loop <= 1)
   612  				continue;
   613  			for(z=0; z<BITS; z++)
   614  				bit.b[z] = r1->calbehind.b[z] &
   615  					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
   616  					~(r->calahead.b[z] & addrs.b[z]);
   617  			while(bany(&bit)) {
   618  				i = bnum(bit);
   619  				biclr(&bit, i);
   620  			}
   621  		}
   622  	}
   623  }
   624  
   625  /*
   626   * add mov b,rn
   627   * just after r
   628   */
   629  void
   630  addmove(Reg *r, int bn, int rn, int f)
   631  {
   632  	Prog *p, *p1, *p2;
   633  	Adr *a;
   634  	Var *v;
   635  
   636  	p1 = mal(sizeof(*p1));
   637  	*p1 = zprog;
   638  	p = r->f.prog;
   639  	
   640  	// If there's a stack fixup coming (after BL newproc or BL deferproc),
   641  	// delay the load until after the fixup.
   642  	p2 = p->link;
   643  	if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
   644  		p = p2;
   645  
   646  	p1->link = p->link;
   647  	p->link = p1;
   648  	p1->lineno = p->lineno;
   649  
   650  	v = var + bn;
   651  
   652  	a = &p1->to;
   653  	a->name = v->name;
   654  	a->node = v->node;
   655  	a->sym = linksym(v->node->sym);
   656  	a->offset = v->offset;
   657  	a->etype = v->etype;
   658  	a->type = D_OREG;
   659  	if(a->etype == TARRAY || a->sym == nil)
   660  		a->type = D_CONST;
   661  
   662  	if(v->addr)
   663  		fatal("addmove: shouldn't be doing this %A\n", a);
   664  
   665  	switch(v->etype) {
   666  	default:
   667  		print("What is this %E\n", v->etype);
   668  
   669  	case TINT8:
   670  		p1->as = AMOVBS;
   671  		break;
   672  	case TBOOL:
   673  	case TUINT8:
   674  //print("movbu %E %d %S\n", v->etype, bn, v->sym);
   675  		p1->as = AMOVBU;
   676  		break;
   677  	case TINT16:
   678  		p1->as = AMOVHS;
   679  		break;
   680  	case TUINT16:
   681  		p1->as = AMOVHU;
   682  		break;
   683  	case TINT32:
   684  	case TUINT32:
   685  	case TPTR32:
   686  		p1->as = AMOVW;
   687  		break;
   688  	case TFLOAT32:
   689  		p1->as = AMOVF;
   690  		break;
   691  	case TFLOAT64:
   692  		p1->as = AMOVD;
   693  		break;
   694  	}
   695  
   696  	p1->from.type = D_REG;
   697  	p1->from.reg = rn;
   698  	if(rn >= NREG) {
   699  		p1->from.type = D_FREG;
   700  		p1->from.reg = rn-NREG;
   701  	}
   702  	if(!f) {
   703  		p1->from = *a;
   704  		*a = zprog.from;
   705  		a->type = D_REG;
   706  		a->reg = rn;
   707  		if(rn >= NREG) {
   708  			a->type = D_FREG;
   709  			a->reg = rn-NREG;
   710  		}
   711  		if(v->etype == TUINT8 || v->etype == TBOOL)
   712  			p1->as = AMOVBU;
   713  		if(v->etype == TUINT16)
   714  			p1->as = AMOVHU;
   715  	}
   716  	if(debug['R'])
   717  		print("%P\t.a%P\n", p, p1);
   718  }
   719  
   720  static int
   721  overlap(int32 o1, int w1, int32 o2, int w2)
   722  {
   723  	int32 t1, t2;
   724  
   725  	t1 = o1+w1;
   726  	t2 = o2+w2;
   727  
   728  	if(!(t1 > o2 && t2 > o1))
   729  		return 0;
   730  
   731  	return 1;
   732  }
   733  
   734  Bits
   735  mkvar(Reg *r, Adr *a)
   736  {
   737  	Var *v;
   738  	int i, t, n, et, z, w, flag;
   739  	int32 o;
   740  	Bits bit;
   741  	Node *node;
   742  
   743  	// mark registers used
   744  	t = a->type;
   745  
   746  	flag = 0;
   747  	switch(t) {
   748  	default:
   749  		print("type %d %d %D\n", t, a->name, a);
   750  		goto none;
   751  
   752  	case D_NONE:
   753  	case D_FCONST:
   754  	case D_BRANCH:
   755  		break;
   756  
   757  
   758  	case D_REGREG:
   759  	case D_REGREG2:
   760  		bit = zbits;
   761  		if(a->offset != NREG)
   762  			bit.b[0] |= RtoB(a->offset);
   763  		if(a->reg != NREG)
   764  			bit.b[0] |= RtoB(a->reg);
   765  		return bit;
   766  
   767  	case D_CONST:
   768  	case D_REG:
   769  	case D_SHIFT:
   770  		if(a->reg != NREG) {
   771  			bit = zbits;
   772  			bit.b[0] = RtoB(a->reg);
   773  			return bit;
   774  		}
   775  		break;
   776  
   777  	case D_OREG:
   778  		if(a->reg != NREG) {
   779  			if(a == &r->f.prog->from)
   780  				r->use1.b[0] |= RtoB(a->reg);
   781  			else
   782  				r->use2.b[0] |= RtoB(a->reg);
   783  			if(r->f.prog->scond & (C_PBIT|C_WBIT))
   784  				r->set.b[0] |= RtoB(a->reg);
   785  		}
   786  		break;
   787  
   788  	case D_FREG:
   789  		if(a->reg != NREG) {
   790  			bit = zbits;
   791  			bit.b[0] = FtoB(a->reg);
   792  			return bit;
   793  		}
   794  		break;
   795  	}
   796  
   797  	switch(a->name) {
   798  	default:
   799  		goto none;
   800  
   801  	case D_EXTERN:
   802  	case D_STATIC:
   803  	case D_AUTO:
   804  	case D_PARAM:
   805  		n = a->name;
   806  		break;
   807  	}
   808  
   809  	node = a->node;
   810  	if(node == N || node->op != ONAME || node->orig == N)
   811  		goto none;
   812  	node = node->orig;
   813  	if(node->orig != node)
   814  		fatal("%D: bad node", a);
   815  	if(node->sym == S || node->sym->name[0] == '.')
   816  		goto none;
   817  	et = a->etype;
   818  	o = a->offset;
   819  	w = a->width;
   820  	if(w < 0)
   821  		fatal("bad width %d for %D", w, a);
   822  
   823  	for(i=0; i<nvar; i++) {
   824  		v = var+i;
   825  		if(v->node == node && v->name == n) {
   826  			if(v->offset == o)
   827  			if(v->etype == et)
   828  			if(v->width == w)
   829  				if(!flag)
   830  					return blsh(i);
   831  
   832  			// if they overlap, disable both
   833  			if(overlap(v->offset, v->width, o, w)) {
   834  				v->addr = 1;
   835  				flag = 1;
   836  			}
   837  		}
   838  	}
   839  
   840  	switch(et) {
   841  	case 0:
   842  	case TFUNC:
   843  		goto none;
   844  	}
   845  
   846  	if(nvar >= NVAR) {
   847  		if(debug['w'] > 1 && node)
   848  			fatal("variable not optimized: %D", a);
   849  		
   850  		// If we're not tracking a word in a variable, mark the rest as
   851  		// having its address taken, so that we keep the whole thing
   852  		// live at all calls. otherwise we might optimize away part of
   853  		// a variable but not all of it.
   854  		for(i=0; i<nvar; i++) {
   855  			v = var+i;
   856  			if(v->node == node)
   857  				v->addr = 1;
   858  		}
   859  		goto none;
   860  	}
   861  
   862  	i = nvar;
   863  	nvar++;
   864  //print("var %d %E %D %S\n", i, et, a, s);
   865  	v = var+i;
   866  	v->offset = o;
   867  	v->name = n;
   868  	v->etype = et;
   869  	v->width = w;
   870  	v->addr = flag;		// funny punning
   871  	v->node = node;
   872  	
   873  	// node->opt is the head of a linked list
   874  	// of Vars within the given Node, so that
   875  	// we can start at a Var and find all the other
   876  	// Vars in the same Go variable.
   877  	v->nextinnode = node->opt;
   878  	node->opt = v;
   879  	
   880  	bit = blsh(i);
   881  	if(n == D_EXTERN || n == D_STATIC)
   882  		for(z=0; z<BITS; z++)
   883  			externs.b[z] |= bit.b[z];
   884  	if(n == D_PARAM)
   885  		for(z=0; z<BITS; z++)
   886  			params.b[z] |= bit.b[z];
   887  
   888  	if(node->class == PPARAM)
   889  		for(z=0; z<BITS; z++)
   890  			ivar.b[z] |= bit.b[z];
   891  	if(node->class == PPARAMOUT)
   892  		for(z=0; z<BITS; z++)
   893  			ovar.b[z] |= bit.b[z];
   894  
   895  	// Treat values with their address taken as live at calls,
   896  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   897  	// These must be consistent or else we will elide stores and the garbage
   898  	// collector will see uninitialized data.
   899  	// The typical case where our own analysis is out of sync is when the
   900  	// node appears to have its address taken but that code doesn't actually
   901  	// get generated and therefore doesn't show up as an address being
   902  	// taken when we analyze the instruction stream.
   903  	// One instance of this case is when a closure uses the same name as
   904  	// an outer variable for one of its own variables declared with :=.
   905  	// The parser flags the outer variable as possibly shared, and therefore
   906  	// sets addrtaken, even though it ends up not being actually shared.
   907  	// If we were better about _ elision, _ = &x would suffice too.
   908  	// The broader := in a closure problem is mentioned in a comment in
   909  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   910  	if(node->addrtaken)
   911  		v->addr = 1;
   912  
   913  	// Disable registerization for globals, because:
   914  	// (1) we might panic at any time and we want the recovery code
   915  	// to see the latest values (issue 1304).
   916  	// (2) we don't know what pointers might point at them and we want
   917  	// loads via those pointers to see updated values and vice versa (issue 7995).
   918  	//
   919  	// Disable registerization for results if using defer, because the deferred func
   920  	// might recover and return, causing the current values to be used.
   921  	if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
   922  		v->addr = 1;
   923  
   924  	if(debug['R'])
   925  		print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   926  
   927  	return bit;
   928  
   929  none:
   930  	return zbits;
   931  }
   932  
   933  void
   934  prop(Reg *r, Bits ref, Bits cal)
   935  {
   936  	Reg *r1, *r2;
   937  	int z, i, j;
   938  	Var *v, *v1;
   939  
   940  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   941  		for(z=0; z<BITS; z++) {
   942  			ref.b[z] |= r1->refahead.b[z];
   943  			if(ref.b[z] != r1->refahead.b[z]) {
   944  				r1->refahead.b[z] = ref.b[z];
   945  				change++;
   946  			}
   947  			cal.b[z] |= r1->calahead.b[z];
   948  			if(cal.b[z] != r1->calahead.b[z]) {
   949  				r1->calahead.b[z] = cal.b[z];
   950  				change++;
   951  			}
   952  		}
   953  		switch(r1->f.prog->as) {
   954  		case ABL:
   955  			if(noreturn(r1->f.prog))
   956  				break;
   957  
   958  			// Mark all input variables (ivar) as used, because that's what the
   959  			// liveness bitmaps say. The liveness bitmaps say that so that a
   960  			// panic will not show stale values in the parameter dump.
   961  			// Mark variables with a recent VARDEF (r1->act) as used,
   962  			// so that the optimizer flushes initializations to memory,
   963  			// so that if a garbage collection happens during this CALL,
   964  			// the collector will see initialized memory. Again this is to
   965  			// match what the liveness bitmaps say.
   966  			for(z=0; z<BITS; z++) {
   967  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
   968  				ref.b[z] = 0;
   969  			}
   970  			
   971  			// cal.b is the current approximation of what's live across the call.
   972  			// Every bit in cal.b is a single stack word. For each such word,
   973  			// find all the other tracked stack words in the same Go variable
   974  			// (struct/slice/string/interface) and mark them live too.
   975  			// This is necessary because the liveness analysis for the garbage
   976  			// collector works at variable granularity, not at word granularity.
   977  			// It is fundamental for slice/string/interface: the garbage collector
   978  			// needs the whole value, not just some of the words, in order to
   979  			// interpret the other bits correctly. Specifically, slice needs a consistent
   980  			// ptr and cap, string needs a consistent ptr and len, and interface
   981  			// needs a consistent type word and data word.
   982  			for(z=0; z<BITS; z++) {
   983  				if(cal.b[z] == 0)
   984  					continue;
   985  				for(i=0; i<64; i++) {
   986  					if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
   987  						continue;
   988  					v = var+z*64+i;
   989  					if(v->node->opt == nil) // v represents fixed register, not Go variable
   990  						continue;
   991  
   992  					// v->node->opt is the head of a linked list of Vars
   993  					// corresponding to tracked words from the Go variable v->node.
   994  					// Walk the list and set all the bits.
   995  					// For a large struct this could end up being quadratic:
   996  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   997  					// for all of the remaining words in the struct, and for each such
   998  					// word would go through and turn on all the bits again.
   999  					// To avoid the quadratic behavior, we only turn on the bits if
  1000  					// v is the head of the list or if the head's bit is not yet turned on.
  1001  					// This will set the bits at most twice, keeping the overall loop linear.
  1002  					v1 = v->node->opt;
  1003  					j = v1 - var;
  1004  					if(v == v1 || !btest(&cal, j)) {
  1005  						for(; v1 != nil; v1 = v1->nextinnode) {
  1006  							j = v1 - var;
  1007  							biset(&cal, j);
  1008  						}
  1009  					}
  1010  				}
  1011  			}
  1012  			break;
  1013  
  1014  		case ATEXT:
  1015  			for(z=0; z<BITS; z++) {
  1016  				cal.b[z] = 0;
  1017  				ref.b[z] = 0;
  1018  			}
  1019  			break;
  1020  
  1021  		case ARET:
  1022  			for(z=0; z<BITS; z++) {
  1023  				cal.b[z] = externs.b[z] | ovar.b[z];
  1024  				ref.b[z] = 0;
  1025  			}
  1026  			break;
  1027  		}
  1028  		for(z=0; z<BITS; z++) {
  1029  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  1030  				r1->use1.b[z] | r1->use2.b[z];
  1031  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  1032  			r1->refbehind.b[z] = ref.b[z];
  1033  			r1->calbehind.b[z] = cal.b[z];
  1034  		}
  1035  		if(r1->f.active)
  1036  			break;
  1037  		r1->f.active = 1;
  1038  	}
  1039  	for(; r != r1; r = (Reg*)r->f.p1)
  1040  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
  1041  			prop(r2, r->refbehind, r->calbehind);
  1042  }
  1043  
  1044  void
  1045  synch(Reg *r, Bits dif)
  1046  {
  1047  	Reg *r1;
  1048  	int z;
  1049  
  1050  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
  1051  		for(z=0; z<BITS; z++) {
  1052  			dif.b[z] = (dif.b[z] &
  1053  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1054  					r1->set.b[z] | r1->regdiff.b[z];
  1055  			if(dif.b[z] != r1->regdiff.b[z]) {
  1056  				r1->regdiff.b[z] = dif.b[z];
  1057  				change++;
  1058  			}
  1059  		}
  1060  		if(r1->f.active)
  1061  			break;
  1062  		r1->f.active = 1;
  1063  		for(z=0; z<BITS; z++)
  1064  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1065  		if(r1->f.s2 != nil)
  1066  			synch((Reg*)r1->f.s2, dif);
  1067  	}
  1068  }
  1069  
  1070  uint32
  1071  allreg(uint32 b, Rgn *r)
  1072  {
  1073  	Var *v;
  1074  	int i;
  1075  
  1076  	v = var + r->varno;
  1077  	r->regno = 0;
  1078  	switch(v->etype) {
  1079  
  1080  	default:
  1081  		fatal("unknown etype %d/%E", bitno(b), v->etype);
  1082  		break;
  1083  
  1084  	case TINT8:
  1085  	case TUINT8:
  1086  	case TINT16:
  1087  	case TUINT16:
  1088  	case TINT32:
  1089  	case TUINT32:
  1090  	case TINT:
  1091  	case TUINT:
  1092  	case TUINTPTR:
  1093  	case TBOOL:
  1094  	case TPTR32:
  1095  		i = BtoR(~b);
  1096  		if(i && r->cost >= 0) {
  1097  			r->regno = i;
  1098  			return RtoB(i);
  1099  		}
  1100  		break;
  1101  
  1102  	case TFLOAT32:
  1103  	case TFLOAT64:
  1104  		i = BtoF(~b);
  1105  		if(i && r->cost >= 0) {
  1106  			r->regno = i+NREG;
  1107  			return FtoB(i);
  1108  		}
  1109  		break;
  1110  
  1111  	case TINT64:
  1112  	case TUINT64:
  1113  	case TPTR64:
  1114  	case TINTER:
  1115  	case TSTRUCT:
  1116  	case TARRAY:
  1117  		break;
  1118  	}
  1119  	return 0;
  1120  }
  1121  
  1122  void
  1123  paint1(Reg *r, int bn)
  1124  {
  1125  	Reg *r1;
  1126  	Prog *p;
  1127  	int z;
  1128  	uint64 bb;
  1129  
  1130  	z = bn/64;
  1131  	bb = 1LL<<(bn%64);
  1132  	if(r->act.b[z] & bb)
  1133  		return;
  1134  	for(;;) {
  1135  		if(!(r->refbehind.b[z] & bb))
  1136  			break;
  1137  		r1 = (Reg*)r->f.p1;
  1138  		if(r1 == R)
  1139  			break;
  1140  		if(!(r1->refahead.b[z] & bb))
  1141  			break;
  1142  		if(r1->act.b[z] & bb)
  1143  			break;
  1144  		r = r1;
  1145  	}
  1146  
  1147  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1148  		change -= CLOAD * r->f.loop;
  1149  		if(debug['R'] > 1)
  1150  			print("%d%P\td %Q $%d\n", r->f.loop,
  1151  				r->f.prog, blsh(bn), change);
  1152  	}
  1153  	for(;;) {
  1154  		r->act.b[z] |= bb;
  1155  		p = r->f.prog;
  1156  
  1157  
  1158  		if(r->f.prog->as != ANOP) { // don't give credit for NOPs
  1159  			if(r->use1.b[z] & bb) {
  1160  				change += CREF * r->f.loop;
  1161  				if(debug['R'] > 1)
  1162  					print("%d%P\tu1 %Q $%d\n", r->f.loop,
  1163  						p, blsh(bn), change);
  1164  			}
  1165  			if((r->use2.b[z]|r->set.b[z]) & bb) {
  1166  				change += CREF * r->f.loop;
  1167  				if(debug['R'] > 1)
  1168  					print("%d%P\tu2 %Q $%d\n", r->f.loop,
  1169  						p, blsh(bn), change);
  1170  			}
  1171  		}
  1172  
  1173  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1174  			change -= CLOAD * r->f.loop;
  1175  			if(debug['R'] > 1)
  1176  				print("%d%P\tst %Q $%d\n", r->f.loop,
  1177  					p, blsh(bn), change);
  1178  		}
  1179  
  1180  		if(r->refbehind.b[z] & bb)
  1181  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1182  				if(r1->refahead.b[z] & bb)
  1183  					paint1(r1, bn);
  1184  
  1185  		if(!(r->refahead.b[z] & bb))
  1186  			break;
  1187  		r1 = (Reg*)r->f.s2;
  1188  		if(r1 != R)
  1189  			if(r1->refbehind.b[z] & bb)
  1190  				paint1(r1, bn);
  1191  		r = (Reg*)r->f.s1;
  1192  		if(r == R)
  1193  			break;
  1194  		if(r->act.b[z] & bb)
  1195  			break;
  1196  		if(!(r->refbehind.b[z] & bb))
  1197  			break;
  1198  	}
  1199  }
  1200  
  1201  uint32
  1202  paint2(Reg *r, int bn, int depth)
  1203  {
  1204  	Reg *r1;
  1205  	int z;
  1206  	uint64 bb, vreg;
  1207  
  1208  	z = bn/64;
  1209  	bb = 1LL << (bn%64);
  1210  	vreg = regbits;
  1211  	if(!(r->act.b[z] & bb))
  1212  		return vreg;
  1213  	for(;;) {
  1214  		if(!(r->refbehind.b[z] & bb))
  1215  			break;
  1216  		r1 = (Reg*)r->f.p1;
  1217  		if(r1 == R)
  1218  			break;
  1219  		if(!(r1->refahead.b[z] & bb))
  1220  			break;
  1221  		if(!(r1->act.b[z] & bb))
  1222  			break;
  1223  		r = r1;
  1224  	}
  1225  	for(;;) {
  1226  		if(debug['R'] && debug['v'])
  1227  			print("  paint2 %d %P\n", depth, r->f.prog);
  1228  
  1229  		r->act.b[z] &= ~bb;
  1230  
  1231  		vreg |= r->regu;
  1232  
  1233  		if(r->refbehind.b[z] & bb)
  1234  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1235  				if(r1->refahead.b[z] & bb)
  1236  					vreg |= paint2(r1, bn, depth+1);
  1237  
  1238  		if(!(r->refahead.b[z] & bb))
  1239  			break;
  1240  		r1 = (Reg*)r->f.s2;
  1241  		if(r1 != R)
  1242  			if(r1->refbehind.b[z] & bb)
  1243  				vreg |= paint2(r1, bn, depth+1);
  1244  		r = (Reg*)r->f.s1;
  1245  		if(r == R)
  1246  			break;
  1247  		if(!(r->act.b[z] & bb))
  1248  			break;
  1249  		if(!(r->refbehind.b[z] & bb))
  1250  			break;
  1251  	}
  1252  	return vreg;
  1253  }
  1254  
  1255  void
  1256  paint3(Reg *r, int bn, uint32 rb, int rn)
  1257  {
  1258  	Reg *r1;
  1259  	Prog *p;
  1260  	int z;
  1261  	uint64 bb;
  1262  
  1263  	z = bn/64;
  1264  	bb = 1LL << (bn%64);
  1265  	if(r->act.b[z] & bb)
  1266  		return;
  1267  	for(;;) {
  1268  		if(!(r->refbehind.b[z] & bb))
  1269  			break;
  1270  		r1 = (Reg*)r->f.p1;
  1271  		if(r1 == R)
  1272  			break;
  1273  		if(!(r1->refahead.b[z] & bb))
  1274  			break;
  1275  		if(r1->act.b[z] & bb)
  1276  			break;
  1277  		r = r1;
  1278  	}
  1279  
  1280  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1281  		addmove(r, bn, rn, 0);
  1282  
  1283  	for(;;) {
  1284  		r->act.b[z] |= bb;
  1285  		p = r->f.prog;
  1286  
  1287  		if(r->use1.b[z] & bb) {
  1288  			if(debug['R'])
  1289  				print("%P", p);
  1290  			addreg(&p->from, rn);
  1291  			if(debug['R'])
  1292  				print("\t.c%P\n", p);
  1293  		}
  1294  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1295  			if(debug['R'])
  1296  				print("%P", p);
  1297  			addreg(&p->to, rn);
  1298  			if(debug['R'])
  1299  				print("\t.c%P\n", p);
  1300  		}
  1301  
  1302  		if(STORE(r) & r->regdiff.b[z] & bb)
  1303  			addmove(r, bn, rn, 1);
  1304  		r->regu |= rb;
  1305  
  1306  		if(r->refbehind.b[z] & bb)
  1307  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1308  				if(r1->refahead.b[z] & bb)
  1309  					paint3(r1, bn, rb, rn);
  1310  
  1311  		if(!(r->refahead.b[z] & bb))
  1312  			break;
  1313  		r1 = (Reg*)r->f.s2;
  1314  		if(r1 != R)
  1315  			if(r1->refbehind.b[z] & bb)
  1316  				paint3(r1, bn, rb, rn);
  1317  		r = (Reg*)r->f.s1;
  1318  		if(r == R)
  1319  			break;
  1320  		if(r->act.b[z] & bb)
  1321  			break;
  1322  		if(!(r->refbehind.b[z] & bb))
  1323  			break;
  1324  	}
  1325  }
  1326  
  1327  void
  1328  addreg(Adr *a, int rn)
  1329  {
  1330  	a->sym = nil;
  1331  	a->node = nil;
  1332  	a->name = D_NONE;
  1333  	a->type = D_REG;
  1334  	a->reg = rn;
  1335  	if(rn >= NREG) {
  1336  		a->type = D_FREG;
  1337  		a->reg = rn-NREG;
  1338  	}
  1339  }
  1340  
  1341  /*
  1342   *	bit	reg
  1343   *	0	R0
  1344   *	1	R1
  1345   *	...	...
  1346   *	10	R10
  1347   *	12  R12
  1348   */
  1349  uint32
  1350  RtoB(int r)
  1351  {
  1352  	if(r >= REGTMP-2 && r != 12)	// excluded R9 and R10 for m and g, but not R12
  1353  		return 0;
  1354  	return 1L << r;
  1355  }
  1356  
  1357  int
  1358  BtoR(uint32 b)
  1359  {
  1360  	// TODO Allow R0 and R1, but be careful with a 0 return
  1361  	// TODO Allow R9. Only R10 is reserved now (just g, not m).
  1362  	b &= 0x11fcL;	// excluded R9 and R10 for m and g, but not R12
  1363  	if(b == 0)
  1364  		return 0;
  1365  	return bitno(b);
  1366  }
  1367  
  1368  /*
  1369   *	bit	reg
  1370   *	18	F2
  1371   *	19	F3
  1372   *	...	...
  1373   *	31	F15
  1374   */
  1375  uint32
  1376  FtoB(int f)
  1377  {
  1378  
  1379  	if(f < 2 || f > NFREG-1)
  1380  		return 0;
  1381  	return 1L << (f + 16);
  1382  }
  1383  
  1384  int
  1385  BtoF(uint32 b)
  1386  {
  1387  
  1388  	b &= 0xfffc0000L;
  1389  	if(b == 0)
  1390  		return 0;
  1391  	return bitno(b) - 16;
  1392  }
  1393  
  1394  void
  1395  dumpone(Flow *f, int isreg)
  1396  {
  1397  	int z;
  1398  	Bits bit;
  1399  	Reg *r;
  1400  
  1401  	print("%d:%P", f->loop, f->prog);
  1402  	if(isreg) {
  1403  		r = (Reg*)f;
  1404  		for(z=0; z<BITS; z++)
  1405  			bit.b[z] =
  1406  				r->set.b[z] |
  1407  				r->use1.b[z] |
  1408  				r->use2.b[z] |
  1409  				r->refbehind.b[z] |
  1410  				r->refahead.b[z] |
  1411  				r->calbehind.b[z] |
  1412  				r->calahead.b[z] |
  1413  				r->regdiff.b[z] |
  1414  				r->act.b[z] |
  1415  					0;
  1416  		if(bany(&bit)) {
  1417  			print("\t");
  1418  			if(bany(&r->set))
  1419  				print(" s:%Q", r->set);
  1420  			if(bany(&r->use1))
  1421  				print(" u1:%Q", r->use1);
  1422  			if(bany(&r->use2))
  1423  				print(" u2:%Q", r->use2);
  1424  			if(bany(&r->refbehind))
  1425  				print(" rb:%Q ", r->refbehind);
  1426  			if(bany(&r->refahead))
  1427  				print(" ra:%Q ", r->refahead);
  1428  			if(bany(&r->calbehind))
  1429  				print(" cb:%Q ", r->calbehind);
  1430  			if(bany(&r->calahead))
  1431  				print(" ca:%Q ", r->calahead);
  1432  			if(bany(&r->regdiff))
  1433  				print(" d:%Q ", r->regdiff);
  1434  			if(bany(&r->act))
  1435  				print(" a:%Q ", r->act);
  1436  		}
  1437  	}
  1438  	print("\n");
  1439  }
  1440  
  1441  void
  1442  dumpit(char *str, Flow *r0, int isreg)
  1443  {
  1444  	Flow *r, *r1;
  1445  
  1446  	print("\n%s\n", str);
  1447  	for(r = r0; r != nil; r = r->link) {
  1448  		dumpone(r, isreg);
  1449  		r1 = r->p2;
  1450  		if(r1 != nil) {
  1451  			print("	pred:");
  1452  			for(; r1 != nil; r1 = r1->p2link)
  1453  				print(" %.4ud", (int)r1->prog->pc);
  1454  			if(r->p1 != nil)
  1455  				print(" (and %.4ud)", (int)r->p1->prog->pc);
  1456  			else
  1457  				print(" (only)");
  1458  			print("\n");
  1459  		}
  1460  		// Print successors if it's not just the next one
  1461  		if(r->s1 != r->link || r->s2 != nil) {
  1462  			print("	succ:");
  1463  			if(r->s1 != nil)
  1464  				print(" %.4ud", (int)r->s1->prog->pc);
  1465  			if(r->s2 != nil)
  1466  				print(" %.4ud", (int)r->s2->prog->pc);
  1467  			print("\n");
  1468  		}
  1469  	}
  1470  }