github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/5g/reg.c (about)

     1  // Inferno utils/5c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  
    32  #include <u.h>
    33  #include <libc.h>
    34  #include "gg.h"
    35  #include "opt.h"
    36  
    37  #define	NREGVAR	32
    38  #define	REGBITS	((uint32)0xffffffff)
    39  #define	P2R(p)	(Reg*)(p->reg)
    40  
    41  	void	addsplits(void);
    42  	int	noreturn(Prog *p);
    43  static	int	first	= 0;
    44  
    45  static	void	fixjmp(Prog*);
    46  
    47  
    48  Reg*
    49  rega(void)
    50  {
    51  	Reg *r;
    52  
    53  	r = freer;
    54  	if(r == R) {
    55  		r = mal(sizeof(*r));
    56  	} else
    57  		freer = r->link;
    58  
    59  	*r = zreg;
    60  	return r;
    61  }
    62  
    63  int
    64  rcmp(const void *a1, const void *a2)
    65  {
    66  	Rgn *p1, *p2;
    67  	int c1, c2;
    68  
    69  	p1 = (Rgn*)a1;
    70  	p2 = (Rgn*)a2;
    71  	c1 = p2->cost;
    72  	c2 = p1->cost;
    73  	if(c1 -= c2)
    74  		return c1;
    75  	return p2->varno - p1->varno;
    76  }
    77  
    78  static void
    79  setoutvar(void)
    80  {
    81  	Type *t;
    82  	Node *n;
    83  	Addr a;
    84  	Iter save;
    85  	Bits bit;
    86  	int z;
    87  
    88  	t = structfirst(&save, getoutarg(curfn->type));
    89  	while(t != T) {
    90  		n = nodarg(t, 1);
    91  		a = zprog.from;
    92  		naddr(n, &a, 0);
    93  		bit = mkvar(R, &a);
    94  		for(z=0; z<BITS; z++)
    95  			ovar.b[z] |= bit.b[z];
    96  		t = structnext(&save);
    97  	}
    98  //if(bany(&ovar))
    99  //print("ovar = %Q\n", ovar);
   100  }
   101  
   102  void
   103  excise(Reg *r)
   104  {
   105  	Prog *p;
   106  
   107  	p = r->prog;
   108  	p->as = ANOP;
   109  	p->scond = zprog.scond;
   110  	p->from = zprog.from;
   111  	p->to = zprog.to;
   112  	p->reg = zprog.reg;
   113  }
   114  
   115  static void
   116  setaddrs(Bits bit)
   117  {
   118  	int i, n;
   119  	Var *v;
   120  	Node *node;
   121  
   122  	while(bany(&bit)) {
   123  		// convert each bit to a variable
   124  		i = bnum(bit);
   125  		node = var[i].node;
   126  		n = var[i].name;
   127  		bit.b[i/32] &= ~(1L<<(i%32));
   128  
   129  		// disable all pieces of that variable
   130  		for(i=0; i<nvar; i++) {
   131  			v = var+i;
   132  			if(v->node == node && v->name == n)
   133  				v->addr = 2;
   134  		}
   135  	}
   136  }
   137  
   138  static char* regname[] = {
   139  	".R0",
   140  	".R1",
   141  	".R2",
   142  	".R3",
   143  	".R4",
   144  	".R5",
   145  	".R6",
   146  	".R7",
   147  	".R8",
   148  	".R9",
   149  	".R10",
   150  	".R11",
   151  	".R12",
   152  	".R13",
   153  	".R14",
   154  	".R15",
   155  	".F0",
   156  	".F1",
   157  	".F2",
   158  	".F3",
   159  	".F4",
   160  	".F5",
   161  	".F6",
   162  	".F7",
   163  	".F8",
   164  	".F9",
   165  	".F10",
   166  	".F11",
   167  	".F12",
   168  	".F13",
   169  	".F14",
   170  	".F15",
   171  };
   172  
   173  static Node* regnodes[NREGVAR];
   174  
   175  void
   176  regopt(Prog *firstp)
   177  {
   178  	Reg *r, *r1;
   179  	Prog *p;
   180  	int i, z, nr;
   181  	uint32 vreg;
   182  	Bits bit;
   183  	
   184  	if(first == 0) {
   185  		fmtinstall('Q', Qconv);
   186  	}
   187  	
   188  	fixjmp(firstp);
   189  
   190  	first++;
   191  	if(debug['K']) {
   192  		if(first != 13)
   193  			return;
   194  //		debug['R'] = 2;
   195  //		debug['P'] = 2;
   196  		print("optimizing %S\n", curfn->nname->sym);
   197  	}
   198  
   199  	// count instructions
   200  	nr = 0;
   201  	for(p=firstp; p!=P; p=p->link)
   202  		nr++;
   203  
   204  	// if too big dont bother
   205  	if(nr >= 10000) {
   206  //		print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
   207  		return;
   208  	}
   209  
   210  	firstr = R;
   211  	lastr = R;
   212  
   213  	/*
   214  	 * control flow is more complicated in generated go code
   215  	 * than in generated c code.  define pseudo-variables for
   216  	 * registers, so we have complete register usage information.
   217  	 */
   218  	nvar = NREGVAR;
   219  	memset(var, 0, NREGVAR*sizeof var[0]);
   220  	for(i=0; i<NREGVAR; i++) {
   221  		if(regnodes[i] == N)
   222  			regnodes[i] = newname(lookup(regname[i]));
   223  		var[i].node = regnodes[i];
   224  	}
   225  
   226  	regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
   227  	for(z=0; z<BITS; z++) {
   228  		externs.b[z] = 0;
   229  		params.b[z] = 0;
   230  		consts.b[z] = 0;
   231  		addrs.b[z] = 0;
   232  		ovar.b[z] = 0;
   233  	}
   234  
   235  	// build list of return variables
   236  	setoutvar();
   237  
   238  	/*
   239  	 * pass 1
   240  	 * build aux data structure
   241  	 * allocate pcs
   242  	 * find use and set of variables
   243  	 */
   244  	nr = 0;
   245  	for(p=firstp; p != P; p = p->link) {
   246  		switch(p->as) {
   247  		case ADATA:
   248  		case AGLOBL:
   249  		case ANAME:
   250  		case ASIGNAME:
   251  		case ALOCALS:
   252  		case ATYPE:
   253  			continue;
   254  		}
   255  		r = rega();
   256  		nr++;
   257  		if(firstr == R) {
   258  			firstr = r;
   259  			lastr = r;
   260  		} else {
   261  			lastr->link = r;
   262  			r->p1 = lastr;
   263  			lastr->s1 = r;
   264  			lastr = r;
   265  		}
   266  		r->prog = p;
   267  		p->regp = r;
   268  
   269  		r1 = r->p1;
   270  		if(r1 != R) {
   271  			switch(r1->prog->as) {
   272  			case ARET:
   273  			case AB:
   274  			case ARFE:
   275  				r->p1 = R;
   276  				r1->s1 = R;
   277  			}
   278  		}
   279  
   280  		// Avoid making variables for direct-called functions.
   281  		if(p->as == ABL && p->to.type == D_EXTERN)
   282  			continue;
   283  
   284  		/*
   285  		 * left side always read
   286  		 */
   287  		bit = mkvar(r, &p->from);
   288  		for(z=0; z<BITS; z++)
   289  			r->use1.b[z] |= bit.b[z];
   290  		
   291  		/*
   292  		 * middle always read when present
   293  		 */
   294  		if(p->reg != NREG) {
   295  			if(p->from.type != D_FREG)
   296  				r->use1.b[0] |= RtoB(p->reg);
   297  			else
   298  				r->use1.b[0] |= FtoB(p->reg);
   299  		}
   300  
   301  		/*
   302  		 * right side depends on opcode
   303  		 */
   304  		bit = mkvar(r, &p->to);
   305  		if(bany(&bit))
   306  		switch(p->as) {
   307  		default:
   308  			yyerror("reg: unknown op: %A", p->as);
   309  			break;
   310  		
   311  		/*
   312  		 * right side read
   313  		 */
   314  		case ATST:
   315  		case ATEQ:
   316  		case ACMP:
   317  		case ACMN:
   318  		case ACMPD:
   319  		case ACMPF:
   320  		rightread:
   321  			for(z=0; z<BITS; z++)
   322  				r->use2.b[z] |= bit.b[z];
   323  			break;
   324  			
   325  		/*
   326  		 * right side read or read+write, depending on middle
   327  		 *	ADD x, z => z += x
   328  		 *	ADD x, y, z  => z = x + y
   329  		 */
   330  		case AADD:
   331  		case AAND:
   332  		case AEOR:
   333  		case ASUB:
   334  		case ARSB:
   335  		case AADC:
   336  		case ASBC:
   337  		case ARSC:
   338  		case AORR:
   339  		case ABIC:
   340  		case ASLL:
   341  		case ASRL:
   342  		case ASRA:
   343  		case AMUL:
   344  		case AMULU:
   345  		case ADIV:
   346  		case AMOD:
   347  		case AMODU:
   348  		case ADIVU:
   349  			if(p->reg != NREG)
   350  				goto rightread;
   351  			// fall through
   352  
   353  		/*
   354  		 * right side read+write
   355  		 */
   356  		case AADDF:
   357  		case AADDD:
   358  		case ASUBF:
   359  		case ASUBD:
   360  		case AMULF:
   361  		case AMULD:
   362  		case ADIVF:
   363  		case ADIVD:
   364  		case AMULA:
   365  		case AMULAL:
   366  		case AMULALU:
   367  			for(z=0; z<BITS; z++) {
   368  				r->use2.b[z] |= bit.b[z];
   369  				r->set.b[z] |= bit.b[z];
   370  			}
   371  			break;
   372  
   373  		/*
   374  		 * right side write
   375  		 */
   376  		case ANOP:
   377  		case AMOVB:
   378  		case AMOVBU:
   379  		case AMOVD:
   380  		case AMOVDF:
   381  		case AMOVDW:
   382  		case AMOVF:
   383  		case AMOVFW:
   384  		case AMOVH:
   385  		case AMOVHU:
   386  		case AMOVW:
   387  		case AMOVWD:
   388  		case AMOVWF:
   389  		case AMVN:
   390  		case AMULL:
   391  		case AMULLU:
   392  			if((p->scond & C_SCOND) != C_SCOND_NONE)
   393  				for(z=0; z<BITS; z++)
   394  					r->use2.b[z] |= bit.b[z];
   395  			for(z=0; z<BITS; z++)
   396  				r->set.b[z] |= bit.b[z];
   397  			break;
   398  
   399  		/*
   400  		 * funny
   401  		 */
   402  		case ABL:
   403  			setaddrs(bit);
   404  			break;
   405  		}
   406  
   407  		if(p->as == AMOVM) {
   408  			z = p->to.offset;
   409  			if(p->from.type == D_CONST)
   410  				z = p->from.offset;
   411  			for(i=0; z; i++) {
   412  				if(z&1)
   413  					regbits |= RtoB(i);
   414  				z >>= 1;
   415  			}
   416  		}
   417  	}
   418  	if(firstr == R)
   419  		return;
   420  
   421  	for(i=0; i<nvar; i++) {
   422  		Var *v = var+i;
   423  		if(v->addr) {
   424  			bit = blsh(i);
   425  			for(z=0; z<BITS; z++)
   426  				addrs.b[z] |= bit.b[z];
   427  		}
   428  
   429  		if(debug['R'] && debug['v'])
   430  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   431  				i, v->addr, v->etype, v->width, v->node, v->offset);
   432  	}
   433  
   434  	if(debug['R'] && debug['v'])
   435  		dumpit("pass1", firstr);
   436  
   437  	/*
   438  	 * pass 2
   439  	 * turn branch references to pointers
   440  	 * build back pointers
   441  	 */
   442  	for(r=firstr; r!=R; r=r->link) {
   443  		p = r->prog;
   444  		if(p->to.type == D_BRANCH) {
   445  			if(p->to.u.branch == P)
   446  				fatal("pnil %P", p);
   447  			r1 = p->to.u.branch->regp;
   448  			if(r1 == R)
   449  				fatal("rnil %P", p);
   450  			if(r1 == r) {
   451  				//fatal("ref to self %P", p);
   452  				continue;
   453  			}
   454  			r->s2 = r1;
   455  			r->p2link = r1->p2;
   456  			r1->p2 = r;
   457  		}
   458  	}
   459  	if(debug['R']) {
   460  		p = firstr->prog;
   461  		print("\n%L %D\n", p->lineno, &p->from);
   462  		print("	addr = %Q\n", addrs);
   463  	}
   464  
   465  	if(debug['R'] && debug['v'])
   466  		dumpit("pass2", firstr);
   467  
   468  	/*
   469  	 * pass 2.5
   470  	 * find looping structure
   471  	 */
   472  	for(r = firstr; r != R; r = r->link)
   473  		r->active = 0;
   474  	change = 0;
   475  	loopit(firstr, nr);
   476  
   477  	if(debug['R'] && debug['v'])
   478  		dumpit("pass2.5", firstr);
   479  
   480  	/*
   481  	 * pass 3
   482  	 * iterate propagating usage
   483  	 * 	back until flow graph is complete
   484  	 */
   485  loop1:
   486  	change = 0;
   487  	for(r = firstr; r != R; r = r->link)
   488  		r->active = 0;
   489  	for(r = firstr; r != R; r = r->link)
   490  		if(r->prog->as == ARET)
   491  			prop(r, zbits, zbits);
   492  loop11:
   493  	/* pick up unreachable code */
   494  	i = 0;
   495  	for(r = firstr; r != R; r = r1) {
   496  		r1 = r->link;
   497  		if(r1 && r1->active && !r->active) {
   498  			prop(r, zbits, zbits);
   499  			i = 1;
   500  		}
   501  	}
   502  	if(i)
   503  		goto loop11;
   504  	if(change)
   505  		goto loop1;
   506  
   507  	if(debug['R'] && debug['v'])
   508  		dumpit("pass3", firstr);
   509  
   510  
   511  	/*
   512  	 * pass 4
   513  	 * iterate propagating register/variable synchrony
   514  	 * 	forward until graph is complete
   515  	 */
   516  loop2:
   517  	change = 0;
   518  	for(r = firstr; r != R; r = r->link)
   519  		r->active = 0;
   520  	synch(firstr, zbits);
   521  	if(change)
   522  		goto loop2;
   523  
   524  	addsplits();
   525  
   526  	if(debug['R'] && debug['v'])
   527  		dumpit("pass4", firstr);
   528  
   529  	if(debug['R'] > 1) {
   530  		print("\nprop structure:\n");
   531  		for(r = firstr; r != R; r = r->link) {
   532  			print("%d:%P", r->loop, r->prog);
   533  			for(z=0; z<BITS; z++) {
   534  				bit.b[z] = r->set.b[z] |
   535  					r->refahead.b[z] | r->calahead.b[z] |
   536  					r->refbehind.b[z] | r->calbehind.b[z] |
   537  					r->use1.b[z] | r->use2.b[z];
   538  				bit.b[z] &= ~addrs.b[z];
   539  			}
   540  
   541  			if(bany(&bit)) {
   542  				print("\t");
   543  				if(bany(&r->use1))
   544  					print(" u1=%Q", r->use1);
   545  				if(bany(&r->use2))
   546  					print(" u2=%Q", r->use2);
   547  				if(bany(&r->set))
   548  					print(" st=%Q", r->set);
   549  				if(bany(&r->refahead))
   550  					print(" ra=%Q", r->refahead);
   551  				if(bany(&r->calahead))
   552  					print(" ca=%Q", r->calahead);
   553  				if(bany(&r->refbehind))
   554  					print(" rb=%Q", r->refbehind);
   555  				if(bany(&r->calbehind))
   556  					print(" cb=%Q", r->calbehind);
   557  			}
   558  			print("\n");
   559  		}
   560  	}
   561  
   562  	/*
   563  	 * pass 4.5
   564  	 * move register pseudo-variables into regu.
   565  	 */
   566  	for(r = firstr; r != R; r = r->link) {
   567  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   568  
   569  		r->set.b[0] &= ~REGBITS;
   570  		r->use1.b[0] &= ~REGBITS;
   571  		r->use2.b[0] &= ~REGBITS;
   572  		r->refbehind.b[0] &= ~REGBITS;
   573  		r->refahead.b[0] &= ~REGBITS;
   574  		r->calbehind.b[0] &= ~REGBITS;
   575  		r->calahead.b[0] &= ~REGBITS;
   576  		r->regdiff.b[0] &= ~REGBITS;
   577  		r->act.b[0] &= ~REGBITS;
   578  	}
   579  
   580  	if(debug['R'] && debug['v'])
   581  		dumpit("pass4.5", firstr);
   582  
   583  	/*
   584  	 * pass 5
   585  	 * isolate regions
   586  	 * calculate costs (paint1)
   587  	 */
   588  	r = firstr;
   589  	if(r) {
   590  		for(z=0; z<BITS; z++)
   591  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   592  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   593  		if(bany(&bit) & !r->refset) {
   594  			// should never happen - all variables are preset
   595  			if(debug['w'])
   596  				print("%L: used and not set: %Q\n", r->prog->lineno, bit);
   597  			r->refset = 1;
   598  		}
   599  	}
   600  
   601  	for(r = firstr; r != R; r = r->link)
   602  		r->act = zbits;
   603  	rgp = region;
   604  	nregion = 0;
   605  	for(r = firstr; r != R; r = r->link) {
   606  		for(z=0; z<BITS; z++)
   607  			bit.b[z] = r->set.b[z] &
   608  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   609  		if(bany(&bit) && !r->refset) {
   610  			if(debug['w'])
   611  				print("%L: set and not used: %Q\n", r->prog->lineno, bit);
   612  			r->refset = 1;
   613  			excise(r);
   614  		}
   615  		for(z=0; z<BITS; z++)
   616  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   617  		while(bany(&bit)) {
   618  			i = bnum(bit);
   619  			rgp->enter = r;
   620  			rgp->varno = i;
   621  			change = 0;
   622  			if(debug['R'] > 1)
   623  				print("\n");
   624  			paint1(r, i);
   625  			bit.b[i/32] &= ~(1L<<(i%32));
   626  			if(change <= 0) {
   627  				if(debug['R'])
   628  					print("%L $%d: %Q\n",
   629  						r->prog->lineno, change, blsh(i));
   630  				continue;
   631  			}
   632  			rgp->cost = change;
   633  			nregion++;
   634  			if(nregion >= NRGN) {
   635  				if(debug['R'] > 1)
   636  					print("too many regions\n");
   637  				goto brk;
   638  			}
   639  			rgp++;
   640  		}
   641  	}
   642  brk:
   643  	qsort(region, nregion, sizeof(region[0]), rcmp);
   644  
   645  	if(debug['R'] && debug['v'])
   646  		dumpit("pass5", firstr);
   647  
   648  	/*
   649  	 * pass 6
   650  	 * determine used registers (paint2)
   651  	 * replace code (paint3)
   652  	 */
   653  	rgp = region;
   654  	for(i=0; i<nregion; i++) {
   655  		bit = blsh(rgp->varno);
   656  		vreg = paint2(rgp->enter, rgp->varno);
   657  		vreg = allreg(vreg, rgp);
   658  		if(debug['R']) {
   659  			if(rgp->regno >= NREG)
   660  				print("%L $%d F%d: %Q\n",
   661  					rgp->enter->prog->lineno,
   662  					rgp->cost,
   663  					rgp->regno-NREG,
   664  					bit);
   665  			else
   666  				print("%L $%d R%d: %Q\n",
   667  					rgp->enter->prog->lineno,
   668  					rgp->cost,
   669  					rgp->regno,
   670  					bit);
   671  		}
   672  		if(rgp->regno != 0)
   673  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   674  		rgp++;
   675  	}
   676  
   677  	if(debug['R'] && debug['v'])
   678  		dumpit("pass6", firstr);
   679  
   680  	/*
   681  	 * pass 7
   682  	 * peep-hole on basic block
   683  	 */
   684  	if(!debug['R'] || debug['P']) {
   685  		peep();
   686  	}
   687  
   688  	if(debug['R'] && debug['v'])
   689  		dumpit("pass7", firstr);
   690  
   691  	/*
   692  	 * last pass
   693  	 * eliminate nops
   694  	 * free aux structures
   695  	 * adjust the stack pointer
   696  	 *	MOVW.W 	R1,-12(R13)			<<- start
   697  	 *	MOVW   	R0,R1
   698  	 *	MOVW   	R1,8(R13)
   699  	 *	MOVW   	$0,R1
   700  	 *	MOVW   	R1,4(R13)
   701  	 *	BL     	,runtime.newproc+0(SB)
   702  	 *	MOVW   	&ft+-32(SP),R7			<<- adjust
   703  	 *	MOVW   	&j+-40(SP),R6			<<- adjust
   704  	 *	MOVW   	autotmp_0003+-24(SP),R5		<<- adjust
   705  	 *	MOVW   	$12(R13),R13			<<- finish
   706  	 */
   707  	vreg = 0;
   708  	for(p = firstp; p != P; p = p->link) {
   709  		while(p->link != P && p->link->as == ANOP)
   710  			p->link = p->link->link;
   711  		if(p->to.type == D_BRANCH)
   712  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   713  				p->to.u.branch = p->to.u.branch->link;
   714  		if(p->as == AMOVW && p->to.reg == 13) {
   715  			if(p->scond & C_WBIT) {
   716  				vreg = -p->to.offset;		// in adjust region
   717  //				print("%P adjusting %d\n", p, vreg);
   718  				continue;
   719  			}
   720  			if(p->from.type == D_CONST && p->to.type == D_REG) {
   721  				if(p->from.offset != vreg)
   722  					print("in and out different\n");
   723  //				print("%P finish %d\n", p, vreg);
   724  				vreg = 0;	// done adjust region
   725  				continue;
   726  			}
   727  
   728  //			print("%P %d %d from type\n", p, p->from.type, D_CONST);
   729  //			print("%P %d %d to type\n\n", p, p->to.type, D_REG);
   730  		}
   731  
   732  		if(p->as == AMOVW && vreg != 0) {
   733  			if(p->from.sym != S)
   734  			if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
   735  				p->from.offset += vreg;
   736  //				print("%P adjusting from %d %d\n", p, vreg, p->from.type);
   737  			}
   738  			if(p->to.sym != S)
   739  			if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
   740  				p->to.offset += vreg;
   741  //				print("%P adjusting to %d %d\n", p, vreg, p->from.type);
   742  			}
   743  		}
   744  	}
   745  	if(lastr != R) {
   746  		lastr->link = freer;
   747  		freer = firstr;
   748  	}
   749  
   750  }
   751  
   752  void
   753  addsplits(void)
   754  {
   755  	Reg *r, *r1;
   756  	int z, i;
   757  	Bits bit;
   758  
   759  	for(r = firstr; r != R; r = r->link) {
   760  		if(r->loop > 1)
   761  			continue;
   762  		if(r->prog->as == ABL)
   763  			continue;
   764  		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
   765  			if(r1->loop <= 1)
   766  				continue;
   767  			for(z=0; z<BITS; z++)
   768  				bit.b[z] = r1->calbehind.b[z] &
   769  					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
   770  					~(r->calahead.b[z] & addrs.b[z]);
   771  			while(bany(&bit)) {
   772  				i = bnum(bit);
   773  				bit.b[i/32] &= ~(1L << (i%32));
   774  			}
   775  		}
   776  	}
   777  }
   778  
   779  /*
   780   * add mov b,rn
   781   * just after r
   782   */
   783  void
   784  addmove(Reg *r, int bn, int rn, int f)
   785  {
   786  	Prog *p, *p1, *p2;
   787  	Adr *a;
   788  	Var *v;
   789  
   790  	p1 = mal(sizeof(*p1));
   791  	*p1 = zprog;
   792  	p = r->prog;
   793  	
   794  	// If there's a stack fixup coming (after BL newproc or BL deferproc),
   795  	// delay the load until after the fixup.
   796  	p2 = p->link;
   797  	if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
   798  		p = p2;
   799  
   800  	p1->link = p->link;
   801  	p->link = p1;
   802  	p1->lineno = p->lineno;
   803  
   804  	v = var + bn;
   805  
   806  	a = &p1->to;
   807  	a->name = v->name;
   808  	a->node = v->node;
   809  	a->sym = v->node->sym;
   810  	a->offset = v->offset;
   811  	a->etype = v->etype;
   812  	a->type = D_OREG;
   813  	if(a->etype == TARRAY || a->sym == S)
   814  		a->type = D_CONST;
   815  
   816  	if(v->addr)
   817  		fatal("addmove: shouldnt be doing this %A\n", a);
   818  
   819  	switch(v->etype) {
   820  	default:
   821  		print("What is this %E\n", v->etype);
   822  
   823  	case TINT8:
   824  		p1->as = AMOVB;
   825  		break;
   826  	case TBOOL:
   827  	case TUINT8:
   828  //print("movbu %E %d %S\n", v->etype, bn, v->sym);
   829  		p1->as = AMOVBU;
   830  		break;
   831  	case TINT16:
   832  		p1->as = AMOVH;
   833  		break;
   834  	case TUINT16:
   835  		p1->as = AMOVHU;
   836  		break;
   837  	case TINT32:
   838  	case TUINT32:
   839  	case TPTR32:
   840  		p1->as = AMOVW;
   841  		break;
   842  	case TFLOAT32:
   843  		p1->as = AMOVF;
   844  		break;
   845  	case TFLOAT64:
   846  		p1->as = AMOVD;
   847  		break;
   848  	}
   849  
   850  	p1->from.type = D_REG;
   851  	p1->from.reg = rn;
   852  	if(rn >= NREG) {
   853  		p1->from.type = D_FREG;
   854  		p1->from.reg = rn-NREG;
   855  	}
   856  	if(!f) {
   857  		p1->from = *a;
   858  		*a = zprog.from;
   859  		a->type = D_REG;
   860  		a->reg = rn;
   861  		if(rn >= NREG) {
   862  			a->type = D_FREG;
   863  			a->reg = rn-NREG;
   864  		}
   865  		if(v->etype == TUINT8 || v->etype == TBOOL)
   866  			p1->as = AMOVBU;
   867  		if(v->etype == TUINT16)
   868  			p1->as = AMOVHU;
   869  	}
   870  	if(debug['R'])
   871  		print("%P\t.a%P\n", p, p1);
   872  }
   873  
   874  static int
   875  overlap(int32 o1, int w1, int32 o2, int w2)
   876  {
   877  	int32 t1, t2;
   878  
   879  	t1 = o1+w1;
   880  	t2 = o2+w2;
   881  
   882  	if(!(t1 > o2 && t2 > o1))
   883  		return 0;
   884  
   885  	return 1;
   886  }
   887  
   888  Bits
   889  mkvar(Reg *r, Adr *a)
   890  {
   891  	Var *v;
   892  	int i, t, n, et, z, w, flag;
   893  	int32 o;
   894  	Bits bit;
   895  	Node *node;
   896  
   897  	// mark registers used
   898  	t = a->type;
   899  
   900  	flag = 0;
   901  	switch(t) {
   902  	default:
   903  		print("type %d %d %D\n", t, a->name, a);
   904  		goto none;
   905  
   906  	case D_NONE:
   907  	case D_FCONST:
   908  	case D_BRANCH:
   909  		break;
   910  
   911  	case D_CONST:
   912  		flag = 1;
   913  		goto onereg;
   914  
   915  	case D_REGREG:
   916  	case D_REGREG2:
   917  		bit = zbits;
   918  		if(a->offset != NREG)
   919  			bit.b[0] |= RtoB(a->offset);
   920  		if(a->reg != NREG)
   921  			bit.b[0] |= RtoB(a->reg);
   922  		return bit;
   923  
   924  	case D_REG:
   925  	case D_SHIFT:
   926  	onereg:
   927  		if(a->reg != NREG) {
   928  			bit = zbits;
   929  			bit.b[0] = RtoB(a->reg);
   930  			return bit;
   931  		}
   932  		break;
   933  
   934  	case D_OREG:
   935  		if(a->reg != NREG) {
   936  			if(a == &r->prog->from)
   937  				r->use1.b[0] |= RtoB(a->reg);
   938  			else
   939  				r->use2.b[0] |= RtoB(a->reg);
   940  			if(r->prog->scond & (C_PBIT|C_WBIT))
   941  				r->set.b[0] |= RtoB(a->reg);
   942  		}
   943  		break;
   944  
   945  	case D_FREG:
   946  		if(a->reg != NREG) {
   947  			bit = zbits;
   948  			bit.b[0] = FtoB(a->reg);
   949  			return bit;
   950  		}
   951  		break;
   952  	}
   953  
   954  	switch(a->name) {
   955  	default:
   956  		goto none;
   957  
   958  	case D_EXTERN:
   959  	case D_STATIC:
   960  	case D_AUTO:
   961  	case D_PARAM:
   962  		n = a->name;
   963  		break;
   964  	}
   965  
   966  	node = a->node;
   967  	if(node == N || node->op != ONAME || node->orig == N)
   968  		goto none;
   969  	node = node->orig;
   970  	if(node->orig != node)
   971  		fatal("%D: bad node", a);
   972  	if(node->sym == S || node->sym->name[0] == '.')
   973  		goto none;
   974  	et = a->etype;
   975  	o = a->offset;
   976  	w = a->width;
   977  	if(w < 0)
   978  		fatal("bad width %d for %D", w, a);
   979  
   980  	for(i=0; i<nvar; i++) {
   981  		v = var+i;
   982  		if(v->node == node && v->name == n) {
   983  			if(v->offset == o)
   984  			if(v->etype == et)
   985  			if(v->width == w)
   986  				if(!flag)
   987  					return blsh(i);
   988  
   989  			// if they overlap, disable both
   990  			if(overlap(v->offset, v->width, o, w)) {
   991  				v->addr = 1;
   992  				flag = 1;
   993  			}
   994  		}
   995  	}
   996  
   997  	switch(et) {
   998  	case 0:
   999  	case TFUNC:
  1000  		goto none;
  1001  	}
  1002  
  1003  	if(nvar >= NVAR) {
  1004  		if(debug['w'] > 1 && node)
  1005  			fatal("variable not optimized: %D", a);
  1006  		goto none;
  1007  	}
  1008  
  1009  	i = nvar;
  1010  	nvar++;
  1011  //print("var %d %E %D %S\n", i, et, a, s);
  1012  	v = var+i;
  1013  	v->offset = o;
  1014  	v->name = n;
  1015  	v->etype = et;
  1016  	v->width = w;
  1017  	v->addr = flag;		// funny punning
  1018  	v->node = node;
  1019  	
  1020  	if(debug['R'])
  1021  		print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
  1022  
  1023  	bit = blsh(i);
  1024  	if(n == D_EXTERN || n == D_STATIC)
  1025  		for(z=0; z<BITS; z++)
  1026  			externs.b[z] |= bit.b[z];
  1027  	if(n == D_PARAM)
  1028  		for(z=0; z<BITS; z++)
  1029  			params.b[z] |= bit.b[z];
  1030  
  1031  	return bit;
  1032  
  1033  none:
  1034  	return zbits;
  1035  }
  1036  
  1037  void
  1038  prop(Reg *r, Bits ref, Bits cal)
  1039  {
  1040  	Reg *r1, *r2;
  1041  	int z;
  1042  
  1043  	for(r1 = r; r1 != R; r1 = r1->p1) {
  1044  		for(z=0; z<BITS; z++) {
  1045  			ref.b[z] |= r1->refahead.b[z];
  1046  			if(ref.b[z] != r1->refahead.b[z]) {
  1047  				r1->refahead.b[z] = ref.b[z];
  1048  				change++;
  1049  			}
  1050  			cal.b[z] |= r1->calahead.b[z];
  1051  			if(cal.b[z] != r1->calahead.b[z]) {
  1052  				r1->calahead.b[z] = cal.b[z];
  1053  				change++;
  1054  			}
  1055  		}
  1056  		switch(r1->prog->as) {
  1057  		case ABL:
  1058  			if(noreturn(r1->prog))
  1059  				break;
  1060  			for(z=0; z<BITS; z++) {
  1061  				cal.b[z] |= ref.b[z] | externs.b[z];
  1062  				ref.b[z] = 0;
  1063  			}
  1064  			break;
  1065  
  1066  		case ATEXT:
  1067  			for(z=0; z<BITS; z++) {
  1068  				cal.b[z] = 0;
  1069  				ref.b[z] = 0;
  1070  			}
  1071  			break;
  1072  
  1073  		case ARET:
  1074  			for(z=0; z<BITS; z++) {
  1075  				cal.b[z] = externs.b[z] | ovar.b[z];
  1076  				ref.b[z] = 0;
  1077  			}
  1078  			break;
  1079  
  1080  		default:
  1081  			// Work around for issue 1304:
  1082  			// flush modified globals before each instruction.
  1083  			for(z=0; z<BITS; z++) {
  1084  				cal.b[z] |= externs.b[z];
  1085  				// issue 4066: flush modified return variables in case of panic
  1086  				if(hasdefer)
  1087  					cal.b[z] |= ovar.b[z];
  1088  			}
  1089  			break;
  1090  		}
  1091  		for(z=0; z<BITS; z++) {
  1092  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  1093  				r1->use1.b[z] | r1->use2.b[z];
  1094  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  1095  			r1->refbehind.b[z] = ref.b[z];
  1096  			r1->calbehind.b[z] = cal.b[z];
  1097  		}
  1098  		if(r1->active)
  1099  			break;
  1100  		r1->active = 1;
  1101  	}
  1102  	for(; r != r1; r = r->p1)
  1103  		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  1104  			prop(r2, r->refbehind, r->calbehind);
  1105  }
  1106  
  1107  /*
  1108   * find looping structure
  1109   *
  1110   * 1) find reverse postordering
  1111   * 2) find approximate dominators,
  1112   *	the actual dominators if the flow graph is reducible
  1113   *	otherwise, dominators plus some other non-dominators.
  1114   *	See Matthew S. Hecht and Jeffrey D. Ullman,
  1115   *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
  1116   *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  1117   *	Oct. 1-3, 1973, pp.  207-217.
  1118   * 3) find all nodes with a predecessor dominated by the current node.
  1119   *	such a node is a loop head.
  1120   *	recursively, all preds with a greater rpo number are in the loop
  1121   */
  1122  int32
  1123  postorder(Reg *r, Reg **rpo2r, int32 n)
  1124  {
  1125  	Reg *r1;
  1126  
  1127  	r->rpo = 1;
  1128  	r1 = r->s1;
  1129  	if(r1 && !r1->rpo)
  1130  		n = postorder(r1, rpo2r, n);
  1131  	r1 = r->s2;
  1132  	if(r1 && !r1->rpo)
  1133  		n = postorder(r1, rpo2r, n);
  1134  	rpo2r[n] = r;
  1135  	n++;
  1136  	return n;
  1137  }
  1138  
  1139  int32
  1140  rpolca(int32 *idom, int32 rpo1, int32 rpo2)
  1141  {
  1142  	int32 t;
  1143  
  1144  	if(rpo1 == -1)
  1145  		return rpo2;
  1146  	while(rpo1 != rpo2){
  1147  		if(rpo1 > rpo2){
  1148  			t = rpo2;
  1149  			rpo2 = rpo1;
  1150  			rpo1 = t;
  1151  		}
  1152  		while(rpo1 < rpo2){
  1153  			t = idom[rpo2];
  1154  			if(t >= rpo2)
  1155  				fatal("bad idom");
  1156  			rpo2 = t;
  1157  		}
  1158  	}
  1159  	return rpo1;
  1160  }
  1161  
  1162  int
  1163  doms(int32 *idom, int32 r, int32 s)
  1164  {
  1165  	while(s > r)
  1166  		s = idom[s];
  1167  	return s == r;
  1168  }
  1169  
  1170  int
  1171  loophead(int32 *idom, Reg *r)
  1172  {
  1173  	int32 src;
  1174  
  1175  	src = r->rpo;
  1176  	if(r->p1 != R && doms(idom, src, r->p1->rpo))
  1177  		return 1;
  1178  	for(r = r->p2; r != R; r = r->p2link)
  1179  		if(doms(idom, src, r->rpo))
  1180  			return 1;
  1181  	return 0;
  1182  }
  1183  
  1184  void
  1185  loopmark(Reg **rpo2r, int32 head, Reg *r)
  1186  {
  1187  	if(r->rpo < head || r->active == head)
  1188  		return;
  1189  	r->active = head;
  1190  	r->loop += LOOP;
  1191  	if(r->p1 != R)
  1192  		loopmark(rpo2r, head, r->p1);
  1193  	for(r = r->p2; r != R; r = r->p2link)
  1194  		loopmark(rpo2r, head, r);
  1195  }
  1196  
  1197  void
  1198  loopit(Reg *r, int32 nr)
  1199  {
  1200  	Reg *r1;
  1201  	int32 i, d, me;
  1202  
  1203  	if(nr > maxnr) {
  1204  		rpo2r = mal(nr * sizeof(Reg*));
  1205  		idom = mal(nr * sizeof(int32));
  1206  		maxnr = nr;
  1207  	}
  1208  	d = postorder(r, rpo2r, 0);
  1209  	if(d > nr)
  1210  		fatal("too many reg nodes");
  1211  	nr = d;
  1212  	for(i = 0; i < nr / 2; i++){
  1213  		r1 = rpo2r[i];
  1214  		rpo2r[i] = rpo2r[nr - 1 - i];
  1215  		rpo2r[nr - 1 - i] = r1;
  1216  	}
  1217  	for(i = 0; i < nr; i++)
  1218  		rpo2r[i]->rpo = i;
  1219  
  1220  	idom[0] = 0;
  1221  	for(i = 0; i < nr; i++){
  1222  		r1 = rpo2r[i];
  1223  		me = r1->rpo;
  1224  		d = -1;
  1225  		// rpo2r[r->rpo] == r protects against considering dead code,
  1226  		// which has r->rpo == 0.
  1227  		if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
  1228  			d = r1->p1->rpo;
  1229  		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  1230  			if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
  1231  				d = rpolca(idom, d, r1->rpo);
  1232  		idom[i] = d;
  1233  	}
  1234  
  1235  	for(i = 0; i < nr; i++){
  1236  		r1 = rpo2r[i];
  1237  		r1->loop++;
  1238  		if(r1->p2 != R && loophead(idom, r1))
  1239  			loopmark(rpo2r, i, r1);
  1240  	}
  1241  }
  1242  
  1243  void
  1244  synch(Reg *r, Bits dif)
  1245  {
  1246  	Reg *r1;
  1247  	int z;
  1248  
  1249  	for(r1 = r; r1 != R; r1 = r1->s1) {
  1250  		for(z=0; z<BITS; z++) {
  1251  			dif.b[z] = (dif.b[z] &
  1252  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1253  					r1->set.b[z] | r1->regdiff.b[z];
  1254  			if(dif.b[z] != r1->regdiff.b[z]) {
  1255  				r1->regdiff.b[z] = dif.b[z];
  1256  				change++;
  1257  			}
  1258  		}
  1259  		if(r1->active)
  1260  			break;
  1261  		r1->active = 1;
  1262  		for(z=0; z<BITS; z++)
  1263  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1264  		if(r1->s2 != R)
  1265  			synch(r1->s2, dif);
  1266  	}
  1267  }
  1268  
  1269  uint32
  1270  allreg(uint32 b, Rgn *r)
  1271  {
  1272  	Var *v;
  1273  	int i;
  1274  
  1275  	v = var + r->varno;
  1276  	r->regno = 0;
  1277  	switch(v->etype) {
  1278  
  1279  	default:
  1280  		fatal("unknown etype %d/%E", bitno(b), v->etype);
  1281  		break;
  1282  
  1283  	case TINT8:
  1284  	case TUINT8:
  1285  	case TINT16:
  1286  	case TUINT16:
  1287  	case TINT32:
  1288  	case TUINT32:
  1289  	case TINT:
  1290  	case TUINT:
  1291  	case TUINTPTR:
  1292  	case TBOOL:
  1293  	case TPTR32:
  1294  		i = BtoR(~b);
  1295  		if(i && r->cost >= 0) {
  1296  			r->regno = i;
  1297  			return RtoB(i);
  1298  		}
  1299  		break;
  1300  
  1301  	case TFLOAT32:
  1302  	case TFLOAT64:
  1303  		i = BtoF(~b);
  1304  		if(i && r->cost >= 0) {
  1305  			r->regno = i+NREG;
  1306  			return FtoB(i);
  1307  		}
  1308  		break;
  1309  
  1310  	case TINT64:
  1311  	case TUINT64:
  1312  	case TPTR64:
  1313  	case TINTER:
  1314  	case TSTRUCT:
  1315  	case TARRAY:
  1316  		break;
  1317  	}
  1318  	return 0;
  1319  }
  1320  
  1321  void
  1322  paint1(Reg *r, int bn)
  1323  {
  1324  	Reg *r1;
  1325  	Prog *p;
  1326  	int z;
  1327  	uint32 bb;
  1328  
  1329  	z = bn/32;
  1330  	bb = 1L<<(bn%32);
  1331  	if(r->act.b[z] & bb)
  1332  		return;
  1333  	for(;;) {
  1334  		if(!(r->refbehind.b[z] & bb))
  1335  			break;
  1336  		r1 = r->p1;
  1337  		if(r1 == R)
  1338  			break;
  1339  		if(!(r1->refahead.b[z] & bb))
  1340  			break;
  1341  		if(r1->act.b[z] & bb)
  1342  			break;
  1343  		r = r1;
  1344  	}
  1345  
  1346  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1347  		change -= CLOAD * r->loop;
  1348  		if(debug['R'] > 1)
  1349  			print("%d%P\td %Q $%d\n", r->loop,
  1350  				r->prog, blsh(bn), change);
  1351  	}
  1352  	for(;;) {
  1353  		r->act.b[z] |= bb;
  1354  		p = r->prog;
  1355  
  1356  		if(r->use1.b[z] & bb) {
  1357  			change += CREF * r->loop;
  1358  			if(debug['R'] > 1)
  1359  				print("%d%P\tu1 %Q $%d\n", r->loop,
  1360  					p, blsh(bn), change);
  1361  		}
  1362  
  1363  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1364  			change += CREF * r->loop;
  1365  			if(debug['R'] > 1)
  1366  				print("%d%P\tu2 %Q $%d\n", r->loop,
  1367  					p, blsh(bn), change);
  1368  		}
  1369  
  1370  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1371  			change -= CLOAD * r->loop;
  1372  			if(debug['R'] > 1)
  1373  				print("%d%P\tst %Q $%d\n", r->loop,
  1374  					p, blsh(bn), change);
  1375  		}
  1376  
  1377  		if(r->refbehind.b[z] & bb)
  1378  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1379  				if(r1->refahead.b[z] & bb)
  1380  					paint1(r1, bn);
  1381  
  1382  		if(!(r->refahead.b[z] & bb))
  1383  			break;
  1384  		r1 = r->s2;
  1385  		if(r1 != R)
  1386  			if(r1->refbehind.b[z] & bb)
  1387  				paint1(r1, bn);
  1388  		r = r->s1;
  1389  		if(r == R)
  1390  			break;
  1391  		if(r->act.b[z] & bb)
  1392  			break;
  1393  		if(!(r->refbehind.b[z] & bb))
  1394  			break;
  1395  	}
  1396  }
  1397  
  1398  uint32
  1399  paint2(Reg *r, int bn)
  1400  {
  1401  	Reg *r1;
  1402  	int z;
  1403  	uint32 bb, vreg;
  1404  
  1405  	z = bn/32;
  1406  	bb = 1L << (bn%32);
  1407  	vreg = regbits;
  1408  	if(!(r->act.b[z] & bb))
  1409  		return vreg;
  1410  	for(;;) {
  1411  		if(!(r->refbehind.b[z] & bb))
  1412  			break;
  1413  		r1 = r->p1;
  1414  		if(r1 == R)
  1415  			break;
  1416  		if(!(r1->refahead.b[z] & bb))
  1417  			break;
  1418  		if(!(r1->act.b[z] & bb))
  1419  			break;
  1420  		r = r1;
  1421  	}
  1422  	for(;;) {
  1423  		r->act.b[z] &= ~bb;
  1424  
  1425  		vreg |= r->regu;
  1426  
  1427  		if(r->refbehind.b[z] & bb)
  1428  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1429  				if(r1->refahead.b[z] & bb)
  1430  					vreg |= paint2(r1, bn);
  1431  
  1432  		if(!(r->refahead.b[z] & bb))
  1433  			break;
  1434  		r1 = r->s2;
  1435  		if(r1 != R)
  1436  			if(r1->refbehind.b[z] & bb)
  1437  				vreg |= paint2(r1, bn);
  1438  		r = r->s1;
  1439  		if(r == R)
  1440  			break;
  1441  		if(!(r->act.b[z] & bb))
  1442  			break;
  1443  		if(!(r->refbehind.b[z] & bb))
  1444  			break;
  1445  	}
  1446  	return vreg;
  1447  }
  1448  
  1449  void
  1450  paint3(Reg *r, int bn, int32 rb, int rn)
  1451  {
  1452  	Reg *r1;
  1453  	Prog *p;
  1454  	int z;
  1455  	uint32 bb;
  1456  
  1457  	z = bn/32;
  1458  	bb = 1L << (bn%32);
  1459  	if(r->act.b[z] & bb)
  1460  		return;
  1461  	for(;;) {
  1462  		if(!(r->refbehind.b[z] & bb))
  1463  			break;
  1464  		r1 = r->p1;
  1465  		if(r1 == R)
  1466  			break;
  1467  		if(!(r1->refahead.b[z] & bb))
  1468  			break;
  1469  		if(r1->act.b[z] & bb)
  1470  			break;
  1471  		r = r1;
  1472  	}
  1473  
  1474  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1475  		addmove(r, bn, rn, 0);
  1476  
  1477  	for(;;) {
  1478  		r->act.b[z] |= bb;
  1479  		p = r->prog;
  1480  
  1481  		if(r->use1.b[z] & bb) {
  1482  			if(debug['R'])
  1483  				print("%P", p);
  1484  			addreg(&p->from, rn);
  1485  			if(debug['R'])
  1486  				print("\t.c%P\n", p);
  1487  		}
  1488  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1489  			if(debug['R'])
  1490  				print("%P", p);
  1491  			addreg(&p->to, rn);
  1492  			if(debug['R'])
  1493  				print("\t.c%P\n", p);
  1494  		}
  1495  
  1496  		if(STORE(r) & r->regdiff.b[z] & bb)
  1497  			addmove(r, bn, rn, 1);
  1498  		r->regu |= rb;
  1499  
  1500  		if(r->refbehind.b[z] & bb)
  1501  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1502  				if(r1->refahead.b[z] & bb)
  1503  					paint3(r1, bn, rb, rn);
  1504  
  1505  		if(!(r->refahead.b[z] & bb))
  1506  			break;
  1507  		r1 = r->s2;
  1508  		if(r1 != R)
  1509  			if(r1->refbehind.b[z] & bb)
  1510  				paint3(r1, bn, rb, rn);
  1511  		r = r->s1;
  1512  		if(r == R)
  1513  			break;
  1514  		if(r->act.b[z] & bb)
  1515  			break;
  1516  		if(!(r->refbehind.b[z] & bb))
  1517  			break;
  1518  	}
  1519  }
  1520  
  1521  void
  1522  addreg(Adr *a, int rn)
  1523  {
  1524  	a->sym = 0;
  1525  	a->name = D_NONE;
  1526  	a->type = D_REG;
  1527  	a->reg = rn;
  1528  	if(rn >= NREG) {
  1529  		a->type = D_FREG;
  1530  		a->reg = rn-NREG;
  1531  	}
  1532  }
  1533  
  1534  /*
  1535   *	bit	reg
  1536   *	0	R0
  1537   *	1	R1
  1538   *	...	...
  1539   *	10	R10
  1540   *	12  R12
  1541   */
  1542  int32
  1543  RtoB(int r)
  1544  {
  1545  	if(r >= REGTMP-2 && r != 12)	// excluded R9 and R10 for m and g, but not R12
  1546  		return 0;
  1547  	return 1L << r;
  1548  }
  1549  
  1550  int
  1551  BtoR(int32 b)
  1552  {
  1553  	b &= 0x11fcL;	// excluded R9 and R10 for m and g, but not R12
  1554  	if(b == 0)
  1555  		return 0;
  1556  	return bitno(b);
  1557  }
  1558  
  1559  /*
  1560   *	bit	reg
  1561   *	18	F2
  1562   *	19	F3
  1563   *	...	...
  1564   *	31	F15
  1565   */
  1566  int32
  1567  FtoB(int f)
  1568  {
  1569  
  1570  	if(f < 2 || f > NFREG-1)
  1571  		return 0;
  1572  	return 1L << (f + 16);
  1573  }
  1574  
  1575  int
  1576  BtoF(int32 b)
  1577  {
  1578  
  1579  	b &= 0xfffc0000L;
  1580  	if(b == 0)
  1581  		return 0;
  1582  	return bitno(b) - 16;
  1583  }
  1584  
  1585  static Sym*	symlist[10];
  1586  
  1587  int
  1588  noreturn(Prog *p)
  1589  {
  1590  	Sym *s;
  1591  	int i;
  1592  
  1593  	if(symlist[0] == S) {
  1594  		symlist[0] = pkglookup("panicindex", runtimepkg);
  1595  		symlist[1] = pkglookup("panicslice", runtimepkg);
  1596  		symlist[2] = pkglookup("throwinit", runtimepkg);
  1597  		symlist[3] = pkglookup("panic", runtimepkg);
  1598  		symlist[4] = pkglookup("panicwrap", runtimepkg);
  1599  	}
  1600  
  1601  	s = p->to.sym;
  1602  	if(s == S)
  1603  		return 0;
  1604  	for(i=0; symlist[i]!=S; i++)
  1605  		if(s == symlist[i])
  1606  			return 1;
  1607  	return 0;
  1608  }
  1609  
  1610  void
  1611  dumpone(Reg *r)
  1612  {
  1613  	int z;
  1614  	Bits bit;
  1615  
  1616  	print("%d:%P", r->loop, r->prog);
  1617  	for(z=0; z<BITS; z++)
  1618  		bit.b[z] =
  1619  			r->set.b[z] |
  1620  			r->use1.b[z] |
  1621  			r->use2.b[z] |
  1622  			r->refbehind.b[z] |
  1623  			r->refahead.b[z] |
  1624  			r->calbehind.b[z] |
  1625  			r->calahead.b[z] |
  1626  			r->regdiff.b[z] |
  1627  			r->act.b[z] |
  1628  				0;
  1629  	if(bany(&bit)) {
  1630  		print("\t");
  1631  		if(bany(&r->set))
  1632  			print(" s:%Q", r->set);
  1633  		if(bany(&r->use1))
  1634  			print(" u1:%Q", r->use1);
  1635  		if(bany(&r->use2))
  1636  			print(" u2:%Q", r->use2);
  1637  		if(bany(&r->refbehind))
  1638  			print(" rb:%Q ", r->refbehind);
  1639  		if(bany(&r->refahead))
  1640  			print(" ra:%Q ", r->refahead);
  1641  		if(bany(&r->calbehind))
  1642  			print(" cb:%Q ", r->calbehind);
  1643  		if(bany(&r->calahead))
  1644  			print(" ca:%Q ", r->calahead);
  1645  		if(bany(&r->regdiff))
  1646  			print(" d:%Q ", r->regdiff);
  1647  		if(bany(&r->act))
  1648  			print(" a:%Q ", r->act);
  1649  	}
  1650  	print("\n");
  1651  }
  1652  
  1653  void
  1654  dumpit(char *str, Reg *r0)
  1655  {
  1656  	Reg *r, *r1;
  1657  
  1658  	print("\n%s\n", str);
  1659  	for(r = r0; r != R; r = r->link) {
  1660  		dumpone(r);
  1661  		r1 = r->p2;
  1662  		if(r1 != R) {
  1663  			print("	pred:");
  1664  			for(; r1 != R; r1 = r1->p2link)
  1665  				print(" %.4ud", r1->prog->loc);
  1666  			print("\n");
  1667  		}
  1668  //		r1 = r->s1;
  1669  //		if(r1 != R) {
  1670  //			print("	succ:");
  1671  //			for(; r1 != R; r1 = r1->s1)
  1672  //				print(" %.4ud", r1->prog->loc);
  1673  //			print("\n");
  1674  //		}
  1675  	}
  1676  }
  1677  
  1678  /*
  1679   * the code generator depends on being able to write out JMP (B)
  1680   * instructions that it can jump to now but fill in later.
  1681   * the linker will resolve them nicely, but they make the code
  1682   * longer and more difficult to follow during debugging.
  1683   * remove them.
  1684   */
  1685  
  1686  /* what instruction does a JMP to p eventually land on? */
  1687  static Prog*
  1688  chasejmp(Prog *p, int *jmploop)
  1689  {
  1690  	int n;
  1691  
  1692  	n = 0;
  1693  	while(p != P && p->as == AB && p->to.type == D_BRANCH) {
  1694  		if(++n > 10) {
  1695  			*jmploop = 1;
  1696  			break;
  1697  		}
  1698  		p = p->to.u.branch;
  1699  	}
  1700  	return p;
  1701  }
  1702  
  1703  /*
  1704   * reuse reg pointer for mark/sweep state.
  1705   * leave reg==nil at end because alive==nil.
  1706   */
  1707  #define alive ((void*)0)
  1708  #define dead ((void*)1)
  1709  
  1710  /* mark all code reachable from firstp as alive */
  1711  static void
  1712  mark(Prog *firstp)
  1713  {
  1714  	Prog *p;
  1715  	
  1716  	for(p=firstp; p; p=p->link) {
  1717  		if(p->regp != dead)
  1718  			break;
  1719  		p->regp = alive;
  1720  		if(p->as != ABL && p->to.type == D_BRANCH && p->to.u.branch)
  1721  			mark(p->to.u.branch);
  1722  		if(p->as == AB || p->as == ARET || (p->as == ABL && noreturn(p)))
  1723  			break;
  1724  	}
  1725  }
  1726  
  1727  static void
  1728  fixjmp(Prog *firstp)
  1729  {
  1730  	int jmploop;
  1731  	Prog *p, *last;
  1732  	
  1733  	if(debug['R'] && debug['v'])
  1734  		print("\nfixjmp\n");
  1735  
  1736  	// pass 1: resolve jump to B, mark all code as dead.
  1737  	jmploop = 0;
  1738  	for(p=firstp; p; p=p->link) {
  1739  		if(debug['R'] && debug['v'])
  1740  			print("%P\n", p);
  1741  		if(p->as != ABL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AB) {
  1742  			p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);
  1743  			if(debug['R'] && debug['v'])
  1744  				print("->%P\n", p);
  1745  		}
  1746  		p->regp = dead;
  1747  	}
  1748  	if(debug['R'] && debug['v'])
  1749  		print("\n");
  1750  	
  1751  	// pass 2: mark all reachable code alive
  1752  	mark(firstp);
  1753  	
  1754  	// pass 3: delete dead code (mostly JMPs).
  1755  	last = nil;
  1756  	for(p=firstp; p; p=p->link) {
  1757  		if(p->regp == dead) {
  1758  			if(p->link == P && p->as == ARET && last && last->as != ARET) {
  1759  				// This is the final ARET, and the code so far doesn't have one.
  1760  				// Let it stay.
  1761  			} else {
  1762  				if(debug['R'] && debug['v'])
  1763  					print("del %P\n", p);
  1764  				continue;
  1765  			}
  1766  		}
  1767  		if(last)
  1768  			last->link = p;
  1769  		last = p;
  1770  	}
  1771  	last->link = P;
  1772  	
  1773  	// pass 4: elide JMP to next instruction.
  1774  	// only safe if there are no jumps to JMPs anymore.
  1775  	if(!jmploop) {
  1776  		last = nil;
  1777  		for(p=firstp; p; p=p->link) {
  1778  			if(p->as == AB && p->to.type == D_BRANCH && p->to.u.branch == p->link) {
  1779  				if(debug['R'] && debug['v'])
  1780  					print("del %P\n", p);
  1781  				continue;
  1782  			}
  1783  			if(last)
  1784  				last->link = p;
  1785  			last = p;
  1786  		}
  1787  		last->link = P;
  1788  	}
  1789  	
  1790  	if(debug['R'] && debug['v']) {
  1791  		print("\n");
  1792  		for(p=firstp; p; p=p->link)
  1793  			print("%P\n", p);
  1794  		print("\n");
  1795  	}
  1796  }