github.com/akaros/go-akaros@v0.0.0-20181004170632-85005d477eab/src/cmd/8c/reg.c (about)

     1  // Inferno utils/8c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/8c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include "gc.h"
    32  
    33  static	void	fixjmp(Reg*);
    34  
    35  Reg*
    36  rega(void)
    37  {
    38  	Reg *r;
    39  
    40  	r = freer;
    41  	if(r == R) {
    42  		r = alloc(sizeof(*r));
    43  	} else
    44  		freer = r->link;
    45  
    46  	*r = zreg;
    47  	return r;
    48  }
    49  
    50  int
    51  rcmp(const void *a1, const void *a2)
    52  {
    53  	Rgn *p1, *p2;
    54  	int c1, c2;
    55  
    56  	p1 = (Rgn*)a1;
    57  	p2 = (Rgn*)a2;
    58  	c1 = p2->cost;
    59  	c2 = p1->cost;
    60  	if(c1 -= c2)
    61  		return c1;
    62  	return p2->varno - p1->varno;
    63  }
    64  
    65  void
    66  regopt(Prog *p)
    67  {
    68  	Reg *r, *r1, *r2;
    69  	Prog *p1;
    70  	int i, z;
    71  	int32 initpc, val, npc;
    72  	uint32 vreg;
    73  	Bits bit;
    74  	struct
    75  	{
    76  		int32	m;
    77  		int32	c;
    78  		Reg*	p;
    79  	} log5[6], *lp;
    80  
    81  	firstr = R;
    82  	lastr = R;
    83  	nvar = 0;
    84  	regbits = RtoB(D_SP) | RtoB(D_AX);
    85  	for(z=0; z<BITS; z++) {
    86  		externs.b[z] = 0;
    87  		params.b[z] = 0;
    88  		consts.b[z] = 0;
    89  		addrs.b[z] = 0;
    90  	}
    91  
    92  	/*
    93  	 * pass 1
    94  	 * build aux data structure
    95  	 * allocate pcs
    96  	 * find use and set of variables
    97  	 */
    98  	val = 5L * 5L * 5L * 5L * 5L;
    99  	lp = log5;
   100  	for(i=0; i<5; i++) {
   101  		lp->m = val;
   102  		lp->c = 0;
   103  		lp->p = R;
   104  		val /= 5L;
   105  		lp++;
   106  	}
   107  	val = 0;
   108  	for(; p != P; p = p->link) {
   109  		switch(p->as) {
   110  		case ADATA:
   111  		case AGLOBL:
   112  		case ANAME:
   113  		case ASIGNAME:
   114  		case AFUNCDATA:
   115  			continue;
   116  		}
   117  		r = rega();
   118  		if(firstr == R) {
   119  			firstr = r;
   120  			lastr = r;
   121  		} else {
   122  			lastr->link = r;
   123  			r->p1 = lastr;
   124  			lastr->s1 = r;
   125  			lastr = r;
   126  		}
   127  		r->prog = p;
   128  		r->pc = val;
   129  		val++;
   130  
   131  		lp = log5;
   132  		for(i=0; i<5; i++) {
   133  			lp->c--;
   134  			if(lp->c <= 0) {
   135  				lp->c = lp->m;
   136  				if(lp->p != R)
   137  					lp->p->log5 = r;
   138  				lp->p = r;
   139  				(lp+1)->c = 0;
   140  				break;
   141  			}
   142  			lp++;
   143  		}
   144  
   145  		r1 = r->p1;
   146  		if(r1 != R)
   147  		switch(r1->prog->as) {
   148  		case ARET:
   149  		case AJMP:
   150  		case AIRETL:
   151  			r->p1 = R;
   152  			r1->s1 = R;
   153  		}
   154  		bit = mkvar(r, &p->from);
   155  		if(bany(&bit))
   156  		switch(p->as) {
   157  		/*
   158  		 * funny
   159  		 */
   160  		case ALEAL:
   161  			for(z=0; z<BITS; z++)
   162  				addrs.b[z] |= bit.b[z];
   163  			break;
   164  
   165  		/*
   166  		 * left side read
   167  		 */
   168  		default:
   169  			for(z=0; z<BITS; z++)
   170  				r->use1.b[z] |= bit.b[z];
   171  			break;
   172  		}
   173  
   174  		bit = mkvar(r, &p->to);
   175  		if(bany(&bit))
   176  		switch(p->as) {
   177  		default:
   178  			diag(Z, "reg: unknown op: %A", p->as);
   179  			break;
   180  
   181  		/*
   182  		 * right side read
   183  		 */
   184  		case ACMPB:
   185  		case ACMPL:
   186  		case ACMPW:
   187  		case APREFETCHT0:
   188  		case APREFETCHT1:
   189  		case APREFETCHT2:
   190  		case APREFETCHNTA:
   191  			for(z=0; z<BITS; z++)
   192  				r->use2.b[z] |= bit.b[z];
   193  			break;
   194  
   195  		/*
   196  		 * right side write
   197  		 */
   198  		case ANOP:
   199  		case AMOVL:
   200  		case AMOVB:
   201  		case AMOVW:
   202  		case AMOVBLSX:
   203  		case AMOVBLZX:
   204  		case AMOVWLSX:
   205  		case AMOVWLZX:
   206  			for(z=0; z<BITS; z++)
   207  				r->set.b[z] |= bit.b[z];
   208  			break;
   209  
   210  		/*
   211  		 * right side read+write
   212  		 */
   213  		case AADDB:
   214  		case AADDL:
   215  		case AADDW:
   216  		case AANDB:
   217  		case AANDL:
   218  		case AANDW:
   219  		case ASUBB:
   220  		case ASUBL:
   221  		case ASUBW:
   222  		case AORB:
   223  		case AORL:
   224  		case AORW:
   225  		case AXORB:
   226  		case AXORL:
   227  		case AXORW:
   228  		case ASALB:
   229  		case ASALL:
   230  		case ASALW:
   231  		case ASARB:
   232  		case ASARL:
   233  		case ASARW:
   234  		case AROLB:
   235  		case AROLL:
   236  		case AROLW:
   237  		case ARORB:
   238  		case ARORL:
   239  		case ARORW:
   240  		case ASHLB:
   241  		case ASHLL:
   242  		case ASHLW:
   243  		case ASHRB:
   244  		case ASHRL:
   245  		case ASHRW:
   246  		case AIMULL:
   247  		case AIMULW:
   248  		case ANEGL:
   249  		case ANOTL:
   250  		case AADCL:
   251  		case ASBBL:
   252  			for(z=0; z<BITS; z++) {
   253  				r->set.b[z] |= bit.b[z];
   254  				r->use2.b[z] |= bit.b[z];
   255  			}
   256  			break;
   257  
   258  		/*
   259  		 * funny
   260  		 */
   261  		case AFMOVDP:
   262  		case AFMOVFP:
   263  		case AFMOVLP:
   264  		case AFMOVVP:
   265  		case AFMOVWP:
   266  		case ACALL:
   267  			for(z=0; z<BITS; z++)
   268  				addrs.b[z] |= bit.b[z];
   269  			break;
   270  		}
   271  
   272  		switch(p->as) {
   273  		case AIMULL:
   274  		case AIMULW:
   275  			if(p->to.type != D_NONE)
   276  				break;
   277  
   278  		case AIDIVB:
   279  		case AIDIVL:
   280  		case AIDIVW:
   281  		case AIMULB:
   282  		case ADIVB:
   283  		case ADIVL:
   284  		case ADIVW:
   285  		case AMULB:
   286  		case AMULL:
   287  		case AMULW:
   288  
   289  		case ACWD:
   290  		case ACDQ:
   291  			r->regu |= RtoB(D_AX) | RtoB(D_DX);
   292  			break;
   293  
   294  		case AREP:
   295  		case AREPN:
   296  		case ALOOP:
   297  		case ALOOPEQ:
   298  		case ALOOPNE:
   299  			r->regu |= RtoB(D_CX);
   300  			break;
   301  
   302  		case AMOVSB:
   303  		case AMOVSL:
   304  		case AMOVSW:
   305  		case ACMPSB:
   306  		case ACMPSL:
   307  		case ACMPSW:
   308  			r->regu |= RtoB(D_SI) | RtoB(D_DI);
   309  			break;
   310  
   311  		case ASTOSB:
   312  		case ASTOSL:
   313  		case ASTOSW:
   314  		case ASCASB:
   315  		case ASCASL:
   316  		case ASCASW:
   317  			r->regu |= RtoB(D_AX) | RtoB(D_DI);
   318  			break;
   319  
   320  		case AINSB:
   321  		case AINSL:
   322  		case AINSW:
   323  		case AOUTSB:
   324  		case AOUTSL:
   325  		case AOUTSW:
   326  			r->regu |= RtoB(D_DI) | RtoB(D_DX);
   327  			break;
   328  
   329  		case AFSTSW:
   330  		case ASAHF:
   331  			r->regu |= RtoB(D_AX);
   332  			break;
   333  		}
   334  	}
   335  	if(firstr == R)
   336  		return;
   337  	initpc = pc - val;
   338  	npc = val;
   339  
   340  	/*
   341  	 * pass 2
   342  	 * turn branch references to pointers
   343  	 * build back pointers
   344  	 */
   345  	for(r = firstr; r != R; r = r->link) {
   346  		p = r->prog;
   347  		if(p->to.type == D_BRANCH) {
   348  			val = p->to.offset - initpc;
   349  			r1 = firstr;
   350  			while(r1 != R) {
   351  				r2 = r1->log5;
   352  				if(r2 != R && val >= r2->pc) {
   353  					r1 = r2;
   354  					continue;
   355  				}
   356  				if(r1->pc == val)
   357  					break;
   358  				r1 = r1->link;
   359  			}
   360  			if(r1 == R) {
   361  				nearln = p->lineno;
   362  				diag(Z, "ref not found\n%P", p);
   363  				continue;
   364  			}
   365  			if(r1 == r) {
   366  				nearln = p->lineno;
   367  				diag(Z, "ref to self\n%P", p);
   368  				continue;
   369  			}
   370  			r->s2 = r1;
   371  			r->p2link = r1->p2;
   372  			r1->p2 = r;
   373  		}
   374  	}
   375  	if(debug['R']) {
   376  		p = firstr->prog;
   377  		print("\n%L %D\n", p->lineno, &p->from);
   378  	}
   379  
   380  	/*
   381  	 * pass 2.1
   382  	 * fix jumps
   383  	 */
   384  	fixjmp(firstr);
   385  
   386  	/*
   387  	 * pass 2.5
   388  	 * find looping structure
   389  	 */
   390  	for(r = firstr; r != R; r = r->link)
   391  		r->active = 0;
   392  	change = 0;
   393  	loopit(firstr, npc);
   394  	if(debug['R'] && debug['v']) {
   395  		print("\nlooping structure:\n");
   396  		for(r = firstr; r != R; r = r->link) {
   397  			print("%d:%P", r->loop, r->prog);
   398  			for(z=0; z<BITS; z++)
   399  				bit.b[z] = r->use1.b[z] |
   400  					   r->use2.b[z] |
   401  					   r->set.b[z];
   402  			if(bany(&bit)) {
   403  				print("\t");
   404  				if(bany(&r->use1))
   405  					print(" u1=%B", r->use1);
   406  				if(bany(&r->use2))
   407  					print(" u2=%B", r->use2);
   408  				if(bany(&r->set))
   409  					print(" st=%B", r->set);
   410  			}
   411  			print("\n");
   412  		}
   413  	}
   414  
   415  	/*
   416  	 * pass 3
   417  	 * iterate propagating usage
   418  	 * 	back until flow graph is complete
   419  	 */
   420  loop1:
   421  	change = 0;
   422  	for(r = firstr; r != R; r = r->link)
   423  		r->active = 0;
   424  	for(r = firstr; r != R; r = r->link)
   425  		if(r->prog->as == ARET)
   426  			prop(r, zbits, zbits);
   427  loop11:
   428  	/* pick up unreachable code */
   429  	i = 0;
   430  	for(r = firstr; r != R; r = r1) {
   431  		r1 = r->link;
   432  		if(r1 && r1->active && !r->active) {
   433  			prop(r, zbits, zbits);
   434  			i = 1;
   435  		}
   436  	}
   437  	if(i)
   438  		goto loop11;
   439  	if(change)
   440  		goto loop1;
   441  
   442  
   443  	/*
   444  	 * pass 4
   445  	 * iterate propagating register/variable synchrony
   446  	 * 	forward until graph is complete
   447  	 */
   448  loop2:
   449  	change = 0;
   450  	for(r = firstr; r != R; r = r->link)
   451  		r->active = 0;
   452  	synch(firstr, zbits);
   453  	if(change)
   454  		goto loop2;
   455  
   456  
   457  	/*
   458  	 * pass 5
   459  	 * isolate regions
   460  	 * calculate costs (paint1)
   461  	 */
   462  	r = firstr;
   463  	if(r) {
   464  		for(z=0; z<BITS; z++)
   465  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   466  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   467  		if(bany(&bit)) {
   468  			nearln = r->prog->lineno;
   469  			warn(Z, "used and not set: %B", bit);
   470  			if(debug['R'] && !debug['w'])
   471  				print("used and not set: %B\n", bit);
   472  		}
   473  	}
   474  	if(debug['R'] && debug['v'])
   475  		print("\nprop structure:\n");
   476  	for(r = firstr; r != R; r = r->link)
   477  		r->act = zbits;
   478  	rgp = region;
   479  	nregion = 0;
   480  	for(r = firstr; r != R; r = r->link) {
   481  		if(debug['R'] && debug['v']) {
   482  			print("%P\t", r->prog);
   483  			if(bany(&r->set))
   484  				print("s:%B ", r->set);
   485  			if(bany(&r->refahead))
   486  				print("ra:%B ", r->refahead);
   487  			if(bany(&r->calahead))
   488  				print("ca:%B ", r->calahead);
   489  			print("\n");
   490  		}
   491  		for(z=0; z<BITS; z++)
   492  			bit.b[z] = r->set.b[z] &
   493  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   494  		if(bany(&bit)) {
   495  			nearln = r->prog->lineno;
   496  			warn(Z, "set and not used: %B", bit);
   497  			if(debug['R'])
   498  				print("set and not used: %B\n", bit);
   499  			excise(r);
   500  		}
   501  		for(z=0; z<BITS; z++)
   502  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   503  		while(bany(&bit)) {
   504  			i = bnum(bit);
   505  			rgp->enter = r;
   506  			rgp->varno = i;
   507  			change = 0;
   508  			if(debug['R'] && debug['v'])
   509  				print("\n");
   510  			paint1(r, i);
   511  			bit.b[i/32] &= ~(1L<<(i%32));
   512  			if(change <= 0) {
   513  				if(debug['R'])
   514  					print("%L$%d: %B\n",
   515  						r->prog->lineno, change, blsh(i));
   516  				continue;
   517  			}
   518  			rgp->cost = change;
   519  			nregion++;
   520  			if(nregion >= NRGN) {
   521  				fatal(Z, "too many regions");
   522  				goto brk;
   523  			}
   524  			rgp++;
   525  		}
   526  	}
   527  brk:
   528  	qsort(region, nregion, sizeof(region[0]), rcmp);
   529  
   530  	/*
   531  	 * pass 6
   532  	 * determine used registers (paint2)
   533  	 * replace code (paint3)
   534  	 */
   535  	rgp = region;
   536  	for(i=0; i<nregion; i++) {
   537  		bit = blsh(rgp->varno);
   538  		vreg = paint2(rgp->enter, rgp->varno);
   539  		vreg = allreg(vreg, rgp);
   540  		if(debug['R']) {
   541  			print("%L$%d %R: %B\n",
   542  				rgp->enter->prog->lineno,
   543  				rgp->cost,
   544  				rgp->regno,
   545  				bit);
   546  		}
   547  		if(rgp->regno != 0)
   548  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   549  		rgp++;
   550  	}
   551  	/*
   552  	 * pass 7
   553  	 * peep-hole on basic block
   554  	 */
   555  	if(!debug['R'] || debug['P'])
   556  		peep();
   557  
   558  	if(debug['R'] && debug['v']) {
   559  		print("after pass 7 (peep)\n");
   560  		for(r=firstr; r; r=r->link)
   561  			print("%04d %P\n", (int)r->pc, r->prog);
   562  		print("\n");
   563  	}
   564  
   565  	/*
   566  	 * pass 8
   567  	 * recalculate pc
   568  	 */
   569  	val = initpc;
   570  	for(r = firstr; r != R; r = r1) {
   571  		r->pc = val;
   572  		p = r->prog;
   573  		p1 = P;
   574  		r1 = r->link;
   575  		if(r1 != R)
   576  			p1 = r1->prog;
   577  		for(; p != p1; p = p->link) {
   578  			switch(p->as) {
   579  			default:
   580  				val++;
   581  				break;
   582  
   583  			case ANOP:
   584  			case ADATA:
   585  			case AGLOBL:
   586  			case ANAME:
   587  			case ASIGNAME:
   588  			case AFUNCDATA:
   589  				break;
   590  			}
   591  		}
   592  	}
   593  	pc = val;
   594  
   595  	/*
   596  	 * fix up branches
   597  	 */
   598  	if(debug['R'])
   599  		if(bany(&addrs))
   600  			print("addrs: %B\n", addrs);
   601  
   602  	r1 = 0; /* set */
   603  	for(r = firstr; r != R; r = r->link) {
   604  		p = r->prog;
   605  		if(p->to.type == D_BRANCH) {
   606  			p->to.offset = r->s2->pc;
   607  			p->to.u.branch = r->s2->prog;
   608  		}
   609  		r1 = r;
   610  	}
   611  
   612  	/*
   613  	 * last pass
   614  	 * eliminate nops
   615  	 * free aux structures
   616  	 */
   617  	for(p = firstr->prog; p != P; p = p->link){
   618  		while(p->link && p->link->as == ANOP)
   619  			p->link = p->link->link;
   620  	}
   621  
   622  	if(debug['R'] && debug['v']) {
   623  		print("after pass 8 (fixup pc)\n");
   624  		for(p1=firstr->prog; p1!=P; p1=p1->link)
   625  			print("%P\n", p1);
   626  		print("\n");
   627  	}
   628  
   629  	if(r1 != R) {
   630  		r1->link = freer;
   631  		freer = firstr;
   632  	}
   633  }
   634  
   635  /*
   636   * add mov b,rn
   637   * just after r
   638   */
   639  void
   640  addmove(Reg *r, int bn, int rn, int f)
   641  {
   642  	Prog *p, *p1;
   643  	Addr *a;
   644  	Var *v;
   645  
   646  	p1 = alloc(sizeof(*p1));
   647  	*p1 = zprog;
   648  	p = r->prog;
   649  
   650  	p1->link = p->link;
   651  	p->link = p1;
   652  	p1->lineno = p->lineno;
   653  
   654  	v = var + bn;
   655  
   656  	a = &p1->to;
   657  	a->sym = v->sym;
   658  	a->offset = v->offset;
   659  	a->etype = v->etype;
   660  	a->type = v->name;
   661  
   662  	p1->as = AMOVL;
   663  	if(v->etype == TCHAR || v->etype == TUCHAR)
   664  		p1->as = AMOVB;
   665  	if(v->etype == TSHORT || v->etype == TUSHORT)
   666  		p1->as = AMOVW;
   667  
   668  	p1->from.type = rn;
   669  	if(!f) {
   670  		p1->from = *a;
   671  		*a = zprog.from;
   672  		a->type = rn;
   673  		if(v->etype == TUCHAR)
   674  			p1->as = AMOVB;
   675  		if(v->etype == TUSHORT)
   676  			p1->as = AMOVW;
   677  	}
   678  	if(debug['R'])
   679  		print("%P\t.a%P\n", p, p1);
   680  }
   681  
   682  uint32
   683  doregbits(int r)
   684  {
   685  	uint32 b;
   686  
   687  	b = 0;
   688  	if(r >= D_INDIR)
   689  		r -= D_INDIR;
   690  	if(r >= D_AX && r <= D_DI)
   691  		b |= RtoB(r);
   692  	else
   693  	if(r >= D_AL && r <= D_BL)
   694  		b |= RtoB(r-D_AL+D_AX);
   695  	else
   696  	if(r >= D_AH && r <= D_BH)
   697  		b |= RtoB(r-D_AH+D_AX);
   698  	return b;
   699  }
   700  
   701  Bits
   702  mkvar(Reg *r, Addr *a)
   703  {
   704  	Var *v;
   705  	int i, t, n, et, z;
   706  	int32 o;
   707  	Bits bit;
   708  	LSym *s;
   709  
   710  	/*
   711  	 * mark registers used
   712  	 */
   713  	t = a->type;
   714  	r->regu |= doregbits(t);
   715  	r->regu |= doregbits(a->index);
   716  
   717  	switch(t) {
   718  	default:
   719  		goto none;
   720  	case D_ADDR:
   721  		a->type = a->index;
   722  		bit = mkvar(r, a);
   723  		for(z=0; z<BITS; z++)
   724  			addrs.b[z] |= bit.b[z];
   725  		a->type = t;
   726  		goto none;
   727  	case D_EXTERN:
   728  	case D_STATIC:
   729  	case D_PARAM:
   730  	case D_AUTO:
   731  		n = t;
   732  		break;
   733  	}
   734  	s = a->sym;
   735  	if(s == nil)
   736  		goto none;
   737  	if(s->name[0] == '.')
   738  		goto none;
   739  	et = a->etype;
   740  	o = a->offset;
   741  	v = var;
   742  	for(i=0; i<nvar; i++) {
   743  		if(s == v->sym)
   744  		if(n == v->name)
   745  		if(o == v->offset)
   746  			goto out;
   747  		v++;
   748  	}
   749  	if(nvar >= NVAR)
   750  		fatal(Z, "variable not optimized: %s", s->name);
   751  	i = nvar;
   752  	nvar++;
   753  	v = &var[i];
   754  	v->sym = s;
   755  	v->offset = o;
   756  	v->name = n;
   757  	v->etype = et;
   758  	if(debug['R'])
   759  		print("bit=%2d et=%2d %D\n", i, et, a);
   760  
   761  out:
   762  	bit = blsh(i);
   763  	if(n == D_EXTERN || n == D_STATIC)
   764  		for(z=0; z<BITS; z++)
   765  			externs.b[z] |= bit.b[z];
   766  	if(n == D_PARAM)
   767  		for(z=0; z<BITS; z++)
   768  			params.b[z] |= bit.b[z];
   769  	if(v->etype != et || !typechlpfd[et])	/* funny punning */
   770  		for(z=0; z<BITS; z++)
   771  			addrs.b[z] |= bit.b[z];
   772  	return bit;
   773  
   774  none:
   775  	return zbits;
   776  }
   777  
   778  void
   779  prop(Reg *r, Bits ref, Bits cal)
   780  {
   781  	Reg *r1, *r2;
   782  	int z;
   783  
   784  	for(r1 = r; r1 != R; r1 = r1->p1) {
   785  		for(z=0; z<BITS; z++) {
   786  			ref.b[z] |= r1->refahead.b[z];
   787  			if(ref.b[z] != r1->refahead.b[z]) {
   788  				r1->refahead.b[z] = ref.b[z];
   789  				change++;
   790  			}
   791  			cal.b[z] |= r1->calahead.b[z];
   792  			if(cal.b[z] != r1->calahead.b[z]) {
   793  				r1->calahead.b[z] = cal.b[z];
   794  				change++;
   795  			}
   796  		}
   797  		switch(r1->prog->as) {
   798  		case ACALL:
   799  			for(z=0; z<BITS; z++) {
   800  				cal.b[z] |= ref.b[z] | externs.b[z];
   801  				ref.b[z] = 0;
   802  			}
   803  			break;
   804  
   805  		case ATEXT:
   806  			for(z=0; z<BITS; z++) {
   807  				cal.b[z] = 0;
   808  				ref.b[z] = 0;
   809  			}
   810  			break;
   811  
   812  		case ARET:
   813  			for(z=0; z<BITS; z++) {
   814  				cal.b[z] = externs.b[z];
   815  				ref.b[z] = 0;
   816  			}
   817  		}
   818  		for(z=0; z<BITS; z++) {
   819  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   820  				r1->use1.b[z] | r1->use2.b[z];
   821  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   822  			r1->refbehind.b[z] = ref.b[z];
   823  			r1->calbehind.b[z] = cal.b[z];
   824  		}
   825  		if(r1->active)
   826  			break;
   827  		r1->active = 1;
   828  	}
   829  	for(; r != r1; r = r->p1)
   830  		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
   831  			prop(r2, r->refbehind, r->calbehind);
   832  }
   833  
   834  /*
   835   * find looping structure
   836   *
   837   * 1) find reverse postordering
   838   * 2) find approximate dominators,
   839   *	the actual dominators if the flow graph is reducible
   840   *	otherwise, dominators plus some other non-dominators.
   841   *	See Matthew S. Hecht and Jeffrey D. Ullman,
   842   *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   843   *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   844   *	Oct. 1-3, 1973, pp.  207-217.
   845   * 3) find all nodes with a predecessor dominated by the current node.
   846   *	such a node is a loop head.
   847   *	recursively, all preds with a greater rpo number are in the loop
   848   */
   849  int32
   850  postorder(Reg *r, Reg **rpo2r, int32 n)
   851  {
   852  	Reg *r1;
   853  
   854  	r->rpo = 1;
   855  	r1 = r->s1;
   856  	if(r1 && !r1->rpo)
   857  		n = postorder(r1, rpo2r, n);
   858  	r1 = r->s2;
   859  	if(r1 && !r1->rpo)
   860  		n = postorder(r1, rpo2r, n);
   861  	rpo2r[n] = r;
   862  	n++;
   863  	return n;
   864  }
   865  
   866  int32
   867  rpolca(int32 *idom, int32 rpo1, int32 rpo2)
   868  {
   869  	int32 t;
   870  
   871  	if(rpo1 == -1)
   872  		return rpo2;
   873  	while(rpo1 != rpo2){
   874  		if(rpo1 > rpo2){
   875  			t = rpo2;
   876  			rpo2 = rpo1;
   877  			rpo1 = t;
   878  		}
   879  		while(rpo1 < rpo2){
   880  			t = idom[rpo2];
   881  			if(t >= rpo2)
   882  				fatal(Z, "bad idom");
   883  			rpo2 = t;
   884  		}
   885  	}
   886  	return rpo1;
   887  }
   888  
   889  int
   890  doms(int32 *idom, int32 r, int32 s)
   891  {
   892  	while(s > r)
   893  		s = idom[s];
   894  	return s == r;
   895  }
   896  
   897  int
   898  loophead(int32 *idom, Reg *r)
   899  {
   900  	int32 src;
   901  
   902  	src = r->rpo;
   903  	if(r->p1 != R && doms(idom, src, r->p1->rpo))
   904  		return 1;
   905  	for(r = r->p2; r != R; r = r->p2link)
   906  		if(doms(idom, src, r->rpo))
   907  			return 1;
   908  	return 0;
   909  }
   910  
   911  void
   912  loopmark(Reg **rpo2r, int32 head, Reg *r)
   913  {
   914  	if(r->rpo < head || r->active == head)
   915  		return;
   916  	r->active = head;
   917  	r->loop += LOOP;
   918  	if(r->p1 != R)
   919  		loopmark(rpo2r, head, r->p1);
   920  	for(r = r->p2; r != R; r = r->p2link)
   921  		loopmark(rpo2r, head, r);
   922  }
   923  
   924  void
   925  loopit(Reg *r, int32 nr)
   926  {
   927  	Reg *r1;
   928  	int32 i, d, me;
   929  
   930  	if(nr > maxnr) {
   931  		rpo2r = alloc(nr * sizeof(Reg*));
   932  		idom = alloc(nr * sizeof(int32));
   933  		maxnr = nr;
   934  	}
   935  
   936  	d = postorder(r, rpo2r, 0);
   937  	if(d > nr)
   938  		fatal(Z, "too many reg nodes");
   939  	nr = d;
   940  	for(i = 0; i < nr / 2; i++){
   941  		r1 = rpo2r[i];
   942  		rpo2r[i] = rpo2r[nr - 1 - i];
   943  		rpo2r[nr - 1 - i] = r1;
   944  	}
   945  	for(i = 0; i < nr; i++)
   946  		rpo2r[i]->rpo = i;
   947  
   948  	idom[0] = 0;
   949  	for(i = 0; i < nr; i++){
   950  		r1 = rpo2r[i];
   951  		me = r1->rpo;
   952  		d = -1;
   953  		if(r1->p1 != R && r1->p1->rpo < me)
   954  			d = r1->p1->rpo;
   955  		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
   956  			if(r1->rpo < me)
   957  				d = rpolca(idom, d, r1->rpo);
   958  		idom[i] = d;
   959  	}
   960  
   961  	for(i = 0; i < nr; i++){
   962  		r1 = rpo2r[i];
   963  		r1->loop++;
   964  		if(r1->p2 != R && loophead(idom, r1))
   965  			loopmark(rpo2r, i, r1);
   966  	}
   967  }
   968  
   969  void
   970  synch(Reg *r, Bits dif)
   971  {
   972  	Reg *r1;
   973  	int z;
   974  
   975  	for(r1 = r; r1 != R; r1 = r1->s1) {
   976  		for(z=0; z<BITS; z++) {
   977  			dif.b[z] = (dif.b[z] &
   978  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   979  					r1->set.b[z] | r1->regdiff.b[z];
   980  			if(dif.b[z] != r1->regdiff.b[z]) {
   981  				r1->regdiff.b[z] = dif.b[z];
   982  				change++;
   983  			}
   984  		}
   985  		if(r1->active)
   986  			break;
   987  		r1->active = 1;
   988  		for(z=0; z<BITS; z++)
   989  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   990  		if(r1->s2 != R)
   991  			synch(r1->s2, dif);
   992  	}
   993  }
   994  
   995  uint32
   996  allreg(uint32 b, Rgn *r)
   997  {
   998  	Var *v;
   999  	int i;
  1000  
  1001  	v = var + r->varno;
  1002  	r->regno = 0;
  1003  	switch(v->etype) {
  1004  
  1005  	default:
  1006  		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  1007  		break;
  1008  
  1009  	case TCHAR:
  1010  	case TUCHAR:
  1011  	case TSHORT:
  1012  	case TUSHORT:
  1013  	case TINT:
  1014  	case TUINT:
  1015  	case TLONG:
  1016  	case TULONG:
  1017  	case TIND:
  1018  	case TARRAY:
  1019  		i = BtoR(~b);
  1020  		if(i && r->cost > 0) {
  1021  			r->regno = i;
  1022  			return RtoB(i);
  1023  		}
  1024  		break;
  1025  
  1026  	case TDOUBLE:
  1027  	case TFLOAT:
  1028  		break;
  1029  	}
  1030  	return 0;
  1031  }
  1032  
  1033  void
  1034  paint1(Reg *r, int bn)
  1035  {
  1036  	Reg *r1;
  1037  	Prog *p;
  1038  	int z;
  1039  	uint32 bb;
  1040  
  1041  	z = bn/32;
  1042  	bb = 1L<<(bn%32);
  1043  	if(r->act.b[z] & bb)
  1044  		return;
  1045  	for(;;) {
  1046  		if(!(r->refbehind.b[z] & bb))
  1047  			break;
  1048  		r1 = r->p1;
  1049  		if(r1 == R)
  1050  			break;
  1051  		if(!(r1->refahead.b[z] & bb))
  1052  			break;
  1053  		if(r1->act.b[z] & bb)
  1054  			break;
  1055  		r = r1;
  1056  	}
  1057  
  1058  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1059  		change -= CLOAD * r->loop;
  1060  		if(debug['R'] && debug['v'])
  1061  			print("%d%P\td %B $%d\n", r->loop,
  1062  				r->prog, blsh(bn), change);
  1063  	}
  1064  	for(;;) {
  1065  		r->act.b[z] |= bb;
  1066  		p = r->prog;
  1067  
  1068  		if(r->use1.b[z] & bb) {
  1069  			change += CREF * r->loop;
  1070  			if(p->as == AFMOVL)
  1071  				if(BtoR(bb) != D_F0)
  1072  					change = -CINF;
  1073  			if(debug['R'] && debug['v'])
  1074  				print("%d%P\tu1 %B $%d\n", r->loop,
  1075  					p, blsh(bn), change);
  1076  		}
  1077  
  1078  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1079  			change += CREF * r->loop;
  1080  			if(p->as == AFMOVL)
  1081  				if(BtoR(bb) != D_F0)
  1082  					change = -CINF;
  1083  			if(debug['R'] && debug['v'])
  1084  				print("%d%P\tu2 %B $%d\n", r->loop,
  1085  					p, blsh(bn), change);
  1086  		}
  1087  
  1088  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1089  			change -= CLOAD * r->loop;
  1090  			if(p->as == AFMOVL)
  1091  				if(BtoR(bb) != D_F0)
  1092  					change = -CINF;
  1093  			if(debug['R'] && debug['v'])
  1094  				print("%d%P\tst %B $%d\n", r->loop,
  1095  					p, blsh(bn), change);
  1096  		}
  1097  
  1098  		if(r->refbehind.b[z] & bb)
  1099  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1100  				if(r1->refahead.b[z] & bb)
  1101  					paint1(r1, bn);
  1102  
  1103  		if(!(r->refahead.b[z] & bb))
  1104  			break;
  1105  		r1 = r->s2;
  1106  		if(r1 != R)
  1107  			if(r1->refbehind.b[z] & bb)
  1108  				paint1(r1, bn);
  1109  		r = r->s1;
  1110  		if(r == R)
  1111  			break;
  1112  		if(r->act.b[z] & bb)
  1113  			break;
  1114  		if(!(r->refbehind.b[z] & bb))
  1115  			break;
  1116  	}
  1117  }
  1118  
  1119  uint32
  1120  regset(Reg *r, uint32 bb)
  1121  {
  1122  	uint32 b, set;
  1123  	Addr v;
  1124  	int c;
  1125  
  1126  	set = 0;
  1127  	v = zprog.from;
  1128  	while(b = bb & ~(bb-1)) {
  1129  		v.type = BtoR(b);
  1130  		c = copyu(r->prog, &v, A);
  1131  		if(c == 3)
  1132  			set |= b;
  1133  		bb &= ~b;
  1134  	}
  1135  	return set;
  1136  }
  1137  
  1138  uint32
  1139  reguse(Reg *r, uint32 bb)
  1140  {
  1141  	uint32 b, set;
  1142  	Addr v;
  1143  	int c;
  1144  
  1145  	set = 0;
  1146  	v = zprog.from;
  1147  	while(b = bb & ~(bb-1)) {
  1148  		v.type = BtoR(b);
  1149  		c = copyu(r->prog, &v, A);
  1150  		if(c == 1 || c == 2 || c == 4)
  1151  			set |= b;
  1152  		bb &= ~b;
  1153  	}
  1154  	return set;
  1155  }
  1156  
  1157  uint32
  1158  paint2(Reg *r, int bn)
  1159  {
  1160  	Reg *r1;
  1161  	int z;
  1162  	uint32 bb, vreg, x;
  1163  
  1164  	z = bn/32;
  1165  	bb = 1L << (bn%32);
  1166  	vreg = regbits;
  1167  	if(!(r->act.b[z] & bb))
  1168  		return vreg;
  1169  	for(;;) {
  1170  		if(!(r->refbehind.b[z] & bb))
  1171  			break;
  1172  		r1 = r->p1;
  1173  		if(r1 == R)
  1174  			break;
  1175  		if(!(r1->refahead.b[z] & bb))
  1176  			break;
  1177  		if(!(r1->act.b[z] & bb))
  1178  			break;
  1179  		r = r1;
  1180  	}
  1181  	for(;;) {
  1182  		r->act.b[z] &= ~bb;
  1183  
  1184  		vreg |= r->regu;
  1185  
  1186  		if(r->refbehind.b[z] & bb)
  1187  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1188  				if(r1->refahead.b[z] & bb)
  1189  					vreg |= paint2(r1, bn);
  1190  
  1191  		if(!(r->refahead.b[z] & bb))
  1192  			break;
  1193  		r1 = r->s2;
  1194  		if(r1 != R)
  1195  			if(r1->refbehind.b[z] & bb)
  1196  				vreg |= paint2(r1, bn);
  1197  		r = r->s1;
  1198  		if(r == R)
  1199  			break;
  1200  		if(!(r->act.b[z] & bb))
  1201  			break;
  1202  		if(!(r->refbehind.b[z] & bb))
  1203  			break;
  1204  	}
  1205  
  1206  	bb = vreg;
  1207  	for(; r; r=r->s1) {
  1208  		x = r->regu & ~bb;
  1209  		if(x) {
  1210  			vreg |= reguse(r, x);
  1211  			bb |= regset(r, x);
  1212  		}
  1213  	}
  1214  	return vreg;
  1215  }
  1216  
  1217  void
  1218  paint3(Reg *r, int bn, int32 rb, int rn)
  1219  {
  1220  	Reg *r1;
  1221  	Prog *p;
  1222  	int z;
  1223  	uint32 bb;
  1224  
  1225  	z = bn/32;
  1226  	bb = 1L << (bn%32);
  1227  	if(r->act.b[z] & bb)
  1228  		return;
  1229  	for(;;) {
  1230  		if(!(r->refbehind.b[z] & bb))
  1231  			break;
  1232  		r1 = r->p1;
  1233  		if(r1 == R)
  1234  			break;
  1235  		if(!(r1->refahead.b[z] & bb))
  1236  			break;
  1237  		if(r1->act.b[z] & bb)
  1238  			break;
  1239  		r = r1;
  1240  	}
  1241  
  1242  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1243  		addmove(r, bn, rn, 0);
  1244  	for(;;) {
  1245  		r->act.b[z] |= bb;
  1246  		p = r->prog;
  1247  
  1248  		if(r->use1.b[z] & bb) {
  1249  			if(debug['R'])
  1250  				print("%P", p);
  1251  			addreg(&p->from, rn);
  1252  			if(debug['R'])
  1253  				print("\t.c%P\n", p);
  1254  		}
  1255  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1256  			if(debug['R'])
  1257  				print("%P", p);
  1258  			addreg(&p->to, rn);
  1259  			if(debug['R'])
  1260  				print("\t.c%P\n", p);
  1261  		}
  1262  
  1263  		if(STORE(r) & r->regdiff.b[z] & bb)
  1264  			addmove(r, bn, rn, 1);
  1265  		r->regu |= rb;
  1266  
  1267  		if(r->refbehind.b[z] & bb)
  1268  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1269  				if(r1->refahead.b[z] & bb)
  1270  					paint3(r1, bn, rb, rn);
  1271  
  1272  		if(!(r->refahead.b[z] & bb))
  1273  			break;
  1274  		r1 = r->s2;
  1275  		if(r1 != R)
  1276  			if(r1->refbehind.b[z] & bb)
  1277  				paint3(r1, bn, rb, rn);
  1278  		r = r->s1;
  1279  		if(r == R)
  1280  			break;
  1281  		if(r->act.b[z] & bb)
  1282  			break;
  1283  		if(!(r->refbehind.b[z] & bb))
  1284  			break;
  1285  	}
  1286  }
  1287  
  1288  void
  1289  addreg(Addr *a, int rn)
  1290  {
  1291  
  1292  	a->sym = 0;
  1293  	a->offset = 0;
  1294  	a->type = rn;
  1295  }
  1296  
  1297  int32
  1298  RtoB(int r)
  1299  {
  1300  
  1301  	if(r < D_AX || r > D_DI)
  1302  		return 0;
  1303  	return 1L << (r-D_AX);
  1304  }
  1305  
  1306  int
  1307  BtoR(int32 b)
  1308  {
  1309  
  1310  	b &= 0xffL;
  1311  	if(b == 0)
  1312  		return 0;
  1313  	return bitno(b) + D_AX;
  1314  }
  1315  
  1316  /* what instruction does a JMP to p eventually land on? */
  1317  static Reg*
  1318  chasejmp(Reg *r, int *jmploop)
  1319  {
  1320  	int n;
  1321  
  1322  	n = 0;
  1323  	for(; r; r=r->s2) {
  1324  		if(r->prog->as != AJMP || r->prog->to.type != D_BRANCH)
  1325  			break;
  1326  		if(++n > 10) {
  1327  			*jmploop = 1;
  1328  			break;
  1329  		}
  1330  	}
  1331  	return r;
  1332  }
  1333  
  1334  /* mark all code reachable from firstp as alive */
  1335  static void
  1336  mark(Reg *firstr)
  1337  {
  1338  	Reg *r;
  1339  	Prog *p;
  1340  
  1341  	for(r=firstr; r; r=r->link) {
  1342  		if(r->active)
  1343  			break;
  1344  		r->active = 1;
  1345  		p = r->prog;
  1346  		if(p->as != ACALL && p->to.type == D_BRANCH)
  1347  			mark(r->s2);
  1348  		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
  1349  			break;
  1350  	}
  1351  }
  1352  
  1353  /*
  1354   * the code generator depends on being able to write out JMP
  1355   * instructions that it can jump to now but fill in later.
  1356   * the linker will resolve them nicely, but they make the code
  1357   * longer and more difficult to follow during debugging.
  1358   * remove them.
  1359   */
  1360  static void
  1361  fixjmp(Reg *firstr)
  1362  {
  1363  	int jmploop;
  1364  	Reg *r;
  1365  	Prog *p;
  1366  
  1367  	if(debug['R'] && debug['v'])
  1368  		print("\nfixjmp\n");
  1369  
  1370  	// pass 1: resolve jump to AJMP, mark all code as dead.
  1371  	jmploop = 0;
  1372  	for(r=firstr; r; r=r->link) {
  1373  		p = r->prog;
  1374  		if(debug['R'] && debug['v'])
  1375  			print("%04d %P\n", (int)r->pc, p);
  1376  		if(p->as != ACALL && p->to.type == D_BRANCH && r->s2 && r->s2->prog->as == AJMP) {
  1377  			r->s2 = chasejmp(r->s2, &jmploop);
  1378  			p->to.offset = r->s2->pc;
  1379  			p->to.u.branch = r->s2->prog;
  1380  			if(debug['R'] && debug['v'])
  1381  				print("->%P\n", p);
  1382  		}
  1383  		r->active = 0;
  1384  	}
  1385  	if(debug['R'] && debug['v'])
  1386  		print("\n");
  1387  
  1388  	// pass 2: mark all reachable code alive
  1389  	mark(firstr);
  1390  
  1391  	// pass 3: delete dead code (mostly JMPs).
  1392  	for(r=firstr; r; r=r->link) {
  1393  		if(!r->active) {
  1394  			p = r->prog;
  1395  			if(p->link == P && p->as == ARET && r->p1 && r->p1->prog->as != ARET) {
  1396  				// This is the final ARET, and the code so far doesn't have one.
  1397  				// Let it stay.
  1398  			} else {
  1399  				if(debug['R'] && debug['v'])
  1400  					print("del %04d %P\n", (int)r->pc, p);
  1401  				p->as = ANOP;
  1402  			}
  1403  		}
  1404  	}
  1405  
  1406  	// pass 4: elide JMP to next instruction.
  1407  	// only safe if there are no jumps to JMPs anymore.
  1408  	if(!jmploop) {
  1409  		for(r=firstr; r; r=r->link) {
  1410  			p = r->prog;
  1411  			if(p->as == AJMP && p->to.type == D_BRANCH && r->s2 == r->link) {
  1412  				if(debug['R'] && debug['v'])
  1413  					print("del %04d %P\n", (int)r->pc, p);
  1414  				p->as = ANOP;
  1415  			}
  1416  		}
  1417  	}
  1418  
  1419  	// fix back pointers.
  1420  	for(r=firstr; r; r=r->link) {
  1421  		r->p2 = R;
  1422  		r->p2link = R;
  1423  	}
  1424  	for(r=firstr; r; r=r->link) {
  1425  		if(r->s2) {
  1426  			r->p2link = r->s2->p2;
  1427  			r->s2->p2 = r;
  1428  		}
  1429  	}
  1430  
  1431  	if(debug['R'] && debug['v']) {
  1432  		print("\n");
  1433  		for(r=firstr; r; r=r->link)
  1434  			print("%04d %P\n", (int)r->pc, r->prog);
  1435  		print("\n");
  1436  	}
  1437  }
  1438