github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/6c/reg.c (about)

     1  // Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include "gc.h"
    32  
    33  static	void	fixjmp(Reg*);
    34  
    35  Reg*
    36  rega(void)
    37  {
    38  	Reg *r;
    39  
    40  	r = freer;
    41  	if(r == R) {
    42  		r = alloc(sizeof(*r));
    43  	} else
    44  		freer = r->link;
    45  
    46  	*r = zreg;
    47  	return r;
    48  }
    49  
    50  int
    51  rcmp(const void *a1, const void *a2)
    52  {
    53  	Rgn *p1, *p2;
    54  	int c1, c2;
    55  
    56  	p1 = (Rgn*)a1;
    57  	p2 = (Rgn*)a2;
    58  	c1 = p2->cost;
    59  	c2 = p1->cost;
    60  	if(c1 -= c2)
    61  		return c1;
    62  	return p2->varno - p1->varno;
    63  }
    64  
    65  void
    66  regopt(Prog *p)
    67  {
    68  	Reg *r, *r1, *r2;
    69  	Prog *p1;
    70  	int i, z;
    71  	int32 initpc, val, npc;
    72  	uint32 vreg;
    73  	Bits bit;
    74  	struct
    75  	{
    76  		int32	m;
    77  		int32	c;
    78  		Reg*	p;
    79  	} log5[6], *lp;
    80  
    81  	firstr = R;
    82  	lastr = R;
    83  	nvar = 0;
    84  	regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0);
    85  	for(z=0; z<BITS; z++) {
    86  		externs.b[z] = 0;
    87  		params.b[z] = 0;
    88  		consts.b[z] = 0;
    89  		addrs.b[z] = 0;
    90  	}
    91  
    92  	/*
    93  	 * pass 1
    94  	 * build aux data structure
    95  	 * allocate pcs
    96  	 * find use and set of variables
    97  	 */
    98  	val = 5L * 5L * 5L * 5L * 5L;
    99  	lp = log5;
   100  	for(i=0; i<5; i++) {
   101  		lp->m = val;
   102  		lp->c = 0;
   103  		lp->p = R;
   104  		val /= 5L;
   105  		lp++;
   106  	}
   107  	val = 0;
   108  	for(; p != P; p = p->link) {
   109  		switch(p->as) {
   110  		case ADATA:
   111  		case AGLOBL:
   112  		case ANAME:
   113  		case ASIGNAME:
   114  		case AFUNCDATA:
   115  			continue;
   116  		}
   117  		r = rega();
   118  		if(firstr == R) {
   119  			firstr = r;
   120  			lastr = r;
   121  		} else {
   122  			lastr->link = r;
   123  			r->p1 = lastr;
   124  			lastr->s1 = r;
   125  			lastr = r;
   126  		}
   127  		r->prog = p;
   128  		r->pc = val;
   129  		val++;
   130  
   131  		lp = log5;
   132  		for(i=0; i<5; i++) {
   133  			lp->c--;
   134  			if(lp->c <= 0) {
   135  				lp->c = lp->m;
   136  				if(lp->p != R)
   137  					lp->p->log5 = r;
   138  				lp->p = r;
   139  				(lp+1)->c = 0;
   140  				break;
   141  			}
   142  			lp++;
   143  		}
   144  
   145  		r1 = r->p1;
   146  		if(r1 != R)
   147  		switch(r1->prog->as) {
   148  		case ARET:
   149  		case AJMP:
   150  		case AIRETL:
   151  		case AIRETQ:
   152  			r->p1 = R;
   153  			r1->s1 = R;
   154  		}
   155  
   156  		bit = mkvar(r, &p->from);
   157  		if(bany(&bit))
   158  		switch(p->as) {
   159  		/*
   160  		 * funny
   161  		 */
   162  		case ALEAL:
   163  		case ALEAQ:
   164  			for(z=0; z<BITS; z++)
   165  				addrs.b[z] |= bit.b[z];
   166  			break;
   167  
   168  		/*
   169  		 * left side read
   170  		 */
   171  		default:
   172  			for(z=0; z<BITS; z++)
   173  				r->use1.b[z] |= bit.b[z];
   174  			break;
   175  		}
   176  
   177  		bit = mkvar(r, &p->to);
   178  		if(bany(&bit))
   179  		switch(p->as) {
   180  		default:
   181  			diag(Z, "reg: unknown op: %A", p->as);
   182  			break;
   183  
   184  		/*
   185  		 * right side read
   186  		 */
   187  		case ACMPB:
   188  		case ACMPL:
   189  		case ACMPQ:
   190  		case ACMPW:
   191  		case APREFETCHT0:
   192  		case APREFETCHT1:
   193  		case APREFETCHT2:
   194  		case APREFETCHNTA:
   195  		case ACOMISS:
   196  		case ACOMISD:
   197  		case AUCOMISS:
   198  		case AUCOMISD:
   199  			for(z=0; z<BITS; z++)
   200  				r->use2.b[z] |= bit.b[z];
   201  			break;
   202  
   203  		/*
   204  		 * right side write
   205  		 */
   206  		case ANOP:
   207  		case AMOVL:
   208  		case AMOVQ:
   209  		case AMOVB:
   210  		case AMOVW:
   211  		case AMOVBLSX:
   212  		case AMOVBLZX:
   213  		case AMOVBQSX:
   214  		case AMOVBQZX:
   215  		case AMOVLQSX:
   216  		case AMOVLQZX:
   217  		case AMOVWLSX:
   218  		case AMOVWLZX:
   219  		case AMOVWQSX:
   220  		case AMOVWQZX:
   221  		case AMOVQL:
   222  
   223  		case AMOVSS:
   224  		case AMOVSD:
   225  		case ACVTSD2SL:
   226  		case ACVTSD2SQ:
   227  		case ACVTSD2SS:
   228  		case ACVTSL2SD:
   229  		case ACVTSL2SS:
   230  		case ACVTSQ2SD:
   231  		case ACVTSQ2SS:
   232  		case ACVTSS2SD:
   233  		case ACVTSS2SL:
   234  		case ACVTSS2SQ:
   235  		case ACVTTSD2SL:
   236  		case ACVTTSD2SQ:
   237  		case ACVTTSS2SL:
   238  		case ACVTTSS2SQ:
   239  			for(z=0; z<BITS; z++)
   240  				r->set.b[z] |= bit.b[z];
   241  			break;
   242  
   243  		/*
   244  		 * right side read+write
   245  		 */
   246  		case AADDB:
   247  		case AADDL:
   248  		case AADDQ:
   249  		case AADDW:
   250  		case AANDB:
   251  		case AANDL:
   252  		case AANDQ:
   253  		case AANDW:
   254  		case ASUBB:
   255  		case ASUBL:
   256  		case ASUBQ:
   257  		case ASUBW:
   258  		case AORB:
   259  		case AORL:
   260  		case AORQ:
   261  		case AORW:
   262  		case AXORB:
   263  		case AXORL:
   264  		case AXORQ:
   265  		case AXORW:
   266  		case ASALB:
   267  		case ASALL:
   268  		case ASALQ:
   269  		case ASALW:
   270  		case ASARB:
   271  		case ASARL:
   272  		case ASARQ:
   273  		case ASARW:
   274  		case AROLB:
   275  		case AROLL:
   276  		case AROLQ:
   277  		case AROLW:
   278  		case ARORB:
   279  		case ARORL:
   280  		case ARORQ:
   281  		case ARORW:
   282  		case ASHLB:
   283  		case ASHLL:
   284  		case ASHLQ:
   285  		case ASHLW:
   286  		case ASHRB:
   287  		case ASHRL:
   288  		case ASHRQ:
   289  		case ASHRW:
   290  		case AIMULL:
   291  		case AIMULQ:
   292  		case AIMULW:
   293  		case ANEGL:
   294  		case ANEGQ:
   295  		case ANOTL:
   296  		case ANOTQ:
   297  		case AADCL:
   298  		case AADCQ:
   299  		case ASBBL:
   300  		case ASBBQ:
   301  
   302  		case AADDSD:
   303  		case AADDSS:
   304  		case ACMPSD:
   305  		case ACMPSS:
   306  		case ADIVSD:
   307  		case ADIVSS:
   308  		case AMAXSD:
   309  		case AMAXSS:
   310  		case AMINSD:
   311  		case AMINSS:
   312  		case AMULSD:
   313  		case AMULSS:
   314  		case ARCPSS:
   315  		case ARSQRTSS:
   316  		case ASQRTSD:
   317  		case ASQRTSS:
   318  		case ASUBSD:
   319  		case ASUBSS:
   320  		case AXORPD:
   321  			for(z=0; z<BITS; z++) {
   322  				r->set.b[z] |= bit.b[z];
   323  				r->use2.b[z] |= bit.b[z];
   324  			}
   325  			break;
   326  
   327  		/*
   328  		 * funny
   329  		 */
   330  		case ACALL:
   331  			for(z=0; z<BITS; z++)
   332  				addrs.b[z] |= bit.b[z];
   333  			break;
   334  		}
   335  
   336  		switch(p->as) {
   337  		case AIMULL:
   338  		case AIMULQ:
   339  		case AIMULW:
   340  			if(p->to.type != D_NONE)
   341  				break;
   342  
   343  		case AIDIVB:
   344  		case AIDIVL:
   345  		case AIDIVQ:
   346  		case AIDIVW:
   347  		case AIMULB:
   348  		case ADIVB:
   349  		case ADIVL:
   350  		case ADIVQ:
   351  		case ADIVW:
   352  		case AMULB:
   353  		case AMULL:
   354  		case AMULQ:
   355  		case AMULW:
   356  
   357  		case ACWD:
   358  		case ACDQ:
   359  		case ACQO:
   360  			r->regu |= RtoB(D_AX) | RtoB(D_DX);
   361  			break;
   362  
   363  		case AREP:
   364  		case AREPN:
   365  		case ALOOP:
   366  		case ALOOPEQ:
   367  		case ALOOPNE:
   368  			r->regu |= RtoB(D_CX);
   369  			break;
   370  
   371  		case AMOVSB:
   372  		case AMOVSL:
   373  		case AMOVSQ:
   374  		case AMOVSW:
   375  		case ACMPSB:
   376  		case ACMPSL:
   377  		case ACMPSQ:
   378  		case ACMPSW:
   379  			r->regu |= RtoB(D_SI) | RtoB(D_DI);
   380  			break;
   381  
   382  		case ASTOSB:
   383  		case ASTOSL:
   384  		case ASTOSQ:
   385  		case ASTOSW:
   386  		case ASCASB:
   387  		case ASCASL:
   388  		case ASCASQ:
   389  		case ASCASW:
   390  			r->regu |= RtoB(D_AX) | RtoB(D_DI);
   391  			break;
   392  
   393  		case AINSB:
   394  		case AINSL:
   395  		case AINSW:
   396  		case AOUTSB:
   397  		case AOUTSL:
   398  		case AOUTSW:
   399  			r->regu |= RtoB(D_DI) | RtoB(D_DX);
   400  			break;
   401  		}
   402  	}
   403  	if(firstr == R)
   404  		return;
   405  	initpc = pc - val;
   406  	npc = val;
   407  
   408  	/*
   409  	 * pass 2
   410  	 * turn branch references to pointers
   411  	 * build back pointers
   412  	 */
   413  	for(r = firstr; r != R; r = r->link) {
   414  		p = r->prog;
   415  		if(p->to.type == D_BRANCH) {
   416  			val = p->to.offset - initpc;
   417  			r1 = firstr;
   418  			while(r1 != R) {
   419  				r2 = r1->log5;
   420  				if(r2 != R && val >= r2->pc) {
   421  					r1 = r2;
   422  					continue;
   423  				}
   424  				if(r1->pc == val)
   425  					break;
   426  				r1 = r1->link;
   427  			}
   428  			if(r1 == R) {
   429  				nearln = p->lineno;
   430  				diag(Z, "ref not found\n%P", p);
   431  				continue;
   432  			}
   433  			if(r1 == r) {
   434  				nearln = p->lineno;
   435  				diag(Z, "ref to self\n%P", p);
   436  				continue;
   437  			}
   438  			r->s2 = r1;
   439  			r->p2link = r1->p2;
   440  			r1->p2 = r;
   441  		}
   442  	}
   443  	if(debug['R']) {
   444  		p = firstr->prog;
   445  		print("\n%L %D\n", p->lineno, &p->from);
   446  	}
   447  
   448  	/*
   449  	 * pass 2.1
   450  	 * fix jumps
   451  	 */
   452  	fixjmp(firstr);
   453  
   454  	/*
   455  	 * pass 2.5
   456  	 * find looping structure
   457  	 */
   458  	for(r = firstr; r != R; r = r->link)
   459  		r->active = 0;
   460  	change = 0;
   461  	loopit(firstr, npc);
   462  	if(debug['R'] && debug['v']) {
   463  		print("\nlooping structure:\n");
   464  		for(r = firstr; r != R; r = r->link) {
   465  			print("%d:%P", r->loop, r->prog);
   466  			for(z=0; z<BITS; z++)
   467  				bit.b[z] = r->use1.b[z] |
   468  					   r->use2.b[z] |
   469  					   r->set.b[z];
   470  			if(bany(&bit)) {
   471  				print("\t");
   472  				if(bany(&r->use1))
   473  					print(" u1=%B", r->use1);
   474  				if(bany(&r->use2))
   475  					print(" u2=%B", r->use2);
   476  				if(bany(&r->set))
   477  					print(" st=%B", r->set);
   478  			}
   479  			print("\n");
   480  		}
   481  	}
   482  
   483  	/*
   484  	 * pass 3
   485  	 * iterate propagating usage
   486  	 * 	back until flow graph is complete
   487  	 */
   488  loop1:
   489  	change = 0;
   490  	for(r = firstr; r != R; r = r->link)
   491  		r->active = 0;
   492  	for(r = firstr; r != R; r = r->link)
   493  		if(r->prog->as == ARET)
   494  			prop(r, zbits, zbits);
   495  loop11:
   496  	/* pick up unreachable code */
   497  	i = 0;
   498  	for(r = firstr; r != R; r = r1) {
   499  		r1 = r->link;
   500  		if(r1 && r1->active && !r->active) {
   501  			prop(r, zbits, zbits);
   502  			i = 1;
   503  		}
   504  	}
   505  	if(i)
   506  		goto loop11;
   507  	if(change)
   508  		goto loop1;
   509  
   510  
   511  	/*
   512  	 * pass 4
   513  	 * iterate propagating register/variable synchrony
   514  	 * 	forward until graph is complete
   515  	 */
   516  loop2:
   517  	change = 0;
   518  	for(r = firstr; r != R; r = r->link)
   519  		r->active = 0;
   520  	synch(firstr, zbits);
   521  	if(change)
   522  		goto loop2;
   523  
   524  
   525  	/*
   526  	 * pass 5
   527  	 * isolate regions
   528  	 * calculate costs (paint1)
   529  	 */
   530  	r = firstr;
   531  	if(r) {
   532  		for(z=0; z<BITS; z++)
   533  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   534  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   535  		if(bany(&bit)) {
   536  			nearln = r->prog->lineno;
   537  			warn(Z, "used and not set: %B", bit);
   538  			if(debug['R'] && !debug['w'])
   539  				print("used and not set: %B\n", bit);
   540  		}
   541  	}
   542  	if(debug['R'] && debug['v'])
   543  		print("\nprop structure:\n");
   544  	for(r = firstr; r != R; r = r->link)
   545  		r->act = zbits;
   546  	rgp = region;
   547  	nregion = 0;
   548  	for(r = firstr; r != R; r = r->link) {
   549  		if(debug['R'] && debug['v']) {
   550  			print("%P\t", r->prog);
   551  			if(bany(&r->set))
   552  				print("s:%B ", r->set);
   553  			if(bany(&r->refahead))
   554  				print("ra:%B ", r->refahead);
   555  			if(bany(&r->calahead))
   556  				print("ca:%B ", r->calahead);
   557  			print("\n");
   558  		}
   559  		for(z=0; z<BITS; z++)
   560  			bit.b[z] = r->set.b[z] &
   561  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   562  		if(bany(&bit)) {
   563  			nearln = r->prog->lineno;
   564  			warn(Z, "set and not used: %B", bit);
   565  			if(debug['R'])
   566  				print("set and not used: %B\n", bit);
   567  			excise(r);
   568  		}
   569  		for(z=0; z<BITS; z++)
   570  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   571  		while(bany(&bit)) {
   572  			i = bnum(bit);
   573  			rgp->enter = r;
   574  			rgp->varno = i;
   575  			change = 0;
   576  			if(debug['R'] && debug['v'])
   577  				print("\n");
   578  			paint1(r, i);
   579  			bit.b[i/32] &= ~(1L<<(i%32));
   580  			if(change <= 0) {
   581  				if(debug['R'])
   582  					print("%L$%d: %B\n",
   583  						r->prog->lineno, change, blsh(i));
   584  				continue;
   585  			}
   586  			rgp->cost = change;
   587  			nregion++;
   588  			if(nregion >= NRGN) {
   589  				warn(Z, "too many regions");
   590  				goto brk;
   591  			}
   592  			rgp++;
   593  		}
   594  	}
   595  brk:
   596  	qsort(region, nregion, sizeof(region[0]), rcmp);
   597  
   598  	/*
   599  	 * pass 6
   600  	 * determine used registers (paint2)
   601  	 * replace code (paint3)
   602  	 */
   603  	rgp = region;
   604  	for(i=0; i<nregion; i++) {
   605  		bit = blsh(rgp->varno);
   606  		vreg = paint2(rgp->enter, rgp->varno);
   607  		vreg = allreg(vreg, rgp);
   608  		if(debug['R']) {
   609  			print("%L$%d %R: %B\n",
   610  				rgp->enter->prog->lineno,
   611  				rgp->cost,
   612  				rgp->regno,
   613  				bit);
   614  		}
   615  		if(rgp->regno != 0)
   616  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   617  		rgp++;
   618  	}
   619  	/*
   620  	 * pass 7
   621  	 * peep-hole on basic block
   622  	 */
   623  	if(!debug['R'] || debug['P'])
   624  		peep();
   625  
   626  	/*
   627  	 * pass 8
   628  	 * recalculate pc
   629  	 */
   630  	val = initpc;
   631  	for(r = firstr; r != R; r = r1) {
   632  		r->pc = val;
   633  		p = r->prog;
   634  		p1 = P;
   635  		r1 = r->link;
   636  		if(r1 != R)
   637  			p1 = r1->prog;
   638  		for(; p != p1; p = p->link) {
   639  			switch(p->as) {
   640  			default:
   641  				val++;
   642  				break;
   643  
   644  			case ANOP:
   645  			case ADATA:
   646  			case AGLOBL:
   647  			case ANAME:
   648  			case ASIGNAME:
   649  			case AFUNCDATA:
   650  				break;
   651  			}
   652  		}
   653  	}
   654  	pc = val;
   655  
   656  	/*
   657  	 * fix up branches
   658  	 */
   659  	if(debug['R'])
   660  		if(bany(&addrs))
   661  			print("addrs: %B\n", addrs);
   662  
   663  	r1 = 0; /* set */
   664  	for(r = firstr; r != R; r = r->link) {
   665  		p = r->prog;
   666  		if(p->to.type == D_BRANCH) {
   667  			p->to.offset = r->s2->pc;
   668  			p->to.u.branch = r->s2->prog;
   669  		}
   670  		r1 = r;
   671  	}
   672  
   673  	/*
   674  	 * last pass
   675  	 * eliminate nops
   676  	 * free aux structures
   677  	 */
   678  	for(p = firstr->prog; p != P; p = p->link){
   679  		while(p->link && p->link->as == ANOP)
   680  			p->link = p->link->link;
   681  	}
   682  	if(r1 != R) {
   683  		r1->link = freer;
   684  		freer = firstr;
   685  	}
   686  }
   687  
   688  /*
   689   * add mov b,rn
   690   * just after r
   691   */
   692  void
   693  addmove(Reg *r, int bn, int rn, int f)
   694  {
   695  	Prog *p, *p1;
   696  	Addr *a;
   697  	Var *v;
   698  
   699  	p1 = alloc(sizeof(*p1));
   700  	*p1 = zprog;
   701  	p = r->prog;
   702  
   703  	p1->link = p->link;
   704  	p->link = p1;
   705  	p1->lineno = p->lineno;
   706  
   707  	v = var + bn;
   708  
   709  	a = &p1->to;
   710  	a->sym = v->sym;
   711  	a->offset = v->offset;
   712  	a->etype = v->etype;
   713  	a->type = v->name;
   714  
   715  	p1->as = AMOVL;
   716  	if(v->etype == TCHAR || v->etype == TUCHAR)
   717  		p1->as = AMOVB;
   718  	if(v->etype == TSHORT || v->etype == TUSHORT)
   719  		p1->as = AMOVW;
   720  	if(v->etype == TVLONG || v->etype == TUVLONG || (v->etype == TIND && ewidth[TIND] == 8))
   721  		p1->as = AMOVQ;
   722  	if(v->etype == TFLOAT)
   723  		p1->as = AMOVSS;
   724  	if(v->etype == TDOUBLE)
   725  		p1->as = AMOVSD;
   726  
   727  	p1->from.type = rn;
   728  	if(!f) {
   729  		p1->from = *a;
   730  		*a = zprog.from;
   731  		a->type = rn;
   732  		if(v->etype == TUCHAR)
   733  			p1->as = AMOVB;
   734  		if(v->etype == TUSHORT)
   735  			p1->as = AMOVW;
   736  	}
   737  	if(debug['R'])
   738  		print("%P\t.a%P\n", p, p1);
   739  }
   740  
   741  uint32
   742  doregbits(int r)
   743  {
   744  	uint32 b;
   745  
   746  	b = 0;
   747  	if(r >= D_INDIR)
   748  		r -= D_INDIR;
   749  	if(r >= D_AX && r <= D_R15)
   750  		b |= RtoB(r);
   751  	else
   752  	if(r >= D_AL && r <= D_R15B)
   753  		b |= RtoB(r-D_AL+D_AX);
   754  	else
   755  	if(r >= D_AH && r <= D_BH)
   756  		b |= RtoB(r-D_AH+D_AX);
   757  	else
   758  	if(r >= D_X0 && r <= D_X0+15)
   759  		b |= FtoB(r);
   760  	return b;
   761  }
   762  
   763  Bits
   764  mkvar(Reg *r, Addr *a)
   765  {
   766  	Var *v;
   767  	int i, t, n, et, z;
   768  	int32 o;
   769  	Bits bit;
   770  	LSym *s;
   771  
   772  	/*
   773  	 * mark registers used
   774  	 */
   775  	t = a->type;
   776  	r->regu |= doregbits(t);
   777  	r->regu |= doregbits(a->index);
   778  
   779  	switch(t) {
   780  	default:
   781  		goto none;
   782  	case D_ADDR:
   783  		a->type = a->index;
   784  		bit = mkvar(r, a);
   785  		for(z=0; z<BITS; z++)
   786  			addrs.b[z] |= bit.b[z];
   787  		a->type = t;
   788  		goto none;
   789  	case D_EXTERN:
   790  	case D_STATIC:
   791  	case D_PARAM:
   792  	case D_AUTO:
   793  		n = t;
   794  		break;
   795  	}
   796  	s = a->sym;
   797  	if(s == nil)
   798  		goto none;
   799  	if(s->name[0] == '.')
   800  		goto none;
   801  	et = a->etype;
   802  	o = a->offset;
   803  	v = var;
   804  	for(i=0; i<nvar; i++) {
   805  		if(s == v->sym)
   806  		if(n == v->name)
   807  		if(o == v->offset)
   808  			goto out;
   809  		v++;
   810  	}
   811  	if(nvar >= NVAR) {
   812  		if(debug['w'] > 1 && s)
   813  			warn(Z, "variable not optimized: %s", s->name);
   814  		goto none;
   815  	}
   816  	i = nvar;
   817  	nvar++;
   818  	v = &var[i];
   819  	v->sym = s;
   820  	v->offset = o;
   821  	v->name = n;
   822  	v->etype = et;
   823  	if(debug['R'])
   824  		print("bit=%2d et=%2d %D\n", i, et, a);
   825  
   826  out:
   827  	bit = blsh(i);
   828  	if(n == D_EXTERN || n == D_STATIC)
   829  		for(z=0; z<BITS; z++)
   830  			externs.b[z] |= bit.b[z];
   831  	if(n == D_PARAM)
   832  		for(z=0; z<BITS; z++)
   833  			params.b[z] |= bit.b[z];
   834  	if(v->etype != et || !(typechlpfd[et] || typev[et]))	/* funny punning */
   835  		for(z=0; z<BITS; z++)
   836  			addrs.b[z] |= bit.b[z];
   837  	return bit;
   838  
   839  none:
   840  	return zbits;
   841  }
   842  
   843  void
   844  prop(Reg *r, Bits ref, Bits cal)
   845  {
   846  	Reg *r1, *r2;
   847  	int z;
   848  
   849  	for(r1 = r; r1 != R; r1 = r1->p1) {
   850  		for(z=0; z<BITS; z++) {
   851  			ref.b[z] |= r1->refahead.b[z];
   852  			if(ref.b[z] != r1->refahead.b[z]) {
   853  				r1->refahead.b[z] = ref.b[z];
   854  				change++;
   855  			}
   856  			cal.b[z] |= r1->calahead.b[z];
   857  			if(cal.b[z] != r1->calahead.b[z]) {
   858  				r1->calahead.b[z] = cal.b[z];
   859  				change++;
   860  			}
   861  		}
   862  		switch(r1->prog->as) {
   863  		case ACALL:
   864  			for(z=0; z<BITS; z++) {
   865  				cal.b[z] |= ref.b[z] | externs.b[z];
   866  				ref.b[z] = 0;
   867  			}
   868  			break;
   869  
   870  		case ATEXT:
   871  			for(z=0; z<BITS; z++) {
   872  				cal.b[z] = 0;
   873  				ref.b[z] = 0;
   874  			}
   875  			break;
   876  
   877  		case ARET:
   878  			for(z=0; z<BITS; z++) {
   879  				cal.b[z] = externs.b[z];
   880  				ref.b[z] = 0;
   881  			}
   882  		}
   883  		for(z=0; z<BITS; z++) {
   884  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   885  				r1->use1.b[z] | r1->use2.b[z];
   886  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   887  			r1->refbehind.b[z] = ref.b[z];
   888  			r1->calbehind.b[z] = cal.b[z];
   889  		}
   890  		if(r1->active)
   891  			break;
   892  		r1->active = 1;
   893  	}
   894  	for(; r != r1; r = r->p1)
   895  		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
   896  			prop(r2, r->refbehind, r->calbehind);
   897  }
   898  
   899  /*
   900   * find looping structure
   901   *
   902   * 1) find reverse postordering
   903   * 2) find approximate dominators,
   904   *	the actual dominators if the flow graph is reducible
   905   *	otherwise, dominators plus some other non-dominators.
   906   *	See Matthew S. Hecht and Jeffrey D. Ullman,
   907   *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   908   *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   909   *	Oct. 1-3, 1973, pp.  207-217.
   910   * 3) find all nodes with a predecessor dominated by the current node.
   911   *	such a node is a loop head.
   912   *	recursively, all preds with a greater rpo number are in the loop
   913   */
   914  int32
   915  postorder(Reg *r, Reg **rpo2r, int32 n)
   916  {
   917  	Reg *r1;
   918  
   919  	r->rpo = 1;
   920  	r1 = r->s1;
   921  	if(r1 && !r1->rpo)
   922  		n = postorder(r1, rpo2r, n);
   923  	r1 = r->s2;
   924  	if(r1 && !r1->rpo)
   925  		n = postorder(r1, rpo2r, n);
   926  	rpo2r[n] = r;
   927  	n++;
   928  	return n;
   929  }
   930  
   931  int32
   932  rpolca(int32 *idom, int32 rpo1, int32 rpo2)
   933  {
   934  	int32 t;
   935  
   936  	if(rpo1 == -1)
   937  		return rpo2;
   938  	while(rpo1 != rpo2){
   939  		if(rpo1 > rpo2){
   940  			t = rpo2;
   941  			rpo2 = rpo1;
   942  			rpo1 = t;
   943  		}
   944  		while(rpo1 < rpo2){
   945  			t = idom[rpo2];
   946  			if(t >= rpo2)
   947  				fatal(Z, "bad idom");
   948  			rpo2 = t;
   949  		}
   950  	}
   951  	return rpo1;
   952  }
   953  
   954  int
   955  doms(int32 *idom, int32 r, int32 s)
   956  {
   957  	while(s > r)
   958  		s = idom[s];
   959  	return s == r;
   960  }
   961  
   962  int
   963  loophead(int32 *idom, Reg *r)
   964  {
   965  	int32 src;
   966  
   967  	src = r->rpo;
   968  	if(r->p1 != R && doms(idom, src, r->p1->rpo))
   969  		return 1;
   970  	for(r = r->p2; r != R; r = r->p2link)
   971  		if(doms(idom, src, r->rpo))
   972  			return 1;
   973  	return 0;
   974  }
   975  
   976  void
   977  loopmark(Reg **rpo2r, int32 head, Reg *r)
   978  {
   979  	if(r->rpo < head || r->active == head)
   980  		return;
   981  	r->active = head;
   982  	r->loop += LOOP;
   983  	if(r->p1 != R)
   984  		loopmark(rpo2r, head, r->p1);
   985  	for(r = r->p2; r != R; r = r->p2link)
   986  		loopmark(rpo2r, head, r);
   987  }
   988  
   989  void
   990  loopit(Reg *r, int32 nr)
   991  {
   992  	Reg *r1;
   993  	int32 i, d, me;
   994  
   995  	if(nr > maxnr) {
   996  		rpo2r = alloc(nr * sizeof(Reg*));
   997  		idom = alloc(nr * sizeof(int32));
   998  		maxnr = nr;
   999  	}
  1000  
  1001  	d = postorder(r, rpo2r, 0);
  1002  	if(d > nr)
  1003  		fatal(Z, "too many reg nodes");
  1004  	nr = d;
  1005  	for(i = 0; i < nr / 2; i++){
  1006  		r1 = rpo2r[i];
  1007  		rpo2r[i] = rpo2r[nr - 1 - i];
  1008  		rpo2r[nr - 1 - i] = r1;
  1009  	}
  1010  	for(i = 0; i < nr; i++)
  1011  		rpo2r[i]->rpo = i;
  1012  
  1013  	idom[0] = 0;
  1014  	for(i = 0; i < nr; i++){
  1015  		r1 = rpo2r[i];
  1016  		me = r1->rpo;
  1017  		d = -1;
  1018  		if(r1->p1 != R && r1->p1->rpo < me)
  1019  			d = r1->p1->rpo;
  1020  		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  1021  			if(r1->rpo < me)
  1022  				d = rpolca(idom, d, r1->rpo);
  1023  		idom[i] = d;
  1024  	}
  1025  
  1026  	for(i = 0; i < nr; i++){
  1027  		r1 = rpo2r[i];
  1028  		r1->loop++;
  1029  		if(r1->p2 != R && loophead(idom, r1))
  1030  			loopmark(rpo2r, i, r1);
  1031  	}
  1032  }
  1033  
  1034  void
  1035  synch(Reg *r, Bits dif)
  1036  {
  1037  	Reg *r1;
  1038  	int z;
  1039  
  1040  	for(r1 = r; r1 != R; r1 = r1->s1) {
  1041  		for(z=0; z<BITS; z++) {
  1042  			dif.b[z] = (dif.b[z] &
  1043  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1044  					r1->set.b[z] | r1->regdiff.b[z];
  1045  			if(dif.b[z] != r1->regdiff.b[z]) {
  1046  				r1->regdiff.b[z] = dif.b[z];
  1047  				change++;
  1048  			}
  1049  		}
  1050  		if(r1->active)
  1051  			break;
  1052  		r1->active = 1;
  1053  		for(z=0; z<BITS; z++)
  1054  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1055  		if(r1->s2 != R)
  1056  			synch(r1->s2, dif);
  1057  	}
  1058  }
  1059  
  1060  uint32
  1061  allreg(uint32 b, Rgn *r)
  1062  {
  1063  	Var *v;
  1064  	int i;
  1065  
  1066  	v = var + r->varno;
  1067  	r->regno = 0;
  1068  	switch(v->etype) {
  1069  
  1070  	default:
  1071  		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  1072  		break;
  1073  
  1074  	case TCHAR:
  1075  	case TUCHAR:
  1076  	case TSHORT:
  1077  	case TUSHORT:
  1078  	case TINT:
  1079  	case TUINT:
  1080  	case TLONG:
  1081  	case TULONG:
  1082  	case TVLONG:
  1083  	case TUVLONG:
  1084  	case TIND:
  1085  	case TARRAY:
  1086  		i = BtoR(~b);
  1087  		if(i && r->cost > 0) {
  1088  			r->regno = i;
  1089  			return RtoB(i);
  1090  		}
  1091  		break;
  1092  
  1093  	case TDOUBLE:
  1094  	case TFLOAT:
  1095  		i = BtoF(~b);
  1096  		if(i && r->cost > 0) {
  1097  			r->regno = i;
  1098  			return FtoB(i);
  1099  		}
  1100  		break;
  1101  	}
  1102  	return 0;
  1103  }
  1104  
  1105  void
  1106  paint1(Reg *r, int bn)
  1107  {
  1108  	Reg *r1;
  1109  	Prog *p;
  1110  	int z;
  1111  	uint32 bb;
  1112  
  1113  	z = bn/32;
  1114  	bb = 1L<<(bn%32);
  1115  	if(r->act.b[z] & bb)
  1116  		return;
  1117  	for(;;) {
  1118  		if(!(r->refbehind.b[z] & bb))
  1119  			break;
  1120  		r1 = r->p1;
  1121  		if(r1 == R)
  1122  			break;
  1123  		if(!(r1->refahead.b[z] & bb))
  1124  			break;
  1125  		if(r1->act.b[z] & bb)
  1126  			break;
  1127  		r = r1;
  1128  	}
  1129  
  1130  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1131  		change -= CLOAD * r->loop;
  1132  		if(debug['R'] && debug['v'])
  1133  			print("%d%P\td %B $%d\n", r->loop,
  1134  				r->prog, blsh(bn), change);
  1135  	}
  1136  	for(;;) {
  1137  		r->act.b[z] |= bb;
  1138  		p = r->prog;
  1139  
  1140  		if(r->use1.b[z] & bb) {
  1141  			change += CREF * r->loop;
  1142  			if(debug['R'] && debug['v'])
  1143  				print("%d%P\tu1 %B $%d\n", r->loop,
  1144  					p, blsh(bn), change);
  1145  		}
  1146  
  1147  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1148  			change += CREF * r->loop;
  1149  			if(debug['R'] && debug['v'])
  1150  				print("%d%P\tu2 %B $%d\n", r->loop,
  1151  					p, blsh(bn), change);
  1152  		}
  1153  
  1154  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1155  			change -= CLOAD * r->loop;
  1156  			if(debug['R'] && debug['v'])
  1157  				print("%d%P\tst %B $%d\n", r->loop,
  1158  					p, blsh(bn), change);
  1159  		}
  1160  
  1161  		if(r->refbehind.b[z] & bb)
  1162  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1163  				if(r1->refahead.b[z] & bb)
  1164  					paint1(r1, bn);
  1165  
  1166  		if(!(r->refahead.b[z] & bb))
  1167  			break;
  1168  		r1 = r->s2;
  1169  		if(r1 != R)
  1170  			if(r1->refbehind.b[z] & bb)
  1171  				paint1(r1, bn);
  1172  		r = r->s1;
  1173  		if(r == R)
  1174  			break;
  1175  		if(r->act.b[z] & bb)
  1176  			break;
  1177  		if(!(r->refbehind.b[z] & bb))
  1178  			break;
  1179  	}
  1180  }
  1181  
  1182  uint32
  1183  regset(Reg *r, uint32 bb)
  1184  {
  1185  	uint32 b, set;
  1186  	Addr v;
  1187  	int c;
  1188  
  1189  	set = 0;
  1190  	v = zprog.from;
  1191  	while(b = bb & ~(bb-1)) {
  1192  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
  1193  		if(v.type == 0)
  1194  			diag(Z, "zero v.type for %#ux", b);
  1195  		c = copyu(r->prog, &v, A);
  1196  		if(c == 3)
  1197  			set |= b;
  1198  		bb &= ~b;
  1199  	}
  1200  	return set;
  1201  }
  1202  
  1203  uint32
  1204  reguse(Reg *r, uint32 bb)
  1205  {
  1206  	uint32 b, set;
  1207  	Addr v;
  1208  	int c;
  1209  
  1210  	set = 0;
  1211  	v = zprog.from;
  1212  	while(b = bb & ~(bb-1)) {
  1213  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
  1214  		c = copyu(r->prog, &v, A);
  1215  		if(c == 1 || c == 2 || c == 4)
  1216  			set |= b;
  1217  		bb &= ~b;
  1218  	}
  1219  	return set;
  1220  }
  1221  
  1222  uint32
  1223  paint2(Reg *r, int bn)
  1224  {
  1225  	Reg *r1;
  1226  	int z;
  1227  	uint32 bb, vreg, x;
  1228  
  1229  	z = bn/32;
  1230  	bb = 1L << (bn%32);
  1231  	vreg = regbits;
  1232  	if(!(r->act.b[z] & bb))
  1233  		return vreg;
  1234  	for(;;) {
  1235  		if(!(r->refbehind.b[z] & bb))
  1236  			break;
  1237  		r1 = r->p1;
  1238  		if(r1 == R)
  1239  			break;
  1240  		if(!(r1->refahead.b[z] & bb))
  1241  			break;
  1242  		if(!(r1->act.b[z] & bb))
  1243  			break;
  1244  		r = r1;
  1245  	}
  1246  	for(;;) {
  1247  		r->act.b[z] &= ~bb;
  1248  
  1249  		vreg |= r->regu;
  1250  
  1251  		if(r->refbehind.b[z] & bb)
  1252  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1253  				if(r1->refahead.b[z] & bb)
  1254  					vreg |= paint2(r1, bn);
  1255  
  1256  		if(!(r->refahead.b[z] & bb))
  1257  			break;
  1258  		r1 = r->s2;
  1259  		if(r1 != R)
  1260  			if(r1->refbehind.b[z] & bb)
  1261  				vreg |= paint2(r1, bn);
  1262  		r = r->s1;
  1263  		if(r == R)
  1264  			break;
  1265  		if(!(r->act.b[z] & bb))
  1266  			break;
  1267  		if(!(r->refbehind.b[z] & bb))
  1268  			break;
  1269  	}
  1270  
  1271  	bb = vreg;
  1272  	for(; r; r=r->s1) {
  1273  		x = r->regu & ~bb;
  1274  		if(x) {
  1275  			vreg |= reguse(r, x);
  1276  			bb |= regset(r, x);
  1277  		}
  1278  	}
  1279  	return vreg;
  1280  }
  1281  
  1282  void
  1283  paint3(Reg *r, int bn, int32 rb, int rn)
  1284  {
  1285  	Reg *r1;
  1286  	Prog *p;
  1287  	int z;
  1288  	uint32 bb;
  1289  
  1290  	z = bn/32;
  1291  	bb = 1L << (bn%32);
  1292  	if(r->act.b[z] & bb)
  1293  		return;
  1294  	for(;;) {
  1295  		if(!(r->refbehind.b[z] & bb))
  1296  			break;
  1297  		r1 = r->p1;
  1298  		if(r1 == R)
  1299  			break;
  1300  		if(!(r1->refahead.b[z] & bb))
  1301  			break;
  1302  		if(r1->act.b[z] & bb)
  1303  			break;
  1304  		r = r1;
  1305  	}
  1306  
  1307  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1308  		addmove(r, bn, rn, 0);
  1309  	for(;;) {
  1310  		r->act.b[z] |= bb;
  1311  		p = r->prog;
  1312  
  1313  		if(r->use1.b[z] & bb) {
  1314  			if(debug['R'])
  1315  				print("%P", p);
  1316  			addreg(&p->from, rn);
  1317  			if(debug['R'])
  1318  				print("\t.c%P\n", p);
  1319  		}
  1320  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1321  			if(debug['R'])
  1322  				print("%P", p);
  1323  			addreg(&p->to, rn);
  1324  			if(debug['R'])
  1325  				print("\t.c%P\n", p);
  1326  		}
  1327  
  1328  		if(STORE(r) & r->regdiff.b[z] & bb)
  1329  			addmove(r, bn, rn, 1);
  1330  		r->regu |= rb;
  1331  
  1332  		if(r->refbehind.b[z] & bb)
  1333  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1334  				if(r1->refahead.b[z] & bb)
  1335  					paint3(r1, bn, rb, rn);
  1336  
  1337  		if(!(r->refahead.b[z] & bb))
  1338  			break;
  1339  		r1 = r->s2;
  1340  		if(r1 != R)
  1341  			if(r1->refbehind.b[z] & bb)
  1342  				paint3(r1, bn, rb, rn);
  1343  		r = r->s1;
  1344  		if(r == R)
  1345  			break;
  1346  		if(r->act.b[z] & bb)
  1347  			break;
  1348  		if(!(r->refbehind.b[z] & bb))
  1349  			break;
  1350  	}
  1351  }
  1352  
  1353  void
  1354  addreg(Addr *a, int rn)
  1355  {
  1356  
  1357  	a->sym = 0;
  1358  	a->offset = 0;
  1359  	a->type = rn;
  1360  }
  1361  
  1362  int32
  1363  RtoB(int r)
  1364  {
  1365  
  1366  	if(r < D_AX || r > D_R15)
  1367  		return 0;
  1368  	return 1L << (r-D_AX);
  1369  }
  1370  
  1371  int
  1372  BtoR(int32 b)
  1373  {
  1374  
  1375  	b &= 0xffffL;
  1376  	if(nacl)
  1377  		b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX)));
  1378  	if(b == 0)
  1379  		return 0;
  1380  	return bitno(b) + D_AX;
  1381  }
  1382  
  1383  /*
  1384   *	bit	reg
  1385   *	16	X5
  1386   *	17	X6
  1387   *	18	X7
  1388   */
  1389  int32
  1390  FtoB(int f)
  1391  {
  1392  	if(f < FREGMIN || f > FREGEXT)
  1393  		return 0;
  1394  	return 1L << (f - FREGMIN + 16);
  1395  }
  1396  
  1397  int
  1398  BtoF(int32 b)
  1399  {
  1400  
  1401  	b &= 0x70000L;
  1402  	if(b == 0)
  1403  		return 0;
  1404  	return bitno(b) - 16 + FREGMIN;
  1405  }
  1406  
  1407  /* what instruction does a JMP to p eventually land on? */
  1408  static Reg*
  1409  chasejmp(Reg *r, int *jmploop)
  1410  {
  1411  	int n;
  1412  
  1413  	n = 0;
  1414  	for(; r; r=r->s2) {
  1415  		if(r->prog->as != AJMP || r->prog->to.type != D_BRANCH)
  1416  			break;
  1417  		if(++n > 10) {
  1418  			*jmploop = 1;
  1419  			break;
  1420  		}
  1421  	}
  1422  	return r;
  1423  }
  1424  
  1425  /* mark all code reachable from firstp as alive */
  1426  static void
  1427  mark(Reg *firstr)
  1428  {
  1429  	Reg *r;
  1430  	Prog *p;
  1431  
  1432  	for(r=firstr; r; r=r->link) {
  1433  		if(r->active)
  1434  			break;
  1435  		r->active = 1;
  1436  		p = r->prog;
  1437  		if(p->as != ACALL && p->to.type == D_BRANCH)
  1438  			mark(r->s2);
  1439  		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
  1440  			break;
  1441  	}
  1442  }
  1443  
  1444  /*
  1445   * the code generator depends on being able to write out JMP
  1446   * instructions that it can jump to now but fill in later.
  1447   * the linker will resolve them nicely, but they make the code
  1448   * longer and more difficult to follow during debugging.
  1449   * remove them.
  1450   */
  1451  static void
  1452  fixjmp(Reg *firstr)
  1453  {
  1454  	int jmploop;
  1455  	Reg *r;
  1456  	Prog *p;
  1457  
  1458  	if(debug['R'] && debug['v'])
  1459  		print("\nfixjmp\n");
  1460  
  1461  	// pass 1: resolve jump to AJMP, mark all code as dead.
  1462  	jmploop = 0;
  1463  	for(r=firstr; r; r=r->link) {
  1464  		p = r->prog;
  1465  		if(debug['R'] && debug['v'])
  1466  			print("%04d %P\n", (int)r->pc, p);
  1467  		if(p->as != ACALL && p->to.type == D_BRANCH && r->s2 && r->s2->prog->as == AJMP) {
  1468  			r->s2 = chasejmp(r->s2, &jmploop);
  1469  			p->to.offset = r->s2->pc;
  1470  			p->to.u.branch = r->s2->prog;
  1471  			if(debug['R'] && debug['v'])
  1472  				print("->%P\n", p);
  1473  		}
  1474  		r->active = 0;
  1475  	}
  1476  	if(debug['R'] && debug['v'])
  1477  		print("\n");
  1478  
  1479  	// pass 2: mark all reachable code alive
  1480  	mark(firstr);
  1481  
  1482  	// pass 3: delete dead code (mostly JMPs).
  1483  	for(r=firstr; r; r=r->link) {
  1484  		if(!r->active) {
  1485  			p = r->prog;
  1486  			if(p->link == P && p->as == ARET && r->p1 && r->p1->prog->as != ARET) {
  1487  				// This is the final ARET, and the code so far doesn't have one.
  1488  				// Let it stay.
  1489  			} else {
  1490  				if(debug['R'] && debug['v'])
  1491  					print("del %04d %P\n", (int)r->pc, p);
  1492  				p->as = ANOP;
  1493  			}
  1494  		}
  1495  	}
  1496  
  1497  	// pass 4: elide JMP to next instruction.
  1498  	// only safe if there are no jumps to JMPs anymore.
  1499  	if(!jmploop) {
  1500  		for(r=firstr; r; r=r->link) {
  1501  			p = r->prog;
  1502  			if(p->as == AJMP && p->to.type == D_BRANCH && r->s2 == r->link) {
  1503  				if(debug['R'] && debug['v'])
  1504  					print("del %04d %P\n", (int)r->pc, p);
  1505  				p->as = ANOP;
  1506  			}
  1507  		}
  1508  	}
  1509  
  1510  	// fix back pointers.
  1511  	for(r=firstr; r; r=r->link) {
  1512  		r->p2 = R;
  1513  		r->p2link = R;
  1514  	}
  1515  	for(r=firstr; r; r=r->link) {
  1516  		if(r->s2) {
  1517  			r->p2link = r->s2->p2;
  1518  			r->s2->p2 = r;
  1519  		}
  1520  	}
  1521  
  1522  	if(debug['R'] && debug['v']) {
  1523  		print("\n");
  1524  		for(r=firstr; r; r=r->link)
  1525  			print("%04d %P\n", (int)r->pc, r->prog);
  1526  		print("\n");
  1527  	}
  1528  }
  1529