github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6g/reg.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  #define	NREGVAR	32	/* 16 general + 16 floating */
    37  #define	REGBITS	((uint32)0xffffffff)
    38  #define	P2R(p)	(Reg*)(p->reg)
    39  
    40  static	int	first	= 1;
    41  
    42  Reg*
    43  rega(void)
    44  {
    45  	Reg *r;
    46  
    47  	r = freer;
    48  	if(r == R) {
    49  		r = mal(sizeof(*r));
    50  	} else
    51  		freer = r->link;
    52  
    53  	*r = zreg;
    54  	return r;
    55  }
    56  
    57  int
    58  rcmp(const void *a1, const void *a2)
    59  {
    60  	Rgn *p1, *p2;
    61  	int c1, c2;
    62  
    63  	p1 = (Rgn*)a1;
    64  	p2 = (Rgn*)a2;
    65  	c1 = p2->cost;
    66  	c2 = p1->cost;
    67  	if(c1 -= c2)
    68  		return c1;
    69  	return p2->varno - p1->varno;
    70  }
    71  
    72  static void
    73  setoutvar(void)
    74  {
    75  	Type *t;
    76  	Node *n;
    77  	Addr a;
    78  	Iter save;
    79  	Bits bit;
    80  	int z;
    81  
    82  	t = structfirst(&save, getoutarg(curfn->type));
    83  	while(t != T) {
    84  		n = nodarg(t, 1);
    85  		a = zprog.from;
    86  		naddr(n, &a, 0);
    87  		bit = mkvar(R, &a);
    88  		for(z=0; z<BITS; z++)
    89  			ovar.b[z] |= bit.b[z];
    90  		t = structnext(&save);
    91  	}
    92  //if(bany(&ovar))
    93  //print("ovars = %Q\n", ovar);
    94  }
    95  
    96  static void
    97  setaddrs(Bits bit)
    98  {
    99  	int i, n;
   100  	Var *v;
   101  	Node *node;
   102  
   103  	while(bany(&bit)) {
   104  		// convert each bit to a variable
   105  		i = bnum(bit);
   106  		node = var[i].node;
   107  		n = var[i].name;
   108  		bit.b[i/32] &= ~(1L<<(i%32));
   109  
   110  		// disable all pieces of that variable
   111  		for(i=0; i<nvar; i++) {
   112  			v = var+i;
   113  			if(v->node == node && v->name == n)
   114  				v->addr = 2;
   115  		}
   116  	}
   117  }
   118  
   119  static char* regname[] = {
   120  	".AX",
   121  	".CX",
   122  	".DX",
   123  	".BX",
   124  	".SP",
   125  	".BP",
   126  	".SI",
   127  	".DI",
   128  	".R8",
   129  	".R9",
   130  	".R10",
   131  	".R11",
   132  	".R12",
   133  	".R13",
   134  	".R14",
   135  	".R15",
   136  	".X0",
   137  	".X1",
   138  	".X2",
   139  	".X3",
   140  	".X4",
   141  	".X5",
   142  	".X6",
   143  	".X7",
   144  	".X8",
   145  	".X9",
   146  	".X10",
   147  	".X11",
   148  	".X12",
   149  	".X13",
   150  	".X14",
   151  	".X15",
   152  };
   153  
   154  static Node* regnodes[NREGVAR];
   155  
   156  static void fixjmp(Prog*);
   157  
   158  void
   159  regopt(Prog *firstp)
   160  {
   161  	Reg *r, *r1;
   162  	Prog *p;
   163  	int i, z, nr;
   164  	uint32 vreg;
   165  	Bits bit;
   166  
   167  	if(first) {
   168  		fmtinstall('Q', Qconv);
   169  		exregoffset = D_R15;
   170  		first = 0;
   171  	}
   172  
   173  	fixjmp(firstp);
   174  
   175  	// count instructions
   176  	nr = 0;
   177  	for(p=firstp; p!=P; p=p->link)
   178  		nr++;
   179  	// if too big dont bother
   180  	if(nr >= 10000) {
   181  //		print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
   182  		return;
   183  	}
   184  
   185  	firstr = R;
   186  	lastr = R;
   187  
   188  	/*
   189  	 * control flow is more complicated in generated go code
   190  	 * than in generated c code.  define pseudo-variables for
   191  	 * registers, so we have complete register usage information.
   192  	 */
   193  	nvar = NREGVAR;
   194  	memset(var, 0, NREGVAR*sizeof var[0]);
   195  	for(i=0; i<NREGVAR; i++) {
   196  		if(regnodes[i] == N)
   197  			regnodes[i] = newname(lookup(regname[i]));
   198  		var[i].node = regnodes[i];
   199  	}
   200  
   201  	regbits = RtoB(D_SP);
   202  	for(z=0; z<BITS; z++) {
   203  		externs.b[z] = 0;
   204  		params.b[z] = 0;
   205  		consts.b[z] = 0;
   206  		addrs.b[z] = 0;
   207  		ovar.b[z] = 0;
   208  	}
   209  
   210  	// build list of return variables
   211  	setoutvar();
   212  
   213  	/*
   214  	 * pass 1
   215  	 * build aux data structure
   216  	 * allocate pcs
   217  	 * find use and set of variables
   218  	 */
   219  	nr = 0;
   220  	for(p=firstp; p!=P; p=p->link) {
   221  		switch(p->as) {
   222  		case ADATA:
   223  		case AGLOBL:
   224  		case ANAME:
   225  		case ASIGNAME:
   226  		case ALOCALS:
   227  		case ATYPE:
   228  			continue;
   229  		}
   230  		r = rega();
   231  		nr++;
   232  		if(firstr == R) {
   233  			firstr = r;
   234  			lastr = r;
   235  		} else {
   236  			lastr->link = r;
   237  			r->p1 = lastr;
   238  			lastr->s1 = r;
   239  			lastr = r;
   240  		}
   241  		r->prog = p;
   242  		p->reg = r;
   243  
   244  		r1 = r->p1;
   245  		if(r1 != R) {
   246  			switch(r1->prog->as) {
   247  			case ARET:
   248  			case AJMP:
   249  			case AIRETL:
   250  			case AIRETQ:
   251  				r->p1 = R;
   252  				r1->s1 = R;
   253  			}
   254  		}
   255  
   256  		// Avoid making variables for direct-called functions.
   257  		if(p->as == ACALL && p->to.type == D_EXTERN)
   258  			continue;
   259  
   260  		// Addressing makes some registers used.
   261  		if(p->from.type >= D_INDIR)
   262  			r->use1.b[0] |= RtoB(p->from.type-D_INDIR);
   263  		if(p->from.index != D_NONE)
   264  			r->use1.b[0] |= RtoB(p->from.index);
   265  		if(p->to.type >= D_INDIR)
   266  			r->use2.b[0] |= RtoB(p->to.type-D_INDIR);
   267  		if(p->to.index != D_NONE)
   268  			r->use2.b[0] |= RtoB(p->to.index);
   269  
   270  		bit = mkvar(r, &p->from);
   271  		if(bany(&bit))
   272  		switch(p->as) {
   273  		/*
   274  		 * funny
   275  		 */
   276  		case ALEAL:
   277  		case ALEAQ:
   278  			setaddrs(bit);
   279  			break;
   280  
   281  		/*
   282  		 * left side read
   283  		 */
   284  		default:
   285  			for(z=0; z<BITS; z++)
   286  				r->use1.b[z] |= bit.b[z];
   287  			break;
   288  
   289  		/*
   290  		 * left side read+write
   291  		 */
   292  		case AXCHGB:
   293  		case AXCHGW:
   294  		case AXCHGL:
   295  		case AXCHGQ:
   296  			for(z=0; z<BITS; z++) {
   297  				r->use1.b[z] |= bit.b[z];
   298  				r->set.b[z] |= bit.b[z];
   299  			}
   300  			break;
   301  		}
   302  
   303  		bit = mkvar(r, &p->to);
   304  		if(bany(&bit))
   305  		switch(p->as) {
   306  		default:
   307  			yyerror("reg: unknown op: %A", p->as);
   308  			break;
   309  
   310  		/*
   311  		 * right side read
   312  		 */
   313  		case ACMPB:
   314  		case ACMPL:
   315  		case ACMPQ:
   316  		case ACMPW:
   317  		case ACOMISS:
   318  		case ACOMISD:
   319  		case AUCOMISS:
   320  		case AUCOMISD:
   321  		case ATESTB:
   322  		case ATESTL:
   323  		case ATESTQ:
   324  			for(z=0; z<BITS; z++)
   325  				r->use2.b[z] |= bit.b[z];
   326  			break;
   327  
   328  		/*
   329  		 * right side write
   330  		 */
   331  		case ALEAQ:
   332  		case ANOP:
   333  		case AMOVL:
   334  		case AMOVQ:
   335  		case AMOVB:
   336  		case AMOVW:
   337  		case AMOVBLSX:
   338  		case AMOVBLZX:
   339  		case AMOVBWSX:
   340  		case AMOVBWZX:
   341  		case AMOVBQSX:
   342  		case AMOVBQZX:
   343  		case AMOVLQSX:
   344  		case AMOVLQZX:
   345  		case AMOVWLSX:
   346  		case AMOVWLZX:
   347  		case AMOVWQSX:
   348  		case AMOVWQZX:
   349  		case AMOVQL:
   350  		case APOPQ:
   351  
   352  		case AMOVSS:
   353  		case AMOVSD:
   354  		case ACVTSD2SL:
   355  		case ACVTSD2SQ:
   356  		case ACVTSD2SS:
   357  		case ACVTSL2SD:
   358  		case ACVTSL2SS:
   359  		case ACVTSQ2SD:
   360  		case ACVTSQ2SS:
   361  		case ACVTSS2SD:
   362  		case ACVTSS2SL:
   363  		case ACVTSS2SQ:
   364  		case ACVTTSD2SL:
   365  		case ACVTTSD2SQ:
   366  		case ACVTTSS2SL:
   367  		case ACVTTSS2SQ:
   368  			for(z=0; z<BITS; z++)
   369  				r->set.b[z] |= bit.b[z];
   370  			break;
   371  
   372  		/*
   373  		 * right side read+write
   374  		 */
   375  		case AINCB:
   376  		case AINCL:
   377  		case AINCQ:
   378  		case AINCW:
   379  		case ADECB:
   380  		case ADECL:
   381  		case ADECQ:
   382  		case ADECW:
   383  
   384  		case AADDB:
   385  		case AADDL:
   386  		case AADDQ:
   387  		case AADDW:
   388  		case AANDB:
   389  		case AANDL:
   390  		case AANDQ:
   391  		case AANDW:
   392  		case ASUBB:
   393  		case ASUBL:
   394  		case ASUBQ:
   395  		case ASUBW:
   396  		case AORB:
   397  		case AORL:
   398  		case AORQ:
   399  		case AORW:
   400  		case AXORB:
   401  		case AXORL:
   402  		case AXORQ:
   403  		case AXORW:
   404  		case ASALB:
   405  		case ASALL:
   406  		case ASALQ:
   407  		case ASALW:
   408  		case ASARB:
   409  		case ASARL:
   410  		case ASARQ:
   411  		case ASARW:
   412  		case ARCLB:
   413  		case ARCLL:
   414  		case ARCLQ:
   415  		case ARCLW:
   416  		case ARCRB:
   417  		case ARCRL:
   418  		case ARCRQ:
   419  		case ARCRW:
   420  		case AROLB:
   421  		case AROLL:
   422  		case AROLQ:
   423  		case AROLW:
   424  		case ARORB:
   425  		case ARORL:
   426  		case ARORQ:
   427  		case ARORW:
   428  		case ASHLB:
   429  		case ASHLL:
   430  		case ASHLQ:
   431  		case ASHLW:
   432  		case ASHRB:
   433  		case ASHRL:
   434  		case ASHRQ:
   435  		case ASHRW:
   436  		case AIMULL:
   437  		case AIMULQ:
   438  		case AIMULW:
   439  		case ANEGB:
   440  		case ANEGW:
   441  		case ANEGL:
   442  		case ANEGQ:
   443  		case ANOTL:
   444  		case ANOTQ:
   445  		case AADCL:
   446  		case AADCQ:
   447  		case ASBBL:
   448  		case ASBBQ:
   449  
   450  		case ASETCC:
   451  		case ASETCS:
   452  		case ASETEQ:
   453  		case ASETGE:
   454  		case ASETGT:
   455  		case ASETHI:
   456  		case ASETLE:
   457  		case ASETLS:
   458  		case ASETLT:
   459  		case ASETMI:
   460  		case ASETNE:
   461  		case ASETOC:
   462  		case ASETOS:
   463  		case ASETPC:
   464  		case ASETPL:
   465  		case ASETPS:
   466  
   467  		case AXCHGB:
   468  		case AXCHGW:
   469  		case AXCHGL:
   470  		case AXCHGQ:
   471  
   472  		case AADDSD:
   473  		case AADDSS:
   474  		case ACMPSD:
   475  		case ACMPSS:
   476  		case ADIVSD:
   477  		case ADIVSS:
   478  		case AMAXSD:
   479  		case AMAXSS:
   480  		case AMINSD:
   481  		case AMINSS:
   482  		case AMULSD:
   483  		case AMULSS:
   484  		case ARCPSS:
   485  		case ARSQRTSS:
   486  		case ASQRTSD:
   487  		case ASQRTSS:
   488  		case ASUBSD:
   489  		case ASUBSS:
   490  		case AXORPD:
   491  			for(z=0; z<BITS; z++) {
   492  				r->set.b[z] |= bit.b[z];
   493  				r->use2.b[z] |= bit.b[z];
   494  			}
   495  			break;
   496  
   497  		/*
   498  		 * funny
   499  		 */
   500  		case ACALL:
   501  			setaddrs(bit);
   502  			break;
   503  		}
   504  
   505  		switch(p->as) {
   506  		case AIMULL:
   507  		case AIMULQ:
   508  		case AIMULW:
   509  			if(p->to.type != D_NONE)
   510  				break;
   511  
   512  		case AIDIVL:
   513  		case AIDIVW:
   514  		case AIDIVQ:
   515  		case ADIVL:
   516  		case ADIVW:
   517  		case ADIVQ:
   518  		case AMULL:
   519  		case AMULW:
   520  		case AMULQ:
   521  			r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
   522  			r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
   523  			break;
   524  
   525  		case AIDIVB:
   526  		case AIMULB:
   527  		case ADIVB:
   528   		case AMULB:
   529  			r->set.b[0] |= RtoB(D_AX);
   530  			r->use1.b[0] |= RtoB(D_AX);
   531  			break;
   532  
   533  		case ACWD:
   534  			r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
   535  			r->use1.b[0] |= RtoB(D_AX);
   536  			break;
   537  
   538  		case ACDQ:
   539  			r->set.b[0] |= RtoB(D_DX);
   540  			r->use1.b[0] |= RtoB(D_AX);
   541   			break;
   542  
   543  		case AREP:
   544  		case AREPN:
   545  		case ALOOP:
   546  		case ALOOPEQ:
   547  		case ALOOPNE:
   548  			r->set.b[0] |= RtoB(D_CX);
   549  			r->use1.b[0] |= RtoB(D_CX);
   550  			break;
   551  
   552  		case AMOVSB:
   553  		case AMOVSL:
   554  		case AMOVSQ:
   555  		case AMOVSW:
   556  		case ACMPSB:
   557  		case ACMPSL:
   558  		case ACMPSQ:
   559  		case ACMPSW:
   560  			r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
   561  			r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
   562  			break;
   563  
   564  		case ASTOSB:
   565  		case ASTOSL:
   566  		case ASTOSQ:
   567  		case ASTOSW:
   568  		case ASCASB:
   569  		case ASCASL:
   570  		case ASCASQ:
   571  		case ASCASW:
   572  			r->set.b[0] |= RtoB(D_DI);
   573  			r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
   574  			break;
   575  
   576  		case AINSB:
   577  		case AINSL:
   578  		case AINSW:
   579  			r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
   580  			r->use1.b[0] |= RtoB(D_DI);
   581  			break;
   582  
   583  		case AOUTSB:
   584  		case AOUTSL:
   585  		case AOUTSW:
   586  			r->set.b[0] |= RtoB(D_DI);
   587  			r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
   588  			break;
   589  		}
   590  	}
   591  	if(firstr == R)
   592  		return;
   593  
   594  	for(i=0; i<nvar; i++) {
   595  		Var *v = var+i;
   596  		if(v->addr) {
   597  			bit = blsh(i);
   598  			for(z=0; z<BITS; z++)
   599  				addrs.b[z] |= bit.b[z];
   600  		}
   601  
   602  		if(debug['R'] && debug['v'])
   603  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   604  				i, v->addr, v->etype, v->width, v->node, v->offset);
   605  	}
   606  
   607  	if(debug['R'] && debug['v'])
   608  		dumpit("pass1", firstr);
   609  
   610  	/*
   611  	 * pass 2
   612  	 * turn branch references to pointers
   613  	 * build back pointers
   614  	 */
   615  	for(r=firstr; r!=R; r=r->link) {
   616  		p = r->prog;
   617  		if(p->to.type == D_BRANCH) {
   618  			if(p->to.u.branch == P)
   619  				fatal("pnil %P", p);
   620  			r1 = p->to.u.branch->reg;
   621  			if(r1 == R)
   622  				fatal("rnil %P", p);
   623  			if(r1 == r) {
   624  				//fatal("ref to self %P", p);
   625  				continue;
   626  			}
   627  			r->s2 = r1;
   628  			r->p2link = r1->p2;
   629  			r1->p2 = r;
   630  		}
   631  	}
   632  
   633  	if(debug['R'] && debug['v'])
   634  		dumpit("pass2", firstr);
   635  
   636  	/*
   637  	 * pass 2.5
   638  	 * find looping structure
   639  	 */
   640  	for(r = firstr; r != R; r = r->link)
   641  		r->active = 0;
   642  	change = 0;
   643  	loopit(firstr, nr);
   644  
   645  	if(debug['R'] && debug['v'])
   646  		dumpit("pass2.5", firstr);
   647  
   648  	/*
   649  	 * pass 3
   650  	 * iterate propagating usage
   651  	 * 	back until flow graph is complete
   652  	 */
   653  loop1:
   654  	change = 0;
   655  	for(r = firstr; r != R; r = r->link)
   656  		r->active = 0;
   657  	for(r = firstr; r != R; r = r->link)
   658  		if(r->prog->as == ARET)
   659  			prop(r, zbits, zbits);
   660  loop11:
   661  	/* pick up unreachable code */
   662  	i = 0;
   663  	for(r = firstr; r != R; r = r1) {
   664  		r1 = r->link;
   665  		if(r1 && r1->active && !r->active) {
   666  			prop(r, zbits, zbits);
   667  			i = 1;
   668  		}
   669  	}
   670  	if(i)
   671  		goto loop11;
   672  	if(change)
   673  		goto loop1;
   674  
   675  	if(debug['R'] && debug['v'])
   676  		dumpit("pass3", firstr);
   677  
   678  	/*
   679  	 * pass 4
   680  	 * iterate propagating register/variable synchrony
   681  	 * 	forward until graph is complete
   682  	 */
   683  loop2:
   684  	change = 0;
   685  	for(r = firstr; r != R; r = r->link)
   686  		r->active = 0;
   687  	synch(firstr, zbits);
   688  	if(change)
   689  		goto loop2;
   690  
   691  	if(debug['R'] && debug['v'])
   692  		dumpit("pass4", firstr);
   693  
   694  	/*
   695  	 * pass 4.5
   696  	 * move register pseudo-variables into regu.
   697  	 */
   698  	for(r = firstr; r != R; r = r->link) {
   699  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   700  
   701  		r->set.b[0] &= ~REGBITS;
   702  		r->use1.b[0] &= ~REGBITS;
   703  		r->use2.b[0] &= ~REGBITS;
   704  		r->refbehind.b[0] &= ~REGBITS;
   705  		r->refahead.b[0] &= ~REGBITS;
   706  		r->calbehind.b[0] &= ~REGBITS;
   707  		r->calahead.b[0] &= ~REGBITS;
   708  		r->regdiff.b[0] &= ~REGBITS;
   709  		r->act.b[0] &= ~REGBITS;
   710  	}
   711  
   712  	/*
   713  	 * pass 5
   714  	 * isolate regions
   715  	 * calculate costs (paint1)
   716  	 */
   717  	r = firstr;
   718  	if(r) {
   719  		for(z=0; z<BITS; z++)
   720  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   721  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   722  		if(bany(&bit) && !r->refset) {
   723  			// should never happen - all variables are preset
   724  			if(debug['w'])
   725  				print("%L: used and not set: %Q\n", r->prog->lineno, bit);
   726  			r->refset = 1;
   727  		}
   728  	}
   729  	for(r = firstr; r != R; r = r->link)
   730  		r->act = zbits;
   731  	rgp = region;
   732  	nregion = 0;
   733  	for(r = firstr; r != R; r = r->link) {
   734  		for(z=0; z<BITS; z++)
   735  			bit.b[z] = r->set.b[z] &
   736  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   737  		if(bany(&bit) && !r->refset) {
   738  			if(debug['w'])
   739  				print("%L: set and not used: %Q\n", r->prog->lineno, bit);
   740  			r->refset = 1;
   741  			excise(r);
   742  		}
   743  		for(z=0; z<BITS; z++)
   744  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   745  		while(bany(&bit)) {
   746  			i = bnum(bit);
   747  			rgp->enter = r;
   748  			rgp->varno = i;
   749  			change = 0;
   750  			paint1(r, i);
   751  			bit.b[i/32] &= ~(1L<<(i%32));
   752  			if(change <= 0)
   753  				continue;
   754  			rgp->cost = change;
   755  			nregion++;
   756  			if(nregion >= NRGN) {
   757  				if(debug['R'] && debug['v'])
   758  					print("too many regions\n");
   759  				goto brk;
   760  			}
   761  			rgp++;
   762  		}
   763  	}
   764  brk:
   765  	qsort(region, nregion, sizeof(region[0]), rcmp);
   766  
   767  	if(debug['R'] && debug['v'])
   768  		dumpit("pass5", firstr);
   769  
   770  	/*
   771  	 * pass 6
   772  	 * determine used registers (paint2)
   773  	 * replace code (paint3)
   774  	 */
   775  	rgp = region;
   776  	for(i=0; i<nregion; i++) {
   777  		bit = blsh(rgp->varno);
   778  		vreg = paint2(rgp->enter, rgp->varno);
   779  		vreg = allreg(vreg, rgp);
   780  		if(rgp->regno != 0) {
   781  			if(debug['R'] && debug['v']) {
   782  				Var *v;
   783  
   784  				v = var + rgp->varno;
   785  				print("registerize %N+%lld (bit=%2d et=%2E) in %R\n",
   786  						v->node, v->offset, rgp->varno, v->etype, rgp->regno);
   787  			}
   788  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   789  		}
   790  		rgp++;
   791  	}
   792  
   793  	if(debug['R'] && debug['v'])
   794  		dumpit("pass6", firstr);
   795  
   796  	/*
   797  	 * pass 7
   798  	 * peep-hole on basic block
   799  	 */
   800  	if(!debug['R'] || debug['P']) {
   801  		peep();
   802  	}
   803  
   804  	/*
   805  	 * eliminate nops
   806  	 * free aux structures
   807  	 */
   808  	for(p=firstp; p!=P; p=p->link) {
   809  		while(p->link != P && p->link->as == ANOP)
   810  			p->link = p->link->link;
   811  		if(p->to.type == D_BRANCH)
   812  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   813  				p->to.u.branch = p->to.u.branch->link;
   814  	}
   815  
   816  	if(lastr != R) {
   817  		lastr->link = freer;
   818  		freer = firstr;
   819  	}
   820  
   821  	if(debug['R']) {
   822  		if(ostats.ncvtreg ||
   823  		   ostats.nspill ||
   824  		   ostats.nreload ||
   825  		   ostats.ndelmov ||
   826  		   ostats.nvar ||
   827  		   ostats.naddr ||
   828  		   0)
   829  			print("\nstats\n");
   830  
   831  		if(ostats.ncvtreg)
   832  			print("	%4d cvtreg\n", ostats.ncvtreg);
   833  		if(ostats.nspill)
   834  			print("	%4d spill\n", ostats.nspill);
   835  		if(ostats.nreload)
   836  			print("	%4d reload\n", ostats.nreload);
   837  		if(ostats.ndelmov)
   838  			print("	%4d delmov\n", ostats.ndelmov);
   839  		if(ostats.nvar)
   840  			print("	%4d var\n", ostats.nvar);
   841  		if(ostats.naddr)
   842  			print("	%4d addr\n", ostats.naddr);
   843  
   844  		memset(&ostats, 0, sizeof(ostats));
   845  	}
   846  }
   847  
   848  /*
   849   * add mov b,rn
   850   * just after r
   851   */
   852  void
   853  addmove(Reg *r, int bn, int rn, int f)
   854  {
   855  	Prog *p, *p1;
   856  	Adr *a;
   857  	Var *v;
   858  
   859  	p1 = mal(sizeof(*p1));
   860  	clearp(p1);
   861  	p1->loc = 9999;
   862  
   863  	p = r->prog;
   864  	p1->link = p->link;
   865  	p->link = p1;
   866  	p1->lineno = p->lineno;
   867  
   868  	v = var + bn;
   869  
   870  	a = &p1->to;
   871  	a->offset = v->offset;
   872  	a->etype = v->etype;
   873  	a->type = v->name;
   874  	a->node = v->node;
   875  	a->sym = v->node->sym;
   876  
   877  	// need to clean this up with wptr and
   878  	// some of the defaults
   879  	p1->as = AMOVL;
   880  	switch(v->etype) {
   881  	default:
   882  		fatal("unknown type %E", v->etype);
   883  	case TINT8:
   884  	case TUINT8:
   885  	case TBOOL:
   886  		p1->as = AMOVB;
   887  		break;
   888  	case TINT16:
   889  	case TUINT16:
   890  		p1->as = AMOVW;
   891  		break;
   892  	case TINT64:
   893  	case TUINT64:
   894  	case TUINTPTR:
   895  	case TPTR64:
   896  		p1->as = AMOVQ;
   897  		break;
   898  	case TFLOAT32:
   899  		p1->as = AMOVSS;
   900  		break;
   901  	case TFLOAT64:
   902  		p1->as = AMOVSD;
   903  		break;
   904  	case TINT:
   905  	case TUINT:
   906  	case TINT32:
   907  	case TUINT32:
   908  	case TPTR32:
   909  		break;
   910  	}
   911  
   912  	p1->from.type = rn;
   913  	if(!f) {
   914  		p1->from = *a;
   915  		*a = zprog.from;
   916  		a->type = rn;
   917  		if(v->etype == TUINT8)
   918  			p1->as = AMOVB;
   919  		if(v->etype == TUINT16)
   920  			p1->as = AMOVW;
   921  	}
   922  	if(debug['R'] && debug['v'])
   923  		print("%P ===add=== %P\n", p, p1);
   924  	ostats.nspill++;
   925  }
   926  
   927  uint32
   928  doregbits(int r)
   929  {
   930  	uint32 b;
   931  
   932  	b = 0;
   933  	if(r >= D_INDIR)
   934  		r -= D_INDIR;
   935  	if(r >= D_AX && r <= D_R15)
   936  		b |= RtoB(r);
   937  	else
   938  	if(r >= D_AL && r <= D_R15B)
   939  		b |= RtoB(r-D_AL+D_AX);
   940  	else
   941  	if(r >= D_AH && r <= D_BH)
   942  		b |= RtoB(r-D_AH+D_AX);
   943  	else
   944  	if(r >= D_X0 && r <= D_X0+15)
   945  		b |= FtoB(r);
   946  	return b;
   947  }
   948  
   949  static int
   950  overlap(int64 o1, int w1, int64 o2, int w2)
   951  {
   952  	int64 t1, t2;
   953  
   954  	t1 = o1+w1;
   955  	t2 = o2+w2;
   956  
   957  	if(!(t1 > o2 && t2 > o1))
   958  		return 0;
   959  
   960  	return 1;
   961  }
   962  
   963  Bits
   964  mkvar(Reg *r, Adr *a)
   965  {
   966  	Var *v;
   967  	int i, t, n, et, z, flag;
   968  	int64 w;
   969  	uint32 regu;
   970  	int64 o;
   971  	Bits bit;
   972  	Node *node;
   973  
   974  	/*
   975  	 * mark registers used
   976  	 */
   977  	t = a->type;
   978  	if(t == D_NONE)
   979  		goto none;
   980  
   981  	if(r != R)
   982  		r->use1.b[0] |= doregbits(a->index);
   983  
   984  	switch(t) {
   985  	default:
   986  		regu = doregbits(t);
   987  		if(regu == 0)
   988  			goto none;
   989  		bit = zbits;
   990  		bit.b[0] = regu;
   991  		return bit;
   992  
   993  	case D_ADDR:
   994  		a->type = a->index;
   995  		bit = mkvar(r, a);
   996  		setaddrs(bit);
   997  		a->type = t;
   998  		ostats.naddr++;
   999  		goto none;
  1000  
  1001  	case D_EXTERN:
  1002  	case D_STATIC:
  1003  	case D_PARAM:
  1004  	case D_AUTO:
  1005  		n = t;
  1006  		break;
  1007  	}
  1008  
  1009  	node = a->node;
  1010  	if(node == N || node->op != ONAME || node->orig == N)
  1011  		goto none;
  1012  	node = node->orig;
  1013  	if(node->orig != node)
  1014  		fatal("%D: bad node", a);
  1015  	if(node->sym == S || node->sym->name[0] == '.')
  1016  		goto none;
  1017  	et = a->etype;
  1018  	o = a->offset;
  1019  	w = a->width;
  1020  	if(w < 0)
  1021  		fatal("bad width %lld for %D", w, a);
  1022  
  1023  	flag = 0;
  1024  	for(i=0; i<nvar; i++) {
  1025  		v = var+i;
  1026  		if(v->node == node && v->name == n) {
  1027  			if(v->offset == o)
  1028  			if(v->etype == et)
  1029  			if(v->width == w)
  1030  				return blsh(i);
  1031  
  1032  			// if they overlaps, disable both
  1033  			if(overlap(v->offset, v->width, o, w)) {
  1034  //				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
  1035  				v->addr = 1;
  1036  				flag = 1;
  1037  			}
  1038  		}
  1039  	}
  1040  	switch(et) {
  1041  	case 0:
  1042  	case TFUNC:
  1043  		goto none;
  1044  	}
  1045  
  1046  	if(nvar >= NVAR) {
  1047  		if(debug['w'] > 1 && node != N)
  1048  			fatal("variable not optimized: %#N", node);
  1049  		goto none;
  1050  	}
  1051  
  1052  	i = nvar;
  1053  	nvar++;
  1054  	v = var+i;
  1055  	v->offset = o;
  1056  	v->name = n;
  1057  	v->etype = et;
  1058  	v->width = w;
  1059  	v->addr = flag;		// funny punning
  1060  	v->node = node;
  1061  
  1062  	if(debug['R'])
  1063  		print("bit=%2d et=%2E w=%d+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
  1064  
  1065  	ostats.nvar++;
  1066  
  1067  	bit = blsh(i);
  1068  	if(n == D_EXTERN || n == D_STATIC)
  1069  		for(z=0; z<BITS; z++)
  1070  			externs.b[z] |= bit.b[z];
  1071  	if(n == D_PARAM)
  1072  		for(z=0; z<BITS; z++)
  1073  			params.b[z] |= bit.b[z];
  1074  
  1075  	return bit;
  1076  
  1077  none:
  1078  	return zbits;
  1079  }
  1080  
  1081  void
  1082  prop(Reg *r, Bits ref, Bits cal)
  1083  {
  1084  	Reg *r1, *r2;
  1085  	int z;
  1086  
  1087  	for(r1 = r; r1 != R; r1 = r1->p1) {
  1088  		for(z=0; z<BITS; z++) {
  1089  			ref.b[z] |= r1->refahead.b[z];
  1090  			if(ref.b[z] != r1->refahead.b[z]) {
  1091  				r1->refahead.b[z] = ref.b[z];
  1092  				change++;
  1093  			}
  1094  			cal.b[z] |= r1->calahead.b[z];
  1095  			if(cal.b[z] != r1->calahead.b[z]) {
  1096  				r1->calahead.b[z] = cal.b[z];
  1097  				change++;
  1098  			}
  1099  		}
  1100  		switch(r1->prog->as) {
  1101  		case ACALL:
  1102  			if(noreturn(r1->prog))
  1103  				break;
  1104  			for(z=0; z<BITS; z++) {
  1105  				cal.b[z] |= ref.b[z] | externs.b[z];
  1106  				ref.b[z] = 0;
  1107  			}
  1108  			break;
  1109  
  1110  		case ATEXT:
  1111  			for(z=0; z<BITS; z++) {
  1112  				cal.b[z] = 0;
  1113  				ref.b[z] = 0;
  1114  			}
  1115  			break;
  1116  
  1117  		case ARET:
  1118  			for(z=0; z<BITS; z++) {
  1119  				cal.b[z] = externs.b[z] | ovar.b[z];
  1120  				ref.b[z] = 0;
  1121  			}
  1122  			break;
  1123  
  1124  		default:
  1125  			// Work around for issue 1304:
  1126  			// flush modified globals before each instruction.
  1127  			for(z=0; z<BITS; z++) {
  1128  				cal.b[z] |= externs.b[z];
  1129  				// issue 4066: flush modified return variables in case of panic
  1130  				if(hasdefer)
  1131  					cal.b[z] |= ovar.b[z];
  1132  			}
  1133  			break;
  1134  		}
  1135  		for(z=0; z<BITS; z++) {
  1136  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  1137  				r1->use1.b[z] | r1->use2.b[z];
  1138  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  1139  			r1->refbehind.b[z] = ref.b[z];
  1140  			r1->calbehind.b[z] = cal.b[z];
  1141  		}
  1142  		if(r1->active)
  1143  			break;
  1144  		r1->active = 1;
  1145  	}
  1146  	for(; r != r1; r = r->p1)
  1147  		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  1148  			prop(r2, r->refbehind, r->calbehind);
  1149  }
  1150  
  1151  /*
  1152   * find looping structure
  1153   *
  1154   * 1) find reverse postordering
  1155   * 2) find approximate dominators,
  1156   *	the actual dominators if the flow graph is reducible
  1157   *	otherwise, dominators plus some other non-dominators.
  1158   *	See Matthew S. Hecht and Jeffrey D. Ullman,
  1159   *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
  1160   *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  1161   *	Oct. 1-3, 1973, pp.  207-217.
  1162   * 3) find all nodes with a predecessor dominated by the current node.
  1163   *	such a node is a loop head.
  1164   *	recursively, all preds with a greater rpo number are in the loop
  1165   */
  1166  int32
  1167  postorder(Reg *r, Reg **rpo2r, int32 n)
  1168  {
  1169  	Reg *r1;
  1170  
  1171  	r->rpo = 1;
  1172  	r1 = r->s1;
  1173  	if(r1 && !r1->rpo)
  1174  		n = postorder(r1, rpo2r, n);
  1175  	r1 = r->s2;
  1176  	if(r1 && !r1->rpo)
  1177  		n = postorder(r1, rpo2r, n);
  1178  	rpo2r[n] = r;
  1179  	n++;
  1180  	return n;
  1181  }
  1182  
  1183  int32
  1184  rpolca(int32 *idom, int32 rpo1, int32 rpo2)
  1185  {
  1186  	int32 t;
  1187  
  1188  	if(rpo1 == -1)
  1189  		return rpo2;
  1190  	while(rpo1 != rpo2){
  1191  		if(rpo1 > rpo2){
  1192  			t = rpo2;
  1193  			rpo2 = rpo1;
  1194  			rpo1 = t;
  1195  		}
  1196  		while(rpo1 < rpo2){
  1197  			t = idom[rpo2];
  1198  			if(t >= rpo2)
  1199  				fatal("bad idom");
  1200  			rpo2 = t;
  1201  		}
  1202  	}
  1203  	return rpo1;
  1204  }
  1205  
  1206  int
  1207  doms(int32 *idom, int32 r, int32 s)
  1208  {
  1209  	while(s > r)
  1210  		s = idom[s];
  1211  	return s == r;
  1212  }
  1213  
  1214  int
  1215  loophead(int32 *idom, Reg *r)
  1216  {
  1217  	int32 src;
  1218  
  1219  	src = r->rpo;
  1220  	if(r->p1 != R && doms(idom, src, r->p1->rpo))
  1221  		return 1;
  1222  	for(r = r->p2; r != R; r = r->p2link)
  1223  		if(doms(idom, src, r->rpo))
  1224  			return 1;
  1225  	return 0;
  1226  }
  1227  
  1228  void
  1229  loopmark(Reg **rpo2r, int32 head, Reg *r)
  1230  {
  1231  	if(r->rpo < head || r->active == head)
  1232  		return;
  1233  	r->active = head;
  1234  	r->loop += LOOP;
  1235  	if(r->p1 != R)
  1236  		loopmark(rpo2r, head, r->p1);
  1237  	for(r = r->p2; r != R; r = r->p2link)
  1238  		loopmark(rpo2r, head, r);
  1239  }
  1240  
  1241  void
  1242  loopit(Reg *r, int32 nr)
  1243  {
  1244  	Reg *r1;
  1245  	int32 i, d, me;
  1246  
  1247  	if(nr > maxnr) {
  1248  		rpo2r = mal(nr * sizeof(Reg*));
  1249  		idom = mal(nr * sizeof(int32));
  1250  		maxnr = nr;
  1251  	}
  1252  
  1253  	d = postorder(r, rpo2r, 0);
  1254  	if(d > nr)
  1255  		fatal("too many reg nodes %d %d", d, nr);
  1256  	nr = d;
  1257  	for(i = 0; i < nr / 2; i++) {
  1258  		r1 = rpo2r[i];
  1259  		rpo2r[i] = rpo2r[nr - 1 - i];
  1260  		rpo2r[nr - 1 - i] = r1;
  1261  	}
  1262  	for(i = 0; i < nr; i++)
  1263  		rpo2r[i]->rpo = i;
  1264  
  1265  	idom[0] = 0;
  1266  	for(i = 0; i < nr; i++) {
  1267  		r1 = rpo2r[i];
  1268  		me = r1->rpo;
  1269  		d = -1;
  1270  		// rpo2r[r->rpo] == r protects against considering dead code,
  1271  		// which has r->rpo == 0.
  1272  		if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
  1273  			d = r1->p1->rpo;
  1274  		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  1275  			if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
  1276  				d = rpolca(idom, d, r1->rpo);
  1277  		idom[i] = d;
  1278  	}
  1279  
  1280  	for(i = 0; i < nr; i++) {
  1281  		r1 = rpo2r[i];
  1282  		r1->loop++;
  1283  		if(r1->p2 != R && loophead(idom, r1))
  1284  			loopmark(rpo2r, i, r1);
  1285  	}
  1286  }
  1287  
  1288  void
  1289  synch(Reg *r, Bits dif)
  1290  {
  1291  	Reg *r1;
  1292  	int z;
  1293  
  1294  	for(r1 = r; r1 != R; r1 = r1->s1) {
  1295  		for(z=0; z<BITS; z++) {
  1296  			dif.b[z] = (dif.b[z] &
  1297  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1298  					r1->set.b[z] | r1->regdiff.b[z];
  1299  			if(dif.b[z] != r1->regdiff.b[z]) {
  1300  				r1->regdiff.b[z] = dif.b[z];
  1301  				change++;
  1302  			}
  1303  		}
  1304  		if(r1->active)
  1305  			break;
  1306  		r1->active = 1;
  1307  		for(z=0; z<BITS; z++)
  1308  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1309  		if(r1->s2 != R)
  1310  			synch(r1->s2, dif);
  1311  	}
  1312  }
  1313  
  1314  uint32
  1315  allreg(uint32 b, Rgn *r)
  1316  {
  1317  	Var *v;
  1318  	int i;
  1319  
  1320  	v = var + r->varno;
  1321  	r->regno = 0;
  1322  	switch(v->etype) {
  1323  
  1324  	default:
  1325  		fatal("unknown etype %d/%E", bitno(b), v->etype);
  1326  		break;
  1327  
  1328  	case TINT8:
  1329  	case TUINT8:
  1330  	case TINT16:
  1331  	case TUINT16:
  1332  	case TINT32:
  1333  	case TUINT32:
  1334  	case TINT64:
  1335  	case TUINT64:
  1336  	case TINT:
  1337  	case TUINT:
  1338  	case TUINTPTR:
  1339  	case TBOOL:
  1340  	case TPTR32:
  1341  	case TPTR64:
  1342  		i = BtoR(~b);
  1343  		if(i && r->cost > 0) {
  1344  			r->regno = i;
  1345  			return RtoB(i);
  1346  		}
  1347  		break;
  1348  
  1349  	case TFLOAT32:
  1350  	case TFLOAT64:
  1351  		i = BtoF(~b);
  1352  		if(i && r->cost > 0) {
  1353  			r->regno = i;
  1354  			return FtoB(i);
  1355  		}
  1356  		break;
  1357  	}
  1358  	return 0;
  1359  }
  1360  
  1361  void
  1362  paint1(Reg *r, int bn)
  1363  {
  1364  	Reg *r1;
  1365  	int z;
  1366  	uint32 bb;
  1367  
  1368  	z = bn/32;
  1369  	bb = 1L<<(bn%32);
  1370  	if(r->act.b[z] & bb)
  1371  		return;
  1372  	for(;;) {
  1373  		if(!(r->refbehind.b[z] & bb))
  1374  			break;
  1375  		r1 = r->p1;
  1376  		if(r1 == R)
  1377  			break;
  1378  		if(!(r1->refahead.b[z] & bb))
  1379  			break;
  1380  		if(r1->act.b[z] & bb)
  1381  			break;
  1382  		r = r1;
  1383  	}
  1384  
  1385  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1386  		change -= CLOAD * r->loop;
  1387  	}
  1388  	for(;;) {
  1389  		r->act.b[z] |= bb;
  1390  
  1391  		if(r->use1.b[z] & bb) {
  1392  			change += CREF * r->loop;
  1393  		}
  1394  
  1395  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1396  			change += CREF * r->loop;
  1397  		}
  1398  
  1399  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1400  			change -= CLOAD * r->loop;
  1401  		}
  1402  
  1403  		if(r->refbehind.b[z] & bb)
  1404  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1405  				if(r1->refahead.b[z] & bb)
  1406  					paint1(r1, bn);
  1407  
  1408  		if(!(r->refahead.b[z] & bb))
  1409  			break;
  1410  		r1 = r->s2;
  1411  		if(r1 != R)
  1412  			if(r1->refbehind.b[z] & bb)
  1413  				paint1(r1, bn);
  1414  		r = r->s1;
  1415  		if(r == R)
  1416  			break;
  1417  		if(r->act.b[z] & bb)
  1418  			break;
  1419  		if(!(r->refbehind.b[z] & bb))
  1420  			break;
  1421  	}
  1422  }
  1423  
  1424  uint32
  1425  regset(Reg *r, uint32 bb)
  1426  {
  1427  	uint32 b, set;
  1428  	Adr v;
  1429  	int c;
  1430  
  1431  	set = 0;
  1432  	v = zprog.from;
  1433  	while(b = bb & ~(bb-1)) {
  1434  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
  1435  		if(v.type == 0)
  1436  			fatal("zero v.type for %#ux", b);
  1437  		c = copyu(r->prog, &v, A);
  1438  		if(c == 3)
  1439  			set |= b;
  1440  		bb &= ~b;
  1441  	}
  1442  	return set;
  1443  }
  1444  
  1445  uint32
  1446  reguse(Reg *r, uint32 bb)
  1447  {
  1448  	uint32 b, set;
  1449  	Adr v;
  1450  	int c;
  1451  
  1452  	set = 0;
  1453  	v = zprog.from;
  1454  	while(b = bb & ~(bb-1)) {
  1455  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
  1456  		c = copyu(r->prog, &v, A);
  1457  		if(c == 1 || c == 2 || c == 4)
  1458  			set |= b;
  1459  		bb &= ~b;
  1460  	}
  1461  	return set;
  1462  }
  1463  
  1464  uint32
  1465  paint2(Reg *r, int bn)
  1466  {
  1467  	Reg *r1;
  1468  	int z;
  1469  	uint32 bb, vreg, x;
  1470  
  1471  	z = bn/32;
  1472  	bb = 1L << (bn%32);
  1473  	vreg = regbits;
  1474  	if(!(r->act.b[z] & bb))
  1475  		return vreg;
  1476  	for(;;) {
  1477  		if(!(r->refbehind.b[z] & bb))
  1478  			break;
  1479  		r1 = r->p1;
  1480  		if(r1 == R)
  1481  			break;
  1482  		if(!(r1->refahead.b[z] & bb))
  1483  			break;
  1484  		if(!(r1->act.b[z] & bb))
  1485  			break;
  1486  		r = r1;
  1487  	}
  1488  	for(;;) {
  1489  		r->act.b[z] &= ~bb;
  1490  
  1491  		vreg |= r->regu;
  1492  
  1493  		if(r->refbehind.b[z] & bb)
  1494  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1495  				if(r1->refahead.b[z] & bb)
  1496  					vreg |= paint2(r1, bn);
  1497  
  1498  		if(!(r->refahead.b[z] & bb))
  1499  			break;
  1500  		r1 = r->s2;
  1501  		if(r1 != R)
  1502  			if(r1->refbehind.b[z] & bb)
  1503  				vreg |= paint2(r1, bn);
  1504  		r = r->s1;
  1505  		if(r == R)
  1506  			break;
  1507  		if(!(r->act.b[z] & bb))
  1508  			break;
  1509  		if(!(r->refbehind.b[z] & bb))
  1510  			break;
  1511  	}
  1512  
  1513  	bb = vreg;
  1514  	for(; r; r=r->s1) {
  1515  		x = r->regu & ~bb;
  1516  		if(x) {
  1517  			vreg |= reguse(r, x);
  1518  			bb |= regset(r, x);
  1519  		}
  1520  	}
  1521  	return vreg;
  1522  }
  1523  
  1524  void
  1525  paint3(Reg *r, int bn, int32 rb, int rn)
  1526  {
  1527  	Reg *r1;
  1528  	Prog *p;
  1529  	int z;
  1530  	uint32 bb;
  1531  
  1532  	z = bn/32;
  1533  	bb = 1L << (bn%32);
  1534  	if(r->act.b[z] & bb)
  1535  		return;
  1536  	for(;;) {
  1537  		if(!(r->refbehind.b[z] & bb))
  1538  			break;
  1539  		r1 = r->p1;
  1540  		if(r1 == R)
  1541  			break;
  1542  		if(!(r1->refahead.b[z] & bb))
  1543  			break;
  1544  		if(r1->act.b[z] & bb)
  1545  			break;
  1546  		r = r1;
  1547  	}
  1548  
  1549  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1550  		addmove(r, bn, rn, 0);
  1551  	for(;;) {
  1552  		r->act.b[z] |= bb;
  1553  		p = r->prog;
  1554  
  1555  		if(r->use1.b[z] & bb) {
  1556  			if(debug['R'] && debug['v'])
  1557  				print("%P", p);
  1558  			addreg(&p->from, rn);
  1559  			if(debug['R'] && debug['v'])
  1560  				print(" ===change== %P\n", p);
  1561  		}
  1562  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1563  			if(debug['R'] && debug['v'])
  1564  				print("%P", p);
  1565  			addreg(&p->to, rn);
  1566  			if(debug['R'] && debug['v'])
  1567  				print(" ===change== %P\n", p);
  1568  		}
  1569  
  1570  		if(STORE(r) & r->regdiff.b[z] & bb)
  1571  			addmove(r, bn, rn, 1);
  1572  		r->regu |= rb;
  1573  
  1574  		if(r->refbehind.b[z] & bb)
  1575  			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1576  				if(r1->refahead.b[z] & bb)
  1577  					paint3(r1, bn, rb, rn);
  1578  
  1579  		if(!(r->refahead.b[z] & bb))
  1580  			break;
  1581  		r1 = r->s2;
  1582  		if(r1 != R)
  1583  			if(r1->refbehind.b[z] & bb)
  1584  				paint3(r1, bn, rb, rn);
  1585  		r = r->s1;
  1586  		if(r == R)
  1587  			break;
  1588  		if(r->act.b[z] & bb)
  1589  			break;
  1590  		if(!(r->refbehind.b[z] & bb))
  1591  			break;
  1592  	}
  1593  }
  1594  
  1595  void
  1596  addreg(Adr *a, int rn)
  1597  {
  1598  
  1599  	a->sym = 0;
  1600  	a->offset = 0;
  1601  	a->type = rn;
  1602  
  1603  	ostats.ncvtreg++;
  1604  }
  1605  
  1606  int32
  1607  RtoB(int r)
  1608  {
  1609  
  1610  	if(r < D_AX || r > D_R15)
  1611  		return 0;
  1612  	return 1L << (r-D_AX);
  1613  }
  1614  
  1615  int
  1616  BtoR(int32 b)
  1617  {
  1618  	b &= 0xffffL;
  1619  	if(b == 0)
  1620  		return 0;
  1621  	return bitno(b) + D_AX;
  1622  }
  1623  
  1624  /*
  1625   *	bit	reg
  1626   *	16	X0
  1627   *	...
  1628   *	31	X15
  1629   */
  1630  int32
  1631  FtoB(int f)
  1632  {
  1633  	if(f < D_X0 || f > D_X15)
  1634  		return 0;
  1635  	return 1L << (f - D_X0 + 16);
  1636  }
  1637  
  1638  int
  1639  BtoF(int32 b)
  1640  {
  1641  
  1642  	b &= 0xFFFF0000L;
  1643  	if(b == 0)
  1644  		return 0;
  1645  	return bitno(b) - 16 + D_X0;
  1646  }
  1647  
  1648  void
  1649  dumpone(Reg *r)
  1650  {
  1651  	int z;
  1652  	Bits bit;
  1653  
  1654  	print("%d:%P", r->loop, r->prog);
  1655  	for(z=0; z<BITS; z++)
  1656  		bit.b[z] =
  1657  			r->set.b[z] |
  1658  			r->use1.b[z] |
  1659  			r->use2.b[z] |
  1660  			r->refbehind.b[z] |
  1661  			r->refahead.b[z] |
  1662  			r->calbehind.b[z] |
  1663  			r->calahead.b[z] |
  1664  			r->regdiff.b[z] |
  1665  			r->act.b[z] |
  1666  				0;
  1667  	if(bany(&bit)) {
  1668  		print("\t");
  1669  		if(bany(&r->set))
  1670  			print(" s:%Q", r->set);
  1671  		if(bany(&r->use1))
  1672  			print(" u1:%Q", r->use1);
  1673  		if(bany(&r->use2))
  1674  			print(" u2:%Q", r->use2);
  1675  		if(bany(&r->refbehind))
  1676  			print(" rb:%Q ", r->refbehind);
  1677  		if(bany(&r->refahead))
  1678  			print(" ra:%Q ", r->refahead);
  1679  		if(bany(&r->calbehind))
  1680  			print(" cb:%Q ", r->calbehind);
  1681  		if(bany(&r->calahead))
  1682  			print(" ca:%Q ", r->calahead);
  1683  		if(bany(&r->regdiff))
  1684  			print(" d:%Q ", r->regdiff);
  1685  		if(bany(&r->act))
  1686  			print(" a:%Q ", r->act);
  1687  	}
  1688  	print("\n");
  1689  }
  1690  
  1691  void
  1692  dumpit(char *str, Reg *r0)
  1693  {
  1694  	Reg *r, *r1;
  1695  
  1696  	print("\n%s\n", str);
  1697  	for(r = r0; r != R; r = r->link) {
  1698  		dumpone(r);
  1699  		r1 = r->p2;
  1700  		if(r1 != R) {
  1701  			print("	pred:");
  1702  			for(; r1 != R; r1 = r1->p2link)
  1703  				print(" %.4ud", r1->prog->loc);
  1704  			print("\n");
  1705  		}
  1706  //		r1 = r->s1;
  1707  //		if(r1 != R) {
  1708  //			print("	succ:");
  1709  //			for(; r1 != R; r1 = r1->s1)
  1710  //				print(" %.4ud", r1->prog->loc);
  1711  //			print("\n");
  1712  //		}
  1713  	}
  1714  }
  1715  
  1716  static Sym*	symlist[10];
  1717  
  1718  int
  1719  noreturn(Prog *p)
  1720  {
  1721  	Sym *s;
  1722  	int i;
  1723  
  1724  	if(symlist[0] == S) {
  1725  		symlist[0] = pkglookup("panicindex", runtimepkg);
  1726  		symlist[1] = pkglookup("panicslice", runtimepkg);
  1727  		symlist[2] = pkglookup("throwinit", runtimepkg);
  1728  		symlist[3] = pkglookup("panic", runtimepkg);
  1729  		symlist[4] = pkglookup("panicwrap", runtimepkg);
  1730  	}
  1731  
  1732  	s = p->to.sym;
  1733  	if(s == S)
  1734  		return 0;
  1735  	for(i=0; symlist[i]!=S; i++)
  1736  		if(s == symlist[i])
  1737  			return 1;
  1738  	return 0;
  1739  }
  1740  
  1741  /*
  1742   * the code generator depends on being able to write out JMP
  1743   * instructions that it can jump to now but fill in later.
  1744   * the linker will resolve them nicely, but they make the code
  1745   * longer and more difficult to follow during debugging.
  1746   * remove them.
  1747   */
  1748  
  1749  /* what instruction does a JMP to p eventually land on? */
  1750  static Prog*
  1751  chasejmp(Prog *p, int *jmploop)
  1752  {
  1753  	int n;
  1754  
  1755  	n = 0;
  1756  	while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {
  1757  		if(++n > 10) {
  1758  			*jmploop = 1;
  1759  			break;
  1760  		}
  1761  		p = p->to.u.branch;
  1762  	}
  1763  	return p;
  1764  }
  1765  
  1766  /*
  1767   * reuse reg pointer for mark/sweep state.
  1768   * leave reg==nil at end because alive==nil.
  1769   */
  1770  #define alive ((void*)0)
  1771  #define dead ((void*)1)
  1772  
  1773  /* mark all code reachable from firstp as alive */
  1774  static void
  1775  mark(Prog *firstp)
  1776  {
  1777  	Prog *p;
  1778  	
  1779  	for(p=firstp; p; p=p->link) {
  1780  		if(p->reg != dead)
  1781  			break;
  1782  		p->reg = alive;
  1783  		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch)
  1784  			mark(p->to.u.branch);
  1785  		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
  1786  			break;
  1787  	}
  1788  }
  1789  
  1790  static void
  1791  fixjmp(Prog *firstp)
  1792  {
  1793  	int jmploop;
  1794  	Prog *p, *last;
  1795  	
  1796  	if(debug['R'] && debug['v'])
  1797  		print("\nfixjmp\n");
  1798  
  1799  	// pass 1: resolve jump to AJMP, mark all code as dead.
  1800  	jmploop = 0;
  1801  	for(p=firstp; p; p=p->link) {
  1802  		if(debug['R'] && debug['v'])
  1803  			print("%P\n", p);
  1804  		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) {
  1805  			p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);
  1806  			if(debug['R'] && debug['v'])
  1807  				print("->%P\n", p);
  1808  		}
  1809  		p->reg = dead;
  1810  	}
  1811  	if(debug['R'] && debug['v'])
  1812  		print("\n");
  1813  
  1814  	// pass 2: mark all reachable code alive
  1815  	mark(firstp);
  1816  	
  1817  	// pass 3: delete dead code (mostly JMPs).
  1818  	last = nil;
  1819  	for(p=firstp; p; p=p->link) {
  1820  		if(p->reg == dead) {
  1821  			if(p->link == P && p->as == ARET && last && last->as != ARET) {
  1822  				// This is the final ARET, and the code so far doesn't have one.
  1823  				// Let it stay.
  1824  			} else {
  1825  				if(debug['R'] && debug['v'])
  1826  					print("del %P\n", p);
  1827  				continue;
  1828  			}
  1829  		}
  1830  		if(last)
  1831  			last->link = p;
  1832  		last = p;
  1833  	}
  1834  	last->link = P;
  1835  	
  1836  	// pass 4: elide JMP to next instruction.
  1837  	// only safe if there are no jumps to JMPs anymore.
  1838  	if(!jmploop) {
  1839  		last = nil;
  1840  		for(p=firstp; p; p=p->link) {
  1841  			if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) {
  1842  				if(debug['R'] && debug['v'])
  1843  					print("del %P\n", p);
  1844  				continue;
  1845  			}
  1846  			if(last)
  1847  				last->link = p;
  1848  			last = p;
  1849  		}
  1850  		last->link = P;
  1851  	}
  1852  	
  1853  	if(debug['R'] && debug['v']) {
  1854  		print("\n");
  1855  		for(p=firstp; p; p=p->link)
  1856  			print("%P\n", p);
  1857  		print("\n");
  1858  	}
  1859  }