github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/5g/reg.c (about)

     1  // Inferno utils/5c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  
    32  #include <u.h>
    33  #include <libc.h>
    34  #include "gg.h"
    35  #include "opt.h"
    36  
    37  #define	NREGVAR	32
    38  #define	REGBITS	((uint32)0xffffffff)
    39  
    40  	void	addsplits(void);
    41  static	Reg*	firstr;
    42  static	int	first	= 1;
    43  
    44  int
    45  rcmp(const void *a1, const void *a2)
    46  {
    47  	Rgn *p1, *p2;
    48  	int c1, c2;
    49  
    50  	p1 = (Rgn*)a1;
    51  	p2 = (Rgn*)a2;
    52  	c1 = p2->cost;
    53  	c2 = p1->cost;
    54  	if(c1 -= c2)
    55  		return c1;
    56  	return p2->varno - p1->varno;
    57  }
    58  
    59  static void
    60  setoutvar(void)
    61  {
    62  	Type *t;
    63  	Node *n;
    64  	Addr a;
    65  	Iter save;
    66  	Bits bit;
    67  	int z;
    68  
    69  	t = structfirst(&save, getoutarg(curfn->type));
    70  	while(t != T) {
    71  		n = nodarg(t, 1);
    72  		a = zprog.from;
    73  		naddr(n, &a, 0);
    74  		bit = mkvar(R, &a);
    75  		for(z=0; z<BITS; z++)
    76  			ovar.b[z] |= bit.b[z];
    77  		t = structnext(&save);
    78  	}
    79  //if(bany(&ovar))
    80  //print("ovar = %Q\n", ovar);
    81  }
    82  
    83  void
    84  excise(Flow *r)
    85  {
    86  	Prog *p;
    87  
    88  	p = r->prog;
    89  	p->as = ANOP;
    90  	p->scond = zprog.scond;
    91  	p->from = zprog.from;
    92  	p->to = zprog.to;
    93  	p->reg = zprog.reg;
    94  }
    95  
    96  static void
    97  setaddrs(Bits bit)
    98  {
    99  	int i, n;
   100  	Var *v;
   101  	Node *node;
   102  
   103  	while(bany(&bit)) {
   104  		// convert each bit to a variable
   105  		i = bnum(bit);
   106  		node = var[i].node;
   107  		n = var[i].name;
   108  		bit.b[i/32] &= ~(1L<<(i%32));
   109  
   110  		// disable all pieces of that variable
   111  		for(i=0; i<nvar; i++) {
   112  			v = var+i;
   113  			if(v->node == node && v->name == n)
   114  				v->addr = 2;
   115  		}
   116  	}
   117  }
   118  
   119  static char* regname[] = {
   120  	".R0",
   121  	".R1",
   122  	".R2",
   123  	".R3",
   124  	".R4",
   125  	".R5",
   126  	".R6",
   127  	".R7",
   128  	".R8",
   129  	".R9",
   130  	".R10",
   131  	".R11",
   132  	".R12",
   133  	".R13",
   134  	".R14",
   135  	".R15",
   136  	".F0",
   137  	".F1",
   138  	".F2",
   139  	".F3",
   140  	".F4",
   141  	".F5",
   142  	".F6",
   143  	".F7",
   144  	".F8",
   145  	".F9",
   146  	".F10",
   147  	".F11",
   148  	".F12",
   149  	".F13",
   150  	".F14",
   151  	".F15",
   152  };
   153  
   154  static Node* regnodes[NREGVAR];
   155  
   156  void
   157  regopt(Prog *firstp)
   158  {
   159  	Reg *r, *r1;
   160  	Prog *p;
   161  	Graph *g;
   162  	int i, z;
   163  	uint32 vreg;
   164  	Bits bit;
   165  	ProgInfo info;
   166  
   167  	if(first) {
   168  		fmtinstall('Q', Qconv);
   169  		first = 0;
   170  	}
   171  	
   172  	fixjmp(firstp);
   173  	mergetemp(firstp);
   174  
   175  	/*
   176  	 * control flow is more complicated in generated go code
   177  	 * than in generated c code.  define pseudo-variables for
   178  	 * registers, so we have complete register usage information.
   179  	 */
   180  	nvar = NREGVAR;
   181  	memset(var, 0, NREGVAR*sizeof var[0]);
   182  	for(i=0; i<NREGVAR; i++) {
   183  		if(regnodes[i] == N)
   184  			regnodes[i] = newname(lookup(regname[i]));
   185  		var[i].node = regnodes[i];
   186  	}
   187  
   188  	regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
   189  	for(z=0; z<BITS; z++) {
   190  		externs.b[z] = 0;
   191  		params.b[z] = 0;
   192  		consts.b[z] = 0;
   193  		addrs.b[z] = 0;
   194  		ovar.b[z] = 0;
   195  	}
   196  
   197  	// build list of return variables
   198  	setoutvar();
   199  
   200  	/*
   201  	 * pass 1
   202  	 * build aux data structure
   203  	 * allocate pcs
   204  	 * find use and set of variables
   205  	 */
   206  	g = flowstart(firstp, sizeof(Reg));
   207  	if(g == nil)
   208  		return;
   209  	firstr = (Reg*)g->start;
   210  
   211  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   212  		p = r->f.prog;
   213  		proginfo(&info, p);
   214  
   215  		// Avoid making variables for direct-called functions.
   216  		if(p->as == ABL && p->to.type == D_EXTERN)
   217  			continue;
   218  
   219  		bit = mkvar(r, &p->from);
   220  		if(info.flags & LeftRead)
   221  			for(z=0; z<BITS; z++)
   222  				r->use1.b[z] |= bit.b[z];
   223  		if(info.flags & LeftAddr)
   224  			setaddrs(bit);
   225  
   226  		if(info.flags & RegRead) {	
   227  			if(p->from.type != D_FREG)
   228  				r->use1.b[0] |= RtoB(p->reg);
   229  			else
   230  				r->use1.b[0] |= FtoB(p->reg);
   231  		}
   232  
   233  		if(info.flags & (RightAddr | RightRead | RightWrite)) {
   234  			bit = mkvar(r, &p->to);
   235  			if(info.flags & RightAddr)
   236  				setaddrs(bit);
   237  			if(info.flags & RightRead)
   238  				for(z=0; z<BITS; z++)
   239  					r->use2.b[z] |= bit.b[z];
   240  			if(info.flags & RightWrite)
   241  				for(z=0; z<BITS; z++)
   242  					r->set.b[z] |= bit.b[z];
   243  		}
   244  	}
   245  	if(firstr == R)
   246  		return;
   247  
   248  	for(i=0; i<nvar; i++) {
   249  		Var *v = var+i;
   250  		if(v->addr) {
   251  			bit = blsh(i);
   252  			for(z=0; z<BITS; z++)
   253  				addrs.b[z] |= bit.b[z];
   254  		}
   255  
   256  		if(debug['R'] && debug['v'])
   257  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   258  				i, v->addr, v->etype, v->width, v->node, v->offset);
   259  	}
   260  
   261  	if(debug['R'] && debug['v'])
   262  		dumpit("pass1", &firstr->f, 1);
   263  
   264  	/*
   265  	 * pass 2
   266  	 * find looping structure
   267  	 */
   268  	flowrpo(g);
   269  
   270  	if(debug['R'] && debug['v'])
   271  		dumpit("pass2", &firstr->f, 1);
   272  
   273  	/*
   274  	 * pass 3
   275  	 * iterate propagating usage
   276  	 * 	back until flow graph is complete
   277  	 */
   278  loop1:
   279  	change = 0;
   280  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   281  		r->f.active = 0;
   282  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   283  		if(r->f.prog->as == ARET)
   284  			prop(r, zbits, zbits);
   285  loop11:
   286  	/* pick up unreachable code */
   287  	i = 0;
   288  	for(r = firstr; r != R; r = r1) {
   289  		r1 = (Reg*)r->f.link;
   290  		if(r1 && r1->f.active && !r->f.active) {
   291  			prop(r, zbits, zbits);
   292  			i = 1;
   293  		}
   294  	}
   295  	if(i)
   296  		goto loop11;
   297  	if(change)
   298  		goto loop1;
   299  
   300  	if(debug['R'] && debug['v'])
   301  		dumpit("pass3", &firstr->f, 1);
   302  
   303  
   304  	/*
   305  	 * pass 4
   306  	 * iterate propagating register/variable synchrony
   307  	 * 	forward until graph is complete
   308  	 */
   309  loop2:
   310  	change = 0;
   311  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   312  		r->f.active = 0;
   313  	synch(firstr, zbits);
   314  	if(change)
   315  		goto loop2;
   316  
   317  	addsplits();
   318  
   319  	if(debug['R'] && debug['v'])
   320  		dumpit("pass4", &firstr->f, 1);
   321  
   322  	if(debug['R'] > 1) {
   323  		print("\nprop structure:\n");
   324  		for(r = firstr; r != R; r = (Reg*)r->f.link) {
   325  			print("%d:%P", r->f.loop, r->f.prog);
   326  			for(z=0; z<BITS; z++) {
   327  				bit.b[z] = r->set.b[z] |
   328  					r->refahead.b[z] | r->calahead.b[z] |
   329  					r->refbehind.b[z] | r->calbehind.b[z] |
   330  					r->use1.b[z] | r->use2.b[z];
   331  				bit.b[z] &= ~addrs.b[z];
   332  			}
   333  
   334  			if(bany(&bit)) {
   335  				print("\t");
   336  				if(bany(&r->use1))
   337  					print(" u1=%Q", r->use1);
   338  				if(bany(&r->use2))
   339  					print(" u2=%Q", r->use2);
   340  				if(bany(&r->set))
   341  					print(" st=%Q", r->set);
   342  				if(bany(&r->refahead))
   343  					print(" ra=%Q", r->refahead);
   344  				if(bany(&r->calahead))
   345  					print(" ca=%Q", r->calahead);
   346  				if(bany(&r->refbehind))
   347  					print(" rb=%Q", r->refbehind);
   348  				if(bany(&r->calbehind))
   349  					print(" cb=%Q", r->calbehind);
   350  			}
   351  			print("\n");
   352  		}
   353  	}
   354  
   355  	/*
   356  	 * pass 4.5
   357  	 * move register pseudo-variables into regu.
   358  	 */
   359  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   360  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   361  
   362  		r->set.b[0] &= ~REGBITS;
   363  		r->use1.b[0] &= ~REGBITS;
   364  		r->use2.b[0] &= ~REGBITS;
   365  		r->refbehind.b[0] &= ~REGBITS;
   366  		r->refahead.b[0] &= ~REGBITS;
   367  		r->calbehind.b[0] &= ~REGBITS;
   368  		r->calahead.b[0] &= ~REGBITS;
   369  		r->regdiff.b[0] &= ~REGBITS;
   370  		r->act.b[0] &= ~REGBITS;
   371  	}
   372  
   373  	if(debug['R'] && debug['v'])
   374  		dumpit("pass4.5", &firstr->f, 1);
   375  
   376  	/*
   377  	 * pass 5
   378  	 * isolate regions
   379  	 * calculate costs (paint1)
   380  	 */
   381  	r = firstr;
   382  	if(r) {
   383  		for(z=0; z<BITS; z++)
   384  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   385  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   386  		if(bany(&bit) & !r->f.refset) {
   387  			// should never happen - all variables are preset
   388  			if(debug['w'])
   389  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   390  			r->f.refset = 1;
   391  		}
   392  	}
   393  
   394  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   395  		r->act = zbits;
   396  	rgp = region;
   397  	nregion = 0;
   398  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   399  		for(z=0; z<BITS; z++)
   400  			bit.b[z] = r->set.b[z] &
   401  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   402  		if(bany(&bit) && !r->f.refset) {
   403  			if(debug['w'])
   404  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   405  			r->f.refset = 1;
   406  			excise(&r->f);
   407  		}
   408  		for(z=0; z<BITS; z++)
   409  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   410  		while(bany(&bit)) {
   411  			i = bnum(bit);
   412  			rgp->enter = r;
   413  			rgp->varno = i;
   414  			change = 0;
   415  			if(debug['R'] > 1)
   416  				print("\n");
   417  			paint1(r, i);
   418  			bit.b[i/32] &= ~(1L<<(i%32));
   419  			if(change <= 0) {
   420  				if(debug['R'])
   421  					print("%L $%d: %Q\n",
   422  						r->f.prog->lineno, change, blsh(i));
   423  				continue;
   424  			}
   425  			rgp->cost = change;
   426  			nregion++;
   427  			if(nregion >= NRGN) {
   428  				if(debug['R'] > 1)
   429  					print("too many regions\n");
   430  				goto brk;
   431  			}
   432  			rgp++;
   433  		}
   434  	}
   435  brk:
   436  	qsort(region, nregion, sizeof(region[0]), rcmp);
   437  
   438  	if(debug['R'] && debug['v'])
   439  		dumpit("pass5", &firstr->f, 1);
   440  
   441  	/*
   442  	 * pass 6
   443  	 * determine used registers (paint2)
   444  	 * replace code (paint3)
   445  	 */
   446  	rgp = region;
   447  	for(i=0; i<nregion; i++) {
   448  		bit = blsh(rgp->varno);
   449  		vreg = paint2(rgp->enter, rgp->varno);
   450  		vreg = allreg(vreg, rgp);
   451  		if(debug['R']) {
   452  			if(rgp->regno >= NREG)
   453  				print("%L $%d F%d: %Q\n",
   454  					rgp->enter->f.prog->lineno,
   455  					rgp->cost,
   456  					rgp->regno-NREG,
   457  					bit);
   458  			else
   459  				print("%L $%d R%d: %Q\n",
   460  					rgp->enter->f.prog->lineno,
   461  					rgp->cost,
   462  					rgp->regno,
   463  					bit);
   464  		}
   465  		if(rgp->regno != 0)
   466  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   467  		rgp++;
   468  	}
   469  
   470  	if(debug['R'] && debug['v'])
   471  		dumpit("pass6", &firstr->f, 1);
   472  
   473  	/*
   474  	 * pass 7
   475  	 * peep-hole on basic block
   476  	 */
   477  	if(!debug['R'] || debug['P']) {
   478  		peep(firstp);
   479  	}
   480  
   481  	if(debug['R'] && debug['v'])
   482  		dumpit("pass7", &firstr->f, 1);
   483  
   484  	/*
   485  	 * last pass
   486  	 * eliminate nops
   487  	 * free aux structures
   488  	 * adjust the stack pointer
   489  	 *	MOVW.W 	R1,-12(R13)			<<- start
   490  	 *	MOVW   	R0,R1
   491  	 *	MOVW   	R1,8(R13)
   492  	 *	MOVW   	$0,R1
   493  	 *	MOVW   	R1,4(R13)
   494  	 *	BL     	,runtime.newproc+0(SB)
   495  	 *	MOVW   	&ft+-32(SP),R7			<<- adjust
   496  	 *	MOVW   	&j+-40(SP),R6			<<- adjust
   497  	 *	MOVW   	autotmp_0003+-24(SP),R5		<<- adjust
   498  	 *	MOVW   	$12(R13),R13			<<- finish
   499  	 */
   500  	vreg = 0;
   501  	for(p = firstp; p != P; p = p->link) {
   502  		while(p->link != P && p->link->as == ANOP)
   503  			p->link = p->link->link;
   504  		if(p->to.type == D_BRANCH)
   505  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   506  				p->to.u.branch = p->to.u.branch->link;
   507  		if(p->as == AMOVW && p->to.reg == 13) {
   508  			if(p->scond & C_WBIT) {
   509  				vreg = -p->to.offset;		// in adjust region
   510  //				print("%P adjusting %d\n", p, vreg);
   511  				continue;
   512  			}
   513  			if(p->from.type == D_CONST && p->to.type == D_REG) {
   514  				if(p->from.offset != vreg)
   515  					print("in and out different\n");
   516  //				print("%P finish %d\n", p, vreg);
   517  				vreg = 0;	// done adjust region
   518  				continue;
   519  			}
   520  
   521  //			print("%P %d %d from type\n", p, p->from.type, D_CONST);
   522  //			print("%P %d %d to type\n\n", p, p->to.type, D_REG);
   523  		}
   524  
   525  		if(p->as == AMOVW && vreg != 0) {
   526  			if(p->from.sym != S)
   527  			if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
   528  				p->from.offset += vreg;
   529  //				print("%P adjusting from %d %d\n", p, vreg, p->from.type);
   530  			}
   531  			if(p->to.sym != S)
   532  			if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
   533  				p->to.offset += vreg;
   534  //				print("%P adjusting to %d %d\n", p, vreg, p->from.type);
   535  			}
   536  		}
   537  	}
   538  
   539  	flowend(g);
   540  }
   541  
   542  void
   543  addsplits(void)
   544  {
   545  	Reg *r, *r1;
   546  	int z, i;
   547  	Bits bit;
   548  
   549  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   550  		if(r->f.loop > 1)
   551  			continue;
   552  		if(r->f.prog->as == ABL)
   553  			continue;
   554  		for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
   555  			if(r1->f.loop <= 1)
   556  				continue;
   557  			for(z=0; z<BITS; z++)
   558  				bit.b[z] = r1->calbehind.b[z] &
   559  					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
   560  					~(r->calahead.b[z] & addrs.b[z]);
   561  			while(bany(&bit)) {
   562  				i = bnum(bit);
   563  				bit.b[i/32] &= ~(1L << (i%32));
   564  			}
   565  		}
   566  	}
   567  }
   568  
   569  /*
   570   * add mov b,rn
   571   * just after r
   572   */
   573  void
   574  addmove(Reg *r, int bn, int rn, int f)
   575  {
   576  	Prog *p, *p1, *p2;
   577  	Adr *a;
   578  	Var *v;
   579  
   580  	p1 = mal(sizeof(*p1));
   581  	*p1 = zprog;
   582  	p = r->f.prog;
   583  	
   584  	// If there's a stack fixup coming (after BL newproc or BL deferproc),
   585  	// delay the load until after the fixup.
   586  	p2 = p->link;
   587  	if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
   588  		p = p2;
   589  
   590  	p1->link = p->link;
   591  	p->link = p1;
   592  	p1->lineno = p->lineno;
   593  
   594  	v = var + bn;
   595  
   596  	a = &p1->to;
   597  	a->name = v->name;
   598  	a->node = v->node;
   599  	a->sym = v->node->sym;
   600  	a->offset = v->offset;
   601  	a->etype = v->etype;
   602  	a->type = D_OREG;
   603  	if(a->etype == TARRAY || a->sym == S)
   604  		a->type = D_CONST;
   605  
   606  	if(v->addr)
   607  		fatal("addmove: shouldn't be doing this %A\n", a);
   608  
   609  	switch(v->etype) {
   610  	default:
   611  		print("What is this %E\n", v->etype);
   612  
   613  	case TINT8:
   614  		p1->as = AMOVBS;
   615  		break;
   616  	case TBOOL:
   617  	case TUINT8:
   618  //print("movbu %E %d %S\n", v->etype, bn, v->sym);
   619  		p1->as = AMOVBU;
   620  		break;
   621  	case TINT16:
   622  		p1->as = AMOVHS;
   623  		break;
   624  	case TUINT16:
   625  		p1->as = AMOVHU;
   626  		break;
   627  	case TINT32:
   628  	case TUINT32:
   629  	case TPTR32:
   630  		p1->as = AMOVW;
   631  		break;
   632  	case TFLOAT32:
   633  		p1->as = AMOVF;
   634  		break;
   635  	case TFLOAT64:
   636  		p1->as = AMOVD;
   637  		break;
   638  	}
   639  
   640  	p1->from.type = D_REG;
   641  	p1->from.reg = rn;
   642  	if(rn >= NREG) {
   643  		p1->from.type = D_FREG;
   644  		p1->from.reg = rn-NREG;
   645  	}
   646  	if(!f) {
   647  		p1->from = *a;
   648  		*a = zprog.from;
   649  		a->type = D_REG;
   650  		a->reg = rn;
   651  		if(rn >= NREG) {
   652  			a->type = D_FREG;
   653  			a->reg = rn-NREG;
   654  		}
   655  		if(v->etype == TUINT8 || v->etype == TBOOL)
   656  			p1->as = AMOVBU;
   657  		if(v->etype == TUINT16)
   658  			p1->as = AMOVHU;
   659  	}
   660  	if(debug['R'])
   661  		print("%P\t.a%P\n", p, p1);
   662  }
   663  
   664  static int
   665  overlap(int32 o1, int w1, int32 o2, int w2)
   666  {
   667  	int32 t1, t2;
   668  
   669  	t1 = o1+w1;
   670  	t2 = o2+w2;
   671  
   672  	if(!(t1 > o2 && t2 > o1))
   673  		return 0;
   674  
   675  	return 1;
   676  }
   677  
   678  Bits
   679  mkvar(Reg *r, Adr *a)
   680  {
   681  	Var *v;
   682  	int i, t, n, et, z, w, flag;
   683  	int32 o;
   684  	Bits bit;
   685  	Node *node;
   686  
   687  	// mark registers used
   688  	t = a->type;
   689  
   690  	flag = 0;
   691  	switch(t) {
   692  	default:
   693  		print("type %d %d %D\n", t, a->name, a);
   694  		goto none;
   695  
   696  	case D_NONE:
   697  	case D_FCONST:
   698  	case D_BRANCH:
   699  		break;
   700  
   701  
   702  	case D_REGREG:
   703  	case D_REGREG2:
   704  		bit = zbits;
   705  		if(a->offset != NREG)
   706  			bit.b[0] |= RtoB(a->offset);
   707  		if(a->reg != NREG)
   708  			bit.b[0] |= RtoB(a->reg);
   709  		return bit;
   710  
   711  	case D_CONST:
   712  	case D_REG:
   713  	case D_SHIFT:
   714  		if(a->reg != NREG) {
   715  			bit = zbits;
   716  			bit.b[0] = RtoB(a->reg);
   717  			return bit;
   718  		}
   719  		break;
   720  
   721  	case D_OREG:
   722  		if(a->reg != NREG) {
   723  			if(a == &r->f.prog->from)
   724  				r->use1.b[0] |= RtoB(a->reg);
   725  			else
   726  				r->use2.b[0] |= RtoB(a->reg);
   727  			if(r->f.prog->scond & (C_PBIT|C_WBIT))
   728  				r->set.b[0] |= RtoB(a->reg);
   729  		}
   730  		break;
   731  
   732  	case D_FREG:
   733  		if(a->reg != NREG) {
   734  			bit = zbits;
   735  			bit.b[0] = FtoB(a->reg);
   736  			return bit;
   737  		}
   738  		break;
   739  	}
   740  
   741  	switch(a->name) {
   742  	default:
   743  		goto none;
   744  
   745  	case D_EXTERN:
   746  	case D_STATIC:
   747  	case D_AUTO:
   748  	case D_PARAM:
   749  		n = a->name;
   750  		break;
   751  	}
   752  
   753  	node = a->node;
   754  	if(node == N || node->op != ONAME || node->orig == N)
   755  		goto none;
   756  	node = node->orig;
   757  	if(node->orig != node)
   758  		fatal("%D: bad node", a);
   759  	if(node->sym == S || node->sym->name[0] == '.')
   760  		goto none;
   761  	et = a->etype;
   762  	o = a->offset;
   763  	w = a->width;
   764  	if(w < 0)
   765  		fatal("bad width %d for %D", w, a);
   766  
   767  	for(i=0; i<nvar; i++) {
   768  		v = var+i;
   769  		if(v->node == node && v->name == n) {
   770  			if(v->offset == o)
   771  			if(v->etype == et)
   772  			if(v->width == w)
   773  				if(!flag)
   774  					return blsh(i);
   775  
   776  			// if they overlap, disable both
   777  			if(overlap(v->offset, v->width, o, w)) {
   778  				v->addr = 1;
   779  				flag = 1;
   780  			}
   781  		}
   782  	}
   783  
   784  	switch(et) {
   785  	case 0:
   786  	case TFUNC:
   787  		goto none;
   788  	}
   789  
   790  	if(nvar >= NVAR) {
   791  		if(debug['w'] > 1 && node)
   792  			fatal("variable not optimized: %D", a);
   793  		goto none;
   794  	}
   795  
   796  	i = nvar;
   797  	nvar++;
   798  //print("var %d %E %D %S\n", i, et, a, s);
   799  	v = var+i;
   800  	v->offset = o;
   801  	v->name = n;
   802  	v->etype = et;
   803  	v->width = w;
   804  	v->addr = flag;		// funny punning
   805  	v->node = node;
   806  	
   807  	if(debug['R'])
   808  		print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   809  
   810  	bit = blsh(i);
   811  	if(n == D_EXTERN || n == D_STATIC)
   812  		for(z=0; z<BITS; z++)
   813  			externs.b[z] |= bit.b[z];
   814  	if(n == D_PARAM)
   815  		for(z=0; z<BITS; z++)
   816  			params.b[z] |= bit.b[z];
   817  
   818  	return bit;
   819  
   820  none:
   821  	return zbits;
   822  }
   823  
   824  void
   825  prop(Reg *r, Bits ref, Bits cal)
   826  {
   827  	Reg *r1, *r2;
   828  	int z;
   829  
   830  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   831  		for(z=0; z<BITS; z++) {
   832  			ref.b[z] |= r1->refahead.b[z];
   833  			if(ref.b[z] != r1->refahead.b[z]) {
   834  				r1->refahead.b[z] = ref.b[z];
   835  				change++;
   836  			}
   837  			cal.b[z] |= r1->calahead.b[z];
   838  			if(cal.b[z] != r1->calahead.b[z]) {
   839  				r1->calahead.b[z] = cal.b[z];
   840  				change++;
   841  			}
   842  		}
   843  		switch(r1->f.prog->as) {
   844  		case ABL:
   845  			if(noreturn(r1->f.prog))
   846  				break;
   847  			for(z=0; z<BITS; z++) {
   848  				cal.b[z] |= ref.b[z] | externs.b[z];
   849  				ref.b[z] = 0;
   850  			}
   851  			break;
   852  
   853  		case ATEXT:
   854  			for(z=0; z<BITS; z++) {
   855  				cal.b[z] = 0;
   856  				ref.b[z] = 0;
   857  			}
   858  			break;
   859  
   860  		case ARET:
   861  			for(z=0; z<BITS; z++) {
   862  				cal.b[z] = externs.b[z] | ovar.b[z];
   863  				ref.b[z] = 0;
   864  			}
   865  			break;
   866  
   867  		default:
   868  			// Work around for issue 1304:
   869  			// flush modified globals before each instruction.
   870  			for(z=0; z<BITS; z++) {
   871  				cal.b[z] |= externs.b[z];
   872  				// issue 4066: flush modified return variables in case of panic
   873  				if(hasdefer)
   874  					cal.b[z] |= ovar.b[z];
   875  			}
   876  			break;
   877  		}
   878  		for(z=0; z<BITS; z++) {
   879  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   880  				r1->use1.b[z] | r1->use2.b[z];
   881  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   882  			r1->refbehind.b[z] = ref.b[z];
   883  			r1->calbehind.b[z] = cal.b[z];
   884  		}
   885  		if(r1->f.active)
   886  			break;
   887  		r1->f.active = 1;
   888  	}
   889  	for(; r != r1; r = (Reg*)r->f.p1)
   890  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
   891  			prop(r2, r->refbehind, r->calbehind);
   892  }
   893  
   894  void
   895  synch(Reg *r, Bits dif)
   896  {
   897  	Reg *r1;
   898  	int z;
   899  
   900  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
   901  		for(z=0; z<BITS; z++) {
   902  			dif.b[z] = (dif.b[z] &
   903  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   904  					r1->set.b[z] | r1->regdiff.b[z];
   905  			if(dif.b[z] != r1->regdiff.b[z]) {
   906  				r1->regdiff.b[z] = dif.b[z];
   907  				change++;
   908  			}
   909  		}
   910  		if(r1->f.active)
   911  			break;
   912  		r1->f.active = 1;
   913  		for(z=0; z<BITS; z++)
   914  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   915  		if(r1->f.s2 != nil)
   916  			synch((Reg*)r1->f.s2, dif);
   917  	}
   918  }
   919  
   920  uint32
   921  allreg(uint32 b, Rgn *r)
   922  {
   923  	Var *v;
   924  	int i;
   925  
   926  	v = var + r->varno;
   927  	r->regno = 0;
   928  	switch(v->etype) {
   929  
   930  	default:
   931  		fatal("unknown etype %d/%E", bitno(b), v->etype);
   932  		break;
   933  
   934  	case TINT8:
   935  	case TUINT8:
   936  	case TINT16:
   937  	case TUINT16:
   938  	case TINT32:
   939  	case TUINT32:
   940  	case TINT:
   941  	case TUINT:
   942  	case TUINTPTR:
   943  	case TBOOL:
   944  	case TPTR32:
   945  		i = BtoR(~b);
   946  		if(i && r->cost >= 0) {
   947  			r->regno = i;
   948  			return RtoB(i);
   949  		}
   950  		break;
   951  
   952  	case TFLOAT32:
   953  	case TFLOAT64:
   954  		i = BtoF(~b);
   955  		if(i && r->cost >= 0) {
   956  			r->regno = i+NREG;
   957  			return FtoB(i);
   958  		}
   959  		break;
   960  
   961  	case TINT64:
   962  	case TUINT64:
   963  	case TPTR64:
   964  	case TINTER:
   965  	case TSTRUCT:
   966  	case TARRAY:
   967  		break;
   968  	}
   969  	return 0;
   970  }
   971  
   972  void
   973  paint1(Reg *r, int bn)
   974  {
   975  	Reg *r1;
   976  	Prog *p;
   977  	int z;
   978  	uint32 bb;
   979  
   980  	z = bn/32;
   981  	bb = 1L<<(bn%32);
   982  	if(r->act.b[z] & bb)
   983  		return;
   984  	for(;;) {
   985  		if(!(r->refbehind.b[z] & bb))
   986  			break;
   987  		r1 = (Reg*)r->f.p1;
   988  		if(r1 == R)
   989  			break;
   990  		if(!(r1->refahead.b[z] & bb))
   991  			break;
   992  		if(r1->act.b[z] & bb)
   993  			break;
   994  		r = r1;
   995  	}
   996  
   997  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
   998  		change -= CLOAD * r->f.loop;
   999  		if(debug['R'] > 1)
  1000  			print("%d%P\td %Q $%d\n", r->f.loop,
  1001  				r->f.prog, blsh(bn), change);
  1002  	}
  1003  	for(;;) {
  1004  		r->act.b[z] |= bb;
  1005  		p = r->f.prog;
  1006  
  1007  		if(r->use1.b[z] & bb) {
  1008  			change += CREF * r->f.loop;
  1009  			if(debug['R'] > 1)
  1010  				print("%d%P\tu1 %Q $%d\n", r->f.loop,
  1011  					p, blsh(bn), change);
  1012  		}
  1013  
  1014  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1015  			change += CREF * r->f.loop;
  1016  			if(debug['R'] > 1)
  1017  				print("%d%P\tu2 %Q $%d\n", r->f.loop,
  1018  					p, blsh(bn), change);
  1019  		}
  1020  
  1021  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1022  			change -= CLOAD * r->f.loop;
  1023  			if(debug['R'] > 1)
  1024  				print("%d%P\tst %Q $%d\n", r->f.loop,
  1025  					p, blsh(bn), change);
  1026  		}
  1027  
  1028  		if(r->refbehind.b[z] & bb)
  1029  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1030  				if(r1->refahead.b[z] & bb)
  1031  					paint1(r1, bn);
  1032  
  1033  		if(!(r->refahead.b[z] & bb))
  1034  			break;
  1035  		r1 = (Reg*)r->f.s2;
  1036  		if(r1 != R)
  1037  			if(r1->refbehind.b[z] & bb)
  1038  				paint1(r1, bn);
  1039  		r = (Reg*)r->f.s1;
  1040  		if(r == R)
  1041  			break;
  1042  		if(r->act.b[z] & bb)
  1043  			break;
  1044  		if(!(r->refbehind.b[z] & bb))
  1045  			break;
  1046  	}
  1047  }
  1048  
  1049  uint32
  1050  paint2(Reg *r, int bn)
  1051  {
  1052  	Reg *r1;
  1053  	int z;
  1054  	uint32 bb, vreg;
  1055  
  1056  	z = bn/32;
  1057  	bb = 1L << (bn%32);
  1058  	vreg = regbits;
  1059  	if(!(r->act.b[z] & bb))
  1060  		return vreg;
  1061  	for(;;) {
  1062  		if(!(r->refbehind.b[z] & bb))
  1063  			break;
  1064  		r1 = (Reg*)r->f.p1;
  1065  		if(r1 == R)
  1066  			break;
  1067  		if(!(r1->refahead.b[z] & bb))
  1068  			break;
  1069  		if(!(r1->act.b[z] & bb))
  1070  			break;
  1071  		r = r1;
  1072  	}
  1073  	for(;;) {
  1074  		r->act.b[z] &= ~bb;
  1075  
  1076  		vreg |= r->regu;
  1077  
  1078  		if(r->refbehind.b[z] & bb)
  1079  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1080  				if(r1->refahead.b[z] & bb)
  1081  					vreg |= paint2(r1, bn);
  1082  
  1083  		if(!(r->refahead.b[z] & bb))
  1084  			break;
  1085  		r1 = (Reg*)r->f.s2;
  1086  		if(r1 != R)
  1087  			if(r1->refbehind.b[z] & bb)
  1088  				vreg |= paint2(r1, bn);
  1089  		r = (Reg*)r->f.s1;
  1090  		if(r == R)
  1091  			break;
  1092  		if(!(r->act.b[z] & bb))
  1093  			break;
  1094  		if(!(r->refbehind.b[z] & bb))
  1095  			break;
  1096  	}
  1097  	return vreg;
  1098  }
  1099  
  1100  void
  1101  paint3(Reg *r, int bn, int32 rb, int rn)
  1102  {
  1103  	Reg *r1;
  1104  	Prog *p;
  1105  	int z;
  1106  	uint32 bb;
  1107  
  1108  	z = bn/32;
  1109  	bb = 1L << (bn%32);
  1110  	if(r->act.b[z] & bb)
  1111  		return;
  1112  	for(;;) {
  1113  		if(!(r->refbehind.b[z] & bb))
  1114  			break;
  1115  		r1 = (Reg*)r->f.p1;
  1116  		if(r1 == R)
  1117  			break;
  1118  		if(!(r1->refahead.b[z] & bb))
  1119  			break;
  1120  		if(r1->act.b[z] & bb)
  1121  			break;
  1122  		r = r1;
  1123  	}
  1124  
  1125  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1126  		addmove(r, bn, rn, 0);
  1127  
  1128  	for(;;) {
  1129  		r->act.b[z] |= bb;
  1130  		p = r->f.prog;
  1131  
  1132  		if(r->use1.b[z] & bb) {
  1133  			if(debug['R'])
  1134  				print("%P", p);
  1135  			addreg(&p->from, rn);
  1136  			if(debug['R'])
  1137  				print("\t.c%P\n", p);
  1138  		}
  1139  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1140  			if(debug['R'])
  1141  				print("%P", p);
  1142  			addreg(&p->to, rn);
  1143  			if(debug['R'])
  1144  				print("\t.c%P\n", p);
  1145  		}
  1146  
  1147  		if(STORE(r) & r->regdiff.b[z] & bb)
  1148  			addmove(r, bn, rn, 1);
  1149  		r->regu |= rb;
  1150  
  1151  		if(r->refbehind.b[z] & bb)
  1152  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1153  				if(r1->refahead.b[z] & bb)
  1154  					paint3(r1, bn, rb, rn);
  1155  
  1156  		if(!(r->refahead.b[z] & bb))
  1157  			break;
  1158  		r1 = (Reg*)r->f.s2;
  1159  		if(r1 != R)
  1160  			if(r1->refbehind.b[z] & bb)
  1161  				paint3(r1, bn, rb, rn);
  1162  		r = (Reg*)r->f.s1;
  1163  		if(r == R)
  1164  			break;
  1165  		if(r->act.b[z] & bb)
  1166  			break;
  1167  		if(!(r->refbehind.b[z] & bb))
  1168  			break;
  1169  	}
  1170  }
  1171  
  1172  void
  1173  addreg(Adr *a, int rn)
  1174  {
  1175  	a->sym = 0;
  1176  	a->name = D_NONE;
  1177  	a->type = D_REG;
  1178  	a->reg = rn;
  1179  	if(rn >= NREG) {
  1180  		a->type = D_FREG;
  1181  		a->reg = rn-NREG;
  1182  	}
  1183  }
  1184  
  1185  /*
  1186   *	bit	reg
  1187   *	0	R0
  1188   *	1	R1
  1189   *	...	...
  1190   *	10	R10
  1191   *	12  R12
  1192   */
  1193  int32
  1194  RtoB(int r)
  1195  {
  1196  	if(r >= REGTMP-2 && r != 12)	// excluded R9 and R10 for m and g, but not R12
  1197  		return 0;
  1198  	return 1L << r;
  1199  }
  1200  
  1201  int
  1202  BtoR(int32 b)
  1203  {
  1204  	b &= 0x11fcL;	// excluded R9 and R10 for m and g, but not R12
  1205  	if(b == 0)
  1206  		return 0;
  1207  	return bitno(b);
  1208  }
  1209  
  1210  /*
  1211   *	bit	reg
  1212   *	18	F2
  1213   *	19	F3
  1214   *	...	...
  1215   *	31	F15
  1216   */
  1217  int32
  1218  FtoB(int f)
  1219  {
  1220  
  1221  	if(f < 2 || f > NFREG-1)
  1222  		return 0;
  1223  	return 1L << (f + 16);
  1224  }
  1225  
  1226  int
  1227  BtoF(int32 b)
  1228  {
  1229  
  1230  	b &= 0xfffc0000L;
  1231  	if(b == 0)
  1232  		return 0;
  1233  	return bitno(b) - 16;
  1234  }
  1235  
  1236  void
  1237  dumpone(Flow *f, int isreg)
  1238  {
  1239  	int z;
  1240  	Bits bit;
  1241  	Reg *r;
  1242  
  1243  	print("%d:%P", f->loop, f->prog);
  1244  	if(isreg) {
  1245  		r = (Reg*)f;
  1246  		for(z=0; z<BITS; z++)
  1247  			bit.b[z] =
  1248  				r->set.b[z] |
  1249  				r->use1.b[z] |
  1250  				r->use2.b[z] |
  1251  				r->refbehind.b[z] |
  1252  				r->refahead.b[z] |
  1253  				r->calbehind.b[z] |
  1254  				r->calahead.b[z] |
  1255  				r->regdiff.b[z] |
  1256  				r->act.b[z] |
  1257  					0;
  1258  		if(bany(&bit)) {
  1259  			print("\t");
  1260  			if(bany(&r->set))
  1261  				print(" s:%Q", r->set);
  1262  			if(bany(&r->use1))
  1263  				print(" u1:%Q", r->use1);
  1264  			if(bany(&r->use2))
  1265  				print(" u2:%Q", r->use2);
  1266  			if(bany(&r->refbehind))
  1267  				print(" rb:%Q ", r->refbehind);
  1268  			if(bany(&r->refahead))
  1269  				print(" ra:%Q ", r->refahead);
  1270  			if(bany(&r->calbehind))
  1271  				print(" cb:%Q ", r->calbehind);
  1272  			if(bany(&r->calahead))
  1273  				print(" ca:%Q ", r->calahead);
  1274  			if(bany(&r->regdiff))
  1275  				print(" d:%Q ", r->regdiff);
  1276  			if(bany(&r->act))
  1277  				print(" a:%Q ", r->act);
  1278  		}
  1279  	}
  1280  	print("\n");
  1281  }
  1282  
  1283  void
  1284  dumpit(char *str, Flow *r0, int isreg)
  1285  {
  1286  	Flow *r, *r1;
  1287  
  1288  	print("\n%s\n", str);
  1289  	for(r = r0; r != nil; r = r->link) {
  1290  		dumpone(r, isreg);
  1291  		r1 = r->p2;
  1292  		if(r1 != nil) {
  1293  			print("	pred:");
  1294  			for(; r1 != nil; r1 = r1->p2link)
  1295  				print(" %.4ud", r1->prog->loc);
  1296  			if(r->p1 != nil)
  1297  				print(" (and %.4ud)", r->p1->prog->loc);
  1298  			else
  1299  				print(" (only)");
  1300  			print("\n");
  1301  		}
  1302  //		r1 = r->s1;
  1303  //		if(r1 != nil) {
  1304  //			print("	succ:");
  1305  //			for(; r1 != R; r1 = r1->s1)
  1306  //				print(" %.4ud", r1->prog->loc);
  1307  //			print("\n");
  1308  //		}
  1309  	}
  1310  }