github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/9g/reg.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  #define	NREGVAR	64	/* 32 general + 32 floating */
    37  #define	REGBITS	((uint64)0xffffffffffffffffull)
    38  /*c2go enum {
    39  	NREGVAR = 64,
    40  	REGBITS = 0xffffffffffffffff,
    41  };
    42  */
    43  
    44  static	Reg*	firstr;
    45  static	int	first	= 1;
    46  
    47  int
    48  rcmp(const void *a1, const void *a2)
    49  {
    50  	Rgn *p1, *p2;
    51  	int c1, c2;
    52  
    53  	p1 = (Rgn*)a1;
    54  	p2 = (Rgn*)a2;
    55  	c1 = p2->cost;
    56  	c2 = p1->cost;
    57  	if(c1 -= c2)
    58  		return c1;
    59  	return p2->varno - p1->varno;
    60  }
    61  
    62  static void
    63  setaddrs(Bits bit)
    64  {
    65  	int i, n;
    66  	Var *v;
    67  	Node *node;
    68  
    69  	while(bany(&bit)) {
    70  		// convert each bit to a variable
    71  		i = bnum(bit);
    72  		node = var[i].node;
    73  		n = var[i].name;
    74  		biclr(&bit, i);
    75  
    76  		// disable all pieces of that variable
    77  		for(i=0; i<nvar; i++) {
    78  			v = var+i;
    79  			if(v->node == node && v->name == n)
    80  				v->addr = 2;
    81  		}
    82  	}
    83  }
    84  
    85  static char* regname[] = {
    86  	".R0",
    87  	".R1",
    88  	".R2",
    89  	".R3",
    90  	".R4",
    91  	".R5",
    92  	".R6",
    93  	".R7",
    94  	".R8",
    95  	".R9",
    96  	".R10",
    97  	".R11",
    98  	".R12",
    99  	".R13",
   100  	".R14",
   101  	".R15",
   102  	".R16",
   103  	".R17",
   104  	".R18",
   105  	".R19",
   106  	".R20",
   107  	".R21",
   108  	".R22",
   109  	".R23",
   110  	".R24",
   111  	".R25",
   112  	".R26",
   113  	".R27",
   114  	".R28",
   115  	".R29",
   116  	".R30",
   117  	".R31",
   118  	".F0",
   119  	".F1",
   120  	".F2",
   121  	".F3",
   122  	".F4",
   123  	".F5",
   124  	".F6",
   125  	".F7",
   126  	".F8",
   127  	".F9",
   128  	".F10",
   129  	".F11",
   130  	".F12",
   131  	".F13",
   132  	".F14",
   133  	".F15",
   134  	".F16",
   135  	".F17",
   136  	".F18",
   137  	".F19",
   138  	".F20",
   139  	".F21",
   140  	".F22",
   141  	".F23",
   142  	".F24",
   143  	".F25",
   144  	".F26",
   145  	".F27",
   146  	".F28",
   147  	".F29",
   148  	".F30",
   149  	".F31",
   150  };
   151  
   152  static Node* regnodes[NREGVAR];
   153  
   154  static void walkvardef(Node *n, Reg *r, int active);
   155  
   156  void
   157  regopt(Prog *firstp)
   158  {
   159  	Reg *r, *r1;
   160  	Prog *p;
   161  	Graph *g;
   162  	ProgInfo info;
   163  	int i, z, active;
   164  	uint64 vreg, usedreg;
   165  	Bits bit;
   166  
   167  	if(first) {
   168  		fmtinstall('Q', Qconv);
   169  		first = 0;
   170  	}
   171  
   172  	mergetemp(firstp);
   173  
   174  	/*
   175  	 * control flow is more complicated in generated go code
   176  	 * than in generated c code.  define pseudo-variables for
   177  	 * registers, so we have complete register usage information.
   178  	 */
   179  	nvar = NREGVAR;
   180  	memset(var, 0, NREGVAR*sizeof var[0]);
   181  	for(i=0; i<NREGVAR; i++) {
   182  		if(regnodes[i] == N)
   183  			regnodes[i] = newname(lookup(regname[i]));
   184  		var[i].node = regnodes[i];
   185  	}
   186  
   187  	// Exclude registers with fixed functions
   188  	regbits = (1<<D_R0)|RtoB(REGSP)|RtoB(REGG)|RtoB(REGTLS);
   189  	// Also exclude floating point registers with fixed constants
   190  	regbits |= FtoB(D_F0+27)|FtoB(D_F0+28)|FtoB(D_F0+29)|FtoB(D_F0+30)|FtoB(D_F0+31);
   191  	externs = zbits;
   192  	params = zbits;
   193  	consts = zbits;
   194  	addrs = zbits;
   195  	ivar = zbits;
   196  	ovar = zbits;
   197  
   198  	/*
   199  	 * pass 1
   200  	 * build aux data structure
   201  	 * allocate pcs
   202  	 * find use and set of variables
   203  	 */
   204  	g = flowstart(firstp, sizeof(Reg));
   205  	if(g == nil) {
   206  		for(i=0; i<nvar; i++)
   207  			var[i].node->opt = nil;
   208  		return;
   209  	}
   210  
   211  	firstr = (Reg*)g->start;
   212  
   213  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   214  		p = r->f.prog;
   215  		if(p->as == AVARDEF || p->as == AVARKILL)
   216  			continue;
   217  		proginfo(&info, p);
   218  
   219  		// Avoid making variables for direct-called functions.
   220  		if(p->as == ABL && p->to.name == D_EXTERN)
   221  			continue;
   222  
   223  		// from vs to doesn't matter for registers
   224  		r->use1.b[0] |= info.reguse | info.regindex;
   225  		r->set.b[0] |= info.regset;
   226  
   227  		// Compute used register for from
   228  		bit = mkvar(r, &p->from);
   229  		if(info.flags & LeftAddr)
   230  			setaddrs(bit);
   231  		if(info.flags & LeftRead)
   232  			for(z=0; z<BITS; z++)
   233  				r->use1.b[z] |= bit.b[z];
   234  
   235  		// Compute used register for reg
   236  		if(info.flags & RegRead) {
   237  			if(p->from.type != D_FREG)
   238  				r->use1.b[0] |= RtoB(p->reg);
   239  			else
   240  				r->use1.b[0] |= FtoB(D_F0+p->reg);
   241  		}
   242  
   243  		// Currently we never generate three register forms.
   244  		// If we do, this will need to change.
   245  		if(p->from3.type != D_NONE)
   246  			fatal("regopt not implemented for from3");
   247  
   248  		// Compute used register for to
   249  		bit = mkvar(r, &p->to);
   250  		if(info.flags & RightAddr)
   251  			setaddrs(bit);
   252  		if(info.flags & RightRead)
   253  			for(z=0; z<BITS; z++)
   254  				r->use2.b[z] |= bit.b[z];
   255  		if(info.flags & RightWrite)
   256  			for(z=0; z<BITS; z++)
   257  				r->set.b[z] |= bit.b[z];
   258  	}
   259  
   260  	for(i=0; i<nvar; i++) {
   261  		Var *v = var+i;
   262  		if(v->addr) {
   263  			bit = blsh(i);
   264  			for(z=0; z<BITS; z++)
   265  				addrs.b[z] |= bit.b[z];
   266  		}
   267  
   268  		if(debug['R'] && debug['v'])
   269  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   270  				i, v->addr, v->etype, v->width, v->node, v->offset);
   271  	}
   272  
   273  	if(debug['R'] && debug['v'])
   274  		dumpit("pass1", &firstr->f, 1);
   275  
   276  	/*
   277  	 * pass 2
   278  	 * find looping structure
   279  	 */
   280  	flowrpo(g);
   281  
   282  	if(debug['R'] && debug['v'])
   283  		dumpit("pass2", &firstr->f, 1);
   284  
   285  	/*
   286  	 * pass 2.5
   287  	 * iterate propagating fat vardef covering forward
   288  	 * r->act records vars with a VARDEF since the last CALL.
   289  	 * (r->act will be reused in pass 5 for something else,
   290  	 * but we'll be done with it by then.)
   291  	 */
   292  	active = 0;
   293  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   294  		r->f.active = 0;
   295  		r->act = zbits;
   296  	}
   297  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   298  		p = r->f.prog;
   299  		if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
   300  			active++;
   301  			walkvardef(p->to.node, r, active);
   302  		}
   303  	}
   304  
   305  	/*
   306  	 * pass 3
   307  	 * iterate propagating usage
   308  	 * 	back until flow graph is complete
   309  	 */
   310  loop1:
   311  	change = 0;
   312  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   313  		r->f.active = 0;
   314  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   315  		if(r->f.prog->as == ARET)
   316  			prop(r, zbits, zbits);
   317  loop11:
   318  	/* pick up unreachable code */
   319  	i = 0;
   320  	for(r = firstr; r != R; r = r1) {
   321  		r1 = (Reg*)r->f.link;
   322  		if(r1 && r1->f.active && !r->f.active) {
   323  			prop(r, zbits, zbits);
   324  			i = 1;
   325  		}
   326  	}
   327  	if(i)
   328  		goto loop11;
   329  	if(change)
   330  		goto loop1;
   331  
   332  	if(debug['R'] && debug['v'])
   333  		dumpit("pass3", &firstr->f, 1);
   334  
   335  	/*
   336  	 * pass 4
   337  	 * iterate propagating register/variable synchrony
   338  	 * 	forward until graph is complete
   339  	 */
   340  loop2:
   341  	change = 0;
   342  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   343  		r->f.active = 0;
   344  	synch(firstr, zbits);
   345  	if(change)
   346  		goto loop2;
   347  
   348  	if(debug['R'] && debug['v'])
   349  		dumpit("pass4", &firstr->f, 1);
   350  
   351  	/*
   352  	 * pass 4.5
   353  	 * move register pseudo-variables into regu.
   354  	 */
   355  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   356  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   357  
   358  		r->set.b[0] &= ~REGBITS;
   359  		r->use1.b[0] &= ~REGBITS;
   360  		r->use2.b[0] &= ~REGBITS;
   361  		r->refbehind.b[0] &= ~REGBITS;
   362  		r->refahead.b[0] &= ~REGBITS;
   363  		r->calbehind.b[0] &= ~REGBITS;
   364  		r->calahead.b[0] &= ~REGBITS;
   365  		r->regdiff.b[0] &= ~REGBITS;
   366  		r->act.b[0] &= ~REGBITS;
   367  	}
   368  
   369  	if(debug['R'] && debug['v'])
   370  		dumpit("pass4.5", &firstr->f, 1);
   371  
   372  	/*
   373  	 * pass 5
   374  	 * isolate regions
   375  	 * calculate costs (paint1)
   376  	 */
   377  	r = firstr;
   378  	if(r) {
   379  		for(z=0; z<BITS; z++)
   380  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   381  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   382  		if(bany(&bit) && !r->f.refset) {
   383  			// should never happen - all variables are preset
   384  			if(debug['w'])
   385  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   386  			r->f.refset = 1;
   387  		}
   388  	}
   389  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   390  		r->act = zbits;
   391  	rgp = region;
   392  	nregion = 0;
   393  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   394  		for(z=0; z<BITS; z++)
   395  			bit.b[z] = r->set.b[z] &
   396  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   397  		if(bany(&bit) && !r->f.refset) {
   398  			if(debug['w'])
   399  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   400  			r->f.refset = 1;
   401  			excise(&r->f);
   402  		}
   403  		for(z=0; z<BITS; z++)
   404  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   405  		while(bany(&bit)) {
   406  			i = bnum(bit);
   407  			rgp->enter = r;
   408  			rgp->varno = i;
   409  			change = 0;
   410  			paint1(r, i);
   411  			biclr(&bit, i);
   412  			if(change <= 0)
   413  				continue;
   414  			rgp->cost = change;
   415  			nregion++;
   416  			if(nregion >= NRGN) {
   417  				if(debug['R'] && debug['v'])
   418  					print("too many regions\n");
   419  				goto brk;
   420  			}
   421  			rgp++;
   422  		}
   423  	}
   424  brk:
   425  	qsort(region, nregion, sizeof(region[0]), rcmp);
   426  
   427  	if(debug['R'] && debug['v'])
   428  		dumpit("pass5", &firstr->f, 1);
   429  
   430  	/*
   431  	 * pass 6
   432  	 * determine used registers (paint2)
   433  	 * replace code (paint3)
   434  	 */
   435  	rgp = region;
   436  	if(debug['R'] && debug['v'])
   437  		print("\nregisterizing\n");
   438  	for(i=0; i<nregion; i++) {
   439  		if(debug['R'] && debug['v'])
   440  			print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
   441  		bit = blsh(rgp->varno);
   442  		usedreg = paint2(rgp->enter, rgp->varno, 0);
   443  		vreg = allreg(usedreg, rgp);
   444  		if(rgp->regno != 0) {
   445  			if(debug['R'] && debug['v']) {
   446  				Var *v;
   447  
   448  				v = var + rgp->varno;
   449  				print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n",
   450  						v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
   451  			}
   452  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   453  		}
   454  		rgp++;
   455  	}
   456  
   457  	/*
   458  	 * free aux structures. peep allocates new ones.
   459  	 */
   460  	for(i=0; i<nvar; i++)
   461  		var[i].node->opt = nil;
   462  	flowend(g);
   463  	firstr = R;
   464  
   465  	if(debug['R'] && debug['v']) {
   466  		// Rebuild flow graph, since we inserted instructions
   467  		g = flowstart(firstp, sizeof(Reg));
   468  		firstr = (Reg*)g->start;
   469  		dumpit("pass6", &firstr->f, 1);
   470  		flowend(g);
   471  		firstr = R;
   472  	}
   473  
   474  	/*
   475  	 * pass 7
   476  	 * peep-hole on basic block
   477  	 */
   478  	if(!debug['R'] || debug['P'])
   479  		peep(firstp);
   480  
   481  	/*
   482  	 * eliminate nops
   483  	 */
   484  	for(p=firstp; p!=P; p=p->link) {
   485  		while(p->link != P && p->link->as == ANOP)
   486  			p->link = p->link->link;
   487  		if(p->to.type == D_BRANCH)
   488  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   489  				p->to.u.branch = p->to.u.branch->link;
   490  	}
   491  
   492  	if(debug['R']) {
   493  		if(ostats.ncvtreg ||
   494  		   ostats.nspill ||
   495  		   ostats.ndelmov ||
   496  		   ostats.nvar ||
   497  		   0)
   498  			print("\nstats\n");
   499  
   500  		if(ostats.ncvtreg)
   501  			print("	%4d cvtreg\n", ostats.ncvtreg);
   502  		if(ostats.nspill)
   503  			print("	%4d spill\n", ostats.nspill);
   504  		if(ostats.ndelmov)
   505  			print("	%4d delmov\n", ostats.ndelmov);
   506  		if(ostats.nvar)
   507  			print("	%4d var\n", ostats.nvar);
   508  
   509  		memset(&ostats, 0, sizeof(ostats));
   510  	}
   511  
   512  	return;
   513  }
   514  
   515  static void
   516  walkvardef(Node *n, Reg *r, int active)
   517  {
   518  	Reg *r1, *r2;
   519  	int bn;
   520  	Var *v;
   521  	
   522  	for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
   523  		if(r1->f.active == active)
   524  			break;
   525  		r1->f.active = active;
   526  		if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
   527  			break;
   528  		for(v=n->opt; v!=nil; v=v->nextinnode) {
   529  			bn = v - var;
   530  			biset(&r1->act, bn);
   531  		}
   532  		if(r1->f.prog->as == ABL)
   533  			break;
   534  	}
   535  
   536  	for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
   537  		if(r2->f.s2 != nil)
   538  			walkvardef(n, (Reg*)r2->f.s2, active);
   539  }
   540  
   541  /*
   542   * add mov b,rn
   543   * just after r
   544   */
   545  void
   546  addmove(Reg *r, int bn, int rn, int f)
   547  {
   548  	Prog *p, *p1, *p2;
   549  	Adr *a;
   550  	Var *v;
   551  
   552  	p1 = mal(sizeof(*p1));
   553  	*p1 = zprog;
   554  	p = r->f.prog;
   555  	
   556  	// If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc),
   557  	// delay the load until after the fixup.
   558  	p2 = p->link;
   559  	if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG)
   560  		p = p2;
   561  
   562  	p1->link = p->link;
   563  	p->link = p1;
   564  	p1->lineno = p->lineno;
   565  
   566  	v = var + bn;
   567  
   568  	a = &p1->to;
   569  	a->name = v->name;
   570  	a->node = v->node;
   571  	a->sym = linksym(v->node->sym);
   572  	a->offset = v->offset;
   573  	a->etype = v->etype;
   574  	a->type = D_OREG;
   575  	if(a->etype == TARRAY || a->sym == nil)
   576  		a->type = D_CONST;
   577  
   578  	if(v->addr)
   579  		fatal("addmove: shouldn't be doing this %A\n", a);
   580  
   581  	switch(v->etype) {
   582  	default:
   583  		print("What is this %E\n", v->etype);
   584  
   585  	case TINT8:
   586  		p1->as = AMOVB;
   587  		break;
   588  	case TBOOL:
   589  	case TUINT8:
   590  //print("movbu %E %d %S\n", v->etype, bn, v->sym);
   591  		p1->as = AMOVBZ;
   592  		break;
   593  	case TINT16:
   594  		p1->as = AMOVH;
   595  		break;
   596  	case TUINT16:
   597  		p1->as = AMOVHZ;
   598  		break;
   599  	case TINT32:
   600  		p1->as = AMOVW;
   601  		break;
   602  	case TUINT32:
   603  	case TPTR32:
   604  		p1->as = AMOVWZ;
   605  		break;
   606  	case TINT64:
   607  	case TUINT64:
   608  	case TPTR64:
   609  		p1->as = AMOVD;
   610  		break;
   611  	case TFLOAT32:
   612  		p1->as = AFMOVS;
   613  		break;
   614  	case TFLOAT64:
   615  		p1->as = AFMOVD;
   616  		break;
   617  	}
   618  
   619  	p1->from.type = D_REG;
   620  	p1->from.reg = rn;
   621  	if(rn >= NREG) {
   622  		p1->from.type = D_FREG;
   623  		p1->from.reg = rn-NREG;
   624  	}
   625  	if(!f) {
   626  		p1->from = *a;
   627  		*a = zprog.from;
   628  		a->type = D_REG;
   629  		a->reg = rn;
   630  		if(rn >= NREG) {
   631  			a->type = D_FREG;
   632  			a->reg = rn-NREG;
   633  		}
   634  		if(v->etype == TUINT8 || v->etype == TBOOL)
   635  			p1->as = AMOVBZ;
   636  		if(v->etype == TUINT16)
   637  			p1->as = AMOVHZ;
   638  	}
   639  	if(debug['R'])
   640  		print("%P\t.a%P\n", p, p1);
   641  	ostats.nspill++;
   642  }
   643  
   644  static int
   645  overlap(int64 o1, int w1, int64 o2, int w2)
   646  {
   647  	int64 t1, t2;
   648  
   649  	t1 = o1+w1;
   650  	t2 = o2+w2;
   651  
   652  	if(!(t1 > o2 && t2 > o1))
   653  		return 0;
   654  
   655  	return 1;
   656  }
   657  
   658  Bits
   659  mkvar(Reg *r, Adr *a)
   660  {
   661  	USED(r);
   662  	Var *v;
   663  	int i, t, n, et, z, flag;
   664  	int64 w;
   665  	int64 o;
   666  	Bits bit;
   667  	Node *node;
   668  
   669  	// mark registers used
   670  	t = a->type;
   671  	switch(t) {
   672  	default:
   673  		print("type %d %d %D\n", t, a->name, a);
   674  		goto none;
   675  
   676  	case D_NONE:
   677  		goto none;
   678  
   679  	case D_BRANCH:
   680  	case D_CONST:
   681  	case D_FCONST:
   682  	case D_SCONST:
   683  	case D_SPR:
   684  	case D_OREG:
   685  		break;
   686  
   687  	case D_REG:
   688  		if(a->reg != NREG) {
   689  			bit = zbits;
   690  			bit.b[0] = RtoB(a->reg);
   691  			return bit;
   692  		}
   693  		break;
   694  
   695  	case D_FREG:
   696  		if(a->reg != NREG) {
   697  			bit = zbits;
   698  			bit.b[0] = FtoB(D_F0+a->reg);
   699  			return bit;
   700  		}
   701  		break;
   702  	}
   703  
   704  	switch(a->name) {
   705  	default:
   706  		goto none;
   707  
   708  	case D_EXTERN:
   709  	case D_STATIC:
   710  	case D_AUTO:
   711  	case D_PARAM:
   712  		n = a->name;
   713  		break;
   714  	}
   715  
   716  	node = a->node;
   717  	if(node == N || node->op != ONAME || node->orig == N)
   718  		goto none;
   719  	node = node->orig;
   720  	if(node->orig != node)
   721  		fatal("%D: bad node", a);
   722  	if(node->sym == S || node->sym->name[0] == '.')
   723  		goto none;
   724  	et = a->etype;
   725  	o = a->offset;
   726  	w = a->width;
   727  	if(w < 0)
   728  		fatal("bad width %lld for %D", w, a);
   729  
   730  	flag = 0;
   731  	for(i=0; i<nvar; i++) {
   732  		v = var+i;
   733  		if(v->node == node && v->name == n) {
   734  			if(v->offset == o)
   735  			if(v->etype == et)
   736  			if(v->width == w)
   737  				return blsh(i);
   738  
   739  			// if they overlap, disable both
   740  			if(overlap(v->offset, v->width, o, w)) {
   741  				v->addr = 1;
   742  				flag = 1;
   743  			}
   744  		}
   745  	}
   746  
   747  	switch(et) {
   748  	case 0:
   749  	case TFUNC:
   750  		goto none;
   751  	}
   752  
   753  	if(nvar >= NVAR) {
   754  		if(debug['w'] > 1 && node != N)
   755  			fatal("variable not optimized: %#N", node);
   756  		
   757  		// If we're not tracking a word in a variable, mark the rest as
   758  		// having its address taken, so that we keep the whole thing
   759  		// live at all calls. otherwise we might optimize away part of
   760  		// a variable but not all of it.
   761  		for(i=0; i<nvar; i++) {
   762  			v = var+i;
   763  			if(v->node == node)
   764  				v->addr = 1;
   765  		}
   766  		goto none;
   767  	}
   768  
   769  	i = nvar;
   770  	nvar++;
   771  	v = var+i;
   772  	v->offset = o;
   773  	v->name = n;
   774  	v->etype = et;
   775  	v->width = w;
   776  	v->addr = flag;		// funny punning
   777  	v->node = node;
   778  	
   779  	// node->opt is the head of a linked list
   780  	// of Vars within the given Node, so that
   781  	// we can start at a Var and find all the other
   782  	// Vars in the same Go variable.
   783  	v->nextinnode = node->opt;
   784  	node->opt = v;
   785  
   786  	bit = blsh(i);
   787  	if(n == D_EXTERN || n == D_STATIC)
   788  		for(z=0; z<BITS; z++)
   789  			externs.b[z] |= bit.b[z];
   790  	if(n == D_PARAM)
   791  		for(z=0; z<BITS; z++)
   792  			params.b[z] |= bit.b[z];
   793  
   794  	if(node->class == PPARAM)
   795  		for(z=0; z<BITS; z++)
   796  			ivar.b[z] |= bit.b[z];
   797  	if(node->class == PPARAMOUT)
   798  		for(z=0; z<BITS; z++)
   799  			ovar.b[z] |= bit.b[z];
   800  
   801  	// Treat values with their address taken as live at calls,
   802  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   803  	// These must be consistent or else we will elide stores and the garbage
   804  	// collector will see uninitialized data.
   805  	// The typical case where our own analysis is out of sync is when the
   806  	// node appears to have its address taken but that code doesn't actually
   807  	// get generated and therefore doesn't show up as an address being
   808  	// taken when we analyze the instruction stream.
   809  	// One instance of this case is when a closure uses the same name as
   810  	// an outer variable for one of its own variables declared with :=.
   811  	// The parser flags the outer variable as possibly shared, and therefore
   812  	// sets addrtaken, even though it ends up not being actually shared.
   813  	// If we were better about _ elision, _ = &x would suffice too.
   814  	// The broader := in a closure problem is mentioned in a comment in
   815  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   816  	if(node->addrtaken)
   817  		v->addr = 1;
   818  
   819  	// Disable registerization for globals, because:
   820  	// (1) we might panic at any time and we want the recovery code
   821  	// to see the latest values (issue 1304).
   822  	// (2) we don't know what pointers might point at them and we want
   823  	// loads via those pointers to see updated values and vice versa (issue 7995).
   824  	//
   825  	// Disable registerization for results if using defer, because the deferred func
   826  	// might recover and return, causing the current values to be used.
   827  	if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
   828  		v->addr = 1;
   829  
   830  	if(debug['R'])
   831  		print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   832  	ostats.nvar++;
   833  
   834  	return bit;
   835  
   836  none:
   837  	return zbits;
   838  }
   839  
   840  void
   841  prop(Reg *r, Bits ref, Bits cal)
   842  {
   843  	Reg *r1, *r2;
   844  	int z, i, j;
   845  	Var *v, *v1;
   846  
   847  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   848  		for(z=0; z<BITS; z++) {
   849  			ref.b[z] |= r1->refahead.b[z];
   850  			if(ref.b[z] != r1->refahead.b[z]) {
   851  				r1->refahead.b[z] = ref.b[z];
   852  				change++;
   853  			}
   854  			cal.b[z] |= r1->calahead.b[z];
   855  			if(cal.b[z] != r1->calahead.b[z]) {
   856  				r1->calahead.b[z] = cal.b[z];
   857  				change++;
   858  			}
   859  		}
   860  		switch(r1->f.prog->as) {
   861  		case ABL:
   862  			if(noreturn(r1->f.prog))
   863  				break;
   864  
   865  			// Mark all input variables (ivar) as used, because that's what the
   866  			// liveness bitmaps say. The liveness bitmaps say that so that a
   867  			// panic will not show stale values in the parameter dump.
   868  			// Mark variables with a recent VARDEF (r1->act) as used,
   869  			// so that the optimizer flushes initializations to memory,
   870  			// so that if a garbage collection happens during this CALL,
   871  			// the collector will see initialized memory. Again this is to
   872  			// match what the liveness bitmaps say.
   873  			for(z=0; z<BITS; z++) {
   874  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
   875  				ref.b[z] = 0;
   876  			}
   877  			
   878  			// cal.b is the current approximation of what's live across the call.
   879  			// Every bit in cal.b is a single stack word. For each such word,
   880  			// find all the other tracked stack words in the same Go variable
   881  			// (struct/slice/string/interface) and mark them live too.
   882  			// This is necessary because the liveness analysis for the garbage
   883  			// collector works at variable granularity, not at word granularity.
   884  			// It is fundamental for slice/string/interface: the garbage collector
   885  			// needs the whole value, not just some of the words, in order to
   886  			// interpret the other bits correctly. Specifically, slice needs a consistent
   887  			// ptr and cap, string needs a consistent ptr and len, and interface
   888  			// needs a consistent type word and data word.
   889  			for(z=0; z<BITS; z++) {
   890  				if(cal.b[z] == 0)
   891  					continue;
   892  				for(i=0; i<64; i++) {
   893  					if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
   894  						continue;
   895  					v = var+z*64+i;
   896  					if(v->node->opt == nil) // v represents fixed register, not Go variable
   897  						continue;
   898  
   899  					// v->node->opt is the head of a linked list of Vars
   900  					// corresponding to tracked words from the Go variable v->node.
   901  					// Walk the list and set all the bits.
   902  					// For a large struct this could end up being quadratic:
   903  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   904  					// for all of the remaining words in the struct, and for each such
   905  					// word would go through and turn on all the bits again.
   906  					// To avoid the quadratic behavior, we only turn on the bits if
   907  					// v is the head of the list or if the head's bit is not yet turned on.
   908  					// This will set the bits at most twice, keeping the overall loop linear.
   909  					v1 = v->node->opt;
   910  					j = v1 - var;
   911  					if(v == v1 || !btest(&cal, j)) {
   912  						for(; v1 != nil; v1 = v1->nextinnode) {
   913  							j = v1 - var;
   914  							biset(&cal, j);
   915  						}
   916  					}
   917  				}
   918  			}
   919  			break;
   920  
   921  		case ATEXT:
   922  			for(z=0; z<BITS; z++) {
   923  				cal.b[z] = 0;
   924  				ref.b[z] = 0;
   925  			}
   926  			break;
   927  
   928  		case ARET:
   929  			for(z=0; z<BITS; z++) {
   930  				cal.b[z] = externs.b[z] | ovar.b[z];
   931  				ref.b[z] = 0;
   932  			}
   933  			break;
   934  		}
   935  		for(z=0; z<BITS; z++) {
   936  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   937  				r1->use1.b[z] | r1->use2.b[z];
   938  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   939  			r1->refbehind.b[z] = ref.b[z];
   940  			r1->calbehind.b[z] = cal.b[z];
   941  		}
   942  		if(r1->f.active)
   943  			break;
   944  		r1->f.active = 1;
   945  	}
   946  	for(; r != r1; r = (Reg*)r->f.p1)
   947  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
   948  			prop(r2, r->refbehind, r->calbehind);
   949  }
   950  
   951  void
   952  synch(Reg *r, Bits dif)
   953  {
   954  	Reg *r1;
   955  	int z;
   956  
   957  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
   958  		for(z=0; z<BITS; z++) {
   959  			dif.b[z] = (dif.b[z] &
   960  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   961  					r1->set.b[z] | r1->regdiff.b[z];
   962  			if(dif.b[z] != r1->regdiff.b[z]) {
   963  				r1->regdiff.b[z] = dif.b[z];
   964  				change++;
   965  			}
   966  		}
   967  		if(r1->f.active)
   968  			break;
   969  		r1->f.active = 1;
   970  		for(z=0; z<BITS; z++)
   971  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   972  		if(r1->f.s2 != nil)
   973  			synch((Reg*)r1->f.s2, dif);
   974  	}
   975  }
   976  
   977  uint64
   978  allreg(uint64 b, Rgn *r)
   979  {
   980  	Var *v;
   981  	int i;
   982  
   983  	v = var + r->varno;
   984  	r->regno = 0;
   985  	switch(v->etype) {
   986  
   987  	default:
   988  		fatal("unknown etype %d/%E", bitno(b), v->etype);
   989  		break;
   990  
   991  	case TINT8:
   992  	case TUINT8:
   993  	case TINT16:
   994  	case TUINT16:
   995  	case TINT32:
   996  	case TUINT32:
   997  	case TINT64:
   998  	case TUINT64:
   999  	case TINT:
  1000  	case TUINT:
  1001  	case TUINTPTR:
  1002  	case TBOOL:
  1003  	case TPTR32:
  1004  	case TPTR64:
  1005  		i = BtoR(~b);
  1006  		if(i && r->cost > 0) {
  1007  			r->regno = i;
  1008  			return RtoB(i);
  1009  		}
  1010  		break;
  1011  
  1012  	case TFLOAT32:
  1013  	case TFLOAT64:
  1014  		i = BtoF(~b);
  1015  		if(i && r->cost > 0) {
  1016  			r->regno = i;
  1017  			return FtoB(i);
  1018  		}
  1019  		break;
  1020  	}
  1021  	return 0;
  1022  }
  1023  
  1024  void
  1025  paint1(Reg *r, int bn)
  1026  {
  1027  	Reg *r1;
  1028  	int z;
  1029  	uint64 bb;
  1030  
  1031  	z = bn/64;
  1032  	bb = 1LL<<(bn%64);
  1033  	if(r->act.b[z] & bb)
  1034  		return;
  1035  	for(;;) {
  1036  		if(!(r->refbehind.b[z] & bb))
  1037  			break;
  1038  		r1 = (Reg*)r->f.p1;
  1039  		if(r1 == R)
  1040  			break;
  1041  		if(!(r1->refahead.b[z] & bb))
  1042  			break;
  1043  		if(r1->act.b[z] & bb)
  1044  			break;
  1045  		r = r1;
  1046  	}
  1047  
  1048  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1049  		change -= CLOAD * r->f.loop;
  1050  	}
  1051  	for(;;) {
  1052  		r->act.b[z] |= bb;
  1053  
  1054  		if(r->f.prog->as != ANOP) { // don't give credit for NOPs
  1055  			if(r->use1.b[z] & bb)
  1056  				change += CREF * r->f.loop;
  1057  			if((r->use2.b[z]|r->set.b[z]) & bb)
  1058  				change += CREF * r->f.loop;
  1059  		}
  1060  
  1061  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1062  			change -= CLOAD * r->f.loop;
  1063  		}
  1064  
  1065  		if(r->refbehind.b[z] & bb)
  1066  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1067  				if(r1->refahead.b[z] & bb)
  1068  					paint1(r1, bn);
  1069  
  1070  		if(!(r->refahead.b[z] & bb))
  1071  			break;
  1072  		r1 = (Reg*)r->f.s2;
  1073  		if(r1 != R)
  1074  			if(r1->refbehind.b[z] & bb)
  1075  				paint1(r1, bn);
  1076  		r = (Reg*)r->f.s1;
  1077  		if(r == R)
  1078  			break;
  1079  		if(r->act.b[z] & bb)
  1080  			break;
  1081  		if(!(r->refbehind.b[z] & bb))
  1082  			break;
  1083  	}
  1084  }
  1085  
  1086  uint64
  1087  paint2(Reg *r, int bn, int depth)
  1088  {
  1089  	Reg *r1;
  1090  	int z;
  1091  	uint64 bb, vreg;
  1092  
  1093  	z = bn/64;
  1094  	bb = 1LL << (bn%64);
  1095  	vreg = regbits;
  1096  	if(!(r->act.b[z] & bb))
  1097  		return vreg;
  1098  	for(;;) {
  1099  		if(!(r->refbehind.b[z] & bb))
  1100  			break;
  1101  		r1 = (Reg*)r->f.p1;
  1102  		if(r1 == R)
  1103  			break;
  1104  		if(!(r1->refahead.b[z] & bb))
  1105  			break;
  1106  		if(!(r1->act.b[z] & bb))
  1107  			break;
  1108  		r = r1;
  1109  	}
  1110  	for(;;) {
  1111  		if(debug['R'] && debug['v'])
  1112  			print("  paint2 %d %P\n", depth, r->f.prog);
  1113  
  1114  		r->act.b[z] &= ~bb;
  1115  
  1116  		vreg |= r->regu;
  1117  
  1118  		if(r->refbehind.b[z] & bb)
  1119  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1120  				if(r1->refahead.b[z] & bb)
  1121  					vreg |= paint2(r1, bn, depth+1);
  1122  
  1123  		if(!(r->refahead.b[z] & bb))
  1124  			break;
  1125  		r1 = (Reg*)r->f.s2;
  1126  		if(r1 != R)
  1127  			if(r1->refbehind.b[z] & bb)
  1128  				vreg |= paint2(r1, bn, depth+1);
  1129  		r = (Reg*)r->f.s1;
  1130  		if(r == R)
  1131  			break;
  1132  		if(!(r->act.b[z] & bb))
  1133  			break;
  1134  		if(!(r->refbehind.b[z] & bb))
  1135  			break;
  1136  	}
  1137  	return vreg;
  1138  }
  1139  
  1140  void
  1141  paint3(Reg *r, int bn, uint64 rb, int rn)
  1142  {
  1143  	Reg *r1;
  1144  	Prog *p;
  1145  	int z;
  1146  	uint64 bb;
  1147  
  1148  	z = bn/64;
  1149  	bb = 1LL << (bn%64);
  1150  	if(r->act.b[z] & bb)
  1151  		return;
  1152  	for(;;) {
  1153  		if(!(r->refbehind.b[z] & bb))
  1154  			break;
  1155  		r1 = (Reg*)r->f.p1;
  1156  		if(r1 == R)
  1157  			break;
  1158  		if(!(r1->refahead.b[z] & bb))
  1159  			break;
  1160  		if(r1->act.b[z] & bb)
  1161  			break;
  1162  		r = r1;
  1163  	}
  1164  
  1165  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1166  		addmove(r, bn, rn, 0);
  1167  	for(;;) {
  1168  		r->act.b[z] |= bb;
  1169  		p = r->f.prog;
  1170  
  1171  		if(r->use1.b[z] & bb) {
  1172  			if(debug['R'] && debug['v'])
  1173  				print("%P", p);
  1174  			addreg(&p->from, rn);
  1175  			if(debug['R'] && debug['v'])
  1176  				print(" ===change== %P\n", p);
  1177  		}
  1178  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1179  			if(debug['R'] && debug['v'])
  1180  				print("%P", p);
  1181  			addreg(&p->to, rn);
  1182  			if(debug['R'] && debug['v'])
  1183  				print(" ===change== %P\n", p);
  1184  		}
  1185  
  1186  		if(STORE(r) & r->regdiff.b[z] & bb)
  1187  			addmove(r, bn, rn, 1);
  1188  		r->regu |= rb;
  1189  
  1190  		if(r->refbehind.b[z] & bb)
  1191  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1192  				if(r1->refahead.b[z] & bb)
  1193  					paint3(r1, bn, rb, rn);
  1194  
  1195  		if(!(r->refahead.b[z] & bb))
  1196  			break;
  1197  		r1 = (Reg*)r->f.s2;
  1198  		if(r1 != R)
  1199  			if(r1->refbehind.b[z] & bb)
  1200  				paint3(r1, bn, rb, rn);
  1201  		r = (Reg*)r->f.s1;
  1202  		if(r == R)
  1203  			break;
  1204  		if(r->act.b[z] & bb)
  1205  			break;
  1206  		if(!(r->refbehind.b[z] & bb))
  1207  			break;
  1208  	}
  1209  }
  1210  
  1211  void
  1212  addreg(Adr *a, int rn)
  1213  {
  1214  	a->sym = nil;
  1215  	a->node = nil;
  1216  	a->name = D_NONE;
  1217  	a->type = D_REG;
  1218  	a->reg = rn;
  1219  	if(rn >= NREG) {
  1220  		a->type = D_FREG;
  1221  		a->reg = rn-NREG;
  1222  	}
  1223  
  1224  	ostats.ncvtreg++;
  1225  }
  1226  
  1227  /*
  1228   * track register variables including external registers:
  1229   *	bit	reg
  1230   *	0	R0
  1231   *	1	R1
  1232   *	...	...
  1233   *	31	R31
  1234   *	32+0	F0
  1235   *	32+1	F1
  1236   *	...	...
  1237   *	32+31	F31
  1238   */
  1239  uint64
  1240  RtoB(int r)
  1241  {
  1242  	if(r > D_R0 && r <= D_R0+31)
  1243  		return 1ULL << (r - D_R0);
  1244  	return 0;
  1245  }
  1246  
  1247  int
  1248  BtoR(uint64 b)
  1249  {
  1250  	b &= 0xffffffffull;
  1251  	if(b == 0)
  1252  		return 0;
  1253  	return bitno(b) + D_R0;
  1254  }
  1255  
  1256  uint64
  1257  FtoB(int r)
  1258  {
  1259  	if(r >= D_F0 && r <= D_F0+31)
  1260  		return 1ULL << (32 + r - D_F0);
  1261  	return 0;
  1262  }
  1263  
  1264  int
  1265  BtoF(uint64 b)
  1266  {
  1267  	b >>= 32;
  1268  	if(b == 0)
  1269  		return 0;
  1270  	return bitno(b) + D_F0;
  1271  }
  1272  
  1273  void
  1274  dumpone(Flow *f, int isreg)
  1275  {
  1276  	int z;
  1277  	Bits bit;
  1278  	Reg *r;
  1279  
  1280  	print("%d:%P", f->loop, f->prog);
  1281  	if(isreg) {	
  1282  		r = (Reg*)f;
  1283  		for(z=0; z<BITS; z++)
  1284  			bit.b[z] =
  1285  				r->set.b[z] |
  1286  				r->use1.b[z] |
  1287  				r->use2.b[z] |
  1288  				r->refbehind.b[z] |
  1289  				r->refahead.b[z] |
  1290  				r->calbehind.b[z] |
  1291  				r->calahead.b[z] |
  1292  				r->regdiff.b[z] |
  1293  				r->act.b[z] |
  1294  					0;
  1295  		if(bany(&bit)) {
  1296  			print("\t");
  1297  			if(bany(&r->set))
  1298  				print(" s:%Q", r->set);
  1299  			if(bany(&r->use1))
  1300  				print(" u1:%Q", r->use1);
  1301  			if(bany(&r->use2))
  1302  				print(" u2:%Q", r->use2);
  1303  			if(bany(&r->refbehind))
  1304  				print(" rb:%Q ", r->refbehind);
  1305  			if(bany(&r->refahead))
  1306  				print(" ra:%Q ", r->refahead);
  1307  			if(bany(&r->calbehind))
  1308  				print(" cb:%Q ", r->calbehind);
  1309  			if(bany(&r->calahead))
  1310  				print(" ca:%Q ", r->calahead);
  1311  			if(bany(&r->regdiff))
  1312  				print(" d:%Q ", r->regdiff);
  1313  			if(bany(&r->act))
  1314  				print(" a:%Q ", r->act);
  1315  		}
  1316  	}
  1317  	print("\n");
  1318  }
  1319  
  1320  
  1321  void
  1322  dumpit(char *str, Flow *r0, int isreg)
  1323  {
  1324  	Flow *r, *r1;
  1325  
  1326  	print("\n%s\n", str);
  1327  	for(r = r0; r != nil; r = r->link) {
  1328  		dumpone(r, isreg);
  1329  		r1 = r->p2;
  1330  		if(r1 != nil) {
  1331  			print("	pred:");
  1332  			for(; r1 != nil; r1 = r1->p2link)
  1333  				print(" %.4ud", (int)r1->prog->pc);
  1334  			print("\n");
  1335  		}
  1336  		// Print successors if it's not just the next one
  1337  		if(r->s1 != r->link || r->s2 != nil) {
  1338  			print("	succ:");
  1339  			if(r->s1 != nil)
  1340  				print(" %.4ud", (int)r->s1->prog->pc);
  1341  			if(r->s2 != nil)
  1342  				print(" %.4ud", (int)r->s2->prog->pc);
  1343  			print("\n");
  1344  		}
  1345  	}
  1346  }