github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/cmd/6g/reg.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  #define	NREGVAR	32	/* 16 general + 16 floating */
    37  #define	REGBITS	((uint64)0xffffffffull)
    38  /*c2go enum {
    39  	NREGVAR = 32,
    40  	REGBITS = 0xffffffff,
    41  };
    42  */
    43  
    44  static	Reg*	firstr;
    45  static	int	first	= 1;
    46  
    47  int
    48  rcmp(const void *a1, const void *a2)
    49  {
    50  	Rgn *p1, *p2;
    51  	int c1, c2;
    52  
    53  	p1 = (Rgn*)a1;
    54  	p2 = (Rgn*)a2;
    55  	c1 = p2->cost;
    56  	c2 = p1->cost;
    57  	if(c1 -= c2)
    58  		return c1;
    59  	return p2->varno - p1->varno;
    60  }
    61  
    62  static void
    63  setaddrs(Bits bit)
    64  {
    65  	int i, n;
    66  	Var *v;
    67  	Node *node;
    68  
    69  	while(bany(&bit)) {
    70  		// convert each bit to a variable
    71  		i = bnum(bit);
    72  		node = var[i].node;
    73  		n = var[i].name;
    74  		biclr(&bit, i);
    75  
    76  		// disable all pieces of that variable
    77  		for(i=0; i<nvar; i++) {
    78  			v = var+i;
    79  			if(v->node == node && v->name == n)
    80  				v->addr = 2;
    81  		}
    82  	}
    83  }
    84  
    85  static char* regname[] = {
    86  	".AX",
    87  	".CX",
    88  	".DX",
    89  	".BX",
    90  	".SP",
    91  	".BP",
    92  	".SI",
    93  	".DI",
    94  	".R8",
    95  	".R9",
    96  	".R10",
    97  	".R11",
    98  	".R12",
    99  	".R13",
   100  	".R14",
   101  	".R15",
   102  	".X0",
   103  	".X1",
   104  	".X2",
   105  	".X3",
   106  	".X4",
   107  	".X5",
   108  	".X6",
   109  	".X7",
   110  	".X8",
   111  	".X9",
   112  	".X10",
   113  	".X11",
   114  	".X12",
   115  	".X13",
   116  	".X14",
   117  	".X15",
   118  };
   119  
   120  static Node* regnodes[NREGVAR];
   121  
   122  static void walkvardef(Node *n, Reg *r, int active);
   123  
   124  void
   125  regopt(Prog *firstp)
   126  {
   127  	Reg *r, *r1;
   128  	Prog *p;
   129  	Graph *g;
   130  	ProgInfo info;
   131  	int i, z, active;
   132  	uint32 vreg;
   133  	Bits bit;
   134  
   135  	if(first) {
   136  		fmtinstall('Q', Qconv);
   137  		exregoffset = D_R15;
   138  		first = 0;
   139  	}
   140  
   141  	mergetemp(firstp);
   142  
   143  	/*
   144  	 * control flow is more complicated in generated go code
   145  	 * than in generated c code.  define pseudo-variables for
   146  	 * registers, so we have complete register usage information.
   147  	 */
   148  	nvar = NREGVAR;
   149  	memset(var, 0, NREGVAR*sizeof var[0]);
   150  	for(i=0; i<NREGVAR; i++) {
   151  		if(regnodes[i] == N)
   152  			regnodes[i] = newname(lookup(regname[i]));
   153  		var[i].node = regnodes[i];
   154  	}
   155  
   156  	regbits = RtoB(D_SP);
   157  	for(z=0; z<BITS; z++) {
   158  		externs.b[z] = 0;
   159  		params.b[z] = 0;
   160  		consts.b[z] = 0;
   161  		addrs.b[z] = 0;
   162  		ivar.b[z] = 0;
   163  		ovar.b[z] = 0;
   164  	}
   165  
   166  	/*
   167  	 * pass 1
   168  	 * build aux data structure
   169  	 * allocate pcs
   170  	 * find use and set of variables
   171  	 */
   172  	g = flowstart(firstp, sizeof(Reg));
   173  	if(g == nil) {
   174  		for(i=0; i<nvar; i++)
   175  			var[i].node->opt = nil;
   176  		return;
   177  	}
   178  
   179  	firstr = (Reg*)g->start;
   180  
   181  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   182  		p = r->f.prog;
   183  		if(p->as == AVARDEF || p->as == AVARKILL)
   184  			continue;
   185  		proginfo(&info, p);
   186  
   187  		// Avoid making variables for direct-called functions.
   188  		if(p->as == ACALL && p->to.type == D_EXTERN)
   189  			continue;
   190  
   191  		r->use1.b[0] |= info.reguse | info.regindex;
   192  		r->set.b[0] |= info.regset;
   193  
   194  		bit = mkvar(r, &p->from);
   195  		if(bany(&bit)) {
   196  			if(info.flags & LeftAddr)
   197  				setaddrs(bit);
   198  			if(info.flags & LeftRead)
   199  				for(z=0; z<BITS; z++)
   200  					r->use1.b[z] |= bit.b[z];
   201  			if(info.flags & LeftWrite)
   202  				for(z=0; z<BITS; z++)
   203  					r->set.b[z] |= bit.b[z];
   204  		}
   205  
   206  		bit = mkvar(r, &p->to);
   207  		if(bany(&bit)) {	
   208  			if(info.flags & RightAddr)
   209  				setaddrs(bit);
   210  			if(info.flags & RightRead)
   211  				for(z=0; z<BITS; z++)
   212  					r->use2.b[z] |= bit.b[z];
   213  			if(info.flags & RightWrite)
   214  				for(z=0; z<BITS; z++)
   215  					r->set.b[z] |= bit.b[z];
   216  		}
   217  	}
   218  
   219  	for(i=0; i<nvar; i++) {
   220  		Var *v = var+i;
   221  		if(v->addr) {
   222  			bit = blsh(i);
   223  			for(z=0; z<BITS; z++)
   224  				addrs.b[z] |= bit.b[z];
   225  		}
   226  
   227  		if(debug['R'] && debug['v'])
   228  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   229  				i, v->addr, v->etype, v->width, v->node, v->offset);
   230  	}
   231  
   232  	if(debug['R'] && debug['v'])
   233  		dumpit("pass1", &firstr->f, 1);
   234  
   235  	/*
   236  	 * pass 2
   237  	 * find looping structure
   238  	 */
   239  	flowrpo(g);
   240  
   241  	if(debug['R'] && debug['v'])
   242  		dumpit("pass2", &firstr->f, 1);
   243  
   244  	/*
   245  	 * pass 2.5
   246  	 * iterate propagating fat vardef covering forward
   247  	 * r->act records vars with a VARDEF since the last CALL.
   248  	 * (r->act will be reused in pass 5 for something else,
   249  	 * but we'll be done with it by then.)
   250  	 */
   251  	active = 0;
   252  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   253  		r->f.active = 0;
   254  		r->act = zbits;
   255  	}
   256  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   257  		p = r->f.prog;
   258  		if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
   259  			active++;
   260  			walkvardef(p->to.node, r, active);
   261  		}
   262  	}
   263  
   264  	/*
   265  	 * pass 3
   266  	 * iterate propagating usage
   267  	 * 	back until flow graph is complete
   268  	 */
   269  loop1:
   270  	change = 0;
   271  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   272  		r->f.active = 0;
   273  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   274  		if(r->f.prog->as == ARET)
   275  			prop(r, zbits, zbits);
   276  loop11:
   277  	/* pick up unreachable code */
   278  	i = 0;
   279  	for(r = firstr; r != R; r = r1) {
   280  		r1 = (Reg*)r->f.link;
   281  		if(r1 && r1->f.active && !r->f.active) {
   282  			prop(r, zbits, zbits);
   283  			i = 1;
   284  		}
   285  	}
   286  	if(i)
   287  		goto loop11;
   288  	if(change)
   289  		goto loop1;
   290  
   291  	if(debug['R'] && debug['v'])
   292  		dumpit("pass3", &firstr->f, 1);
   293  
   294  	/*
   295  	 * pass 4
   296  	 * iterate propagating register/variable synchrony
   297  	 * 	forward until graph is complete
   298  	 */
   299  loop2:
   300  	change = 0;
   301  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   302  		r->f.active = 0;
   303  	synch(firstr, zbits);
   304  	if(change)
   305  		goto loop2;
   306  
   307  	if(debug['R'] && debug['v'])
   308  		dumpit("pass4", &firstr->f, 1);
   309  
   310  	/*
   311  	 * pass 4.5
   312  	 * move register pseudo-variables into regu.
   313  	 */
   314  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   315  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   316  
   317  		r->set.b[0] &= ~REGBITS;
   318  		r->use1.b[0] &= ~REGBITS;
   319  		r->use2.b[0] &= ~REGBITS;
   320  		r->refbehind.b[0] &= ~REGBITS;
   321  		r->refahead.b[0] &= ~REGBITS;
   322  		r->calbehind.b[0] &= ~REGBITS;
   323  		r->calahead.b[0] &= ~REGBITS;
   324  		r->regdiff.b[0] &= ~REGBITS;
   325  		r->act.b[0] &= ~REGBITS;
   326  	}
   327  
   328  	/*
   329  	 * pass 5
   330  	 * isolate regions
   331  	 * calculate costs (paint1)
   332  	 */
   333  	r = firstr;
   334  	if(r) {
   335  		for(z=0; z<BITS; z++)
   336  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   337  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   338  		if(bany(&bit) && !r->f.refset) {
   339  			// should never happen - all variables are preset
   340  			if(debug['w'])
   341  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   342  			r->f.refset = 1;
   343  		}
   344  	}
   345  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   346  		r->act = zbits;
   347  	rgp = region;
   348  	nregion = 0;
   349  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   350  		for(z=0; z<BITS; z++)
   351  			bit.b[z] = r->set.b[z] &
   352  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   353  		if(bany(&bit) && !r->f.refset) {
   354  			if(debug['w'])
   355  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   356  			r->f.refset = 1;
   357  			excise(&r->f);
   358  		}
   359  		for(z=0; z<BITS; z++)
   360  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   361  		while(bany(&bit)) {
   362  			i = bnum(bit);
   363  			rgp->enter = r;
   364  			rgp->varno = i;
   365  			change = 0;
   366  			paint1(r, i);
   367  			biclr(&bit, i);
   368  			if(change <= 0)
   369  				continue;
   370  			rgp->cost = change;
   371  			nregion++;
   372  			if(nregion >= NRGN) {
   373  				if(debug['R'] && debug['v'])
   374  					print("too many regions\n");
   375  				goto brk;
   376  			}
   377  			rgp++;
   378  		}
   379  	}
   380  brk:
   381  	qsort(region, nregion, sizeof(region[0]), rcmp);
   382  
   383  	if(debug['R'] && debug['v'])
   384  		dumpit("pass5", &firstr->f, 1);
   385  
   386  	/*
   387  	 * pass 6
   388  	 * determine used registers (paint2)
   389  	 * replace code (paint3)
   390  	 */
   391  	rgp = region;
   392  	if(debug['R'] && debug['v'])
   393  		print("\nregisterizing\n");
   394  	for(i=0; i<nregion; i++) {
   395  		if(debug['R'] && debug['v'])
   396  			print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
   397  		bit = blsh(rgp->varno);
   398  		vreg = paint2(rgp->enter, rgp->varno, 0);
   399  		vreg = allreg(vreg, rgp);
   400  		if(rgp->regno != 0) {
   401  			if(debug['R'] && debug['v']) {
   402  				Var *v;
   403  
   404  				v = var + rgp->varno;
   405  				print("registerize %N+%lld (bit=%2d et=%2E) in %R\n",
   406  						v->node, v->offset, rgp->varno, v->etype, rgp->regno);
   407  			}
   408  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   409  		}
   410  		rgp++;
   411  	}
   412  
   413  	/*
   414  	 * free aux structures. peep allocates new ones.
   415  	 */
   416  	for(i=0; i<nvar; i++)
   417  		var[i].node->opt = nil;
   418  	flowend(g);
   419  	firstr = R;
   420  
   421  	if(debug['R'] && debug['v']) {
   422  		// Rebuild flow graph, since we inserted instructions
   423  		g = flowstart(firstp, sizeof(Reg));
   424  		firstr = (Reg*)g->start;
   425  		dumpit("pass6", &firstr->f, 1);
   426  		flowend(g);
   427  		firstr = R;
   428  	}
   429  
   430  	/*
   431  	 * pass 7
   432  	 * peep-hole on basic block
   433  	 */
   434  	if(!debug['R'] || debug['P'])
   435  		peep(firstp);
   436  
   437  	/*
   438  	 * eliminate nops
   439  	 */
   440  	for(p=firstp; p!=P; p=p->link) {
   441  		while(p->link != P && p->link->as == ANOP)
   442  			p->link = p->link->link;
   443  		if(p->to.type == D_BRANCH)
   444  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   445  				p->to.u.branch = p->to.u.branch->link;
   446  	}
   447  
   448  	if(debug['R']) {
   449  		if(ostats.ncvtreg ||
   450  		   ostats.nspill ||
   451  		   ostats.nreload ||
   452  		   ostats.ndelmov ||
   453  		   ostats.nvar ||
   454  		   ostats.naddr ||
   455  		   0)
   456  			print("\nstats\n");
   457  
   458  		if(ostats.ncvtreg)
   459  			print("	%4d cvtreg\n", ostats.ncvtreg);
   460  		if(ostats.nspill)
   461  			print("	%4d spill\n", ostats.nspill);
   462  		if(ostats.nreload)
   463  			print("	%4d reload\n", ostats.nreload);
   464  		if(ostats.ndelmov)
   465  			print("	%4d delmov\n", ostats.ndelmov);
   466  		if(ostats.nvar)
   467  			print("	%4d var\n", ostats.nvar);
   468  		if(ostats.naddr)
   469  			print("	%4d addr\n", ostats.naddr);
   470  
   471  		memset(&ostats, 0, sizeof(ostats));
   472  	}
   473  }
   474  
   475  static void
   476  walkvardef(Node *n, Reg *r, int active)
   477  {
   478  	Reg *r1, *r2;
   479  	int bn;
   480  	Var *v;
   481  	
   482  	for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
   483  		if(r1->f.active == active)
   484  			break;
   485  		r1->f.active = active;
   486  		if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
   487  			break;
   488  		for(v=n->opt; v!=nil; v=v->nextinnode) {
   489  			bn = v - var;
   490  			biset(&r1->act, bn);
   491  		}
   492  		if(r1->f.prog->as == ACALL)
   493  			break;
   494  	}
   495  
   496  	for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
   497  		if(r2->f.s2 != nil)
   498  			walkvardef(n, (Reg*)r2->f.s2, active);
   499  }
   500  
   501  /*
   502   * add mov b,rn
   503   * just after r
   504   */
   505  void
   506  addmove(Reg *r, int bn, int rn, int f)
   507  {
   508  	Prog *p, *p1;
   509  	Adr *a;
   510  	Var *v;
   511  
   512  	p1 = mal(sizeof(*p1));
   513  	clearp(p1);
   514  	p1->pc = 9999;
   515  
   516  	p = r->f.prog;
   517  	p1->link = p->link;
   518  	p->link = p1;
   519  	p1->lineno = p->lineno;
   520  
   521  	v = var + bn;
   522  
   523  	a = &p1->to;
   524  	a->offset = v->offset;
   525  	a->etype = v->etype;
   526  	a->type = v->name;
   527  	a->node = v->node;
   528  	a->sym = linksym(v->node->sym);
   529  
   530  	// need to clean this up with wptr and
   531  	// some of the defaults
   532  	p1->as = AMOVL;
   533  	switch(simtype[(uchar)v->etype]) {
   534  	default:
   535  		fatal("unknown type %E", v->etype);
   536  	case TINT8:
   537  	case TUINT8:
   538  	case TBOOL:
   539  		p1->as = AMOVB;
   540  		break;
   541  	case TINT16:
   542  	case TUINT16:
   543  		p1->as = AMOVW;
   544  		break;
   545  	case TINT64:
   546  	case TUINT64:
   547  	case TPTR64:
   548  		p1->as = AMOVQ;
   549  		break;
   550  	case TFLOAT32:
   551  		p1->as = AMOVSS;
   552  		break;
   553  	case TFLOAT64:
   554  		p1->as = AMOVSD;
   555  		break;
   556  	case TINT32:
   557  	case TUINT32:
   558  	case TPTR32:
   559  		break;
   560  	}
   561  
   562  	p1->from.type = rn;
   563  	if(!f) {
   564  		p1->from = *a;
   565  		*a = zprog.from;
   566  		a->type = rn;
   567  		if(v->etype == TUINT8)
   568  			p1->as = AMOVB;
   569  		if(v->etype == TUINT16)
   570  			p1->as = AMOVW;
   571  	}
   572  	if(debug['R'] && debug['v'])
   573  		print("%P ===add=== %P\n", p, p1);
   574  	ostats.nspill++;
   575  }
   576  
   577  uint32
   578  doregbits(int r)
   579  {
   580  	uint32 b;
   581  
   582  	b = 0;
   583  	if(r >= D_INDIR)
   584  		r -= D_INDIR;
   585  	if(r >= D_AX && r <= D_R15)
   586  		b |= RtoB(r);
   587  	else
   588  	if(r >= D_AL && r <= D_R15B)
   589  		b |= RtoB(r-D_AL+D_AX);
   590  	else
   591  	if(r >= D_AH && r <= D_BH)
   592  		b |= RtoB(r-D_AH+D_AX);
   593  	else
   594  	if(r >= D_X0 && r <= D_X0+15)
   595  		b |= FtoB(r);
   596  	return b;
   597  }
   598  
   599  static int
   600  overlap(int64 o1, int w1, int64 o2, int w2)
   601  {
   602  	int64 t1, t2;
   603  
   604  	t1 = o1+w1;
   605  	t2 = o2+w2;
   606  
   607  	if(!(t1 > o2 && t2 > o1))
   608  		return 0;
   609  
   610  	return 1;
   611  }
   612  
   613  Bits
   614  mkvar(Reg *r, Adr *a)
   615  {
   616  	Var *v;
   617  	int i, t, n, et, z, flag;
   618  	int64 w;
   619  	uint32 regu;
   620  	int64 o;
   621  	Bits bit;
   622  	Node *node;
   623  
   624  	/*
   625  	 * mark registers used
   626  	 */
   627  	t = a->type;
   628  	if(t == D_NONE)
   629  		goto none;
   630  
   631  	if(r != R)
   632  		r->use1.b[0] |= doregbits(a->index);
   633  
   634  	if(t >= D_INDIR && t < 2*D_INDIR)
   635  		goto none;
   636  
   637  	switch(t) {
   638  	default:
   639  		regu = doregbits(t);
   640  		if(regu == 0)
   641  			goto none;
   642  		bit = zbits;
   643  		bit.b[0] = regu;
   644  		return bit;
   645  
   646  	case D_ADDR:
   647  		a->type = a->index;
   648  		bit = mkvar(r, a);
   649  		setaddrs(bit);
   650  		a->type = t;
   651  		ostats.naddr++;
   652  		goto none;
   653  
   654  	case D_EXTERN:
   655  	case D_STATIC:
   656  	case D_PARAM:
   657  	case D_AUTO:
   658  		n = t;
   659  		break;
   660  	}
   661  
   662  	node = a->node;
   663  	if(node == N || node->op != ONAME || node->orig == N)
   664  		goto none;
   665  	node = node->orig;
   666  	if(node->orig != node)
   667  		fatal("%D: bad node", a);
   668  	if(node->sym == S || node->sym->name[0] == '.')
   669  		goto none;
   670  	et = a->etype;
   671  	o = a->offset;
   672  	w = a->width;
   673  	if(w < 0)
   674  		fatal("bad width %lld for %D", w, a);
   675  
   676  	flag = 0;
   677  	for(i=0; i<nvar; i++) {
   678  		v = var+i;
   679  		if(v->node == node && v->name == n) {
   680  			if(v->offset == o)
   681  			if(v->etype == et)
   682  			if(v->width == w)
   683  				return blsh(i);
   684  
   685  			// if they overlaps, disable both
   686  			if(overlap(v->offset, v->width, o, w)) {
   687  //				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   688  				v->addr = 1;
   689  				flag = 1;
   690  			}
   691  		}
   692  	}
   693  	switch(et) {
   694  	case 0:
   695  	case TFUNC:
   696  		goto none;
   697  	}
   698  
   699  	if(nvar >= NVAR) {
   700  		if(debug['w'] > 1 && node != N)
   701  			fatal("variable not optimized: %#N", node);
   702  		
   703  		// If we're not tracking a word in a variable, mark the rest as
   704  		// having its address taken, so that we keep the whole thing
   705  		// live at all calls. otherwise we might optimize away part of
   706  		// a variable but not all of it.
   707  		for(i=0; i<nvar; i++) {
   708  			v = var+i;
   709  			if(v->node == node)
   710  				v->addr = 1;
   711  		}
   712  		goto none;
   713  	}
   714  
   715  	i = nvar;
   716  	nvar++;
   717  	v = var+i;
   718  	v->offset = o;
   719  	v->name = n;
   720  	v->etype = et;
   721  	v->width = w;
   722  	v->addr = flag;		// funny punning
   723  	v->node = node;
   724  	
   725  	// node->opt is the head of a linked list
   726  	// of Vars within the given Node, so that
   727  	// we can start at a Var and find all the other
   728  	// Vars in the same Go variable.
   729  	v->nextinnode = node->opt;
   730  	node->opt = v;
   731  
   732  	bit = blsh(i);
   733  	if(n == D_EXTERN || n == D_STATIC)
   734  		for(z=0; z<BITS; z++)
   735  			externs.b[z] |= bit.b[z];
   736  	if(n == D_PARAM)
   737  		for(z=0; z<BITS; z++)
   738  			params.b[z] |= bit.b[z];
   739  
   740  	if(node->class == PPARAM)
   741  		for(z=0; z<BITS; z++)
   742  			ivar.b[z] |= bit.b[z];
   743  	if(node->class == PPARAMOUT)
   744  		for(z=0; z<BITS; z++)
   745  			ovar.b[z] |= bit.b[z];
   746  
   747  	// Treat values with their address taken as live at calls,
   748  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   749  	// These must be consistent or else we will elide stores and the garbage
   750  	// collector will see uninitialized data.
   751  	// The typical case where our own analysis is out of sync is when the
   752  	// node appears to have its address taken but that code doesn't actually
   753  	// get generated and therefore doesn't show up as an address being
   754  	// taken when we analyze the instruction stream.
   755  	// One instance of this case is when a closure uses the same name as
   756  	// an outer variable for one of its own variables declared with :=.
   757  	// The parser flags the outer variable as possibly shared, and therefore
   758  	// sets addrtaken, even though it ends up not being actually shared.
   759  	// If we were better about _ elision, _ = &x would suffice too.
   760  	// The broader := in a closure problem is mentioned in a comment in
   761  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   762  	if(node->addrtaken)
   763  		v->addr = 1;
   764  
   765  	// Disable registerization for globals, because:
   766  	// (1) we might panic at any time and we want the recovery code
   767  	// to see the latest values (issue 1304).
   768  	// (2) we don't know what pointers might point at them and we want
   769  	// loads via those pointers to see updated values and vice versa (issue 7995).
   770  	//
   771  	// Disable registerization for results if using defer, because the deferred func
   772  	// might recover and return, causing the current values to be used.
   773  	if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
   774  		v->addr = 1;
   775  
   776  	if(debug['R'])
   777  		print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   778  	ostats.nvar++;
   779  
   780  	return bit;
   781  
   782  none:
   783  	return zbits;
   784  }
   785  
   786  void
   787  prop(Reg *r, Bits ref, Bits cal)
   788  {
   789  	Reg *r1, *r2;
   790  	int z, i, j;
   791  	Var *v, *v1;
   792  
   793  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   794  		for(z=0; z<BITS; z++) {
   795  			ref.b[z] |= r1->refahead.b[z];
   796  			if(ref.b[z] != r1->refahead.b[z]) {
   797  				r1->refahead.b[z] = ref.b[z];
   798  				change++;
   799  			}
   800  			cal.b[z] |= r1->calahead.b[z];
   801  			if(cal.b[z] != r1->calahead.b[z]) {
   802  				r1->calahead.b[z] = cal.b[z];
   803  				change++;
   804  			}
   805  		}
   806  		switch(r1->f.prog->as) {
   807  		case ACALL:
   808  			if(noreturn(r1->f.prog))
   809  				break;
   810  
   811  			// Mark all input variables (ivar) as used, because that's what the
   812  			// liveness bitmaps say. The liveness bitmaps say that so that a
   813  			// panic will not show stale values in the parameter dump.
   814  			// Mark variables with a recent VARDEF (r1->act) as used,
   815  			// so that the optimizer flushes initializations to memory,
   816  			// so that if a garbage collection happens during this CALL,
   817  			// the collector will see initialized memory. Again this is to
   818  			// match what the liveness bitmaps say.
   819  			for(z=0; z<BITS; z++) {
   820  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
   821  				ref.b[z] = 0;
   822  			}
   823  			
   824  			// cal.b is the current approximation of what's live across the call.
   825  			// Every bit in cal.b is a single stack word. For each such word,
   826  			// find all the other tracked stack words in the same Go variable
   827  			// (struct/slice/string/interface) and mark them live too.
   828  			// This is necessary because the liveness analysis for the garbage
   829  			// collector works at variable granularity, not at word granularity.
   830  			// It is fundamental for slice/string/interface: the garbage collector
   831  			// needs the whole value, not just some of the words, in order to
   832  			// interpret the other bits correctly. Specifically, slice needs a consistent
   833  			// ptr and cap, string needs a consistent ptr and len, and interface
   834  			// needs a consistent type word and data word.
   835  			for(z=0; z<BITS; z++) {
   836  				if(cal.b[z] == 0)
   837  					continue;
   838  				for(i=0; i<64; i++) {
   839  					if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
   840  						continue;
   841  					v = var+z*64+i;
   842  					if(v->node->opt == nil) // v represents fixed register, not Go variable
   843  						continue;
   844  
   845  					// v->node->opt is the head of a linked list of Vars
   846  					// corresponding to tracked words from the Go variable v->node.
   847  					// Walk the list and set all the bits.
   848  					// For a large struct this could end up being quadratic:
   849  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   850  					// for all of the remaining words in the struct, and for each such
   851  					// word would go through and turn on all the bits again.
   852  					// To avoid the quadratic behavior, we only turn on the bits if
   853  					// v is the head of the list or if the head's bit is not yet turned on.
   854  					// This will set the bits at most twice, keeping the overall loop linear.
   855  					v1 = v->node->opt;
   856  					j = v1 - var;
   857  					if(v == v1 || !btest(&cal, j)) {
   858  						for(; v1 != nil; v1 = v1->nextinnode) {
   859  							j = v1 - var;
   860  							biset(&cal, j);
   861  						}
   862  					}
   863  				}
   864  			}
   865  			break;
   866  
   867  		case ATEXT:
   868  			for(z=0; z<BITS; z++) {
   869  				cal.b[z] = 0;
   870  				ref.b[z] = 0;
   871  			}
   872  			break;
   873  
   874  		case ARET:
   875  			for(z=0; z<BITS; z++) {
   876  				cal.b[z] = externs.b[z] | ovar.b[z];
   877  				ref.b[z] = 0;
   878  			}
   879  			break;
   880  		}
   881  		for(z=0; z<BITS; z++) {
   882  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   883  				r1->use1.b[z] | r1->use2.b[z];
   884  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   885  			r1->refbehind.b[z] = ref.b[z];
   886  			r1->calbehind.b[z] = cal.b[z];
   887  		}
   888  		if(r1->f.active)
   889  			break;
   890  		r1->f.active = 1;
   891  	}
   892  	for(; r != r1; r = (Reg*)r->f.p1)
   893  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
   894  			prop(r2, r->refbehind, r->calbehind);
   895  }
   896  
   897  void
   898  synch(Reg *r, Bits dif)
   899  {
   900  	Reg *r1;
   901  	int z;
   902  
   903  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
   904  		for(z=0; z<BITS; z++) {
   905  			dif.b[z] = (dif.b[z] &
   906  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   907  					r1->set.b[z] | r1->regdiff.b[z];
   908  			if(dif.b[z] != r1->regdiff.b[z]) {
   909  				r1->regdiff.b[z] = dif.b[z];
   910  				change++;
   911  			}
   912  		}
   913  		if(r1->f.active)
   914  			break;
   915  		r1->f.active = 1;
   916  		for(z=0; z<BITS; z++)
   917  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   918  		if(r1->f.s2 != nil)
   919  			synch((Reg*)r1->f.s2, dif);
   920  	}
   921  }
   922  
   923  uint32
   924  allreg(uint32 b, Rgn *r)
   925  {
   926  	Var *v;
   927  	int i;
   928  
   929  	v = var + r->varno;
   930  	r->regno = 0;
   931  	switch(v->etype) {
   932  
   933  	default:
   934  		fatal("unknown etype %d/%E", bitno(b), v->etype);
   935  		break;
   936  
   937  	case TINT8:
   938  	case TUINT8:
   939  	case TINT16:
   940  	case TUINT16:
   941  	case TINT32:
   942  	case TUINT32:
   943  	case TINT64:
   944  	case TUINT64:
   945  	case TINT:
   946  	case TUINT:
   947  	case TUINTPTR:
   948  	case TBOOL:
   949  	case TPTR32:
   950  	case TPTR64:
   951  		i = BtoR(~b);
   952  		if(i && r->cost > 0) {
   953  			r->regno = i;
   954  			return RtoB(i);
   955  		}
   956  		break;
   957  
   958  	case TFLOAT32:
   959  	case TFLOAT64:
   960  		i = BtoF(~b);
   961  		if(i && r->cost > 0) {
   962  			r->regno = i;
   963  			return FtoB(i);
   964  		}
   965  		break;
   966  	}
   967  	return 0;
   968  }
   969  
   970  void
   971  paint1(Reg *r, int bn)
   972  {
   973  	Reg *r1;
   974  	int z;
   975  	uint64 bb;
   976  
   977  	z = bn/64;
   978  	bb = 1LL<<(bn%64);
   979  	if(r->act.b[z] & bb)
   980  		return;
   981  	for(;;) {
   982  		if(!(r->refbehind.b[z] & bb))
   983  			break;
   984  		r1 = (Reg*)r->f.p1;
   985  		if(r1 == R)
   986  			break;
   987  		if(!(r1->refahead.b[z] & bb))
   988  			break;
   989  		if(r1->act.b[z] & bb)
   990  			break;
   991  		r = r1;
   992  	}
   993  
   994  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
   995  		change -= CLOAD * r->f.loop;
   996  	}
   997  	for(;;) {
   998  		r->act.b[z] |= bb;
   999  
  1000  		if(r->f.prog->as != ANOP) { // don't give credit for NOPs
  1001  			if(r->use1.b[z] & bb)
  1002  				change += CREF * r->f.loop;
  1003  			if((r->use2.b[z]|r->set.b[z]) & bb)
  1004  				change += CREF * r->f.loop;
  1005  		}
  1006  
  1007  		if(STORE(r) & r->regdiff.b[z] & bb) {
  1008  			change -= CLOAD * r->f.loop;
  1009  		}
  1010  
  1011  		if(r->refbehind.b[z] & bb)
  1012  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1013  				if(r1->refahead.b[z] & bb)
  1014  					paint1(r1, bn);
  1015  
  1016  		if(!(r->refahead.b[z] & bb))
  1017  			break;
  1018  		r1 = (Reg*)r->f.s2;
  1019  		if(r1 != R)
  1020  			if(r1->refbehind.b[z] & bb)
  1021  				paint1(r1, bn);
  1022  		r = (Reg*)r->f.s1;
  1023  		if(r == R)
  1024  			break;
  1025  		if(r->act.b[z] & bb)
  1026  			break;
  1027  		if(!(r->refbehind.b[z] & bb))
  1028  			break;
  1029  	}
  1030  }
  1031  
  1032  uint32
  1033  paint2(Reg *r, int bn, int depth)
  1034  {
  1035  	Reg *r1;
  1036  	int z;
  1037  	uint64 bb, vreg;
  1038  
  1039  	z = bn/64;
  1040  	bb = 1LL << (bn%64);
  1041  	vreg = regbits;
  1042  	if(!(r->act.b[z] & bb))
  1043  		return vreg;
  1044  	for(;;) {
  1045  		if(!(r->refbehind.b[z] & bb))
  1046  			break;
  1047  		r1 = (Reg*)r->f.p1;
  1048  		if(r1 == R)
  1049  			break;
  1050  		if(!(r1->refahead.b[z] & bb))
  1051  			break;
  1052  		if(!(r1->act.b[z] & bb))
  1053  			break;
  1054  		r = r1;
  1055  	}
  1056  	for(;;) {
  1057  		if(debug['R'] && debug['v'])
  1058  			print("  paint2 %d %P\n", depth, r->f.prog);
  1059  
  1060  		r->act.b[z] &= ~bb;
  1061  
  1062  		vreg |= r->regu;
  1063  
  1064  		if(r->refbehind.b[z] & bb)
  1065  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1066  				if(r1->refahead.b[z] & bb)
  1067  					vreg |= paint2(r1, bn, depth+1);
  1068  
  1069  		if(!(r->refahead.b[z] & bb))
  1070  			break;
  1071  		r1 = (Reg*)r->f.s2;
  1072  		if(r1 != R)
  1073  			if(r1->refbehind.b[z] & bb)
  1074  				vreg |= paint2(r1, bn, depth+1);
  1075  		r = (Reg*)r->f.s1;
  1076  		if(r == R)
  1077  			break;
  1078  		if(!(r->act.b[z] & bb))
  1079  			break;
  1080  		if(!(r->refbehind.b[z] & bb))
  1081  			break;
  1082  	}
  1083  
  1084  	return vreg;
  1085  }
  1086  
  1087  void
  1088  paint3(Reg *r, int bn, uint32 rb, int rn)
  1089  {
  1090  	Reg *r1;
  1091  	Prog *p;
  1092  	int z;
  1093  	uint64 bb;
  1094  
  1095  	z = bn/64;
  1096  	bb = 1LL << (bn%64);
  1097  	if(r->act.b[z] & bb)
  1098  		return;
  1099  	for(;;) {
  1100  		if(!(r->refbehind.b[z] & bb))
  1101  			break;
  1102  		r1 = (Reg*)r->f.p1;
  1103  		if(r1 == R)
  1104  			break;
  1105  		if(!(r1->refahead.b[z] & bb))
  1106  			break;
  1107  		if(r1->act.b[z] & bb)
  1108  			break;
  1109  		r = r1;
  1110  	}
  1111  
  1112  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1113  		addmove(r, bn, rn, 0);
  1114  	for(;;) {
  1115  		r->act.b[z] |= bb;
  1116  		p = r->f.prog;
  1117  
  1118  		if(r->use1.b[z] & bb) {
  1119  			if(debug['R'] && debug['v'])
  1120  				print("%P", p);
  1121  			addreg(&p->from, rn);
  1122  			if(debug['R'] && debug['v'])
  1123  				print(" ===change== %P\n", p);
  1124  		}
  1125  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1126  			if(debug['R'] && debug['v'])
  1127  				print("%P", p);
  1128  			addreg(&p->to, rn);
  1129  			if(debug['R'] && debug['v'])
  1130  				print(" ===change== %P\n", p);
  1131  		}
  1132  
  1133  		if(STORE(r) & r->regdiff.b[z] & bb)
  1134  			addmove(r, bn, rn, 1);
  1135  		r->regu |= rb;
  1136  
  1137  		if(r->refbehind.b[z] & bb)
  1138  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1139  				if(r1->refahead.b[z] & bb)
  1140  					paint3(r1, bn, rb, rn);
  1141  
  1142  		if(!(r->refahead.b[z] & bb))
  1143  			break;
  1144  		r1 = (Reg*)r->f.s2;
  1145  		if(r1 != R)
  1146  			if(r1->refbehind.b[z] & bb)
  1147  				paint3(r1, bn, rb, rn);
  1148  		r = (Reg*)r->f.s1;
  1149  		if(r == R)
  1150  			break;
  1151  		if(r->act.b[z] & bb)
  1152  			break;
  1153  		if(!(r->refbehind.b[z] & bb))
  1154  			break;
  1155  	}
  1156  }
  1157  
  1158  void
  1159  addreg(Adr *a, int rn)
  1160  {
  1161  	a->sym = nil;
  1162  	a->node = nil;
  1163  	a->offset = 0;
  1164  	a->type = rn;
  1165  
  1166  	ostats.ncvtreg++;
  1167  }
  1168  
  1169  uint32
  1170  RtoB(int r)
  1171  {
  1172  
  1173  	if(r < D_AX || r > D_R15)
  1174  		return 0;
  1175  	return 1L << (r-D_AX);
  1176  }
  1177  
  1178  int
  1179  BtoR(uint32 b)
  1180  {
  1181  	b &= 0xffffL;
  1182  	if(nacl)
  1183  		b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX)));
  1184  	if(b == 0)
  1185  		return 0;
  1186  	return bitno(b) + D_AX;
  1187  }
  1188  
  1189  /*
  1190   *	bit	reg
  1191   *	16	X0
  1192   *	...
  1193   *	31	X15
  1194   */
  1195  uint32
  1196  FtoB(int f)
  1197  {
  1198  	if(f < D_X0 || f > D_X15)
  1199  		return 0;
  1200  	return 1L << (f - D_X0 + 16);
  1201  }
  1202  
  1203  int
  1204  BtoF(uint32 b)
  1205  {
  1206  
  1207  	b &= 0xFFFF0000L;
  1208  	if(b == 0)
  1209  		return 0;
  1210  	return bitno(b) - 16 + D_X0;
  1211  }
  1212  
  1213  void
  1214  dumpone(Flow *f, int isreg)
  1215  {
  1216  	int z;
  1217  	Bits bit;
  1218  	Reg *r;
  1219  
  1220  	print("%d:%P", f->loop, f->prog);
  1221  	if(isreg) {	
  1222  		r = (Reg*)f;
  1223  		for(z=0; z<BITS; z++)
  1224  			bit.b[z] =
  1225  				r->set.b[z] |
  1226  				r->use1.b[z] |
  1227  				r->use2.b[z] |
  1228  				r->refbehind.b[z] |
  1229  				r->refahead.b[z] |
  1230  				r->calbehind.b[z] |
  1231  				r->calahead.b[z] |
  1232  				r->regdiff.b[z] |
  1233  				r->act.b[z] |
  1234  					0;
  1235  		if(bany(&bit)) {
  1236  			print("\t");
  1237  			if(bany(&r->set))
  1238  				print(" s:%Q", r->set);
  1239  			if(bany(&r->use1))
  1240  				print(" u1:%Q", r->use1);
  1241  			if(bany(&r->use2))
  1242  				print(" u2:%Q", r->use2);
  1243  			if(bany(&r->refbehind))
  1244  				print(" rb:%Q ", r->refbehind);
  1245  			if(bany(&r->refahead))
  1246  				print(" ra:%Q ", r->refahead);
  1247  			if(bany(&r->calbehind))
  1248  				print(" cb:%Q ", r->calbehind);
  1249  			if(bany(&r->calahead))
  1250  				print(" ca:%Q ", r->calahead);
  1251  			if(bany(&r->regdiff))
  1252  				print(" d:%Q ", r->regdiff);
  1253  			if(bany(&r->act))
  1254  				print(" a:%Q ", r->act);
  1255  		}
  1256  	}
  1257  	print("\n");
  1258  }
  1259  
  1260  void
  1261  dumpit(char *str, Flow *r0, int isreg)
  1262  {
  1263  	Flow *r, *r1;
  1264  
  1265  	print("\n%s\n", str);
  1266  	for(r = r0; r != nil; r = r->link) {
  1267  		dumpone(r, isreg);
  1268  		r1 = r->p2;
  1269  		if(r1 != nil) {
  1270  			print("	pred:");
  1271  			for(; r1 != nil; r1 = r1->p2link)
  1272  				print(" %.4ud", (int)r1->prog->pc);
  1273  			print("\n");
  1274  		}
  1275  		// Print successors if it's not just the next one
  1276  		if(r->s1 != r->link || r->s2 != nil) {
  1277  			print("	succ:");
  1278  			if(r->s1 != nil)
  1279  				print(" %.4ud", (int)r->s1->prog->pc);
  1280  			if(r->s2 != nil)
  1281  				print(" %.4ud", (int)r->s2->prog->pc);
  1282  			print("\n");
  1283  		}
  1284  	}
  1285  }