github.com/golang-haiku/go-1.4.3@v0.0.0-20190609233734-1f5ae41cc308/src/cmd/6g/reg.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  #define	NREGVAR	32	/* 16 general + 16 floating */
    37  #define	REGBITS	((uint32)0xffffffff)
    38  /*c2go enum {
    39  	NREGVAR = 32,
    40  	REGBITS = 0xffffffff,
    41  };
    42  */
    43  
    44  static	Reg*	firstr;
    45  static	int	first	= 1;
    46  
    47  int
    48  rcmp(const void *a1, const void *a2)
    49  {
    50  	Rgn *p1, *p2;
    51  	int c1, c2;
    52  
    53  	p1 = (Rgn*)a1;
    54  	p2 = (Rgn*)a2;
    55  	c1 = p2->cost;
    56  	c2 = p1->cost;
    57  	if(c1 -= c2)
    58  		return c1;
    59  	return p2->varno - p1->varno;
    60  }
    61  
    62  static void
    63  setaddrs(Bits bit)
    64  {
    65  	int i, n;
    66  	Var *v;
    67  	Node *node;
    68  
    69  	while(bany(&bit)) {
    70  		// convert each bit to a variable
    71  		i = bnum(bit);
    72  		node = var[i].node;
    73  		n = var[i].name;
    74  		bit.b[i/32] &= ~(1L<<(i%32));
    75  
    76  		// disable all pieces of that variable
    77  		for(i=0; i<nvar; i++) {
    78  			v = var+i;
    79  			if(v->node == node && v->name == n)
    80  				v->addr = 2;
    81  		}
    82  	}
    83  }
    84  
    85  static char* regname[] = {
    86  	".AX",
    87  	".CX",
    88  	".DX",
    89  	".BX",
    90  	".SP",
    91  	".BP",
    92  	".SI",
    93  	".DI",
    94  	".R8",
    95  	".R9",
    96  	".R10",
    97  	".R11",
    98  	".R12",
    99  	".R13",
   100  	".R14",
   101  	".R15",
   102  	".X0",
   103  	".X1",
   104  	".X2",
   105  	".X3",
   106  	".X4",
   107  	".X5",
   108  	".X6",
   109  	".X7",
   110  	".X8",
   111  	".X9",
   112  	".X10",
   113  	".X11",
   114  	".X12",
   115  	".X13",
   116  	".X14",
   117  	".X15",
   118  };
   119  
   120  static Node* regnodes[NREGVAR];
   121  
   122  static void walkvardef(Node *n, Reg *r, int active);
   123  
   124  void
   125  regopt(Prog *firstp)
   126  {
   127  	Reg *r, *r1;
   128  	Prog *p;
   129  	Graph *g;
   130  	ProgInfo info;
   131  	int i, z, active;
   132  	uint32 vreg;
   133  	Bits bit;
   134  
   135  	if(first) {
   136  		fmtinstall('Q', Qconv);
   137  		exregoffset = D_R15;
   138  		first = 0;
   139  	}
   140  
   141  	mergetemp(firstp);
   142  
   143  	/*
   144  	 * control flow is more complicated in generated go code
   145  	 * than in generated c code.  define pseudo-variables for
   146  	 * registers, so we have complete register usage information.
   147  	 */
   148  	nvar = NREGVAR;
   149  	memset(var, 0, NREGVAR*sizeof var[0]);
   150  	for(i=0; i<NREGVAR; i++) {
   151  		if(regnodes[i] == N)
   152  			regnodes[i] = newname(lookup(regname[i]));
   153  		var[i].node = regnodes[i];
   154  	}
   155  
   156  	regbits = RtoB(D_SP);
   157  	for(z=0; z<BITS; z++) {
   158  		externs.b[z] = 0;
   159  		params.b[z] = 0;
   160  		consts.b[z] = 0;
   161  		addrs.b[z] = 0;
   162  		ivar.b[z] = 0;
   163  		ovar.b[z] = 0;
   164  	}
   165  
   166  	/*
   167  	 * pass 1
   168  	 * build aux data structure
   169  	 * allocate pcs
   170  	 * find use and set of variables
   171  	 */
   172  	g = flowstart(firstp, sizeof(Reg));
   173  	if(g == nil) {
   174  		for(i=0; i<nvar; i++)
   175  			var[i].node->opt = nil;
   176  		return;
   177  	}
   178  
   179  	firstr = (Reg*)g->start;
   180  
   181  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   182  		p = r->f.prog;
   183  		if(p->as == AVARDEF || p->as == AVARKILL)
   184  			continue;
   185  		proginfo(&info, p);
   186  
   187  		// Avoid making variables for direct-called functions.
   188  		if(p->as == ACALL && p->to.type == D_EXTERN)
   189  			continue;
   190  
   191  		r->use1.b[0] |= info.reguse | info.regindex;
   192  		r->set.b[0] |= info.regset;
   193  
   194  		bit = mkvar(r, &p->from);
   195  		if(bany(&bit)) {
   196  			if(info.flags & LeftAddr)
   197  				setaddrs(bit);
   198  			if(info.flags & LeftRead)
   199  				for(z=0; z<BITS; z++)
   200  					r->use1.b[z] |= bit.b[z];
   201  			if(info.flags & LeftWrite)
   202  				for(z=0; z<BITS; z++)
   203  					r->set.b[z] |= bit.b[z];
   204  		}
   205  
   206  		bit = mkvar(r, &p->to);
   207  		if(bany(&bit)) {	
   208  			if(info.flags & RightAddr)
   209  				setaddrs(bit);
   210  			if(info.flags & RightRead)
   211  				for(z=0; z<BITS; z++)
   212  					r->use2.b[z] |= bit.b[z];
   213  			if(info.flags & RightWrite)
   214  				for(z=0; z<BITS; z++)
   215  					r->set.b[z] |= bit.b[z];
   216  		}
   217  	}
   218  
   219  	for(i=0; i<nvar; i++) {
   220  		Var *v = var+i;
   221  		if(v->addr) {
   222  			bit = blsh(i);
   223  			for(z=0; z<BITS; z++)
   224  				addrs.b[z] |= bit.b[z];
   225  		}
   226  
   227  		if(debug['R'] && debug['v'])
   228  			print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
   229  				i, v->addr, v->etype, v->width, v->node, v->offset);
   230  	}
   231  
   232  	if(debug['R'] && debug['v'])
   233  		dumpit("pass1", &firstr->f, 1);
   234  
   235  	/*
   236  	 * pass 2
   237  	 * find looping structure
   238  	 */
   239  	flowrpo(g);
   240  
   241  	if(debug['R'] && debug['v'])
   242  		dumpit("pass2", &firstr->f, 1);
   243  
   244  	/*
   245  	 * pass 2.5
   246  	 * iterate propagating fat vardef covering forward
   247  	 * r->act records vars with a VARDEF since the last CALL.
   248  	 * (r->act will be reused in pass 5 for something else,
   249  	 * but we'll be done with it by then.)
   250  	 */
   251  	active = 0;
   252  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   253  		r->f.active = 0;
   254  		r->act = zbits;
   255  	}
   256  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   257  		p = r->f.prog;
   258  		if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
   259  			active++;
   260  			walkvardef(p->to.node, r, active);
   261  		}
   262  	}
   263  
   264  	/*
   265  	 * pass 3
   266  	 * iterate propagating usage
   267  	 * 	back until flow graph is complete
   268  	 */
   269  loop1:
   270  	change = 0;
   271  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   272  		r->f.active = 0;
   273  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   274  		if(r->f.prog->as == ARET)
   275  			prop(r, zbits, zbits);
   276  loop11:
   277  	/* pick up unreachable code */
   278  	i = 0;
   279  	for(r = firstr; r != R; r = r1) {
   280  		r1 = (Reg*)r->f.link;
   281  		if(r1 && r1->f.active && !r->f.active) {
   282  			prop(r, zbits, zbits);
   283  			i = 1;
   284  		}
   285  	}
   286  	if(i)
   287  		goto loop11;
   288  	if(change)
   289  		goto loop1;
   290  
   291  	if(debug['R'] && debug['v'])
   292  		dumpit("pass3", &firstr->f, 1);
   293  
   294  	/*
   295  	 * pass 4
   296  	 * iterate propagating register/variable synchrony
   297  	 * 	forward until graph is complete
   298  	 */
   299  loop2:
   300  	change = 0;
   301  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   302  		r->f.active = 0;
   303  	synch(firstr, zbits);
   304  	if(change)
   305  		goto loop2;
   306  
   307  	if(debug['R'] && debug['v'])
   308  		dumpit("pass4", &firstr->f, 1);
   309  
   310  	/*
   311  	 * pass 4.5
   312  	 * move register pseudo-variables into regu.
   313  	 */
   314  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   315  		r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   316  
   317  		r->set.b[0] &= ~REGBITS;
   318  		r->use1.b[0] &= ~REGBITS;
   319  		r->use2.b[0] &= ~REGBITS;
   320  		r->refbehind.b[0] &= ~REGBITS;
   321  		r->refahead.b[0] &= ~REGBITS;
   322  		r->calbehind.b[0] &= ~REGBITS;
   323  		r->calahead.b[0] &= ~REGBITS;
   324  		r->regdiff.b[0] &= ~REGBITS;
   325  		r->act.b[0] &= ~REGBITS;
   326  	}
   327  
   328  	/*
   329  	 * pass 5
   330  	 * isolate regions
   331  	 * calculate costs (paint1)
   332  	 */
   333  	r = firstr;
   334  	if(r) {
   335  		for(z=0; z<BITS; z++)
   336  			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   337  			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   338  		if(bany(&bit) && !r->f.refset) {
   339  			// should never happen - all variables are preset
   340  			if(debug['w'])
   341  				print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
   342  			r->f.refset = 1;
   343  		}
   344  	}
   345  	for(r = firstr; r != R; r = (Reg*)r->f.link)
   346  		r->act = zbits;
   347  	rgp = region;
   348  	nregion = 0;
   349  	for(r = firstr; r != R; r = (Reg*)r->f.link) {
   350  		for(z=0; z<BITS; z++)
   351  			bit.b[z] = r->set.b[z] &
   352  			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   353  		if(bany(&bit) && !r->f.refset) {
   354  			if(debug['w'])
   355  				print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
   356  			r->f.refset = 1;
   357  			excise(&r->f);
   358  		}
   359  		for(z=0; z<BITS; z++)
   360  			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   361  		while(bany(&bit)) {
   362  			i = bnum(bit);
   363  			rgp->enter = r;
   364  			rgp->varno = i;
   365  			change = 0;
   366  			paint1(r, i);
   367  			bit.b[i/32] &= ~(1L<<(i%32));
   368  			if(change <= 0)
   369  				continue;
   370  			rgp->cost = change;
   371  			nregion++;
   372  			if(nregion >= NRGN) {
   373  				if(debug['R'] && debug['v'])
   374  					print("too many regions\n");
   375  				goto brk;
   376  			}
   377  			rgp++;
   378  		}
   379  	}
   380  brk:
   381  	qsort(region, nregion, sizeof(region[0]), rcmp);
   382  
   383  	if(debug['R'] && debug['v'])
   384  		dumpit("pass5", &firstr->f, 1);
   385  
   386  	/*
   387  	 * pass 6
   388  	 * determine used registers (paint2)
   389  	 * replace code (paint3)
   390  	 */
   391  	rgp = region;
   392  	for(i=0; i<nregion; i++) {
   393  		bit = blsh(rgp->varno);
   394  		vreg = paint2(rgp->enter, rgp->varno);
   395  		vreg = allreg(vreg, rgp);
   396  		if(rgp->regno != 0) {
   397  			if(debug['R'] && debug['v']) {
   398  				Var *v;
   399  
   400  				v = var + rgp->varno;
   401  				print("registerize %N+%lld (bit=%2d et=%2E) in %R\n",
   402  						v->node, v->offset, rgp->varno, v->etype, rgp->regno);
   403  			}
   404  			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   405  		}
   406  		rgp++;
   407  	}
   408  
   409  	if(debug['R'] && debug['v'])
   410  		dumpit("pass6", &firstr->f, 1);
   411  	
   412  	/*
   413  	 * free aux structures. peep allocates new ones.
   414  	 */
   415  	for(i=0; i<nvar; i++)
   416  		var[i].node->opt = nil;
   417  	flowend(g);
   418  	firstr = R;
   419  
   420  	/*
   421  	 * pass 7
   422  	 * peep-hole on basic block
   423  	 */
   424  	if(!debug['R'] || debug['P'])
   425  		peep(firstp);
   426  
   427  	/*
   428  	 * eliminate nops
   429  	 */
   430  	for(p=firstp; p!=P; p=p->link) {
   431  		while(p->link != P && p->link->as == ANOP)
   432  			p->link = p->link->link;
   433  		if(p->to.type == D_BRANCH)
   434  			while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
   435  				p->to.u.branch = p->to.u.branch->link;
   436  	}
   437  
   438  	if(debug['R']) {
   439  		if(ostats.ncvtreg ||
   440  		   ostats.nspill ||
   441  		   ostats.nreload ||
   442  		   ostats.ndelmov ||
   443  		   ostats.nvar ||
   444  		   ostats.naddr ||
   445  		   0)
   446  			print("\nstats\n");
   447  
   448  		if(ostats.ncvtreg)
   449  			print("	%4d cvtreg\n", ostats.ncvtreg);
   450  		if(ostats.nspill)
   451  			print("	%4d spill\n", ostats.nspill);
   452  		if(ostats.nreload)
   453  			print("	%4d reload\n", ostats.nreload);
   454  		if(ostats.ndelmov)
   455  			print("	%4d delmov\n", ostats.ndelmov);
   456  		if(ostats.nvar)
   457  			print("	%4d var\n", ostats.nvar);
   458  		if(ostats.naddr)
   459  			print("	%4d addr\n", ostats.naddr);
   460  
   461  		memset(&ostats, 0, sizeof(ostats));
   462  	}
   463  }
   464  
   465  static void
   466  walkvardef(Node *n, Reg *r, int active)
   467  {
   468  	Reg *r1, *r2;
   469  	int bn;
   470  	Var *v;
   471  	
   472  	for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
   473  		if(r1->f.active == active)
   474  			break;
   475  		r1->f.active = active;
   476  		if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
   477  			break;
   478  		for(v=n->opt; v!=nil; v=v->nextinnode) {
   479  			bn = v - var;
   480  			r1->act.b[bn/32] |= 1L << (bn%32);
   481  		}
   482  		if(r1->f.prog->as == ACALL)
   483  			break;
   484  	}
   485  
   486  	for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
   487  		if(r2->f.s2 != nil)
   488  			walkvardef(n, (Reg*)r2->f.s2, active);
   489  }
   490  
   491  /*
   492   * add mov b,rn
   493   * just after r
   494   */
   495  void
   496  addmove(Reg *r, int bn, int rn, int f)
   497  {
   498  	Prog *p, *p1;
   499  	Adr *a;
   500  	Var *v;
   501  
   502  	p1 = mal(sizeof(*p1));
   503  	clearp(p1);
   504  	p1->pc = 9999;
   505  
   506  	p = r->f.prog;
   507  	p1->link = p->link;
   508  	p->link = p1;
   509  	p1->lineno = p->lineno;
   510  
   511  	v = var + bn;
   512  
   513  	a = &p1->to;
   514  	a->offset = v->offset;
   515  	a->etype = v->etype;
   516  	a->type = v->name;
   517  	a->node = v->node;
   518  	a->sym = linksym(v->node->sym);
   519  
   520  	// need to clean this up with wptr and
   521  	// some of the defaults
   522  	p1->as = AMOVL;
   523  	switch(simtype[(uchar)v->etype]) {
   524  	default:
   525  		fatal("unknown type %E", v->etype);
   526  	case TINT8:
   527  	case TUINT8:
   528  	case TBOOL:
   529  		p1->as = AMOVB;
   530  		break;
   531  	case TINT16:
   532  	case TUINT16:
   533  		p1->as = AMOVW;
   534  		break;
   535  	case TINT64:
   536  	case TUINT64:
   537  	case TPTR64:
   538  		p1->as = AMOVQ;
   539  		break;
   540  	case TFLOAT32:
   541  		p1->as = AMOVSS;
   542  		break;
   543  	case TFLOAT64:
   544  		p1->as = AMOVSD;
   545  		break;
   546  	case TINT32:
   547  	case TUINT32:
   548  	case TPTR32:
   549  		break;
   550  	}
   551  
   552  	p1->from.type = rn;
   553  	if(!f) {
   554  		p1->from = *a;
   555  		*a = zprog.from;
   556  		a->type = rn;
   557  		if(v->etype == TUINT8)
   558  			p1->as = AMOVB;
   559  		if(v->etype == TUINT16)
   560  			p1->as = AMOVW;
   561  	}
   562  	if(debug['R'] && debug['v'])
   563  		print("%P ===add=== %P\n", p, p1);
   564  	ostats.nspill++;
   565  }
   566  
   567  uint32
   568  doregbits(int r)
   569  {
   570  	uint32 b;
   571  
   572  	b = 0;
   573  	if(r >= D_INDIR)
   574  		r -= D_INDIR;
   575  	if(r >= D_AX && r <= D_R15)
   576  		b |= RtoB(r);
   577  	else
   578  	if(r >= D_AL && r <= D_R15B)
   579  		b |= RtoB(r-D_AL+D_AX);
   580  	else
   581  	if(r >= D_AH && r <= D_BH)
   582  		b |= RtoB(r-D_AH+D_AX);
   583  	else
   584  	if(r >= D_X0 && r <= D_X0+15)
   585  		b |= FtoB(r);
   586  	return b;
   587  }
   588  
   589  static int
   590  overlap(int64 o1, int w1, int64 o2, int w2)
   591  {
   592  	int64 t1, t2;
   593  
   594  	t1 = o1+w1;
   595  	t2 = o2+w2;
   596  
   597  	if(!(t1 > o2 && t2 > o1))
   598  		return 0;
   599  
   600  	return 1;
   601  }
   602  
   603  Bits
   604  mkvar(Reg *r, Adr *a)
   605  {
   606  	Var *v;
   607  	int i, t, n, et, z, flag;
   608  	int64 w;
   609  	uint32 regu;
   610  	int64 o;
   611  	Bits bit;
   612  	Node *node;
   613  
   614  	/*
   615  	 * mark registers used
   616  	 */
   617  	t = a->type;
   618  	if(t == D_NONE)
   619  		goto none;
   620  
   621  	if(r != R)
   622  		r->use1.b[0] |= doregbits(a->index);
   623  
   624  	switch(t) {
   625  	default:
   626  		regu = doregbits(t);
   627  		if(regu == 0)
   628  			goto none;
   629  		bit = zbits;
   630  		bit.b[0] = regu;
   631  		return bit;
   632  
   633  	case D_ADDR:
   634  		a->type = a->index;
   635  		bit = mkvar(r, a);
   636  		setaddrs(bit);
   637  		a->type = t;
   638  		ostats.naddr++;
   639  		goto none;
   640  
   641  	case D_EXTERN:
   642  	case D_STATIC:
   643  	case D_PARAM:
   644  	case D_AUTO:
   645  		n = t;
   646  		break;
   647  	}
   648  
   649  	node = a->node;
   650  	if(node == N || node->op != ONAME || node->orig == N)
   651  		goto none;
   652  	node = node->orig;
   653  	if(node->orig != node)
   654  		fatal("%D: bad node", a);
   655  	if(node->sym == S || node->sym->name[0] == '.')
   656  		goto none;
   657  	et = a->etype;
   658  	o = a->offset;
   659  	w = a->width;
   660  	if(w < 0)
   661  		fatal("bad width %lld for %D", w, a);
   662  
   663  	flag = 0;
   664  	for(i=0; i<nvar; i++) {
   665  		v = var+i;
   666  		if(v->node == node && v->name == n) {
   667  			if(v->offset == o)
   668  			if(v->etype == et)
   669  			if(v->width == w)
   670  				return blsh(i);
   671  
   672  			// if they overlaps, disable both
   673  			if(overlap(v->offset, v->width, o, w)) {
   674  //				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   675  				v->addr = 1;
   676  				flag = 1;
   677  			}
   678  		}
   679  	}
   680  	switch(et) {
   681  	case 0:
   682  	case TFUNC:
   683  		goto none;
   684  	}
   685  
   686  	if(nvar >= NVAR) {
   687  		if(debug['w'] > 1 && node != N)
   688  			fatal("variable not optimized: %#N", node);
   689  		
   690  		// If we're not tracking a word in a variable, mark the rest as
   691  		// having its address taken, so that we keep the whole thing
   692  		// live at all calls. otherwise we might optimize away part of
   693  		// a variable but not all of it.
   694  		for(i=0; i<nvar; i++) {
   695  			v = var+i;
   696  			if(v->node == node)
   697  				v->addr = 1;
   698  		}
   699  		goto none;
   700  	}
   701  
   702  	i = nvar;
   703  	nvar++;
   704  	v = var+i;
   705  	v->offset = o;
   706  	v->name = n;
   707  	v->etype = et;
   708  	v->width = w;
   709  	v->addr = flag;		// funny punning
   710  	v->node = node;
   711  	
   712  	// node->opt is the head of a linked list
   713  	// of Vars within the given Node, so that
   714  	// we can start at a Var and find all the other
   715  	// Vars in the same Go variable.
   716  	v->nextinnode = node->opt;
   717  	node->opt = v;
   718  
   719  	bit = blsh(i);
   720  	if(n == D_EXTERN || n == D_STATIC)
   721  		for(z=0; z<BITS; z++)
   722  			externs.b[z] |= bit.b[z];
   723  	if(n == D_PARAM)
   724  		for(z=0; z<BITS; z++)
   725  			params.b[z] |= bit.b[z];
   726  
   727  	if(node->class == PPARAM)
   728  		for(z=0; z<BITS; z++)
   729  			ivar.b[z] |= bit.b[z];
   730  	if(node->class == PPARAMOUT)
   731  		for(z=0; z<BITS; z++)
   732  			ovar.b[z] |= bit.b[z];
   733  
   734  	// Treat values with their address taken as live at calls,
   735  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   736  	// These must be consistent or else we will elide stores and the garbage
   737  	// collector will see uninitialized data.
   738  	// The typical case where our own analysis is out of sync is when the
   739  	// node appears to have its address taken but that code doesn't actually
   740  	// get generated and therefore doesn't show up as an address being
   741  	// taken when we analyze the instruction stream.
   742  	// One instance of this case is when a closure uses the same name as
   743  	// an outer variable for one of its own variables declared with :=.
   744  	// The parser flags the outer variable as possibly shared, and therefore
   745  	// sets addrtaken, even though it ends up not being actually shared.
   746  	// If we were better about _ elision, _ = &x would suffice too.
   747  	// The broader := in a closure problem is mentioned in a comment in
   748  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   749  	if(node->addrtaken)
   750  		v->addr = 1;
   751  
   752  	// Disable registerization for globals, because:
   753  	// (1) we might panic at any time and we want the recovery code
   754  	// to see the latest values (issue 1304).
   755  	// (2) we don't know what pointers might point at them and we want
   756  	// loads via those pointers to see updated values and vice versa (issue 7995).
   757  	//
   758  	// Disable registerization for results if using defer, because the deferred func
   759  	// might recover and return, causing the current values to be used.
   760  	if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
   761  		v->addr = 1;
   762  
   763  	if(debug['R'])
   764  		print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
   765  	ostats.nvar++;
   766  
   767  	return bit;
   768  
   769  none:
   770  	return zbits;
   771  }
   772  
   773  void
   774  prop(Reg *r, Bits ref, Bits cal)
   775  {
   776  	Reg *r1, *r2;
   777  	int z, i, j;
   778  	Var *v, *v1;
   779  
   780  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
   781  		for(z=0; z<BITS; z++) {
   782  			ref.b[z] |= r1->refahead.b[z];
   783  			if(ref.b[z] != r1->refahead.b[z]) {
   784  				r1->refahead.b[z] = ref.b[z];
   785  				change++;
   786  			}
   787  			cal.b[z] |= r1->calahead.b[z];
   788  			if(cal.b[z] != r1->calahead.b[z]) {
   789  				r1->calahead.b[z] = cal.b[z];
   790  				change++;
   791  			}
   792  		}
   793  		switch(r1->f.prog->as) {
   794  		case ACALL:
   795  			if(noreturn(r1->f.prog))
   796  				break;
   797  
   798  			// Mark all input variables (ivar) as used, because that's what the
   799  			// liveness bitmaps say. The liveness bitmaps say that so that a
   800  			// panic will not show stale values in the parameter dump.
   801  			// Mark variables with a recent VARDEF (r1->act) as used,
   802  			// so that the optimizer flushes initializations to memory,
   803  			// so that if a garbage collection happens during this CALL,
   804  			// the collector will see initialized memory. Again this is to
   805  			// match what the liveness bitmaps say.
   806  			for(z=0; z<BITS; z++) {
   807  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
   808  				ref.b[z] = 0;
   809  			}
   810  			
   811  			// cal.b is the current approximation of what's live across the call.
   812  			// Every bit in cal.b is a single stack word. For each such word,
   813  			// find all the other tracked stack words in the same Go variable
   814  			// (struct/slice/string/interface) and mark them live too.
   815  			// This is necessary because the liveness analysis for the garbage
   816  			// collector works at variable granularity, not at word granularity.
   817  			// It is fundamental for slice/string/interface: the garbage collector
   818  			// needs the whole value, not just some of the words, in order to
   819  			// interpret the other bits correctly. Specifically, slice needs a consistent
   820  			// ptr and cap, string needs a consistent ptr and len, and interface
   821  			// needs a consistent type word and data word.
   822  			for(z=0; z<BITS; z++) {
   823  				if(cal.b[z] == 0)
   824  					continue;
   825  				for(i=0; i<32; i++) {
   826  					if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
   827  						continue;
   828  					v = var+z*32+i;
   829  					if(v->node->opt == nil) // v represents fixed register, not Go variable
   830  						continue;
   831  
   832  					// v->node->opt is the head of a linked list of Vars
   833  					// corresponding to tracked words from the Go variable v->node.
   834  					// Walk the list and set all the bits.
   835  					// For a large struct this could end up being quadratic:
   836  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   837  					// for all of the remaining words in the struct, and for each such
   838  					// word would go through and turn on all the bits again.
   839  					// To avoid the quadratic behavior, we only turn on the bits if
   840  					// v is the head of the list or if the head's bit is not yet turned on.
   841  					// This will set the bits at most twice, keeping the overall loop linear.
   842  					v1 = v->node->opt;
   843  					j = v1 - var;
   844  					if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
   845  						for(; v1 != nil; v1 = v1->nextinnode) {
   846  							j = v1 - var;
   847  							cal.b[j/32] |= 1UL<<(j&31);
   848  						}
   849  					}
   850  				}
   851  			}
   852  			break;
   853  
   854  		case ATEXT:
   855  			for(z=0; z<BITS; z++) {
   856  				cal.b[z] = 0;
   857  				ref.b[z] = 0;
   858  			}
   859  			break;
   860  
   861  		case ARET:
   862  			for(z=0; z<BITS; z++) {
   863  				cal.b[z] = externs.b[z] | ovar.b[z];
   864  				ref.b[z] = 0;
   865  			}
   866  			break;
   867  		}
   868  		for(z=0; z<BITS; z++) {
   869  			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   870  				r1->use1.b[z] | r1->use2.b[z];
   871  			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   872  			r1->refbehind.b[z] = ref.b[z];
   873  			r1->calbehind.b[z] = cal.b[z];
   874  		}
   875  		if(r1->f.active)
   876  			break;
   877  		r1->f.active = 1;
   878  	}
   879  	for(; r != r1; r = (Reg*)r->f.p1)
   880  		for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
   881  			prop(r2, r->refbehind, r->calbehind);
   882  }
   883  
   884  void
   885  synch(Reg *r, Bits dif)
   886  {
   887  	Reg *r1;
   888  	int z;
   889  
   890  	for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
   891  		for(z=0; z<BITS; z++) {
   892  			dif.b[z] = (dif.b[z] &
   893  				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
   894  					r1->set.b[z] | r1->regdiff.b[z];
   895  			if(dif.b[z] != r1->regdiff.b[z]) {
   896  				r1->regdiff.b[z] = dif.b[z];
   897  				change++;
   898  			}
   899  		}
   900  		if(r1->f.active)
   901  			break;
   902  		r1->f.active = 1;
   903  		for(z=0; z<BITS; z++)
   904  			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
   905  		if(r1->f.s2 != nil)
   906  			synch((Reg*)r1->f.s2, dif);
   907  	}
   908  }
   909  
   910  uint32
   911  allreg(uint32 b, Rgn *r)
   912  {
   913  	Var *v;
   914  	int i;
   915  
   916  	v = var + r->varno;
   917  	r->regno = 0;
   918  	switch(v->etype) {
   919  
   920  	default:
   921  		fatal("unknown etype %d/%E", bitno(b), v->etype);
   922  		break;
   923  
   924  	case TINT8:
   925  	case TUINT8:
   926  	case TINT16:
   927  	case TUINT16:
   928  	case TINT32:
   929  	case TUINT32:
   930  	case TINT64:
   931  	case TUINT64:
   932  	case TINT:
   933  	case TUINT:
   934  	case TUINTPTR:
   935  	case TBOOL:
   936  	case TPTR32:
   937  	case TPTR64:
   938  		i = BtoR(~b);
   939  		if(i && r->cost > 0) {
   940  			r->regno = i;
   941  			return RtoB(i);
   942  		}
   943  		break;
   944  
   945  	case TFLOAT32:
   946  	case TFLOAT64:
   947  		i = BtoF(~b);
   948  		if(i && r->cost > 0) {
   949  			r->regno = i;
   950  			return FtoB(i);
   951  		}
   952  		break;
   953  	}
   954  	return 0;
   955  }
   956  
   957  void
   958  paint1(Reg *r, int bn)
   959  {
   960  	Reg *r1;
   961  	int z;
   962  	uint32 bb;
   963  
   964  	z = bn/32;
   965  	bb = 1L<<(bn%32);
   966  	if(r->act.b[z] & bb)
   967  		return;
   968  	for(;;) {
   969  		if(!(r->refbehind.b[z] & bb))
   970  			break;
   971  		r1 = (Reg*)r->f.p1;
   972  		if(r1 == R)
   973  			break;
   974  		if(!(r1->refahead.b[z] & bb))
   975  			break;
   976  		if(r1->act.b[z] & bb)
   977  			break;
   978  		r = r1;
   979  	}
   980  
   981  	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
   982  		change -= CLOAD * r->f.loop;
   983  	}
   984  	for(;;) {
   985  		r->act.b[z] |= bb;
   986  
   987  		if(r->f.prog->as != ANOP) { // don't give credit for NOPs
   988  			if(r->use1.b[z] & bb)
   989  				change += CREF * r->f.loop;
   990  			if((r->use2.b[z]|r->set.b[z]) & bb)
   991  				change += CREF * r->f.loop;
   992  		}
   993  
   994  		if(STORE(r) & r->regdiff.b[z] & bb) {
   995  			change -= CLOAD * r->f.loop;
   996  		}
   997  
   998  		if(r->refbehind.b[z] & bb)
   999  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1000  				if(r1->refahead.b[z] & bb)
  1001  					paint1(r1, bn);
  1002  
  1003  		if(!(r->refahead.b[z] & bb))
  1004  			break;
  1005  		r1 = (Reg*)r->f.s2;
  1006  		if(r1 != R)
  1007  			if(r1->refbehind.b[z] & bb)
  1008  				paint1(r1, bn);
  1009  		r = (Reg*)r->f.s1;
  1010  		if(r == R)
  1011  			break;
  1012  		if(r->act.b[z] & bb)
  1013  			break;
  1014  		if(!(r->refbehind.b[z] & bb))
  1015  			break;
  1016  	}
  1017  }
  1018  
  1019  uint32
  1020  regset(Reg *r, uint32 bb)
  1021  {
  1022  	uint32 b, set;
  1023  	Adr v;
  1024  	int c;
  1025  
  1026  	set = 0;
  1027  	v = zprog.from;
  1028  	while(b = bb & ~(bb-1)) {
  1029  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
  1030  		if(v.type == 0)
  1031  			fatal("zero v.type for %#ux", b);
  1032  		c = copyu(r->f.prog, &v, nil);
  1033  		if(c == 3)
  1034  			set |= b;
  1035  		bb &= ~b;
  1036  	}
  1037  	return set;
  1038  }
  1039  
  1040  uint32
  1041  reguse(Reg *r, uint32 bb)
  1042  {
  1043  	uint32 b, set;
  1044  	Adr v;
  1045  	int c;
  1046  
  1047  	set = 0;
  1048  	v = zprog.from;
  1049  	while(b = bb & ~(bb-1)) {
  1050  		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
  1051  		c = copyu(r->f.prog, &v, nil);
  1052  		if(c == 1 || c == 2 || c == 4)
  1053  			set |= b;
  1054  		bb &= ~b;
  1055  	}
  1056  	return set;
  1057  }
  1058  
  1059  uint32
  1060  paint2(Reg *r, int bn)
  1061  {
  1062  	Reg *r1;
  1063  	int z;
  1064  	uint32 bb, vreg, x;
  1065  
  1066  	z = bn/32;
  1067  	bb = 1L << (bn%32);
  1068  	vreg = regbits;
  1069  	if(!(r->act.b[z] & bb))
  1070  		return vreg;
  1071  	for(;;) {
  1072  		if(!(r->refbehind.b[z] & bb))
  1073  			break;
  1074  		r1 = (Reg*)r->f.p1;
  1075  		if(r1 == R)
  1076  			break;
  1077  		if(!(r1->refahead.b[z] & bb))
  1078  			break;
  1079  		if(!(r1->act.b[z] & bb))
  1080  			break;
  1081  		r = r1;
  1082  	}
  1083  	for(;;) {
  1084  		r->act.b[z] &= ~bb;
  1085  
  1086  		vreg |= r->regu;
  1087  
  1088  		if(r->refbehind.b[z] & bb)
  1089  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1090  				if(r1->refahead.b[z] & bb)
  1091  					vreg |= paint2(r1, bn);
  1092  
  1093  		if(!(r->refahead.b[z] & bb))
  1094  			break;
  1095  		r1 = (Reg*)r->f.s2;
  1096  		if(r1 != R)
  1097  			if(r1->refbehind.b[z] & bb)
  1098  				vreg |= paint2(r1, bn);
  1099  		r = (Reg*)r->f.s1;
  1100  		if(r == R)
  1101  			break;
  1102  		if(!(r->act.b[z] & bb))
  1103  			break;
  1104  		if(!(r->refbehind.b[z] & bb))
  1105  			break;
  1106  	}
  1107  
  1108  	bb = vreg;
  1109  	for(; r; r=(Reg*)r->f.s1) {
  1110  		x = r->regu & ~bb;
  1111  		if(x) {
  1112  			vreg |= reguse(r, x);
  1113  			bb |= regset(r, x);
  1114  		}
  1115  	}
  1116  	return vreg;
  1117  }
  1118  
  1119  void
  1120  paint3(Reg *r, int bn, int32 rb, int rn)
  1121  {
  1122  	Reg *r1;
  1123  	Prog *p;
  1124  	int z;
  1125  	uint32 bb;
  1126  
  1127  	z = bn/32;
  1128  	bb = 1L << (bn%32);
  1129  	if(r->act.b[z] & bb)
  1130  		return;
  1131  	for(;;) {
  1132  		if(!(r->refbehind.b[z] & bb))
  1133  			break;
  1134  		r1 = (Reg*)r->f.p1;
  1135  		if(r1 == R)
  1136  			break;
  1137  		if(!(r1->refahead.b[z] & bb))
  1138  			break;
  1139  		if(r1->act.b[z] & bb)
  1140  			break;
  1141  		r = r1;
  1142  	}
  1143  
  1144  	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1145  		addmove(r, bn, rn, 0);
  1146  	for(;;) {
  1147  		r->act.b[z] |= bb;
  1148  		p = r->f.prog;
  1149  
  1150  		if(r->use1.b[z] & bb) {
  1151  			if(debug['R'] && debug['v'])
  1152  				print("%P", p);
  1153  			addreg(&p->from, rn);
  1154  			if(debug['R'] && debug['v'])
  1155  				print(" ===change== %P\n", p);
  1156  		}
  1157  		if((r->use2.b[z]|r->set.b[z]) & bb) {
  1158  			if(debug['R'] && debug['v'])
  1159  				print("%P", p);
  1160  			addreg(&p->to, rn);
  1161  			if(debug['R'] && debug['v'])
  1162  				print(" ===change== %P\n", p);
  1163  		}
  1164  
  1165  		if(STORE(r) & r->regdiff.b[z] & bb)
  1166  			addmove(r, bn, rn, 1);
  1167  		r->regu |= rb;
  1168  
  1169  		if(r->refbehind.b[z] & bb)
  1170  			for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
  1171  				if(r1->refahead.b[z] & bb)
  1172  					paint3(r1, bn, rb, rn);
  1173  
  1174  		if(!(r->refahead.b[z] & bb))
  1175  			break;
  1176  		r1 = (Reg*)r->f.s2;
  1177  		if(r1 != R)
  1178  			if(r1->refbehind.b[z] & bb)
  1179  				paint3(r1, bn, rb, rn);
  1180  		r = (Reg*)r->f.s1;
  1181  		if(r == R)
  1182  			break;
  1183  		if(r->act.b[z] & bb)
  1184  			break;
  1185  		if(!(r->refbehind.b[z] & bb))
  1186  			break;
  1187  	}
  1188  }
  1189  
  1190  void
  1191  addreg(Adr *a, int rn)
  1192  {
  1193  	a->sym = nil;
  1194  	a->node = nil;
  1195  	a->offset = 0;
  1196  	a->type = rn;
  1197  
  1198  	ostats.ncvtreg++;
  1199  }
  1200  
  1201  int32
  1202  RtoB(int r)
  1203  {
  1204  
  1205  	if(r < D_AX || r > D_R15)
  1206  		return 0;
  1207  	return 1L << (r-D_AX);
  1208  }
  1209  
  1210  int
  1211  BtoR(int32 b)
  1212  {
  1213  	b &= 0xffffL;
  1214  	if(nacl)
  1215  		b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX)));
  1216  	if(b == 0)
  1217  		return 0;
  1218  	return bitno(b) + D_AX;
  1219  }
  1220  
  1221  /*
  1222   *	bit	reg
  1223   *	16	X0
  1224   *	...
  1225   *	31	X15
  1226   */
  1227  int32
  1228  FtoB(int f)
  1229  {
  1230  	if(f < D_X0 || f > D_X15)
  1231  		return 0;
  1232  	return 1L << (f - D_X0 + 16);
  1233  }
  1234  
  1235  int
  1236  BtoF(int32 b)
  1237  {
  1238  
  1239  	b &= 0xFFFF0000L;
  1240  	if(b == 0)
  1241  		return 0;
  1242  	return bitno(b) - 16 + D_X0;
  1243  }
  1244  
  1245  void
  1246  dumpone(Flow *f, int isreg)
  1247  {
  1248  	int z;
  1249  	Bits bit;
  1250  	Reg *r;
  1251  
  1252  	print("%d:%P", f->loop, f->prog);
  1253  	if(isreg) {	
  1254  		r = (Reg*)f;
  1255  		for(z=0; z<BITS; z++)
  1256  			bit.b[z] =
  1257  				r->set.b[z] |
  1258  				r->use1.b[z] |
  1259  				r->use2.b[z] |
  1260  				r->refbehind.b[z] |
  1261  				r->refahead.b[z] |
  1262  				r->calbehind.b[z] |
  1263  				r->calahead.b[z] |
  1264  				r->regdiff.b[z] |
  1265  				r->act.b[z] |
  1266  					0;
  1267  		if(bany(&bit)) {
  1268  			print("\t");
  1269  			if(bany(&r->set))
  1270  				print(" s:%Q", r->set);
  1271  			if(bany(&r->use1))
  1272  				print(" u1:%Q", r->use1);
  1273  			if(bany(&r->use2))
  1274  				print(" u2:%Q", r->use2);
  1275  			if(bany(&r->refbehind))
  1276  				print(" rb:%Q ", r->refbehind);
  1277  			if(bany(&r->refahead))
  1278  				print(" ra:%Q ", r->refahead);
  1279  			if(bany(&r->calbehind))
  1280  				print(" cb:%Q ", r->calbehind);
  1281  			if(bany(&r->calahead))
  1282  				print(" ca:%Q ", r->calahead);
  1283  			if(bany(&r->regdiff))
  1284  				print(" d:%Q ", r->regdiff);
  1285  			if(bany(&r->act))
  1286  				print(" a:%Q ", r->act);
  1287  		}
  1288  	}
  1289  	print("\n");
  1290  }
  1291  
  1292  void
  1293  dumpit(char *str, Flow *r0, int isreg)
  1294  {
  1295  	Flow *r, *r1;
  1296  
  1297  	print("\n%s\n", str);
  1298  	for(r = r0; r != nil; r = r->link) {
  1299  		dumpone(r, isreg);
  1300  		r1 = r->p2;
  1301  		if(r1 != nil) {
  1302  			print("	pred:");
  1303  			for(; r1 != nil; r1 = r1->p2link)
  1304  				print(" %.4ud", (int)r1->prog->pc);
  1305  			print("\n");
  1306  		}
  1307  //		r1 = r->s1;
  1308  //		if(r1 != R) {
  1309  //			print("	succ:");
  1310  //			for(; r1 != R; r1 = r1->s1)
  1311  //				print(" %.4ud", (int)r1->prog->pc);
  1312  //			print("\n");
  1313  //		}
  1314  	}
  1315  }