github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/cmd/gc/popt.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // "Portable" optimizations.
    32  // Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h.
    33  // Must code to the intersection of the three back ends.
    34  
    35  #include	<u.h>
    36  #include	<libc.h>
    37  #include	"gg.h"
    38  #include	"opt.h"
    39  
    40  // p is a call instruction. Does the call fail to return?
    41  int
    42  noreturn(Prog *p)
    43  {
    44  	Sym *s;
    45  	int i;
    46  	static Sym*	symlist[10];
    47  
    48  	if(symlist[0] == S) {
    49  		symlist[0] = pkglookup("panicindex", runtimepkg);
    50  		symlist[1] = pkglookup("panicslice", runtimepkg);
    51  		symlist[2] = pkglookup("throwinit", runtimepkg);
    52  		symlist[3] = pkglookup("gopanic", runtimepkg);
    53  		symlist[4] = pkglookup("panicwrap", runtimepkg);
    54  		symlist[5] = pkglookup("throwreturn", runtimepkg);
    55  		symlist[6] = pkglookup("selectgo", runtimepkg);
    56  		symlist[7] = pkglookup("block", runtimepkg);
    57  	}
    58  
    59  	if(p->to.node == nil)
    60  		return 0;
    61  	s = p->to.node->sym;
    62  	if(s == S)
    63  		return 0;
    64  	for(i=0; symlist[i]!=S; i++)
    65  		if(s == symlist[i])
    66  			return 1;
    67  	return 0;
    68  }
    69  
    70  // JMP chasing and removal.
    71  //
    72  // The code generator depends on being able to write out jump
    73  // instructions that it can jump to now but fill in later.
    74  // the linker will resolve them nicely, but they make the code
    75  // longer and more difficult to follow during debugging.
    76  // Remove them.
    77  
    78  /* what instruction does a JMP to p eventually land on? */
    79  static Prog*
    80  chasejmp(Prog *p, int *jmploop)
    81  {
    82  	int n;
    83  
    84  	n = 0;
    85  	while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {
    86  		if(++n > 10) {
    87  			*jmploop = 1;
    88  			break;
    89  		}
    90  		p = p->to.u.branch;
    91  	}
    92  	return p;
    93  }
    94  
    95  /*
    96   * reuse reg pointer for mark/sweep state.
    97   * leave reg==nil at end because alive==nil.
    98   */
    99  #define alive ((void*)0)
   100  #define dead ((void*)1)
   101  /*c2go
   102  extern void *alive;
   103  extern void *dead;
   104  */
   105  
   106  /* mark all code reachable from firstp as alive */
   107  static void
   108  mark(Prog *firstp)
   109  {
   110  	Prog *p;
   111  	
   112  	for(p=firstp; p; p=p->link) {
   113  		if(p->opt != dead)
   114  			break;
   115  		p->opt = alive;
   116  		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch)
   117  			mark(p->to.u.branch);
   118  		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
   119  			break;
   120  	}
   121  }
   122  
   123  void
   124  fixjmp(Prog *firstp)
   125  {
   126  	int jmploop;
   127  	Prog *p, *last;
   128  	
   129  	if(debug['R'] && debug['v'])
   130  		print("\nfixjmp\n");
   131  
   132  	// pass 1: resolve jump to jump, mark all code as dead.
   133  	jmploop = 0;
   134  	for(p=firstp; p; p=p->link) {
   135  		if(debug['R'] && debug['v'])
   136  			print("%P\n", p);
   137  		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) {
   138  			p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);
   139  			if(debug['R'] && debug['v'])
   140  				print("->%P\n", p);
   141  		}
   142  		p->opt = dead;
   143  	}
   144  	if(debug['R'] && debug['v'])
   145  		print("\n");
   146  
   147  	// pass 2: mark all reachable code alive
   148  	mark(firstp);
   149  	
   150  	// pass 3: delete dead code (mostly JMPs).
   151  	last = nil;
   152  	for(p=firstp; p; p=p->link) {
   153  		if(p->opt == dead) {
   154  			if(p->link == P && p->as == ARET && last && last->as != ARET) {
   155  				// This is the final ARET, and the code so far doesn't have one.
   156  				// Let it stay. The register allocator assumes that all live code in
   157  				// the function can be traversed by starting at all the RET instructions
   158  				// and following predecessor links. If we remove the final RET,
   159  				// this assumption will not hold in the case of an infinite loop
   160  				// at the end of a function.
   161  				// Keep the RET but mark it dead for the liveness analysis.
   162  				p->mode = 1;
   163  			} else {
   164  				if(debug['R'] && debug['v'])
   165  					print("del %P\n", p);
   166  				continue;
   167  			}
   168  		}
   169  		if(last)
   170  			last->link = p;
   171  		last = p;
   172  	}
   173  	last->link = P;
   174  	
   175  	// pass 4: elide JMP to next instruction.
   176  	// only safe if there are no jumps to JMPs anymore.
   177  	if(!jmploop) {
   178  		last = nil;
   179  		for(p=firstp; p; p=p->link) {
   180  			if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) {
   181  				if(debug['R'] && debug['v'])
   182  					print("del %P\n", p);
   183  				continue;
   184  			}
   185  			if(last)
   186  				last->link = p;
   187  			last = p;
   188  		}
   189  		last->link = P;
   190  	}
   191  	
   192  	if(debug['R'] && debug['v']) {
   193  		print("\n");
   194  		for(p=firstp; p; p=p->link)
   195  			print("%P\n", p);
   196  		print("\n");
   197  	}
   198  }
   199  
   200  #undef alive
   201  #undef dead
   202  
   203  // Control flow analysis. The Flow structures hold predecessor and successor
   204  // information as well as basic loop analysis.
   205  //
   206  //	graph = flowstart(firstp, sizeof(Flow));
   207  //	... use flow graph ...
   208  //	flowend(graph); // free graph
   209  //
   210  // Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
   211  //
   212  //	for(f = graph->start; f != nil; f = f->link)
   213  //
   214  // or, given an instruction f, to iterate over all the predecessors, which is
   215  // f->p1 and this list:
   216  //
   217  //	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
   218  //	
   219  // Often the Flow struct is embedded as the first field inside a larger struct S.
   220  // In that case casts are needed to convert Flow* to S* in many places but the
   221  // idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart.
   222  
   223  Graph*
   224  flowstart(Prog *firstp, int size)
   225  {
   226  	int nf;
   227  	Flow *f, *f1, *start, *last;
   228  	Graph *graph;
   229  	Prog *p;
   230  	ProgInfo info;
   231  
   232  	// Count and mark instructions to annotate.
   233  	nf = 0;
   234  	for(p = firstp; p != P; p = p->link) {
   235  		p->opt = nil; // should be already, but just in case
   236  		proginfo(&info, p);
   237  		if(info.flags & Skip)
   238  			continue;
   239  		p->opt = (void*)1;
   240  		nf++;
   241  	}
   242  	
   243  	if(nf == 0)
   244  		return nil;
   245  
   246  	if(nf >= 20000) {
   247  		// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);
   248  		return nil;
   249  	}
   250  
   251  	// Allocate annotations and assign to instructions.
   252  	graph = calloc(sizeof *graph + size*nf, 1);
   253  	if(graph == nil)
   254  		fatal("out of memory");
   255  	start = (Flow*)(graph+1);
   256  	last = nil;
   257  	f = start;
   258  	for(p = firstp; p != P; p = p->link) {
   259  		if(p->opt == nil)
   260  			continue;
   261  		p->opt = f;
   262  		f->prog = p;
   263  		if(last)
   264  			last->link = f;
   265  		last = f;
   266  		
   267  		f = (Flow*)((uchar*)f + size);
   268  	}
   269  
   270  	// Fill in pred/succ information.
   271  	for(f = start; f != nil; f = f->link) {
   272  		p = f->prog;
   273  		proginfo(&info, p);
   274  		if(!(info.flags & Break)) {
   275  			f1 = f->link;
   276  			f->s1 = f1;
   277  			f1->p1 = f;
   278  		}
   279  		if(p->to.type == D_BRANCH) {
   280  			if(p->to.u.branch == P)
   281  				fatal("pnil %P", p);
   282  			f1 = p->to.u.branch->opt;
   283  			if(f1 == nil)
   284  				fatal("fnil %P / %P", p, p->to.u.branch);
   285  			if(f1 == f) {
   286  				//fatal("self loop %P", p);
   287  				continue;
   288  			}
   289  			f->s2 = f1;
   290  			f->p2link = f1->p2;
   291  			f1->p2 = f;
   292  		}
   293  	}
   294  	
   295  	graph->start = start;
   296  	graph->num = nf;
   297  	return graph;
   298  }
   299  
   300  void
   301  flowend(Graph *graph)
   302  {
   303  	Flow *f;
   304  	
   305  	for(f = graph->start; f != nil; f = f->link)
   306  		f->prog->opt = nil;
   307  	free(graph);
   308  }
   309  
   310  /*
   311   * find looping structure
   312   *
   313   * 1) find reverse postordering
   314   * 2) find approximate dominators,
   315   *	the actual dominators if the flow graph is reducible
   316   *	otherwise, dominators plus some other non-dominators.
   317   *	See Matthew S. Hecht and Jeffrey D. Ullman,
   318   *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   319   *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   320   *	Oct. 1-3, 1973, pp.  207-217.
   321   * 3) find all nodes with a predecessor dominated by the current node.
   322   *	such a node is a loop head.
   323   *	recursively, all preds with a greater rpo number are in the loop
   324   */
   325  static int32
   326  postorder(Flow *r, Flow **rpo2r, int32 n)
   327  {
   328  	Flow *r1;
   329  
   330  	r->rpo = 1;
   331  	r1 = r->s1;
   332  	if(r1 && !r1->rpo)
   333  		n = postorder(r1, rpo2r, n);
   334  	r1 = r->s2;
   335  	if(r1 && !r1->rpo)
   336  		n = postorder(r1, rpo2r, n);
   337  	rpo2r[n] = r;
   338  	n++;
   339  	return n;
   340  }
   341  
   342  static int32
   343  rpolca(int32 *idom, int32 rpo1, int32 rpo2)
   344  {
   345  	int32 t;
   346  
   347  	if(rpo1 == -1)
   348  		return rpo2;
   349  	while(rpo1 != rpo2){
   350  		if(rpo1 > rpo2){
   351  			t = rpo2;
   352  			rpo2 = rpo1;
   353  			rpo1 = t;
   354  		}
   355  		while(rpo1 < rpo2){
   356  			t = idom[rpo2];
   357  			if(t >= rpo2)
   358  				fatal("bad idom");
   359  			rpo2 = t;
   360  		}
   361  	}
   362  	return rpo1;
   363  }
   364  
   365  static int
   366  doms(int32 *idom, int32 r, int32 s)
   367  {
   368  	while(s > r)
   369  		s = idom[s];
   370  	return s == r;
   371  }
   372  
   373  static int
   374  loophead(int32 *idom, Flow *r)
   375  {
   376  	int32 src;
   377  
   378  	src = r->rpo;
   379  	if(r->p1 != nil && doms(idom, src, r->p1->rpo))
   380  		return 1;
   381  	for(r = r->p2; r != nil; r = r->p2link)
   382  		if(doms(idom, src, r->rpo))
   383  			return 1;
   384  	return 0;
   385  }
   386  
   387  static void
   388  loopmark(Flow **rpo2r, int32 head, Flow *r)
   389  {
   390  	if(r->rpo < head || r->active == head)
   391  		return;
   392  	r->active = head;
   393  	r->loop += LOOP;
   394  	if(r->p1 != nil)
   395  		loopmark(rpo2r, head, r->p1);
   396  	for(r = r->p2; r != nil; r = r->p2link)
   397  		loopmark(rpo2r, head, r);
   398  }
   399  
   400  void
   401  flowrpo(Graph *g)
   402  {
   403  	Flow *r1;
   404  	int32 i, d, me, nr, *idom;
   405  	Flow **rpo2r;
   406  
   407  	free(g->rpo);
   408  	g->rpo = calloc(g->num*sizeof g->rpo[0], 1);
   409  	idom = calloc(g->num*sizeof idom[0], 1);
   410  	if(g->rpo == nil || idom == nil)
   411  		fatal("out of memory");
   412  
   413  	for(r1 = g->start; r1 != nil; r1 = r1->link)
   414  		r1->active = 0;
   415  
   416  	rpo2r = g->rpo;
   417  	d = postorder(g->start, rpo2r, 0);
   418  	nr = g->num;
   419  	if(d > nr)
   420  		fatal("too many reg nodes %d %d", d, nr);
   421  	nr = d;
   422  	for(i = 0; i < nr / 2; i++) {
   423  		r1 = rpo2r[i];
   424  		rpo2r[i] = rpo2r[nr - 1 - i];
   425  		rpo2r[nr - 1 - i] = r1;
   426  	}
   427  	for(i = 0; i < nr; i++)
   428  		rpo2r[i]->rpo = i;
   429  
   430  	idom[0] = 0;
   431  	for(i = 0; i < nr; i++) {
   432  		r1 = rpo2r[i];
   433  		me = r1->rpo;
   434  		d = -1;
   435  		// rpo2r[r->rpo] == r protects against considering dead code,
   436  		// which has r->rpo == 0.
   437  		if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
   438  			d = r1->p1->rpo;
   439  		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
   440  			if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
   441  				d = rpolca(idom, d, r1->rpo);
   442  		idom[i] = d;
   443  	}
   444  
   445  	for(i = 0; i < nr; i++) {
   446  		r1 = rpo2r[i];
   447  		r1->loop++;
   448  		if(r1->p2 != nil && loophead(idom, r1))
   449  			loopmark(rpo2r, i, r1);
   450  	}
   451  	free(idom);
   452  
   453  	for(r1 = g->start; r1 != nil; r1 = r1->link)
   454  		r1->active = 0;
   455  }
   456  
   457  Flow*
   458  uniqp(Flow *r)
   459  {
   460  	Flow *r1;
   461  
   462  	r1 = r->p1;
   463  	if(r1 == nil) {
   464  		r1 = r->p2;
   465  		if(r1 == nil || r1->p2link != nil)
   466  			return nil;
   467  	} else
   468  		if(r->p2 != nil)
   469  			return nil;
   470  	return r1;
   471  }
   472  
   473  Flow*
   474  uniqs(Flow *r)
   475  {
   476  	Flow *r1;
   477  
   478  	r1 = r->s1;
   479  	if(r1 == nil) {
   480  		r1 = r->s2;
   481  		if(r1 == nil)
   482  			return nil;
   483  	} else
   484  		if(r->s2 != nil)
   485  			return nil;
   486  	return r1;
   487  }
   488  
   489  // The compilers assume they can generate temporary variables
   490  // as needed to preserve the right semantics or simplify code
   491  // generation and the back end will still generate good code.
   492  // This results in a large number of ephemeral temporary variables.
   493  // Merge temps with non-overlapping lifetimes and equal types using the
   494  // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
   495  // ACM TOPLAS 1999.
   496  
   497  typedef struct TempVar TempVar;
   498  typedef struct TempFlow TempFlow;
   499  
   500  struct TempVar
   501  {
   502  	Node *node;
   503  	TempFlow *def; // definition of temp var
   504  	TempFlow *use; // use list, chained through TempFlow.uselink
   505  	TempVar *freelink; // next free temp in Type.opt list
   506  	TempVar *merge; // merge var with this one
   507  	vlong start; // smallest Prog.pc in live range
   508  	vlong end; // largest Prog.pc in live range
   509  	uchar addr; // address taken - no accurate end
   510  	uchar removed; // removed from program
   511  };
   512  
   513  struct TempFlow
   514  {
   515  	Flow	f;
   516  	TempFlow *uselink;
   517  };
   518  
   519  static int
   520  startcmp(const void *va, const void *vb)
   521  {
   522  	TempVar *a, *b;
   523  	
   524  	a = *(TempVar**)va;
   525  	b = *(TempVar**)vb;
   526  
   527  	if(a->start < b->start)
   528  		return -1;
   529  	if(a->start > b->start)
   530  		return +1;
   531  	return 0;
   532  }
   533  
   534  // Is n available for merging?
   535  static int
   536  canmerge(Node *n)
   537  {
   538  	return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0;
   539  }
   540  
   541  static void mergewalk(TempVar*, TempFlow*, uint32);
   542  static void varkillwalk(TempVar*, TempFlow*, uint32);
   543  
   544  void
   545  mergetemp(Prog *firstp)
   546  {
   547  	int i, j, nvar, ninuse, nfree, nkill;
   548  	TempVar *var, *v, *v1, **bystart, **inuse;
   549  	TempFlow *r;
   550  	NodeList *l, **lp;
   551  	Node *n;
   552  	Prog *p, *p1;
   553  	Type *t;
   554  	ProgInfo info, info1;
   555  	int32 gen;
   556  	Graph *g;
   557  
   558  	enum { Debug = 0 };
   559  
   560  	g = flowstart(firstp, sizeof(TempFlow));
   561  	if(g == nil)
   562  		return;
   563  	
   564  	// Build list of all mergeable variables.
   565  	nvar = 0;
   566  	for(l = curfn->dcl; l != nil; l = l->next)
   567  		if(canmerge(l->n))
   568  			nvar++;
   569  	
   570  	var = calloc(nvar*sizeof var[0], 1);
   571  	nvar = 0;
   572  	for(l = curfn->dcl; l != nil; l = l->next) {
   573  		n = l->n;
   574  		if(canmerge(n)) {
   575  			v = &var[nvar++];
   576  			n->opt = v;
   577  			v->node = n;
   578  		}
   579  	}
   580  	
   581  	// Build list of uses.
   582  	// We assume that the earliest reference to a temporary is its definition.
   583  	// This is not true of variables in general but our temporaries are all
   584  	// single-use (that's why we have so many!).
   585  	for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) {
   586  		p = r->f.prog;
   587  		proginfo(&info, p);
   588  
   589  		if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt)
   590  			fatal("double node %P", p);
   591  		if((n = p->from.node) != N && (v = n->opt) != nil ||
   592  		   (n = p->to.node) != N && (v = n->opt) != nil) {
   593  		   	if(v->def == nil)
   594  		   		v->def = r;
   595  			r->uselink = v->use;
   596  			v->use = r;
   597  			if(n == p->from.node && (info.flags & LeftAddr))
   598  				v->addr = 1;
   599  		}
   600  	}
   601  	
   602  	if(Debug > 1)
   603  		dumpit("before", g->start, 0);
   604  	
   605  	nkill = 0;
   606  
   607  	// Special case.
   608  	for(v = var; v < var+nvar; v++) {
   609  		if(v->addr)
   610  			continue;
   611  		// Used in only one instruction, which had better be a write.
   612  		if((r = v->use) != nil && r->uselink == nil) {
   613  			p = r->f.prog;
   614  			proginfo(&info, p);
   615  			if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) {
   616  				p->as = ANOP;
   617  				p->to = zprog.to;
   618  				v->removed = 1;
   619  				if(Debug)
   620  					print("drop write-only %S\n", v->node->sym);
   621  			} else
   622  				fatal("temp used and not set: %P", p);
   623  			nkill++;
   624  			continue;
   625  		}
   626  		
   627  		// Written in one instruction, read in the next, otherwise unused,
   628  		// no jumps to the next instruction. Happens mainly in 386 compiler.
   629  		if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) {
   630  			p = r->f.prog;
   631  			proginfo(&info, p);
   632  			p1 = r->f.link->prog;
   633  			proginfo(&info1, p1);
   634  			enum {
   635  				SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD,
   636  			};
   637  			if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) &&
   638  			   !((info.flags|info1.flags) & (LeftAddr|RightAddr)) &&
   639  			   (info.flags & SizeAny) == (info1.flags & SizeAny)) {
   640  				p1->from = p->from;
   641  				excise(&r->f);
   642  				v->removed = 1;
   643  				if(Debug)
   644  					print("drop immediate-use %S\n", v->node->sym);
   645  			}
   646  			nkill++;
   647  			continue;
   648  		}			   
   649  	}
   650  
   651  	// Traverse live range of each variable to set start, end.
   652  	// Each flood uses a new value of gen so that we don't have
   653  	// to clear all the r->f.active words after each variable.
   654  	gen = 0;
   655  	for(v = var; v < var+nvar; v++) {
   656  		gen++;
   657  		for(r = v->use; r != nil; r = r->uselink)
   658  			mergewalk(v, r, gen);
   659  		if(v->addr) {
   660  			gen++;
   661  			for(r = v->use; r != nil; r = r->uselink)
   662  				varkillwalk(v, r, gen);
   663  		}
   664  	}
   665  
   666  	// Sort variables by start.
   667  	bystart = malloc(nvar*sizeof bystart[0]);
   668  	for(i=0; i<nvar; i++)
   669  		bystart[i] = &var[i];
   670  	qsort(bystart, nvar, sizeof bystart[0], startcmp);
   671  
   672  	// List of in-use variables, sorted by end, so that the ones that
   673  	// will last the longest are the earliest ones in the array.
   674  	// The tail inuse[nfree:] holds no-longer-used variables.
   675  	// In theory we should use a sorted tree so that insertions are
   676  	// guaranteed O(log n) and then the loop is guaranteed O(n log n).
   677  	// In practice, it doesn't really matter.
   678  	inuse = malloc(nvar*sizeof inuse[0]);
   679  	ninuse = 0;
   680  	nfree = nvar;
   681  	for(i=0; i<nvar; i++) {
   682  		v = bystart[i];
   683  		if(v->removed)
   684  			continue;
   685  
   686  		// Expire no longer in use.
   687  		while(ninuse > 0 && inuse[ninuse-1]->end < v->start) {
   688  			v1 = inuse[--ninuse];
   689  			inuse[--nfree] = v1;
   690  		}
   691  
   692  		// Find old temp to reuse if possible.
   693  		t = v->node->type;
   694  		for(j=nfree; j<nvar; j++) {
   695  			v1 = inuse[j];
   696  			// Require the types to match but also require the addrtaken bits to match.
   697  			// If a variable's address is taken, that disables registerization for the individual
   698  			// words of the variable (for example, the base,len,cap of a slice).
   699  			// We don't want to merge a non-addressed var with an addressed one and
   700  			// inhibit registerization of the former.
   701  			if(eqtype(t, v1->node->type) && v->node->addrtaken == v1->node->addrtaken) {
   702  				inuse[j] = inuse[nfree++];
   703  				if(v1->merge)
   704  					v->merge = v1->merge;
   705  				else
   706  					v->merge = v1;
   707  				nkill++;
   708  				break;
   709  			}
   710  		}
   711  
   712  		// Sort v into inuse.
   713  		j = ninuse++;
   714  		while(j > 0 && inuse[j-1]->end < v->end) {
   715  			inuse[j] = inuse[j-1];
   716  			j--;
   717  		}
   718  		inuse[j] = v;
   719  	}
   720  
   721  	if(Debug) {
   722  		print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill);
   723  		for(v=var; v<var+nvar; v++) {
   724  			print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end);
   725  			if(v->addr)
   726  				print(" addr=1");
   727  			if(v->removed)
   728  				print(" dead=1");
   729  			if(v->merge)
   730  				print(" merge %#N", v->merge->node);
   731  			if(v->start == v->end)
   732  				print(" %P", v->def->f.prog);
   733  			print("\n");
   734  		}
   735  	
   736  		if(Debug > 1)
   737  			dumpit("after", g->start, 0);
   738  	}
   739  
   740  	// Update node references to use merged temporaries.
   741  	for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) {
   742  		p = r->f.prog;
   743  		if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil)
   744  			p->from.node = v->merge->node;
   745  		if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil)
   746  			p->to.node = v->merge->node;
   747  	}
   748  
   749  	// Delete merged nodes from declaration list.
   750  	for(lp = &curfn->dcl; (l = *lp); ) {
   751  		curfn->dcl->end = l;
   752  		n = l->n;
   753  		v = n->opt;
   754  		if(v && (v->merge || v->removed)) {
   755  			*lp = l->next;
   756  			continue;
   757  		}
   758  		lp = &l->next;
   759  	}
   760  
   761  	// Clear aux structures.
   762  	for(v=var; v<var+nvar; v++)
   763  		v->node->opt = nil;
   764  	free(var);
   765  	free(bystart);
   766  	free(inuse);
   767  	flowend(g);
   768  }
   769  
   770  static void
   771  mergewalk(TempVar *v, TempFlow *r0, uint32 gen)
   772  {
   773  	Prog *p;
   774  	TempFlow *r1, *r, *r2;
   775  	
   776  	for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) {
   777  		if(r1->f.active == gen)
   778  			break;
   779  		r1->f.active = gen;
   780  		p = r1->f.prog;
   781  		if(v->end < p->pc)
   782  			v->end = p->pc;
   783  		if(r1 == v->def) {
   784  			v->start = p->pc;
   785  			break;
   786  		}
   787  	}
   788  	
   789  	for(r = r0; r != r1; r = (TempFlow*)r->f.p1)
   790  		for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link)
   791  			mergewalk(v, r2, gen);
   792  }
   793  
   794  static void
   795  varkillwalk(TempVar *v, TempFlow *r0, uint32 gen)
   796  {
   797  	Prog *p;
   798  	TempFlow *r1, *r;
   799  	
   800  	for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.s1) {
   801  		if(r1->f.active == gen)
   802  			break;
   803  		r1->f.active = gen;
   804  		p = r1->f.prog;
   805  		if(v->end < p->pc)
   806  			v->end = p->pc;
   807  		if(v->start > p->pc)
   808  			v->start = p->pc;
   809  		if(p->as == ARET || (p->as == AVARKILL && p->to.node == v->node))
   810  			break;
   811  	}
   812  	
   813  	for(r = r0; r != r1; r = (TempFlow*)r->f.s1)
   814  		varkillwalk(v, (TempFlow*)r->f.s2, gen);
   815  }
   816  
   817  // Eliminate redundant nil pointer checks.
   818  //
   819  // The code generation pass emits a CHECKNIL for every possibly nil pointer.
   820  // This pass removes a CHECKNIL if every predecessor path has already
   821  // checked this value for nil.
   822  //
   823  // Simple backwards flood from check to definition.
   824  // Run prog loop backward from end of program to beginning to avoid quadratic
   825  // behavior removing a run of checks.
   826  //
   827  // Assume that stack variables with address not taken can be loaded multiple times
   828  // from memory without being rechecked. Other variables need to be checked on
   829  // each load.
   830  	
   831  typedef struct NilVar NilVar;
   832  typedef struct NilFlow NilFlow;
   833  
   834  struct NilFlow {
   835  	Flow f;
   836  	int kill;
   837  };
   838  
   839  static void nilwalkback(NilFlow *rcheck);
   840  static void nilwalkfwd(NilFlow *rcheck);
   841  
   842  void
   843  nilopt(Prog *firstp)
   844  {
   845  	NilFlow *r;
   846  	Prog *p;
   847  	Graph *g;
   848  	int ncheck, nkill;
   849  
   850  	g = flowstart(firstp, sizeof(NilFlow));
   851  	if(g == nil)
   852  		return;
   853  
   854  	if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */)
   855  		dumpit("nilopt", g->start, 0);
   856  
   857  	ncheck = 0;
   858  	nkill = 0;
   859  	for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) {
   860  		p = r->f.prog;
   861  		if(p->as != ACHECKNIL || !regtyp(&p->from))
   862  			continue;
   863  		ncheck++;
   864  		if(stackaddr(&p->from)) {
   865  			if(debug_checknil && p->lineno > 1)
   866  				warnl(p->lineno, "removed nil check of SP address");
   867  			r->kill = 1;
   868  			continue;
   869  		}
   870  		nilwalkfwd(r);
   871  		if(r->kill) {
   872  			if(debug_checknil && p->lineno > 1)
   873  				warnl(p->lineno, "removed nil check before indirect");
   874  			continue;
   875  		}
   876  		nilwalkback(r);
   877  		if(r->kill) {
   878  			if(debug_checknil && p->lineno > 1)
   879  				warnl(p->lineno, "removed repeated nil check");
   880  			continue;
   881  		}
   882  	}
   883  	
   884  	for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) {
   885  		if(r->kill) {
   886  			nkill++;
   887  			excise(&r->f);
   888  		}
   889  	}
   890  
   891  	flowend(g);
   892  	
   893  	if(debug_checknil > 1)
   894  		print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck);
   895  }
   896  
   897  static void
   898  nilwalkback(NilFlow *rcheck)
   899  {
   900  	Prog *p;
   901  	ProgInfo info;
   902  	NilFlow *r;
   903  	
   904  	for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) {
   905  		p = r->f.prog;
   906  		proginfo(&info, p);
   907  		if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) {
   908  			// Found initialization of value we're checking for nil.
   909  			// without first finding the check, so this one is unchecked.
   910  			return;
   911  		}
   912  		if(r != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) {
   913  			rcheck->kill = 1;
   914  			return;
   915  		}
   916  	}
   917  
   918  	// Here is a more complex version that scans backward across branches.
   919  	// It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason
   920  	// to keep the check (setting rcheck->kill = 0).
   921  	// It doesn't handle copying of aggregates as well as I would like,
   922  	// nor variables with their address taken,
   923  	// and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3.
   924  	/*
   925  	for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) {
   926  		if(r1->f.active == gen)
   927  			break;
   928  		r1->f.active = gen;
   929  		p = r1->f.prog;
   930  		
   931  		// If same check, stop this loop but still check
   932  		// alternate predecessors up to this point.
   933  		if(r1 != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from))
   934  			break;
   935  
   936  		proginfo(&info, p);
   937  		if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) {
   938  			// Found initialization of value we're checking for nil.
   939  			// without first finding the check, so this one is unchecked.
   940  			rcheck->kill = 0;
   941  			return;
   942  		}
   943  		
   944  		if(r1->f.p1 == nil && r1->f.p2 == nil) {
   945  			print("lost pred for %P\n", rcheck->f.prog);
   946  			for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) {
   947  				proginfo(&info, r1->f.prog);
   948  				print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from);
   949  			}
   950  			fatal("lost pred trail");
   951  		}
   952  	}
   953  
   954  	for(r = r0; r != r1; r = (NilFlow*)r->f.p1)
   955  		for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link)
   956  			nilwalkback(rcheck, r2, gen);
   957  	*/
   958  }
   959  
   960  static void
   961  nilwalkfwd(NilFlow *rcheck)
   962  {
   963  	NilFlow *r, *last;
   964  	Prog *p;
   965  	ProgInfo info;
   966  	
   967  	// If the path down from rcheck dereferences the address
   968  	// (possibly with a small offset) before writing to memory
   969  	// and before any subsequent checks, it's okay to wait for
   970  	// that implicit check. Only consider this basic block to
   971  	// avoid problems like:
   972  	//	_ = *x // should panic
   973  	//	for {} // no writes but infinite loop may be considered visible
   974  	last = nil;
   975  	for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) {
   976  		p = r->f.prog;
   977  		proginfo(&info, p);
   978  		
   979  		if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) {
   980  			rcheck->kill = 1;
   981  			return;
   982  		}
   983  		if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) {
   984  			rcheck->kill = 1;
   985  			return;
   986  		}
   987  		
   988  		// Stop if another nil check happens.
   989  		if(p->as == ACHECKNIL)
   990  			return;
   991  		// Stop if value is lost.
   992  		if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from))
   993  			return;
   994  		// Stop if memory write.
   995  		if((info.flags & RightWrite) && !regtyp(&p->to))
   996  			return;
   997  		// Stop if we jump backward.
   998  		// This test is valid because all the NilFlow* are pointers into
   999  		// a single contiguous array. We will need to add an explicit
  1000  		// numbering when the code is converted to Go.
  1001  		if(last != nil && r <= last)
  1002  			return;
  1003  		last = r;
  1004  	}
  1005  }