github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/gc/popt.c (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // "Portable" optimizations.
    32  // Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h.
    33  // Must code to the intersection of the three back ends.
    34  
    35  #include	<u.h>
    36  #include	<libc.h>
    37  #include	"gg.h"
    38  #include	"opt.h"
    39  
    40  // p is a call instruction. Does the call fail to return?
    41  int
    42  noreturn(Prog *p)
    43  {
    44  	Sym *s;
    45  	int i;
    46  	static Sym*	symlist[10];
    47  
    48  	if(symlist[0] == S) {
    49  		symlist[0] = pkglookup("panicindex", runtimepkg);
    50  		symlist[1] = pkglookup("panicslice", runtimepkg);
    51  		symlist[2] = pkglookup("throwinit", runtimepkg);
    52  		symlist[3] = pkglookup("panic", runtimepkg);
    53  		symlist[4] = pkglookup("panicwrap", runtimepkg);
    54  		symlist[5] = pkglookup("throwreturn", runtimepkg);
    55  		symlist[6] = pkglookup("selectgo", runtimepkg);
    56  		symlist[7] = pkglookup("block", runtimepkg);
    57  	}
    58  
    59  	if(p->to.node == nil)
    60  		return 0;
    61  	s = p->to.node->sym;
    62  	if(s == S)
    63  		return 0;
    64  	for(i=0; symlist[i]!=S; i++)
    65  		if(s == symlist[i])
    66  			return 1;
    67  	return 0;
    68  }
    69  
    70  // JMP chasing and removal.
    71  //
    72  // The code generator depends on being able to write out jump
    73  // instructions that it can jump to now but fill in later.
    74  // the linker will resolve them nicely, but they make the code
    75  // longer and more difficult to follow during debugging.
    76  // Remove them.
    77  
    78  /* what instruction does a JMP to p eventually land on? */
    79  static Prog*
    80  chasejmp(Prog *p, int *jmploop)
    81  {
    82  	int n;
    83  
    84  	n = 0;
    85  	while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {
    86  		if(++n > 10) {
    87  			*jmploop = 1;
    88  			break;
    89  		}
    90  		p = p->to.u.branch;
    91  	}
    92  	return p;
    93  }
    94  
    95  /*
    96   * reuse reg pointer for mark/sweep state.
    97   * leave reg==nil at end because alive==nil.
    98   */
    99  #define alive ((void*)0)
   100  #define dead ((void*)1)
   101  
   102  /* mark all code reachable from firstp as alive */
   103  static void
   104  mark(Prog *firstp)
   105  {
   106  	Prog *p;
   107  	
   108  	for(p=firstp; p; p=p->link) {
   109  		if(p->opt != dead)
   110  			break;
   111  		p->opt = alive;
   112  		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch)
   113  			mark(p->to.u.branch);
   114  		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
   115  			break;
   116  	}
   117  }
   118  
   119  void
   120  fixjmp(Prog *firstp)
   121  {
   122  	int jmploop;
   123  	Prog *p, *last;
   124  	
   125  	if(debug['R'] && debug['v'])
   126  		print("\nfixjmp\n");
   127  
   128  	// pass 1: resolve jump to jump, mark all code as dead.
   129  	jmploop = 0;
   130  	for(p=firstp; p; p=p->link) {
   131  		if(debug['R'] && debug['v'])
   132  			print("%P\n", p);
   133  		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) {
   134  			p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);
   135  			if(debug['R'] && debug['v'])
   136  				print("->%P\n", p);
   137  		}
   138  		p->opt = dead;
   139  	}
   140  	if(debug['R'] && debug['v'])
   141  		print("\n");
   142  
   143  	// pass 2: mark all reachable code alive
   144  	mark(firstp);
   145  	
   146  	// pass 3: delete dead code (mostly JMPs).
   147  	last = nil;
   148  	for(p=firstp; p; p=p->link) {
   149  		if(p->opt == dead) {
   150  			if(p->link == P && p->as == ARET && last && last->as != ARET) {
   151  				// This is the final ARET, and the code so far doesn't have one.
   152  				// Let it stay. The register allocator assumes that all live code in
   153  				// the function can be traversed by starting at all the RET instructions
   154  				// and following predecessor links. If we remove the final RET,
   155  				// this assumption will not hold in the case of an infinite loop
   156  				// at the end of a function.
   157  				// Keep the RET but mark it dead for the liveness analysis.
   158  				p->mode = 1;
   159  			} else {
   160  				if(debug['R'] && debug['v'])
   161  					print("del %P\n", p);
   162  				continue;
   163  			}
   164  		}
   165  		if(last)
   166  			last->link = p;
   167  		last = p;
   168  	}
   169  	last->link = P;
   170  	
   171  	// pass 4: elide JMP to next instruction.
   172  	// only safe if there are no jumps to JMPs anymore.
   173  	if(!jmploop) {
   174  		last = nil;
   175  		for(p=firstp; p; p=p->link) {
   176  			if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) {
   177  				if(debug['R'] && debug['v'])
   178  					print("del %P\n", p);
   179  				continue;
   180  			}
   181  			if(last)
   182  				last->link = p;
   183  			last = p;
   184  		}
   185  		last->link = P;
   186  	}
   187  	
   188  	if(debug['R'] && debug['v']) {
   189  		print("\n");
   190  		for(p=firstp; p; p=p->link)
   191  			print("%P\n", p);
   192  		print("\n");
   193  	}
   194  }
   195  
   196  #undef alive
   197  #undef dead
   198  
   199  // Control flow analysis. The Flow structures hold predecessor and successor
   200  // information as well as basic loop analysis.
   201  //
   202  //	graph = flowstart(firstp, sizeof(Flow));
   203  //	... use flow graph ...
   204  //	flowend(graph); // free graph
   205  //
   206  // Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
   207  //
   208  //	for(f = graph->start; f != nil; f = f->link)
   209  //
   210  // or, given an instruction f, to iterate over all the predecessors, which is
   211  // f->p1 and this list:
   212  //
   213  //	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
   214  //	
   215  // Often the Flow struct is embedded as the first field inside a larger struct S.
   216  // In that case casts are needed to convert Flow* to S* in many places but the
   217  // idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart.
   218  
   219  Graph*
   220  flowstart(Prog *firstp, int size)
   221  {
   222  	int nf;
   223  	Flow *f, *f1, *start, *last;
   224  	Graph *graph;
   225  	Prog *p;
   226  	ProgInfo info;
   227  
   228  	// Count and mark instructions to annotate.
   229  	nf = 0;
   230  	for(p = firstp; p != P; p = p->link) {
   231  		p->opt = nil; // should be already, but just in case
   232  		proginfo(&info, p);
   233  		if(info.flags & Skip)
   234  			continue;
   235  		p->opt = (void*)1;
   236  		nf++;
   237  	}
   238  	
   239  	if(nf == 0)
   240  		return nil;
   241  
   242  	if(nf >= 20000) {
   243  		// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);
   244  		return nil;
   245  	}
   246  
   247  	// Allocate annotations and assign to instructions.
   248  	graph = calloc(sizeof *graph + size*nf, 1);
   249  	if(graph == nil)
   250  		fatal("out of memory");
   251  	start = (Flow*)(graph+1);
   252  	last = nil;
   253  	f = start;
   254  	for(p = firstp; p != P; p = p->link) {
   255  		if(p->opt == nil)
   256  			continue;
   257  		p->opt = f;
   258  		f->prog = p;
   259  		if(last)
   260  			last->link = f;
   261  		last = f;
   262  		
   263  		f = (Flow*)((uchar*)f + size);
   264  	}
   265  
   266  	// Fill in pred/succ information.
   267  	for(f = start; f != nil; f = f->link) {
   268  		p = f->prog;
   269  		proginfo(&info, p);
   270  		if(!(info.flags & Break)) {
   271  			f1 = f->link;
   272  			f->s1 = f1;
   273  			f1->p1 = f;
   274  		}
   275  		if(p->to.type == D_BRANCH) {
   276  			if(p->to.u.branch == P)
   277  				fatal("pnil %P", p);
   278  			f1 = p->to.u.branch->opt;
   279  			if(f1 == nil)
   280  				fatal("fnil %P / %P", p, p->to.u.branch);
   281  			if(f1 == f) {
   282  				//fatal("self loop %P", p);
   283  				continue;
   284  			}
   285  			f->s2 = f1;
   286  			f->p2link = f1->p2;
   287  			f1->p2 = f;
   288  		}
   289  	}
   290  	
   291  	graph->start = start;
   292  	graph->num = nf;
   293  	return graph;
   294  }
   295  
   296  void
   297  flowend(Graph *graph)
   298  {
   299  	Flow *f;
   300  	
   301  	for(f = graph->start; f != nil; f = f->link)
   302  		f->prog->opt = nil;
   303  	free(graph);
   304  }
   305  
   306  /*
   307   * find looping structure
   308   *
   309   * 1) find reverse postordering
   310   * 2) find approximate dominators,
   311   *	the actual dominators if the flow graph is reducible
   312   *	otherwise, dominators plus some other non-dominators.
   313   *	See Matthew S. Hecht and Jeffrey D. Ullman,
   314   *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   315   *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   316   *	Oct. 1-3, 1973, pp.  207-217.
   317   * 3) find all nodes with a predecessor dominated by the current node.
   318   *	such a node is a loop head.
   319   *	recursively, all preds with a greater rpo number are in the loop
   320   */
   321  static int32
   322  postorder(Flow *r, Flow **rpo2r, int32 n)
   323  {
   324  	Flow *r1;
   325  
   326  	r->rpo = 1;
   327  	r1 = r->s1;
   328  	if(r1 && !r1->rpo)
   329  		n = postorder(r1, rpo2r, n);
   330  	r1 = r->s2;
   331  	if(r1 && !r1->rpo)
   332  		n = postorder(r1, rpo2r, n);
   333  	rpo2r[n] = r;
   334  	n++;
   335  	return n;
   336  }
   337  
   338  static int32
   339  rpolca(int32 *idom, int32 rpo1, int32 rpo2)
   340  {
   341  	int32 t;
   342  
   343  	if(rpo1 == -1)
   344  		return rpo2;
   345  	while(rpo1 != rpo2){
   346  		if(rpo1 > rpo2){
   347  			t = rpo2;
   348  			rpo2 = rpo1;
   349  			rpo1 = t;
   350  		}
   351  		while(rpo1 < rpo2){
   352  			t = idom[rpo2];
   353  			if(t >= rpo2)
   354  				fatal("bad idom");
   355  			rpo2 = t;
   356  		}
   357  	}
   358  	return rpo1;
   359  }
   360  
   361  static int
   362  doms(int32 *idom, int32 r, int32 s)
   363  {
   364  	while(s > r)
   365  		s = idom[s];
   366  	return s == r;
   367  }
   368  
   369  static int
   370  loophead(int32 *idom, Flow *r)
   371  {
   372  	int32 src;
   373  
   374  	src = r->rpo;
   375  	if(r->p1 != nil && doms(idom, src, r->p1->rpo))
   376  		return 1;
   377  	for(r = r->p2; r != nil; r = r->p2link)
   378  		if(doms(idom, src, r->rpo))
   379  			return 1;
   380  	return 0;
   381  }
   382  
   383  static void
   384  loopmark(Flow **rpo2r, int32 head, Flow *r)
   385  {
   386  	if(r->rpo < head || r->active == head)
   387  		return;
   388  	r->active = head;
   389  	r->loop += LOOP;
   390  	if(r->p1 != nil)
   391  		loopmark(rpo2r, head, r->p1);
   392  	for(r = r->p2; r != nil; r = r->p2link)
   393  		loopmark(rpo2r, head, r);
   394  }
   395  
   396  void
   397  flowrpo(Graph *g)
   398  {
   399  	Flow *r1;
   400  	int32 i, d, me, nr, *idom;
   401  	Flow **rpo2r;
   402  
   403  	free(g->rpo);
   404  	g->rpo = calloc(g->num*sizeof g->rpo[0], 1);
   405  	idom = calloc(g->num*sizeof idom[0], 1);
   406  	if(g->rpo == nil || idom == nil)
   407  		fatal("out of memory");
   408  
   409  	for(r1 = g->start; r1 != nil; r1 = r1->link)
   410  		r1->active = 0;
   411  
   412  	rpo2r = g->rpo;
   413  	d = postorder(g->start, rpo2r, 0);
   414  	nr = g->num;
   415  	if(d > nr)
   416  		fatal("too many reg nodes %d %d", d, nr);
   417  	nr = d;
   418  	for(i = 0; i < nr / 2; i++) {
   419  		r1 = rpo2r[i];
   420  		rpo2r[i] = rpo2r[nr - 1 - i];
   421  		rpo2r[nr - 1 - i] = r1;
   422  	}
   423  	for(i = 0; i < nr; i++)
   424  		rpo2r[i]->rpo = i;
   425  
   426  	idom[0] = 0;
   427  	for(i = 0; i < nr; i++) {
   428  		r1 = rpo2r[i];
   429  		me = r1->rpo;
   430  		d = -1;
   431  		// rpo2r[r->rpo] == r protects against considering dead code,
   432  		// which has r->rpo == 0.
   433  		if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
   434  			d = r1->p1->rpo;
   435  		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
   436  			if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
   437  				d = rpolca(idom, d, r1->rpo);
   438  		idom[i] = d;
   439  	}
   440  
   441  	for(i = 0; i < nr; i++) {
   442  		r1 = rpo2r[i];
   443  		r1->loop++;
   444  		if(r1->p2 != nil && loophead(idom, r1))
   445  			loopmark(rpo2r, i, r1);
   446  	}
   447  	free(idom);
   448  
   449  	for(r1 = g->start; r1 != nil; r1 = r1->link)
   450  		r1->active = 0;
   451  }
   452  
   453  Flow*
   454  uniqp(Flow *r)
   455  {
   456  	Flow *r1;
   457  
   458  	r1 = r->p1;
   459  	if(r1 == nil) {
   460  		r1 = r->p2;
   461  		if(r1 == nil || r1->p2link != nil)
   462  			return nil;
   463  	} else
   464  		if(r->p2 != nil)
   465  			return nil;
   466  	return r1;
   467  }
   468  
   469  Flow*
   470  uniqs(Flow *r)
   471  {
   472  	Flow *r1;
   473  
   474  	r1 = r->s1;
   475  	if(r1 == nil) {
   476  		r1 = r->s2;
   477  		if(r1 == nil)
   478  			return nil;
   479  	} else
   480  		if(r->s2 != nil)
   481  			return nil;
   482  	return r1;
   483  }
   484  
   485  // The compilers assume they can generate temporary variables
   486  // as needed to preserve the right semantics or simplify code
   487  // generation and the back end will still generate good code.
   488  // This results in a large number of ephemeral temporary variables.
   489  // Merge temps with non-overlapping lifetimes and equal types using the
   490  // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
   491  // ACM TOPLAS 1999.
   492  
   493  typedef struct TempVar TempVar;
   494  typedef struct TempFlow TempFlow;
   495  
   496  struct TempVar
   497  {
   498  	Node *node;
   499  	TempFlow *def; // definition of temp var
   500  	TempFlow *use; // use list, chained through TempFlow.uselink
   501  	TempVar *freelink; // next free temp in Type.opt list
   502  	TempVar *merge; // merge var with this one
   503  	vlong start; // smallest Prog.pc in live range
   504  	vlong end; // largest Prog.pc in live range
   505  	uchar addr; // address taken - no accurate end
   506  	uchar removed; // removed from program
   507  };
   508  
   509  struct TempFlow
   510  {
   511  	Flow	f;
   512  	TempFlow *uselink;
   513  };
   514  
   515  static int
   516  startcmp(const void *va, const void *vb)
   517  {
   518  	TempVar *a, *b;
   519  	
   520  	a = *(TempVar**)va;
   521  	b = *(TempVar**)vb;
   522  
   523  	if(a->start < b->start)
   524  		return -1;
   525  	if(a->start > b->start)
   526  		return +1;
   527  	return 0;
   528  }
   529  
   530  // Is n available for merging?
   531  static int
   532  canmerge(Node *n)
   533  {
   534  	return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0;
   535  }
   536  
   537  static void mergewalk(TempVar*, TempFlow*, uint32);
   538  static void varkillwalk(TempVar*, TempFlow*, uint32);
   539  
   540  void
   541  mergetemp(Prog *firstp)
   542  {
   543  	int i, j, nvar, ninuse, nfree, nkill;
   544  	TempVar *var, *v, *v1, **bystart, **inuse;
   545  	TempFlow *r;
   546  	NodeList *l, **lp;
   547  	Node *n;
   548  	Prog *p, *p1;
   549  	Type *t;
   550  	ProgInfo info, info1;
   551  	int32 gen;
   552  	Graph *g;
   553  
   554  	enum { Debug = 0 };
   555  
   556  	g = flowstart(firstp, sizeof(TempFlow));
   557  	if(g == nil)
   558  		return;
   559  	
   560  	// Build list of all mergeable variables.
   561  	nvar = 0;
   562  	for(l = curfn->dcl; l != nil; l = l->next)
   563  		if(canmerge(l->n))
   564  			nvar++;
   565  	
   566  	var = calloc(nvar*sizeof var[0], 1);
   567  	nvar = 0;
   568  	for(l = curfn->dcl; l != nil; l = l->next) {
   569  		n = l->n;
   570  		if(canmerge(n)) {
   571  			v = &var[nvar++];
   572  			n->opt = v;
   573  			v->node = n;
   574  		}
   575  	}
   576  	
   577  	// Build list of uses.
   578  	// We assume that the earliest reference to a temporary is its definition.
   579  	// This is not true of variables in general but our temporaries are all
   580  	// single-use (that's why we have so many!).
   581  	for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) {
   582  		p = r->f.prog;
   583  		proginfo(&info, p);
   584  
   585  		if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt)
   586  			fatal("double node %P", p);
   587  		if((n = p->from.node) != N && (v = n->opt) != nil ||
   588  		   (n = p->to.node) != N && (v = n->opt) != nil) {
   589  		   	if(v->def == nil)
   590  		   		v->def = r;
   591  			r->uselink = v->use;
   592  			v->use = r;
   593  			if(n == p->from.node && (info.flags & LeftAddr))
   594  				v->addr = 1;
   595  		}
   596  	}
   597  	
   598  	if(Debug > 1)
   599  		dumpit("before", g->start, 0);
   600  	
   601  	nkill = 0;
   602  
   603  	// Special case.
   604  	for(v = var; v < var+nvar; v++) {
   605  		if(v->addr)
   606  			continue;
   607  		// Used in only one instruction, which had better be a write.
   608  		if((r = v->use) != nil && r->uselink == nil) {
   609  			p = r->f.prog;
   610  			proginfo(&info, p);
   611  			if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) {
   612  				p->as = ANOP;
   613  				p->to = zprog.to;
   614  				v->removed = 1;
   615  				if(Debug)
   616  					print("drop write-only %S\n", v->node->sym);
   617  			} else
   618  				fatal("temp used and not set: %P", p);
   619  			nkill++;
   620  			continue;
   621  		}
   622  		
   623  		// Written in one instruction, read in the next, otherwise unused,
   624  		// no jumps to the next instruction. Happens mainly in 386 compiler.
   625  		if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) {
   626  			p = r->f.prog;
   627  			proginfo(&info, p);
   628  			p1 = r->f.link->prog;
   629  			proginfo(&info1, p1);
   630  			enum {
   631  				SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD,
   632  			};
   633  			if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) &&
   634  			   !((info.flags|info1.flags) & (LeftAddr|RightAddr)) &&
   635  			   (info.flags & SizeAny) == (info1.flags & SizeAny)) {
   636  				p1->from = p->from;
   637  				excise(&r->f);
   638  				v->removed = 1;
   639  				if(Debug)
   640  					print("drop immediate-use %S\n", v->node->sym);
   641  			}
   642  			nkill++;
   643  			continue;
   644  		}			   
   645  	}
   646  
   647  	// Traverse live range of each variable to set start, end.
   648  	// Each flood uses a new value of gen so that we don't have
   649  	// to clear all the r->f.active words after each variable.
   650  	gen = 0;
   651  	for(v = var; v < var+nvar; v++) {
   652  		gen++;
   653  		for(r = v->use; r != nil; r = r->uselink)
   654  			mergewalk(v, r, gen);
   655  		if(v->addr) {
   656  			gen++;
   657  			for(r = v->use; r != nil; r = r->uselink)
   658  				varkillwalk(v, r, gen);
   659  		}
   660  	}
   661  
   662  	// Sort variables by start.
   663  	bystart = malloc(nvar*sizeof bystart[0]);
   664  	for(i=0; i<nvar; i++)
   665  		bystart[i] = &var[i];
   666  	qsort(bystart, nvar, sizeof bystart[0], startcmp);
   667  
   668  	// List of in-use variables, sorted by end, so that the ones that
   669  	// will last the longest are the earliest ones in the array.
   670  	// The tail inuse[nfree:] holds no-longer-used variables.
   671  	// In theory we should use a sorted tree so that insertions are
   672  	// guaranteed O(log n) and then the loop is guaranteed O(n log n).
   673  	// In practice, it doesn't really matter.
   674  	inuse = malloc(nvar*sizeof inuse[0]);
   675  	ninuse = 0;
   676  	nfree = nvar;
   677  	for(i=0; i<nvar; i++) {
   678  		v = bystart[i];
   679  		if(v->removed)
   680  			continue;
   681  
   682  		// Expire no longer in use.
   683  		while(ninuse > 0 && inuse[ninuse-1]->end < v->start) {
   684  			v1 = inuse[--ninuse];
   685  			inuse[--nfree] = v1;
   686  		}
   687  
   688  		// Find old temp to reuse if possible.
   689  		t = v->node->type;
   690  		for(j=nfree; j<nvar; j++) {
   691  			v1 = inuse[j];
   692  			// Require the types to match but also require the addrtaken bits to match.
   693  			// If a variable's address is taken, that disables registerization for the individual
   694  			// words of the variable (for example, the base,len,cap of a slice).
   695  			// We don't want to merge a non-addressed var with an addressed one and
   696  			// inhibit registerization of the former.
   697  			if(eqtype(t, v1->node->type) && v->node->addrtaken == v1->node->addrtaken) {
   698  				inuse[j] = inuse[nfree++];
   699  				if(v1->merge)
   700  					v->merge = v1->merge;
   701  				else
   702  					v->merge = v1;
   703  				nkill++;
   704  				break;
   705  			}
   706  		}
   707  
   708  		// Sort v into inuse.
   709  		j = ninuse++;
   710  		while(j > 0 && inuse[j-1]->end < v->end) {
   711  			inuse[j] = inuse[j-1];
   712  			j--;
   713  		}
   714  		inuse[j] = v;
   715  	}
   716  
   717  	if(Debug) {
   718  		print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill);
   719  		for(v=var; v<var+nvar; v++) {
   720  			print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end);
   721  			if(v->addr)
   722  				print(" addr=1");
   723  			if(v->removed)
   724  				print(" dead=1");
   725  			if(v->merge)
   726  				print(" merge %#N", v->merge->node);
   727  			if(v->start == v->end)
   728  				print(" %P", v->def->f.prog);
   729  			print("\n");
   730  		}
   731  	
   732  		if(Debug > 1)
   733  			dumpit("after", g->start, 0);
   734  	}
   735  
   736  	// Update node references to use merged temporaries.
   737  	for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) {
   738  		p = r->f.prog;
   739  		if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil)
   740  			p->from.node = v->merge->node;
   741  		if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil)
   742  			p->to.node = v->merge->node;
   743  	}
   744  
   745  	// Delete merged nodes from declaration list.
   746  	for(lp = &curfn->dcl; (l = *lp); ) {
   747  		curfn->dcl->end = l;
   748  		n = l->n;
   749  		v = n->opt;
   750  		if(v && (v->merge || v->removed)) {
   751  			*lp = l->next;
   752  			continue;
   753  		}
   754  		lp = &l->next;
   755  	}
   756  
   757  	// Clear aux structures.
   758  	for(v=var; v<var+nvar; v++)
   759  		v->node->opt = nil;
   760  	free(var);
   761  	free(bystart);
   762  	free(inuse);
   763  	flowend(g);
   764  }
   765  
   766  static void
   767  mergewalk(TempVar *v, TempFlow *r0, uint32 gen)
   768  {
   769  	Prog *p;
   770  	TempFlow *r1, *r, *r2;
   771  	
   772  	for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) {
   773  		if(r1->f.active == gen)
   774  			break;
   775  		r1->f.active = gen;
   776  		p = r1->f.prog;
   777  		if(v->end < p->pc)
   778  			v->end = p->pc;
   779  		if(r1 == v->def) {
   780  			v->start = p->pc;
   781  			break;
   782  		}
   783  	}
   784  	
   785  	for(r = r0; r != r1; r = (TempFlow*)r->f.p1)
   786  		for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link)
   787  			mergewalk(v, r2, gen);
   788  }
   789  
   790  static void
   791  varkillwalk(TempVar *v, TempFlow *r0, uint32 gen)
   792  {
   793  	Prog *p;
   794  	TempFlow *r1, *r;
   795  	
   796  	for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.s1) {
   797  		if(r1->f.active == gen)
   798  			break;
   799  		r1->f.active = gen;
   800  		p = r1->f.prog;
   801  		if(v->end < p->pc)
   802  			v->end = p->pc;
   803  		if(v->start > p->pc)
   804  			v->start = p->pc;
   805  		if(p->as == ARET || (p->as == AVARKILL && p->to.node == v->node))
   806  			break;
   807  	}
   808  	
   809  	for(r = r0; r != r1; r = (TempFlow*)r->f.s1)
   810  		varkillwalk(v, (TempFlow*)r->f.s2, gen);
   811  }
   812  
   813  // Eliminate redundant nil pointer checks.
   814  //
   815  // The code generation pass emits a CHECKNIL for every possibly nil pointer.
   816  // This pass removes a CHECKNIL if every predecessor path has already
   817  // checked this value for nil.
   818  //
   819  // Simple backwards flood from check to definition.
   820  // Run prog loop backward from end of program to beginning to avoid quadratic
   821  // behavior removing a run of checks.
   822  //
   823  // Assume that stack variables with address not taken can be loaded multiple times
   824  // from memory without being rechecked. Other variables need to be checked on
   825  // each load.
   826  	
   827  typedef struct NilVar NilVar;
   828  typedef struct NilFlow NilFlow;
   829  
   830  struct NilFlow {
   831  	Flow f;
   832  	int kill;
   833  };
   834  
   835  static void nilwalkback(NilFlow *rcheck);
   836  static void nilwalkfwd(NilFlow *rcheck);
   837  
   838  void
   839  nilopt(Prog *firstp)
   840  {
   841  	NilFlow *r;
   842  	Prog *p;
   843  	Graph *g;
   844  	int ncheck, nkill;
   845  
   846  	g = flowstart(firstp, sizeof(NilFlow));
   847  	if(g == nil)
   848  		return;
   849  
   850  	if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */)
   851  		dumpit("nilopt", g->start, 0);
   852  
   853  	ncheck = 0;
   854  	nkill = 0;
   855  	for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) {
   856  		p = r->f.prog;
   857  		if(p->as != ACHECKNIL || !regtyp(&p->from))
   858  			continue;
   859  		ncheck++;
   860  		if(stackaddr(&p->from)) {
   861  			if(debug_checknil && p->lineno > 1)
   862  				warnl(p->lineno, "removed nil check of SP address");
   863  			r->kill = 1;
   864  			continue;
   865  		}
   866  		nilwalkfwd(r);
   867  		if(r->kill) {
   868  			if(debug_checknil && p->lineno > 1)
   869  				warnl(p->lineno, "removed nil check before indirect");
   870  			continue;
   871  		}
   872  		nilwalkback(r);
   873  		if(r->kill) {
   874  			if(debug_checknil && p->lineno > 1)
   875  				warnl(p->lineno, "removed repeated nil check");
   876  			continue;
   877  		}
   878  	}
   879  	
   880  	for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) {
   881  		if(r->kill) {
   882  			nkill++;
   883  			excise(&r->f);
   884  		}
   885  	}
   886  
   887  	flowend(g);
   888  	
   889  	if(debug_checknil > 1)
   890  		print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck);
   891  }
   892  
   893  static void
   894  nilwalkback(NilFlow *rcheck)
   895  {
   896  	Prog *p;
   897  	ProgInfo info;
   898  	NilFlow *r;
   899  	
   900  	for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) {
   901  		p = r->f.prog;
   902  		proginfo(&info, p);
   903  		if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) {
   904  			// Found initialization of value we're checking for nil.
   905  			// without first finding the check, so this one is unchecked.
   906  			return;
   907  		}
   908  		if(r != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) {
   909  			rcheck->kill = 1;
   910  			return;
   911  		}
   912  	}
   913  
   914  	// Here is a more complex version that scans backward across branches.
   915  	// It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason
   916  	// to keep the check (setting rcheck->kill = 0).
   917  	// It doesn't handle copying of aggregates as well as I would like,
   918  	// nor variables with their address taken,
   919  	// and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3.
   920  	/*
   921  	for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) {
   922  		if(r1->f.active == gen)
   923  			break;
   924  		r1->f.active = gen;
   925  		p = r1->f.prog;
   926  		
   927  		// If same check, stop this loop but still check
   928  		// alternate predecessors up to this point.
   929  		if(r1 != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from))
   930  			break;
   931  
   932  		proginfo(&info, p);
   933  		if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) {
   934  			// Found initialization of value we're checking for nil.
   935  			// without first finding the check, so this one is unchecked.
   936  			rcheck->kill = 0;
   937  			return;
   938  		}
   939  		
   940  		if(r1->f.p1 == nil && r1->f.p2 == nil) {
   941  			print("lost pred for %P\n", rcheck->f.prog);
   942  			for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) {
   943  				proginfo(&info, r1->f.prog);
   944  				print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from);
   945  			}
   946  			fatal("lost pred trail");
   947  		}
   948  	}
   949  
   950  	for(r = r0; r != r1; r = (NilFlow*)r->f.p1)
   951  		for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link)
   952  			nilwalkback(rcheck, r2, gen);
   953  	*/
   954  }
   955  
   956  static void
   957  nilwalkfwd(NilFlow *rcheck)
   958  {
   959  	NilFlow *r, *last;
   960  	Prog *p;
   961  	ProgInfo info;
   962  	
   963  	// If the path down from rcheck dereferences the address
   964  	// (possibly with a small offset) before writing to memory
   965  	// and before any subsequent checks, it's okay to wait for
   966  	// that implicit check. Only consider this basic block to
   967  	// avoid problems like:
   968  	//	_ = *x // should panic
   969  	//	for {} // no writes but infinite loop may be considered visible
   970  	last = nil;
   971  	for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) {
   972  		p = r->f.prog;
   973  		proginfo(&info, p);
   974  		
   975  		if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) {
   976  			rcheck->kill = 1;
   977  			return;
   978  		}
   979  		if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) {
   980  			rcheck->kill = 1;
   981  			return;
   982  		}
   983  		
   984  		// Stop if another nil check happens.
   985  		if(p->as == ACHECKNIL)
   986  			return;
   987  		// Stop if value is lost.
   988  		if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from))
   989  			return;
   990  		// Stop if memory write.
   991  		if((info.flags & RightWrite) && !regtyp(&p->to))
   992  			return;
   993  		// Stop if we jump backward.
   994  		// This test is valid because all the NilFlow* are pointers into
   995  		// a single contiguous array. We will need to add an explicit
   996  		// numbering when the code is converted to Go.
   997  		if(last != nil && r <= last)
   998  			return;
   999  		last = r;
  1000  	}
  1001  }