github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/gc/popt.go (about)

     1  // Derived from Inferno utils/6c/gc.h
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // "Portable" optimizations.
    32  
    33  package gc
    34  
    35  import (
    36  	"cmd/internal/obj"
    37  	"fmt"
    38  	"sort"
    39  	"strings"
    40  )
    41  
    42  type OptStats struct {
    43  	Ncvtreg int32
    44  	Nspill  int32
    45  	Nreload int32
    46  	Ndelmov int32
    47  	Nvar    int32
    48  	Naddr   int32
    49  }
    50  
    51  var Ostats OptStats
    52  
    53  var noreturn_symlist [10]*Sym
    54  
    55  // p is a call instruction. Does the call fail to return?
    56  func Noreturn(p *obj.Prog) bool {
    57  	if noreturn_symlist[0] == nil {
    58  		noreturn_symlist[0] = Pkglookup("panicindex", Runtimepkg)
    59  		noreturn_symlist[1] = Pkglookup("panicslice", Runtimepkg)
    60  		noreturn_symlist[2] = Pkglookup("throwinit", Runtimepkg)
    61  		noreturn_symlist[3] = Pkglookup("gopanic", Runtimepkg)
    62  		noreturn_symlist[4] = Pkglookup("panicwrap", Runtimepkg)
    63  		noreturn_symlist[5] = Pkglookup("throwreturn", Runtimepkg)
    64  		noreturn_symlist[6] = Pkglookup("selectgo", Runtimepkg)
    65  		noreturn_symlist[7] = Pkglookup("block", Runtimepkg)
    66  	}
    67  
    68  	if p.To.Node == nil {
    69  		return false
    70  	}
    71  	s := ((p.To.Node).(*Node)).Sym
    72  	if s == nil {
    73  		return false
    74  	}
    75  	for i := 0; noreturn_symlist[i] != nil; i++ {
    76  		if s == noreturn_symlist[i] {
    77  			return true
    78  		}
    79  	}
    80  	return false
    81  }
    82  
    83  // JMP chasing and removal.
    84  //
    85  // The code generator depends on being able to write out jump
    86  // instructions that it can jump to now but fill in later.
    87  // the linker will resolve them nicely, but they make the code
    88  // longer and more difficult to follow during debugging.
    89  // Remove them.
    90  
    91  // what instruction does a JMP to p eventually land on?
    92  func chasejmp(p *obj.Prog, jmploop *int) *obj.Prog {
    93  	n := 0
    94  	for p != nil && p.As == obj.AJMP && p.To.Type == obj.TYPE_BRANCH {
    95  		n++
    96  		if n > 10 {
    97  			*jmploop = 1
    98  			break
    99  		}
   100  
   101  		p = p.To.Val.(*obj.Prog)
   102  	}
   103  
   104  	return p
   105  }
   106  
   107  // reuse reg pointer for mark/sweep state.
   108  // leave reg==nil at end because alive==nil.
   109  var alive interface{} = nil
   110  var dead interface{} = 1
   111  
   112  // mark all code reachable from firstp as alive
   113  func mark(firstp *obj.Prog) {
   114  	for p := firstp; p != nil; p = p.Link {
   115  		if p.Opt != dead {
   116  			break
   117  		}
   118  		p.Opt = alive
   119  		if p.As != obj.ACALL && p.To.Type == obj.TYPE_BRANCH && p.To.Val.(*obj.Prog) != nil {
   120  			mark(p.To.Val.(*obj.Prog))
   121  		}
   122  		if p.As == obj.AJMP || p.As == obj.ARET || p.As == obj.AUNDEF {
   123  			break
   124  		}
   125  	}
   126  }
   127  
   128  func fixjmp(firstp *obj.Prog) {
   129  	if Debug['R'] != 0 && Debug['v'] != 0 {
   130  		fmt.Printf("\nfixjmp\n")
   131  	}
   132  
   133  	// pass 1: resolve jump to jump, mark all code as dead.
   134  	jmploop := 0
   135  
   136  	for p := firstp; p != nil; p = p.Link {
   137  		if Debug['R'] != 0 && Debug['v'] != 0 {
   138  			fmt.Printf("%v\n", p)
   139  		}
   140  		if p.As != obj.ACALL && p.To.Type == obj.TYPE_BRANCH && p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.AJMP {
   141  			p.To.Val = chasejmp(p.To.Val.(*obj.Prog), &jmploop)
   142  			if Debug['R'] != 0 && Debug['v'] != 0 {
   143  				fmt.Printf("->%v\n", p)
   144  			}
   145  		}
   146  
   147  		p.Opt = dead
   148  	}
   149  
   150  	if Debug['R'] != 0 && Debug['v'] != 0 {
   151  		fmt.Printf("\n")
   152  	}
   153  
   154  	// pass 2: mark all reachable code alive
   155  	mark(firstp)
   156  
   157  	// pass 3: delete dead code (mostly JMPs).
   158  	var last *obj.Prog
   159  
   160  	for p := firstp; p != nil; p = p.Link {
   161  		if p.Opt == dead {
   162  			if p.Link == nil && p.As == obj.ARET && last != nil && last.As != obj.ARET {
   163  				// This is the final ARET, and the code so far doesn't have one.
   164  				// Let it stay. The register allocator assumes that all live code in
   165  				// the function can be traversed by starting at all the RET instructions
   166  				// and following predecessor links. If we remove the final RET,
   167  				// this assumption will not hold in the case of an infinite loop
   168  				// at the end of a function.
   169  				// Keep the RET but mark it dead for the liveness analysis.
   170  				p.Mode = 1
   171  			} else {
   172  				if Debug['R'] != 0 && Debug['v'] != 0 {
   173  					fmt.Printf("del %v\n", p)
   174  				}
   175  				continue
   176  			}
   177  		}
   178  
   179  		if last != nil {
   180  			last.Link = p
   181  		}
   182  		last = p
   183  	}
   184  
   185  	last.Link = nil
   186  
   187  	// pass 4: elide JMP to next instruction.
   188  	// only safe if there are no jumps to JMPs anymore.
   189  	if jmploop == 0 {
   190  		var last *obj.Prog
   191  		for p := firstp; p != nil; p = p.Link {
   192  			if p.As == obj.AJMP && p.To.Type == obj.TYPE_BRANCH && p.To.Val == p.Link {
   193  				if Debug['R'] != 0 && Debug['v'] != 0 {
   194  					fmt.Printf("del %v\n", p)
   195  				}
   196  				continue
   197  			}
   198  
   199  			if last != nil {
   200  				last.Link = p
   201  			}
   202  			last = p
   203  		}
   204  
   205  		last.Link = nil
   206  	}
   207  
   208  	if Debug['R'] != 0 && Debug['v'] != 0 {
   209  		fmt.Printf("\n")
   210  		for p := firstp; p != nil; p = p.Link {
   211  			fmt.Printf("%v\n", p)
   212  		}
   213  		fmt.Printf("\n")
   214  	}
   215  }
   216  
   217  // Control flow analysis. The Flow structures hold predecessor and successor
   218  // information as well as basic loop analysis.
   219  //
   220  //	graph = flowstart(firstp, 0);
   221  //	... use flow graph ...
   222  //	flowend(graph); // free graph
   223  //
   224  // Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
   225  //
   226  //	for(f = graph->start; f != nil; f = f->link)
   227  //
   228  // or, given an instruction f, to iterate over all the predecessors, which is
   229  // f->p1 and this list:
   230  //
   231  //	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
   232  //
   233  // The size argument to flowstart specifies an amount of zeroed memory
   234  // to allocate in every f->data field, for use by the client.
   235  // If size == 0, f->data will be nil.
   236  
   237  var flowmark int
   238  
   239  // MaxFlowProg is the maximum size program (counted in instructions)
   240  // for which the flow code will build a graph. Functions larger than this limit
   241  // will not have flow graphs and consequently will not be optimized.
   242  const MaxFlowProg = 50000
   243  
   244  func Flowstart(firstp *obj.Prog, newData func() interface{}) *Graph {
   245  	// Count and mark instructions to annotate.
   246  	nf := 0
   247  
   248  	for p := firstp; p != nil; p = p.Link {
   249  		p.Opt = nil // should be already, but just in case
   250  		Thearch.Proginfo(p)
   251  		if p.Info.Flags&Skip != 0 {
   252  			continue
   253  		}
   254  		p.Opt = &flowmark
   255  		nf++
   256  	}
   257  
   258  	if nf == 0 {
   259  		return nil
   260  	}
   261  
   262  	if nf >= MaxFlowProg {
   263  		if Debug['v'] != 0 {
   264  			Warn("%v is too big (%d instructions)", Curfn.Func.Nname.Sym, nf)
   265  		}
   266  		return nil
   267  	}
   268  
   269  	// Allocate annotations and assign to instructions.
   270  	graph := new(Graph)
   271  	ff := make([]Flow, nf)
   272  	start := &ff[0]
   273  	id := 0
   274  	var last *Flow
   275  	for p := firstp; p != nil; p = p.Link {
   276  		if p.Opt == nil {
   277  			continue
   278  		}
   279  		f := &ff[0]
   280  		ff = ff[1:]
   281  		p.Opt = f
   282  		f.Prog = p
   283  		if last != nil {
   284  			last.Link = f
   285  		}
   286  		last = f
   287  		if newData != nil {
   288  			f.Data = newData()
   289  		}
   290  		f.Id = int32(id)
   291  		id++
   292  	}
   293  
   294  	// Fill in pred/succ information.
   295  	var f1 *Flow
   296  	var p *obj.Prog
   297  	for f := start; f != nil; f = f.Link {
   298  		p = f.Prog
   299  		if p.Info.Flags&Break == 0 {
   300  			f1 = f.Link
   301  			f.S1 = f1
   302  			f1.P1 = f
   303  		}
   304  
   305  		if p.To.Type == obj.TYPE_BRANCH {
   306  			if p.To.Val == nil {
   307  				Fatalf("pnil %v", p)
   308  			}
   309  			f1 = p.To.Val.(*obj.Prog).Opt.(*Flow)
   310  			if f1 == nil {
   311  				Fatalf("fnil %v / %v", p, p.To.Val.(*obj.Prog))
   312  			}
   313  			if f1 == f {
   314  				//fatal("self loop %v", p);
   315  				continue
   316  			}
   317  
   318  			f.S2 = f1
   319  			f.P2link = f1.P2
   320  			f1.P2 = f
   321  		}
   322  	}
   323  
   324  	graph.Start = start
   325  	graph.Num = nf
   326  	return graph
   327  }
   328  
   329  func Flowend(graph *Graph) {
   330  	for f := graph.Start; f != nil; f = f.Link {
   331  		f.Prog.Info.Flags = 0 // drop cached proginfo
   332  		f.Prog.Opt = nil
   333  	}
   334  }
   335  
   336  // find looping structure
   337  //
   338  // 1) find reverse postordering
   339  // 2) find approximate dominators,
   340  //	the actual dominators if the flow graph is reducible
   341  //	otherwise, dominators plus some other non-dominators.
   342  //	See Matthew S. Hecht and Jeffrey D. Ullman,
   343  //	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   344  //	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   345  //	Oct. 1-3, 1973, pp.  207-217.
   346  // 3) find all nodes with a predecessor dominated by the current node.
   347  //	such a node is a loop head.
   348  //	recursively, all preds with a greater rpo number are in the loop
   349  func postorder(r *Flow, rpo2r []*Flow, n int32) int32 {
   350  	r.Rpo = 1
   351  	r1 := r.S1
   352  	if r1 != nil && r1.Rpo == 0 {
   353  		n = postorder(r1, rpo2r, n)
   354  	}
   355  	r1 = r.S2
   356  	if r1 != nil && r1.Rpo == 0 {
   357  		n = postorder(r1, rpo2r, n)
   358  	}
   359  	rpo2r[n] = r
   360  	n++
   361  	return n
   362  }
   363  
   364  func rpolca(idom []int32, rpo1 int32, rpo2 int32) int32 {
   365  	if rpo1 == -1 {
   366  		return rpo2
   367  	}
   368  	var t int32
   369  	for rpo1 != rpo2 {
   370  		if rpo1 > rpo2 {
   371  			t = rpo2
   372  			rpo2 = rpo1
   373  			rpo1 = t
   374  		}
   375  
   376  		for rpo1 < rpo2 {
   377  			t = idom[rpo2]
   378  			if t >= rpo2 {
   379  				Fatalf("bad idom")
   380  			}
   381  			rpo2 = t
   382  		}
   383  	}
   384  
   385  	return rpo1
   386  }
   387  
   388  func doms(idom []int32, r int32, s int32) bool {
   389  	for s > r {
   390  		s = idom[s]
   391  	}
   392  	return s == r
   393  }
   394  
   395  func loophead(idom []int32, r *Flow) bool {
   396  	src := r.Rpo
   397  	if r.P1 != nil && doms(idom, src, r.P1.Rpo) {
   398  		return true
   399  	}
   400  	for r = r.P2; r != nil; r = r.P2link {
   401  		if doms(idom, src, r.Rpo) {
   402  			return true
   403  		}
   404  	}
   405  	return false
   406  }
   407  
   408  func loopmark(rpo2r **Flow, head int32, r *Flow) {
   409  	if r.Rpo < head || r.Active == head {
   410  		return
   411  	}
   412  	r.Active = head
   413  	r.Loop += LOOP
   414  	if r.P1 != nil {
   415  		loopmark(rpo2r, head, r.P1)
   416  	}
   417  	for r = r.P2; r != nil; r = r.P2link {
   418  		loopmark(rpo2r, head, r)
   419  	}
   420  }
   421  
   422  func flowrpo(g *Graph) {
   423  	g.Rpo = make([]*Flow, g.Num)
   424  	idom := make([]int32, g.Num)
   425  
   426  	for r1 := g.Start; r1 != nil; r1 = r1.Link {
   427  		r1.Active = 0
   428  	}
   429  
   430  	rpo2r := g.Rpo
   431  	d := postorder(g.Start, rpo2r, 0)
   432  	nr := int32(g.Num)
   433  	if d > nr {
   434  		Fatalf("too many reg nodes %d %d", d, nr)
   435  	}
   436  	nr = d
   437  	var r1 *Flow
   438  	for i := int32(0); i < nr/2; i++ {
   439  		r1 = rpo2r[i]
   440  		rpo2r[i] = rpo2r[nr-1-i]
   441  		rpo2r[nr-1-i] = r1
   442  	}
   443  
   444  	for i := int32(0); i < nr; i++ {
   445  		rpo2r[i].Rpo = i
   446  	}
   447  
   448  	idom[0] = 0
   449  	var me int32
   450  	for i := int32(0); i < nr; i++ {
   451  		r1 = rpo2r[i]
   452  		me = r1.Rpo
   453  		d = -1
   454  
   455  		// rpo2r[r->rpo] == r protects against considering dead code,
   456  		// which has r->rpo == 0.
   457  		if r1.P1 != nil && rpo2r[r1.P1.Rpo] == r1.P1 && r1.P1.Rpo < me {
   458  			d = r1.P1.Rpo
   459  		}
   460  		for r1 = r1.P2; r1 != nil; r1 = r1.P2link {
   461  			if rpo2r[r1.Rpo] == r1 && r1.Rpo < me {
   462  				d = rpolca(idom, d, r1.Rpo)
   463  			}
   464  		}
   465  		idom[i] = d
   466  	}
   467  
   468  	for i := int32(0); i < nr; i++ {
   469  		r1 = rpo2r[i]
   470  		r1.Loop++
   471  		if r1.P2 != nil && loophead(idom, r1) {
   472  			loopmark(&rpo2r[0], i, r1)
   473  		}
   474  	}
   475  
   476  	for r1 := g.Start; r1 != nil; r1 = r1.Link {
   477  		r1.Active = 0
   478  	}
   479  }
   480  
   481  func Uniqp(r *Flow) *Flow {
   482  	r1 := r.P1
   483  	if r1 == nil {
   484  		r1 = r.P2
   485  		if r1 == nil || r1.P2link != nil {
   486  			return nil
   487  		}
   488  	} else if r.P2 != nil {
   489  		return nil
   490  	}
   491  	return r1
   492  }
   493  
   494  func Uniqs(r *Flow) *Flow {
   495  	r1 := r.S1
   496  	if r1 == nil {
   497  		r1 = r.S2
   498  		if r1 == nil {
   499  			return nil
   500  		}
   501  	} else if r.S2 != nil {
   502  		return nil
   503  	}
   504  	return r1
   505  }
   506  
   507  // The compilers assume they can generate temporary variables
   508  // as needed to preserve the right semantics or simplify code
   509  // generation and the back end will still generate good code.
   510  // This results in a large number of ephemeral temporary variables.
   511  // Merge temps with non-overlapping lifetimes and equal types using the
   512  // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
   513  // ACM TOPLAS 1999.
   514  
   515  type TempVar struct {
   516  	node    *Node
   517  	def     *Flow    // definition of temp var
   518  	use     *Flow    // use list, chained through Flow.data
   519  	merge   *TempVar // merge var with this one
   520  	start   int64    // smallest Prog.pc in live range
   521  	end     int64    // largest Prog.pc in live range
   522  	addr    bool     // address taken - no accurate end
   523  	removed bool     // removed from program
   524  }
   525  
   526  // startcmp sorts TempVars by start, then id, then symbol name.
   527  type startcmp []*TempVar
   528  
   529  func (x startcmp) Len() int      { return len(x) }
   530  func (x startcmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
   531  func (x startcmp) Less(i, j int) bool {
   532  	a := x[i]
   533  	b := x[j]
   534  
   535  	if a.start < b.start {
   536  		return true
   537  	}
   538  	if a.start > b.start {
   539  		return false
   540  	}
   541  
   542  	// Order what's left by id or symbol name,
   543  	// just so that sort is forced into a specific ordering,
   544  	// so that the result of the sort does not depend on
   545  	// the sort implementation.
   546  	if a.def != b.def {
   547  		return int(a.def.Id-b.def.Id) < 0
   548  	}
   549  	if a.node != b.node {
   550  		return a.node.Sym.Name < b.node.Sym.Name
   551  	}
   552  	return false
   553  }
   554  
   555  // Is n available for merging?
   556  func canmerge(n *Node) bool {
   557  	return n.Class == PAUTO && strings.HasPrefix(n.Sym.Name, "autotmp")
   558  }
   559  
   560  func mergetemp(firstp *obj.Prog) {
   561  	const (
   562  		debugmerge = 0
   563  	)
   564  
   565  	g := Flowstart(firstp, nil)
   566  	if g == nil {
   567  		return
   568  	}
   569  
   570  	// Build list of all mergeable variables.
   571  	var vars []*TempVar
   572  	for l := Curfn.Func.Dcl; l != nil; l = l.Next {
   573  		if n := l.N; canmerge(n) {
   574  			v := &TempVar{}
   575  			vars = append(vars, v)
   576  			n.SetOpt(v)
   577  			v.node = n
   578  		}
   579  	}
   580  
   581  	// Build list of uses.
   582  	// We assume that the earliest reference to a temporary is its definition.
   583  	// This is not true of variables in general but our temporaries are all
   584  	// single-use (that's why we have so many!).
   585  	for f := g.Start; f != nil; f = f.Link {
   586  		p := f.Prog
   587  		if p.From.Node != nil && ((p.From.Node).(*Node)).Opt() != nil && p.To.Node != nil && ((p.To.Node).(*Node)).Opt() != nil {
   588  			Fatalf("double node %v", p)
   589  		}
   590  		var v *TempVar
   591  		n, _ := p.From.Node.(*Node)
   592  		if n != nil {
   593  			v, _ = n.Opt().(*TempVar)
   594  		}
   595  		if v == nil {
   596  			n, _ = p.To.Node.(*Node)
   597  			if n != nil {
   598  				v, _ = n.Opt().(*TempVar)
   599  			}
   600  		}
   601  		if v != nil {
   602  			if v.def == nil {
   603  				v.def = f
   604  			}
   605  			f.Data = v.use
   606  			v.use = f
   607  			if n == p.From.Node && (p.Info.Flags&LeftAddr != 0) {
   608  				v.addr = true
   609  			}
   610  		}
   611  	}
   612  
   613  	if debugmerge > 1 && Debug['v'] != 0 {
   614  		Dumpit("before", g.Start, 0)
   615  	}
   616  
   617  	nkill := 0
   618  
   619  	// Special case.
   620  	for _, v := range vars {
   621  		if v.addr {
   622  			continue
   623  		}
   624  
   625  		// Used in only one instruction, which had better be a write.
   626  		f := v.use
   627  		if f != nil && f.Data.(*Flow) == nil {
   628  			p := f.Prog
   629  			if p.To.Node == v.node && (p.Info.Flags&RightWrite != 0) && p.Info.Flags&RightRead == 0 {
   630  				p.As = obj.ANOP
   631  				p.To = obj.Addr{}
   632  				v.removed = true
   633  				if debugmerge > 0 && Debug['v'] != 0 {
   634  					fmt.Printf("drop write-only %v\n", v.node.Sym)
   635  				}
   636  			} else {
   637  				Fatalf("temp used and not set: %v", p)
   638  			}
   639  			nkill++
   640  			continue
   641  		}
   642  
   643  		// Written in one instruction, read in the next, otherwise unused,
   644  		// no jumps to the next instruction. Happens mainly in 386 compiler.
   645  		f = v.use
   646  		if f != nil && f.Link == f.Data.(*Flow) && (f.Data.(*Flow)).Data.(*Flow) == nil && Uniqp(f.Link) == f {
   647  			p := f.Prog
   648  			p1 := f.Link.Prog
   649  			const (
   650  				SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD
   651  			)
   652  			if p.From.Node == v.node && p1.To.Node == v.node && (p.Info.Flags&Move != 0) && (p.Info.Flags|p1.Info.Flags)&(LeftAddr|RightAddr) == 0 && p.Info.Flags&SizeAny == p1.Info.Flags&SizeAny {
   653  				p1.From = p.From
   654  				Thearch.Excise(f)
   655  				v.removed = true
   656  				if debugmerge > 0 && Debug['v'] != 0 {
   657  					fmt.Printf("drop immediate-use %v\n", v.node.Sym)
   658  				}
   659  			}
   660  
   661  			nkill++
   662  			continue
   663  		}
   664  	}
   665  
   666  	// Traverse live range of each variable to set start, end.
   667  	// Each flood uses a new value of gen so that we don't have
   668  	// to clear all the r->active words after each variable.
   669  	gen := uint32(0)
   670  
   671  	for _, v := range vars {
   672  		gen++
   673  		for f := v.use; f != nil; f = f.Data.(*Flow) {
   674  			mergewalk(v, f, gen)
   675  		}
   676  		if v.addr {
   677  			gen++
   678  			for f := v.use; f != nil; f = f.Data.(*Flow) {
   679  				varkillwalk(v, f, gen)
   680  			}
   681  		}
   682  	}
   683  
   684  	// Sort variables by start.
   685  	bystart := make([]*TempVar, len(vars))
   686  	copy(bystart, vars)
   687  	sort.Sort(startcmp(bystart))
   688  
   689  	// List of in-use variables, sorted by end, so that the ones that
   690  	// will last the longest are the earliest ones in the array.
   691  	// The tail inuse[nfree:] holds no-longer-used variables.
   692  	// In theory we should use a sorted tree so that insertions are
   693  	// guaranteed O(log n) and then the loop is guaranteed O(n log n).
   694  	// In practice, it doesn't really matter.
   695  	inuse := make([]*TempVar, len(bystart))
   696  
   697  	ninuse := 0
   698  	nfree := len(bystart)
   699  	for _, v := range bystart {
   700  		if debugmerge > 0 && Debug['v'] != 0 {
   701  			fmt.Printf("consider %v: removed=%t\n", Nconv(v.node, obj.FmtSharp), v.removed)
   702  		}
   703  
   704  		if v.removed {
   705  			continue
   706  		}
   707  
   708  		// Expire no longer in use.
   709  		for ninuse > 0 && inuse[ninuse-1].end < v.start {
   710  			ninuse--
   711  			nfree--
   712  			inuse[nfree] = inuse[ninuse]
   713  		}
   714  
   715  		if debugmerge > 0 && Debug['v'] != 0 {
   716  			fmt.Printf("consider %v: removed=%t nfree=%d nvar=%d\n", Nconv(v.node, obj.FmtSharp), v.removed, nfree, len(bystart))
   717  		}
   718  
   719  		// Find old temp to reuse if possible.
   720  		t := v.node.Type
   721  
   722  		for j := nfree; j < len(inuse); j++ {
   723  			v1 := inuse[j]
   724  			if debugmerge > 0 && Debug['v'] != 0 {
   725  				fmt.Printf("consider %v: maybe %v: type=%v,%v addrtaken=%v,%v\n", Nconv(v.node, obj.FmtSharp), Nconv(v1.node, obj.FmtSharp), t, v1.node.Type, v.node.Addrtaken, v1.node.Addrtaken)
   726  			}
   727  
   728  			// Require the types to match but also require the addrtaken bits to match.
   729  			// If a variable's address is taken, that disables registerization for the individual
   730  			// words of the variable (for example, the base,len,cap of a slice).
   731  			// We don't want to merge a non-addressed var with an addressed one and
   732  			// inhibit registerization of the former.
   733  			if Eqtype(t, v1.node.Type) && v.node.Addrtaken == v1.node.Addrtaken {
   734  				inuse[j] = inuse[nfree]
   735  				nfree++
   736  				if v1.merge != nil {
   737  					v.merge = v1.merge
   738  				} else {
   739  					v.merge = v1
   740  				}
   741  				nkill++
   742  				break
   743  			}
   744  		}
   745  
   746  		// Sort v into inuse.
   747  		j := ninuse
   748  		ninuse++
   749  
   750  		for j > 0 && inuse[j-1].end < v.end {
   751  			inuse[j] = inuse[j-1]
   752  			j--
   753  		}
   754  
   755  		inuse[j] = v
   756  	}
   757  
   758  	if debugmerge > 0 && Debug['v'] != 0 {
   759  		fmt.Printf("%v [%d - %d]\n", Curfn.Func.Nname.Sym, len(vars), nkill)
   760  		for _, v := range vars {
   761  			fmt.Printf("var %v %v %d-%d", Nconv(v.node, obj.FmtSharp), v.node.Type, v.start, v.end)
   762  			if v.addr {
   763  				fmt.Printf(" addr=true")
   764  			}
   765  			if v.removed {
   766  				fmt.Printf(" removed=true")
   767  			}
   768  			if v.merge != nil {
   769  				fmt.Printf(" merge %v", Nconv(v.merge.node, obj.FmtSharp))
   770  			}
   771  			if v.start == v.end && v.def != nil {
   772  				fmt.Printf(" %v", v.def.Prog)
   773  			}
   774  			fmt.Printf("\n")
   775  		}
   776  
   777  		if debugmerge > 1 && Debug['v'] != 0 {
   778  			Dumpit("after", g.Start, 0)
   779  		}
   780  	}
   781  
   782  	// Update node references to use merged temporaries.
   783  	for f := g.Start; f != nil; f = f.Link {
   784  		p := f.Prog
   785  		n, _ := p.From.Node.(*Node)
   786  		if n != nil {
   787  			v, _ := n.Opt().(*TempVar)
   788  			if v != nil && v.merge != nil {
   789  				p.From.Node = v.merge.node
   790  			}
   791  		}
   792  		n, _ = p.To.Node.(*Node)
   793  		if n != nil {
   794  			v, _ := n.Opt().(*TempVar)
   795  			if v != nil && v.merge != nil {
   796  				p.To.Node = v.merge.node
   797  			}
   798  		}
   799  	}
   800  
   801  	// Delete merged nodes from declaration list.
   802  	for lp := &Curfn.Func.Dcl; ; {
   803  		l := *lp
   804  		if l == nil {
   805  			break
   806  		}
   807  
   808  		Curfn.Func.Dcl.End = l
   809  		n := l.N
   810  		v, _ := n.Opt().(*TempVar)
   811  		if v != nil && (v.merge != nil || v.removed) {
   812  			*lp = l.Next
   813  			continue
   814  		}
   815  
   816  		lp = &l.Next
   817  	}
   818  
   819  	// Clear aux structures.
   820  	for _, v := range vars {
   821  		v.node.SetOpt(nil)
   822  	}
   823  
   824  	Flowend(g)
   825  }
   826  
   827  func mergewalk(v *TempVar, f0 *Flow, gen uint32) {
   828  	var p *obj.Prog
   829  	var f1 *Flow
   830  
   831  	for f1 = f0; f1 != nil; f1 = f1.P1 {
   832  		if uint32(f1.Active) == gen {
   833  			break
   834  		}
   835  		f1.Active = int32(gen)
   836  		p = f1.Prog
   837  		if v.end < p.Pc {
   838  			v.end = p.Pc
   839  		}
   840  		if f1 == v.def {
   841  			v.start = p.Pc
   842  			break
   843  		}
   844  	}
   845  
   846  	var f2 *Flow
   847  	for f := f0; f != f1; f = f.P1 {
   848  		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
   849  			mergewalk(v, f2, gen)
   850  		}
   851  	}
   852  }
   853  
   854  func varkillwalk(v *TempVar, f0 *Flow, gen uint32) {
   855  	var p *obj.Prog
   856  	var f1 *Flow
   857  
   858  	for f1 = f0; f1 != nil; f1 = f1.S1 {
   859  		if uint32(f1.Active) == gen {
   860  			break
   861  		}
   862  		f1.Active = int32(gen)
   863  		p = f1.Prog
   864  		if v.end < p.Pc {
   865  			v.end = p.Pc
   866  		}
   867  		if v.start > p.Pc {
   868  			v.start = p.Pc
   869  		}
   870  		if p.As == obj.ARET || (p.As == obj.AVARKILL && p.To.Node == v.node) {
   871  			break
   872  		}
   873  	}
   874  
   875  	for f := f0; f != f1; f = f.S1 {
   876  		varkillwalk(v, f.S2, gen)
   877  	}
   878  }
   879  
   880  // Eliminate redundant nil pointer checks.
   881  //
   882  // The code generation pass emits a CHECKNIL for every possibly nil pointer.
   883  // This pass removes a CHECKNIL if every predecessor path has already
   884  // checked this value for nil.
   885  //
   886  // Simple backwards flood from check to definition.
   887  // Run prog loop backward from end of program to beginning to avoid quadratic
   888  // behavior removing a run of checks.
   889  //
   890  // Assume that stack variables with address not taken can be loaded multiple times
   891  // from memory without being rechecked. Other variables need to be checked on
   892  // each load.
   893  
   894  var killed int // f->data is either nil or &killed
   895  
   896  func nilopt(firstp *obj.Prog) {
   897  	g := Flowstart(firstp, nil)
   898  	if g == nil {
   899  		return
   900  	}
   901  
   902  	if Debug_checknil > 1 { // || strcmp(curfn->nname->sym->name, "f1") == 0
   903  		Dumpit("nilopt", g.Start, 0)
   904  	}
   905  
   906  	ncheck := 0
   907  	nkill := 0
   908  	var p *obj.Prog
   909  	for f := g.Start; f != nil; f = f.Link {
   910  		p = f.Prog
   911  		if p.As != obj.ACHECKNIL || !Thearch.Regtyp(&p.From) {
   912  			continue
   913  		}
   914  		ncheck++
   915  		if Thearch.Stackaddr(&p.From) {
   916  			if Debug_checknil != 0 && p.Lineno > 1 {
   917  				Warnl(int(p.Lineno), "removed nil check of SP address")
   918  			}
   919  			f.Data = &killed
   920  			continue
   921  		}
   922  
   923  		nilwalkfwd(f)
   924  		if f.Data != nil {
   925  			if Debug_checknil != 0 && p.Lineno > 1 {
   926  				Warnl(int(p.Lineno), "removed nil check before indirect")
   927  			}
   928  			continue
   929  		}
   930  
   931  		nilwalkback(f)
   932  		if f.Data != nil {
   933  			if Debug_checknil != 0 && p.Lineno > 1 {
   934  				Warnl(int(p.Lineno), "removed repeated nil check")
   935  			}
   936  			continue
   937  		}
   938  	}
   939  
   940  	for f := g.Start; f != nil; f = f.Link {
   941  		if f.Data != nil {
   942  			nkill++
   943  			Thearch.Excise(f)
   944  		}
   945  	}
   946  
   947  	Flowend(g)
   948  
   949  	if Debug_checknil > 1 {
   950  		fmt.Printf("%v: removed %d of %d nil checks\n", Curfn.Func.Nname.Sym, nkill, ncheck)
   951  	}
   952  }
   953  
   954  func nilwalkback(fcheck *Flow) {
   955  	for f := fcheck; f != nil; f = Uniqp(f) {
   956  		p := f.Prog
   957  		if (p.Info.Flags&RightWrite != 0) && Thearch.Sameaddr(&p.To, &fcheck.Prog.From) {
   958  			// Found initialization of value we're checking for nil.
   959  			// without first finding the check, so this one is unchecked.
   960  			return
   961  		}
   962  
   963  		if f != fcheck && p.As == obj.ACHECKNIL && Thearch.Sameaddr(&p.From, &fcheck.Prog.From) {
   964  			fcheck.Data = &killed
   965  			return
   966  		}
   967  	}
   968  }
   969  
   970  // Here is a more complex version that scans backward across branches.
   971  // It assumes fcheck->kill = 1 has been set on entry, and its job is to find a reason
   972  // to keep the check (setting fcheck->kill = 0).
   973  // It doesn't handle copying of aggregates as well as I would like,
   974  // nor variables with their address taken,
   975  // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3.
   976  /*
   977  for(f1 = f0; f1 != nil; f1 = f1->p1) {
   978  	if(f1->active == gen)
   979  		break;
   980  	f1->active = gen;
   981  	p = f1->prog;
   982  
   983  	// If same check, stop this loop but still check
   984  	// alternate predecessors up to this point.
   985  	if(f1 != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from))
   986  		break;
   987  
   988  	if((p.Info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) {
   989  		// Found initialization of value we're checking for nil.
   990  		// without first finding the check, so this one is unchecked.
   991  		fcheck->kill = 0;
   992  		return;
   993  	}
   994  
   995  	if(f1->p1 == nil && f1->p2 == nil) {
   996  		print("lost pred for %v\n", fcheck->prog);
   997  		for(f1=f0; f1!=nil; f1=f1->p1) {
   998  			thearch.proginfo(&info, f1->prog);
   999  			print("\t%v %d %d %D %D\n", r1->prog, info.flags&RightWrite, thearch.sameaddr(&f1->prog->to, &fcheck->prog->from), &f1->prog->to, &fcheck->prog->from);
  1000  		}
  1001  		fatal("lost pred trail");
  1002  	}
  1003  }
  1004  
  1005  for(f = f0; f != f1; f = f->p1)
  1006  	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
  1007  		nilwalkback(fcheck, f2, gen);
  1008  */
  1009  
  1010  func nilwalkfwd(fcheck *Flow) {
  1011  	// If the path down from rcheck dereferences the address
  1012  	// (possibly with a small offset) before writing to memory
  1013  	// and before any subsequent checks, it's okay to wait for
  1014  	// that implicit check. Only consider this basic block to
  1015  	// avoid problems like:
  1016  	//	_ = *x // should panic
  1017  	//	for {} // no writes but infinite loop may be considered visible
  1018  
  1019  	var last *Flow
  1020  	for f := Uniqs(fcheck); f != nil; f = Uniqs(f) {
  1021  		p := f.Prog
  1022  		if (p.Info.Flags&LeftRead != 0) && Thearch.Smallindir(&p.From, &fcheck.Prog.From) {
  1023  			fcheck.Data = &killed
  1024  			return
  1025  		}
  1026  
  1027  		if (p.Info.Flags&(RightRead|RightWrite) != 0) && Thearch.Smallindir(&p.To, &fcheck.Prog.From) {
  1028  			fcheck.Data = &killed
  1029  			return
  1030  		}
  1031  
  1032  		// Stop if another nil check happens.
  1033  		if p.As == obj.ACHECKNIL {
  1034  			return
  1035  		}
  1036  
  1037  		// Stop if value is lost.
  1038  		if (p.Info.Flags&RightWrite != 0) && Thearch.Sameaddr(&p.To, &fcheck.Prog.From) {
  1039  			return
  1040  		}
  1041  
  1042  		// Stop if memory write.
  1043  		if (p.Info.Flags&RightWrite != 0) && !Thearch.Regtyp(&p.To) {
  1044  			return
  1045  		}
  1046  
  1047  		// Stop if we jump backward.
  1048  		if last != nil && f.Id <= last.Id {
  1049  			return
  1050  		}
  1051  		last = f
  1052  	}
  1053  }