github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/gc/popt.go (about)

     1  // Derived from Inferno utils/6c/gc.h
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6c/gc.h
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  // "Portable" optimizations.
    32  
    33  package gc
    34  
    35  import (
    36  	"cmd/internal/obj"
    37  	"fmt"
    38  	"sort"
    39  	"strings"
    40  )
    41  
    42  type OptStats struct {
    43  	Ncvtreg int32
    44  	Nspill  int32
    45  	Nreload int32
    46  	Ndelmov int32
    47  	Nvar    int32
    48  	Naddr   int32
    49  }
    50  
    51  var Ostats OptStats
    52  
    53  var noreturn_symlist [10]*Sym
    54  
    55  // p is a call instruction. Does the call fail to return?
    56  func Noreturn(p *obj.Prog) bool {
    57  	if noreturn_symlist[0] == nil {
    58  		noreturn_symlist[0] = Pkglookup("panicindex", Runtimepkg)
    59  		noreturn_symlist[1] = Pkglookup("panicslice", Runtimepkg)
    60  		noreturn_symlist[2] = Pkglookup("throwinit", Runtimepkg)
    61  		noreturn_symlist[3] = Pkglookup("gopanic", Runtimepkg)
    62  		noreturn_symlist[4] = Pkglookup("panicwrap", Runtimepkg)
    63  		noreturn_symlist[5] = Pkglookup("throwreturn", Runtimepkg)
    64  		noreturn_symlist[6] = Pkglookup("selectgo", Runtimepkg)
    65  		noreturn_symlist[7] = Pkglookup("block", Runtimepkg)
    66  	}
    67  
    68  	if p.To.Node == nil {
    69  		return false
    70  	}
    71  	s := ((p.To.Node).(*Node)).Sym
    72  	if s == nil {
    73  		return false
    74  	}
    75  	for i := 0; noreturn_symlist[i] != nil; i++ {
    76  		if s == noreturn_symlist[i] {
    77  			return true
    78  		}
    79  	}
    80  	return false
    81  }
    82  
    83  // JMP chasing and removal.
    84  //
    85  // The code generator depends on being able to write out jump
    86  // instructions that it can jump to now but fill in later.
    87  // the linker will resolve them nicely, but they make the code
    88  // longer and more difficult to follow during debugging.
    89  // Remove them.
    90  
    91  // what instruction does a JMP to p eventually land on?
    92  func chasejmp(p *obj.Prog, jmploop *int) *obj.Prog {
    93  	n := 0
    94  	for p != nil && p.As == obj.AJMP && p.To.Type == obj.TYPE_BRANCH {
    95  		n++
    96  		if n > 10 {
    97  			*jmploop = 1
    98  			break
    99  		}
   100  
   101  		p = p.To.Val.(*obj.Prog)
   102  	}
   103  
   104  	return p
   105  }
   106  
   107  // reuse reg pointer for mark/sweep state.
   108  // leave reg==nil at end because alive==nil.
   109  var alive interface{} = nil
   110  var dead interface{} = 1
   111  
   112  // mark all code reachable from firstp as alive
   113  func mark(firstp *obj.Prog) {
   114  	for p := firstp; p != nil; p = p.Link {
   115  		if p.Opt != dead {
   116  			break
   117  		}
   118  		p.Opt = alive
   119  		if p.As != obj.ACALL && p.To.Type == obj.TYPE_BRANCH && p.To.Val.(*obj.Prog) != nil {
   120  			mark(p.To.Val.(*obj.Prog))
   121  		}
   122  		if p.As == obj.AJMP || p.As == obj.ARET || p.As == obj.AUNDEF {
   123  			break
   124  		}
   125  	}
   126  }
   127  
   128  func fixjmp(firstp *obj.Prog) {
   129  	if Debug['R'] != 0 && Debug['v'] != 0 {
   130  		fmt.Printf("\nfixjmp\n")
   131  	}
   132  
   133  	// pass 1: resolve jump to jump, mark all code as dead.
   134  	jmploop := 0
   135  
   136  	for p := firstp; p != nil; p = p.Link {
   137  		if Debug['R'] != 0 && Debug['v'] != 0 {
   138  			fmt.Printf("%v\n", p)
   139  		}
   140  		if p.As != obj.ACALL && p.To.Type == obj.TYPE_BRANCH && p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.AJMP {
   141  			if Debug['N'] == 0 {
   142  				p.To.Val = chasejmp(p.To.Val.(*obj.Prog), &jmploop)
   143  				if Debug['R'] != 0 && Debug['v'] != 0 {
   144  					fmt.Printf("->%v\n", p)
   145  				}
   146  			}
   147  		}
   148  
   149  		p.Opt = dead
   150  	}
   151  	if Debug['R'] != 0 && Debug['v'] != 0 {
   152  		fmt.Printf("\n")
   153  	}
   154  
   155  	// pass 2: mark all reachable code alive
   156  	mark(firstp)
   157  
   158  	// pass 3: delete dead code (mostly JMPs).
   159  	var last *obj.Prog
   160  
   161  	for p := firstp; p != nil; p = p.Link {
   162  		if p.Opt == dead {
   163  			if p.Link == nil && p.As == obj.ARET && last != nil && last.As != obj.ARET {
   164  				// This is the final ARET, and the code so far doesn't have one.
   165  				// Let it stay. The register allocator assumes that all live code in
   166  				// the function can be traversed by starting at all the RET instructions
   167  				// and following predecessor links. If we remove the final RET,
   168  				// this assumption will not hold in the case of an infinite loop
   169  				// at the end of a function.
   170  				// Keep the RET but mark it dead for the liveness analysis.
   171  				p.Mode = 1
   172  			} else {
   173  				if Debug['R'] != 0 && Debug['v'] != 0 {
   174  					fmt.Printf("del %v\n", p)
   175  				}
   176  				continue
   177  			}
   178  		}
   179  
   180  		if last != nil {
   181  			last.Link = p
   182  		}
   183  		last = p
   184  	}
   185  
   186  	last.Link = nil
   187  
   188  	// pass 4: elide JMP to next instruction.
   189  	// only safe if there are no jumps to JMPs anymore.
   190  	if jmploop == 0 && Debug['N'] == 0 {
   191  		var last *obj.Prog
   192  		for p := firstp; p != nil; p = p.Link {
   193  			if p.As == obj.AJMP && p.To.Type == obj.TYPE_BRANCH && p.To.Val == p.Link {
   194  				if Debug['R'] != 0 && Debug['v'] != 0 {
   195  					fmt.Printf("del %v\n", p)
   196  				}
   197  				continue
   198  			}
   199  
   200  			if last != nil {
   201  				last.Link = p
   202  			}
   203  			last = p
   204  		}
   205  
   206  		last.Link = nil
   207  	}
   208  
   209  	if Debug['R'] != 0 && Debug['v'] != 0 {
   210  		fmt.Printf("\n")
   211  		for p := firstp; p != nil; p = p.Link {
   212  			fmt.Printf("%v\n", p)
   213  		}
   214  		fmt.Printf("\n")
   215  	}
   216  }
   217  
   218  // Control flow analysis. The Flow structures hold predecessor and successor
   219  // information as well as basic loop analysis.
   220  //
   221  //	graph = Flowstart(firstp, nil)
   222  //	... use flow graph ...
   223  //	Flowend(graph) // free graph
   224  //
   225  // Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
   226  //
   227  //	for f := graph.Start; f != nil; f = f.Link {}
   228  //
   229  // or, given an instruction f, to iterate over all the predecessors, which is
   230  // f.P1 and this list:
   231  //
   232  //	for f2 := f.P2; f2 != nil; f2 = f2.P2link {}
   233  //
   234  // The second argument (newData) to Flowstart specifies a func to create object
   235  // for every f.Data field, for use by the client.
   236  // If newData is nil, f.Data will be nil.
   237  
   238  type Graph struct {
   239  	Start *Flow
   240  	Num   int
   241  
   242  	// After calling flowrpo, rpo lists the flow nodes in reverse postorder,
   243  	// and each non-dead Flow node f has g->rpo[f->rpo] == f.
   244  	Rpo []*Flow
   245  }
   246  
   247  type Flow struct {
   248  	Prog   *obj.Prog // actual instruction
   249  	P1     *Flow     // predecessors of this instruction: p1,
   250  	P2     *Flow     // and then p2 linked though p2link.
   251  	P2link *Flow
   252  	S1     *Flow // successors of this instruction (at most two: s1 and s2).
   253  	S2     *Flow
   254  	Link   *Flow // next instruction in function code
   255  
   256  	Active int32 // usable by client
   257  
   258  	Id     int32  // sequence number in flow graph
   259  	Rpo    int32  // reverse post ordering
   260  	Loop   uint16 // x5 for every loop
   261  	Refset bool   // diagnostic generated
   262  
   263  	Data interface{} // for use by client
   264  }
   265  
   266  var flowmark int
   267  
   268  // MaxFlowProg is the maximum size program (counted in instructions)
   269  // for which the flow code will build a graph. Functions larger than this limit
   270  // will not have flow graphs and consequently will not be optimized.
   271  const MaxFlowProg = 50000
   272  
   273  var ffcache []Flow // reusable []Flow, to reduce allocation
   274  
   275  func growffcache(n int) {
   276  	if n > cap(ffcache) {
   277  		n = (n * 5) / 4
   278  		if n > MaxFlowProg {
   279  			n = MaxFlowProg
   280  		}
   281  		ffcache = make([]Flow, n)
   282  	}
   283  	ffcache = ffcache[:n]
   284  }
   285  
   286  func Flowstart(firstp *obj.Prog, newData func() interface{}) *Graph {
   287  	// Count and mark instructions to annotate.
   288  	nf := 0
   289  
   290  	for p := firstp; p != nil; p = p.Link {
   291  		p.Opt = nil // should be already, but just in case
   292  		Thearch.Proginfo(p)
   293  		if p.Info.Flags&Skip != 0 {
   294  			continue
   295  		}
   296  		p.Opt = &flowmark
   297  		nf++
   298  	}
   299  
   300  	if nf == 0 {
   301  		return nil
   302  	}
   303  
   304  	if nf >= MaxFlowProg {
   305  		if Debug['v'] != 0 {
   306  			Warn("%v is too big (%d instructions)", Curfn.Func.Nname.Sym, nf)
   307  		}
   308  		return nil
   309  	}
   310  
   311  	// Allocate annotations and assign to instructions.
   312  	graph := new(Graph)
   313  
   314  	growffcache(nf)
   315  	ff := ffcache
   316  	start := &ff[0]
   317  	id := 0
   318  	var last *Flow
   319  	for p := firstp; p != nil; p = p.Link {
   320  		if p.Opt == nil {
   321  			continue
   322  		}
   323  		f := &ff[0]
   324  		ff = ff[1:]
   325  		p.Opt = f
   326  		f.Prog = p
   327  		if last != nil {
   328  			last.Link = f
   329  		}
   330  		last = f
   331  		if newData != nil {
   332  			f.Data = newData()
   333  		}
   334  		f.Id = int32(id)
   335  		id++
   336  	}
   337  
   338  	// Fill in pred/succ information.
   339  	var f1 *Flow
   340  	var p *obj.Prog
   341  	for f := start; f != nil; f = f.Link {
   342  		p = f.Prog
   343  		if p.Info.Flags&Break == 0 {
   344  			f1 = f.Link
   345  			f.S1 = f1
   346  			f1.P1 = f
   347  		}
   348  
   349  		if p.To.Type == obj.TYPE_BRANCH {
   350  			if p.To.Val == nil {
   351  				Fatalf("pnil %v", p)
   352  			}
   353  			f1 = p.To.Val.(*obj.Prog).Opt.(*Flow)
   354  			if f1 == nil {
   355  				Fatalf("fnil %v / %v", p, p.To.Val.(*obj.Prog))
   356  			}
   357  			if f1 == f {
   358  				//fatal("self loop %v", p);
   359  				continue
   360  			}
   361  
   362  			f.S2 = f1
   363  			f.P2link = f1.P2
   364  			f1.P2 = f
   365  		}
   366  	}
   367  
   368  	graph.Start = start
   369  	graph.Num = nf
   370  	return graph
   371  }
   372  
   373  func Flowend(graph *Graph) {
   374  	for f := graph.Start; f != nil; f = f.Link {
   375  		f.Prog.Info.Flags = 0 // drop cached proginfo
   376  		f.Prog.Opt = nil
   377  	}
   378  	clear := ffcache[:graph.Num]
   379  	for i := range clear {
   380  		clear[i] = Flow{}
   381  	}
   382  }
   383  
   384  // find looping structure
   385  //
   386  // 1) find reverse postordering
   387  // 2) find approximate dominators,
   388  //	the actual dominators if the flow graph is reducible
   389  //	otherwise, dominators plus some other non-dominators.
   390  //	See Matthew S. Hecht and Jeffrey D. Ullman,
   391  //	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   392  //	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   393  //	Oct. 1-3, 1973, pp.  207-217.
   394  // 3) find all nodes with a predecessor dominated by the current node.
   395  //	such a node is a loop head.
   396  //	recursively, all preds with a greater rpo number are in the loop
   397  func postorder(r *Flow, rpo2r []*Flow, n int32) int32 {
   398  	r.Rpo = 1
   399  	r1 := r.S1
   400  	if r1 != nil && r1.Rpo == 0 {
   401  		n = postorder(r1, rpo2r, n)
   402  	}
   403  	r1 = r.S2
   404  	if r1 != nil && r1.Rpo == 0 {
   405  		n = postorder(r1, rpo2r, n)
   406  	}
   407  	rpo2r[n] = r
   408  	n++
   409  	return n
   410  }
   411  
   412  func rpolca(idom []int32, rpo1 int32, rpo2 int32) int32 {
   413  	if rpo1 == -1 {
   414  		return rpo2
   415  	}
   416  	var t int32
   417  	for rpo1 != rpo2 {
   418  		if rpo1 > rpo2 {
   419  			t = rpo2
   420  			rpo2 = rpo1
   421  			rpo1 = t
   422  		}
   423  
   424  		for rpo1 < rpo2 {
   425  			t = idom[rpo2]
   426  			if t >= rpo2 {
   427  				Fatalf("bad idom")
   428  			}
   429  			rpo2 = t
   430  		}
   431  	}
   432  
   433  	return rpo1
   434  }
   435  
   436  func doms(idom []int32, r int32, s int32) bool {
   437  	for s > r {
   438  		s = idom[s]
   439  	}
   440  	return s == r
   441  }
   442  
   443  func loophead(idom []int32, r *Flow) bool {
   444  	src := r.Rpo
   445  	if r.P1 != nil && doms(idom, src, r.P1.Rpo) {
   446  		return true
   447  	}
   448  	for r = r.P2; r != nil; r = r.P2link {
   449  		if doms(idom, src, r.Rpo) {
   450  			return true
   451  		}
   452  	}
   453  	return false
   454  }
   455  
   456  func loopmark(rpo2r **Flow, head int32, r *Flow) {
   457  	if r.Rpo < head || r.Active == head {
   458  		return
   459  	}
   460  	r.Active = head
   461  	r.Loop += LOOP
   462  	if r.P1 != nil {
   463  		loopmark(rpo2r, head, r.P1)
   464  	}
   465  	for r = r.P2; r != nil; r = r.P2link {
   466  		loopmark(rpo2r, head, r)
   467  	}
   468  }
   469  
   470  func flowrpo(g *Graph) {
   471  	g.Rpo = make([]*Flow, g.Num)
   472  	idom := make([]int32, g.Num)
   473  
   474  	for r1 := g.Start; r1 != nil; r1 = r1.Link {
   475  		r1.Active = 0
   476  	}
   477  
   478  	rpo2r := g.Rpo
   479  	d := postorder(g.Start, rpo2r, 0)
   480  	nr := int32(g.Num)
   481  	if d > nr {
   482  		Fatalf("too many reg nodes %d %d", d, nr)
   483  	}
   484  	nr = d
   485  	var r1 *Flow
   486  	for i := int32(0); i < nr/2; i++ {
   487  		r1 = rpo2r[i]
   488  		rpo2r[i] = rpo2r[nr-1-i]
   489  		rpo2r[nr-1-i] = r1
   490  	}
   491  
   492  	for i := int32(0); i < nr; i++ {
   493  		rpo2r[i].Rpo = i
   494  	}
   495  
   496  	idom[0] = 0
   497  	var me int32
   498  	for i := int32(0); i < nr; i++ {
   499  		r1 = rpo2r[i]
   500  		me = r1.Rpo
   501  		d = -1
   502  
   503  		// rpo2r[r.Rpo] == r protects against considering dead code,
   504  		// which has r.Rpo == 0.
   505  		if r1.P1 != nil && rpo2r[r1.P1.Rpo] == r1.P1 && r1.P1.Rpo < me {
   506  			d = r1.P1.Rpo
   507  		}
   508  		for r1 = r1.P2; r1 != nil; r1 = r1.P2link {
   509  			if rpo2r[r1.Rpo] == r1 && r1.Rpo < me {
   510  				d = rpolca(idom, d, r1.Rpo)
   511  			}
   512  		}
   513  		idom[i] = d
   514  	}
   515  
   516  	for i := int32(0); i < nr; i++ {
   517  		r1 = rpo2r[i]
   518  		r1.Loop++
   519  		if r1.P2 != nil && loophead(idom, r1) {
   520  			loopmark(&rpo2r[0], i, r1)
   521  		}
   522  	}
   523  
   524  	for r1 := g.Start; r1 != nil; r1 = r1.Link {
   525  		r1.Active = 0
   526  	}
   527  }
   528  
   529  func Uniqp(r *Flow) *Flow {
   530  	r1 := r.P1
   531  	if r1 == nil {
   532  		r1 = r.P2
   533  		if r1 == nil || r1.P2link != nil {
   534  			return nil
   535  		}
   536  	} else if r.P2 != nil {
   537  		return nil
   538  	}
   539  	return r1
   540  }
   541  
   542  func Uniqs(r *Flow) *Flow {
   543  	r1 := r.S1
   544  	if r1 == nil {
   545  		r1 = r.S2
   546  		if r1 == nil {
   547  			return nil
   548  		}
   549  	} else if r.S2 != nil {
   550  		return nil
   551  	}
   552  	return r1
   553  }
   554  
   555  // The compilers assume they can generate temporary variables
   556  // as needed to preserve the right semantics or simplify code
   557  // generation and the back end will still generate good code.
   558  // This results in a large number of ephemeral temporary variables.
   559  // Merge temps with non-overlapping lifetimes and equal types using the
   560  // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
   561  // ACM TOPLAS 1999.
   562  
   563  type TempVar struct {
   564  	node    *Node
   565  	def     *Flow    // definition of temp var
   566  	use     *Flow    // use list, chained through Flow.data
   567  	merge   *TempVar // merge var with this one
   568  	start   int64    // smallest Prog.pc in live range
   569  	end     int64    // largest Prog.pc in live range
   570  	addr    bool     // address taken - no accurate end
   571  	removed bool     // removed from program
   572  }
   573  
   574  // startcmp sorts TempVars by start, then id, then symbol name.
   575  type startcmp []*TempVar
   576  
   577  func (x startcmp) Len() int      { return len(x) }
   578  func (x startcmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
   579  func (x startcmp) Less(i, j int) bool {
   580  	a := x[i]
   581  	b := x[j]
   582  
   583  	if a.start < b.start {
   584  		return true
   585  	}
   586  	if a.start > b.start {
   587  		return false
   588  	}
   589  
   590  	// Order what's left by id or symbol name,
   591  	// just so that sort is forced into a specific ordering,
   592  	// so that the result of the sort does not depend on
   593  	// the sort implementation.
   594  	if a.def != b.def {
   595  		return int(a.def.Id-b.def.Id) < 0
   596  	}
   597  	if a.node != b.node {
   598  		return a.node.Sym.Name < b.node.Sym.Name
   599  	}
   600  	return false
   601  }
   602  
   603  // Is n available for merging?
   604  func canmerge(n *Node) bool {
   605  	return n.Class == PAUTO && strings.HasPrefix(n.Sym.Name, "autotmp")
   606  }
   607  
   608  func mergetemp(firstp *obj.Prog) {
   609  	const (
   610  		debugmerge = 0
   611  	)
   612  
   613  	g := Flowstart(firstp, nil)
   614  	if g == nil {
   615  		return
   616  	}
   617  
   618  	// Build list of all mergeable variables.
   619  	var vars []*TempVar
   620  	for _, n := range Curfn.Func.Dcl {
   621  		if canmerge(n) {
   622  			v := &TempVar{}
   623  			vars = append(vars, v)
   624  			n.SetOpt(v)
   625  			v.node = n
   626  		}
   627  	}
   628  
   629  	// Build list of uses.
   630  	// We assume that the earliest reference to a temporary is its definition.
   631  	// This is not true of variables in general but our temporaries are all
   632  	// single-use (that's why we have so many!).
   633  	for f := g.Start; f != nil; f = f.Link {
   634  		p := f.Prog
   635  		if p.From.Node != nil && ((p.From.Node).(*Node)).Opt() != nil && p.To.Node != nil && ((p.To.Node).(*Node)).Opt() != nil {
   636  			Fatalf("double node %v", p)
   637  		}
   638  		var v *TempVar
   639  		n, _ := p.From.Node.(*Node)
   640  		if n != nil {
   641  			v, _ = n.Opt().(*TempVar)
   642  		}
   643  		if v == nil {
   644  			n, _ = p.To.Node.(*Node)
   645  			if n != nil {
   646  				v, _ = n.Opt().(*TempVar)
   647  			}
   648  		}
   649  		if v != nil {
   650  			if v.def == nil {
   651  				v.def = f
   652  			}
   653  			f.Data = v.use
   654  			v.use = f
   655  			if n == p.From.Node && (p.Info.Flags&LeftAddr != 0) {
   656  				v.addr = true
   657  			}
   658  		}
   659  	}
   660  
   661  	if debugmerge > 1 && Debug['v'] != 0 {
   662  		Dumpit("before", g.Start, 0)
   663  	}
   664  
   665  	nkill := 0
   666  
   667  	// Special case.
   668  	for _, v := range vars {
   669  		if v.addr {
   670  			continue
   671  		}
   672  
   673  		// Used in only one instruction, which had better be a write.
   674  		f := v.use
   675  		if f != nil && f.Data.(*Flow) == nil {
   676  			p := f.Prog
   677  			if p.To.Node == v.node && (p.Info.Flags&RightWrite != 0) && p.Info.Flags&RightRead == 0 {
   678  				p.As = obj.ANOP
   679  				p.To = obj.Addr{}
   680  				v.removed = true
   681  				if debugmerge > 0 && Debug['v'] != 0 {
   682  					fmt.Printf("drop write-only %v\n", v.node.Sym)
   683  				}
   684  			} else {
   685  				Fatalf("temp used and not set: %v", p)
   686  			}
   687  			nkill++
   688  			continue
   689  		}
   690  
   691  		// Written in one instruction, read in the next, otherwise unused,
   692  		// no jumps to the next instruction. Happens mainly in 386 compiler.
   693  		f = v.use
   694  		if f != nil && f.Link == f.Data.(*Flow) && (f.Data.(*Flow)).Data.(*Flow) == nil && Uniqp(f.Link) == f {
   695  			p := f.Prog
   696  			p1 := f.Link.Prog
   697  			const (
   698  				SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD
   699  			)
   700  			if p.From.Node == v.node && p1.To.Node == v.node && (p.Info.Flags&Move != 0) && (p.Info.Flags|p1.Info.Flags)&(LeftAddr|RightAddr) == 0 && p.Info.Flags&SizeAny == p1.Info.Flags&SizeAny {
   701  				p1.From = p.From
   702  				Thearch.Excise(f)
   703  				v.removed = true
   704  				if debugmerge > 0 && Debug['v'] != 0 {
   705  					fmt.Printf("drop immediate-use %v\n", v.node.Sym)
   706  				}
   707  			}
   708  
   709  			nkill++
   710  			continue
   711  		}
   712  	}
   713  
   714  	// Traverse live range of each variable to set start, end.
   715  	// Each flood uses a new value of gen so that we don't have
   716  	// to clear all the r.Active words after each variable.
   717  	gen := uint32(0)
   718  
   719  	for _, v := range vars {
   720  		gen++
   721  		for f := v.use; f != nil; f = f.Data.(*Flow) {
   722  			mergewalk(v, f, gen)
   723  		}
   724  		if v.addr {
   725  			gen++
   726  			for f := v.use; f != nil; f = f.Data.(*Flow) {
   727  				varkillwalk(v, f, gen)
   728  			}
   729  		}
   730  	}
   731  
   732  	// Sort variables by start.
   733  	bystart := make([]*TempVar, len(vars))
   734  	copy(bystart, vars)
   735  	sort.Sort(startcmp(bystart))
   736  
   737  	// List of in-use variables, sorted by end, so that the ones that
   738  	// will last the longest are the earliest ones in the array.
   739  	// The tail inuse[nfree:] holds no-longer-used variables.
   740  	// In theory we should use a sorted tree so that insertions are
   741  	// guaranteed O(log n) and then the loop is guaranteed O(n log n).
   742  	// In practice, it doesn't really matter.
   743  	inuse := make([]*TempVar, len(bystart))
   744  
   745  	ninuse := 0
   746  	nfree := len(bystart)
   747  	for _, v := range bystart {
   748  		if debugmerge > 0 && Debug['v'] != 0 {
   749  			fmt.Printf("consider %v: removed=%t\n", Nconv(v.node, FmtSharp), v.removed)
   750  		}
   751  
   752  		if v.removed {
   753  			continue
   754  		}
   755  
   756  		// Expire no longer in use.
   757  		for ninuse > 0 && inuse[ninuse-1].end < v.start {
   758  			ninuse--
   759  			nfree--
   760  			inuse[nfree] = inuse[ninuse]
   761  		}
   762  
   763  		if debugmerge > 0 && Debug['v'] != 0 {
   764  			fmt.Printf("consider %v: removed=%t nfree=%d nvar=%d\n", Nconv(v.node, FmtSharp), v.removed, nfree, len(bystart))
   765  		}
   766  
   767  		// Find old temp to reuse if possible.
   768  		t := v.node.Type
   769  
   770  		for j := nfree; j < len(inuse); j++ {
   771  			v1 := inuse[j]
   772  			if debugmerge > 0 && Debug['v'] != 0 {
   773  				fmt.Printf("consider %v: maybe %v: type=%v,%v addrtaken=%v,%v\n", Nconv(v.node, FmtSharp), Nconv(v1.node, FmtSharp), t, v1.node.Type, v.node.Addrtaken, v1.node.Addrtaken)
   774  			}
   775  
   776  			// Require the types to match but also require the addrtaken bits to match.
   777  			// If a variable's address is taken, that disables registerization for the individual
   778  			// words of the variable (for example, the base,len,cap of a slice).
   779  			// We don't want to merge a non-addressed var with an addressed one and
   780  			// inhibit registerization of the former.
   781  			if Eqtype(t, v1.node.Type) && v.node.Addrtaken == v1.node.Addrtaken {
   782  				inuse[j] = inuse[nfree]
   783  				nfree++
   784  				if v1.merge != nil {
   785  					v.merge = v1.merge
   786  				} else {
   787  					v.merge = v1
   788  				}
   789  				nkill++
   790  				break
   791  			}
   792  		}
   793  
   794  		// Sort v into inuse.
   795  		j := ninuse
   796  		ninuse++
   797  
   798  		for j > 0 && inuse[j-1].end < v.end {
   799  			inuse[j] = inuse[j-1]
   800  			j--
   801  		}
   802  
   803  		inuse[j] = v
   804  	}
   805  
   806  	if debugmerge > 0 && Debug['v'] != 0 {
   807  		fmt.Printf("%v [%d - %d]\n", Curfn.Func.Nname.Sym, len(vars), nkill)
   808  		for _, v := range vars {
   809  			fmt.Printf("var %v %v %d-%d", Nconv(v.node, FmtSharp), v.node.Type, v.start, v.end)
   810  			if v.addr {
   811  				fmt.Printf(" addr=true")
   812  			}
   813  			if v.removed {
   814  				fmt.Printf(" removed=true")
   815  			}
   816  			if v.merge != nil {
   817  				fmt.Printf(" merge %v", Nconv(v.merge.node, FmtSharp))
   818  			}
   819  			if v.start == v.end && v.def != nil {
   820  				fmt.Printf(" %v", v.def.Prog)
   821  			}
   822  			fmt.Printf("\n")
   823  		}
   824  
   825  		if debugmerge > 1 && Debug['v'] != 0 {
   826  			Dumpit("after", g.Start, 0)
   827  		}
   828  	}
   829  
   830  	// Update node references to use merged temporaries.
   831  	for f := g.Start; f != nil; f = f.Link {
   832  		p := f.Prog
   833  		n, _ := p.From.Node.(*Node)
   834  		if n != nil {
   835  			v, _ := n.Opt().(*TempVar)
   836  			if v != nil && v.merge != nil {
   837  				p.From.Node = v.merge.node
   838  			}
   839  		}
   840  		n, _ = p.To.Node.(*Node)
   841  		if n != nil {
   842  			v, _ := n.Opt().(*TempVar)
   843  			if v != nil && v.merge != nil {
   844  				p.To.Node = v.merge.node
   845  			}
   846  		}
   847  	}
   848  
   849  	// Delete merged nodes from declaration list.
   850  	dcl := make([]*Node, 0, len(Curfn.Func.Dcl)-nkill)
   851  	for _, n := range Curfn.Func.Dcl {
   852  		v, _ := n.Opt().(*TempVar)
   853  		if v != nil && (v.merge != nil || v.removed) {
   854  			continue
   855  		}
   856  		dcl = append(dcl, n)
   857  	}
   858  	Curfn.Func.Dcl = dcl
   859  
   860  	// Clear aux structures.
   861  	for _, v := range vars {
   862  		v.node.SetOpt(nil)
   863  	}
   864  
   865  	Flowend(g)
   866  }
   867  
   868  func mergewalk(v *TempVar, f0 *Flow, gen uint32) {
   869  	var p *obj.Prog
   870  	var f1 *Flow
   871  
   872  	for f1 = f0; f1 != nil; f1 = f1.P1 {
   873  		if uint32(f1.Active) == gen {
   874  			break
   875  		}
   876  		f1.Active = int32(gen)
   877  		p = f1.Prog
   878  		if v.end < p.Pc {
   879  			v.end = p.Pc
   880  		}
   881  		if f1 == v.def {
   882  			v.start = p.Pc
   883  			break
   884  		}
   885  	}
   886  
   887  	var f2 *Flow
   888  	for f := f0; f != f1; f = f.P1 {
   889  		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
   890  			mergewalk(v, f2, gen)
   891  		}
   892  	}
   893  }
   894  
   895  func varkillwalk(v *TempVar, f0 *Flow, gen uint32) {
   896  	var p *obj.Prog
   897  	var f1 *Flow
   898  
   899  	for f1 = f0; f1 != nil; f1 = f1.S1 {
   900  		if uint32(f1.Active) == gen {
   901  			break
   902  		}
   903  		f1.Active = int32(gen)
   904  		p = f1.Prog
   905  		if v.end < p.Pc {
   906  			v.end = p.Pc
   907  		}
   908  		if v.start > p.Pc {
   909  			v.start = p.Pc
   910  		}
   911  		if p.As == obj.ARET || (p.As == obj.AVARKILL && p.To.Node == v.node) {
   912  			break
   913  		}
   914  	}
   915  
   916  	for f := f0; f != f1; f = f.S1 {
   917  		varkillwalk(v, f.S2, gen)
   918  	}
   919  }
   920  
   921  // Eliminate redundant nil pointer checks.
   922  //
   923  // The code generation pass emits a CHECKNIL for every possibly nil pointer.
   924  // This pass removes a CHECKNIL if every predecessor path has already
   925  // checked this value for nil.
   926  //
   927  // Simple backwards flood from check to definition.
   928  // Run prog loop backward from end of program to beginning to avoid quadratic
   929  // behavior removing a run of checks.
   930  //
   931  // Assume that stack variables with address not taken can be loaded multiple times
   932  // from memory without being rechecked. Other variables need to be checked on
   933  // each load.
   934  
   935  var killed int // f.Data is either nil or &killed
   936  
   937  func nilopt(firstp *obj.Prog) {
   938  	g := Flowstart(firstp, nil)
   939  	if g == nil {
   940  		return
   941  	}
   942  
   943  	if Debug_checknil > 1 { // || strcmp(curfn->nname->sym->name, "f1") == 0
   944  		Dumpit("nilopt", g.Start, 0)
   945  	}
   946  
   947  	ncheck := 0
   948  	nkill := 0
   949  	var p *obj.Prog
   950  	for f := g.Start; f != nil; f = f.Link {
   951  		p = f.Prog
   952  		if p.As != obj.ACHECKNIL || !Thearch.Regtyp(&p.From) {
   953  			continue
   954  		}
   955  		ncheck++
   956  		if Thearch.Stackaddr(&p.From) {
   957  			if Debug_checknil != 0 && p.Lineno > 1 {
   958  				Warnl(p.Lineno, "removed nil check of SP address")
   959  			}
   960  			f.Data = &killed
   961  			continue
   962  		}
   963  
   964  		nilwalkfwd(f)
   965  		if f.Data != nil {
   966  			if Debug_checknil != 0 && p.Lineno > 1 {
   967  				Warnl(p.Lineno, "removed nil check before indirect")
   968  			}
   969  			continue
   970  		}
   971  
   972  		nilwalkback(f)
   973  		if f.Data != nil {
   974  			if Debug_checknil != 0 && p.Lineno > 1 {
   975  				Warnl(p.Lineno, "removed repeated nil check")
   976  			}
   977  			continue
   978  		}
   979  	}
   980  
   981  	for f := g.Start; f != nil; f = f.Link {
   982  		if f.Data != nil {
   983  			nkill++
   984  			Thearch.Excise(f)
   985  		}
   986  	}
   987  
   988  	Flowend(g)
   989  
   990  	if Debug_checknil > 1 {
   991  		fmt.Printf("%v: removed %d of %d nil checks\n", Curfn.Func.Nname.Sym, nkill, ncheck)
   992  	}
   993  }
   994  
   995  func nilwalkback(fcheck *Flow) {
   996  	for f := fcheck; f != nil; f = Uniqp(f) {
   997  		p := f.Prog
   998  		if (p.Info.Flags&RightWrite != 0) && Thearch.Sameaddr(&p.To, &fcheck.Prog.From) {
   999  			// Found initialization of value we're checking for nil.
  1000  			// without first finding the check, so this one is unchecked.
  1001  			return
  1002  		}
  1003  
  1004  		if f != fcheck && p.As == obj.ACHECKNIL && Thearch.Sameaddr(&p.From, &fcheck.Prog.From) {
  1005  			fcheck.Data = &killed
  1006  			return
  1007  		}
  1008  	}
  1009  }
  1010  
  1011  // Here is a more complex version that scans backward across branches.
  1012  // It assumes fcheck->kill = 1 has been set on entry, and its job is to find a reason
  1013  // to keep the check (setting fcheck->kill = 0).
  1014  // It doesn't handle copying of aggregates as well as I would like,
  1015  // nor variables with their address taken,
  1016  // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3.
  1017  /*
  1018  for(f1 = f0; f1 != nil; f1 = f1->p1) {
  1019  	if(f1->active == gen)
  1020  		break;
  1021  	f1->active = gen;
  1022  	p = f1->prog;
  1023  
  1024  	// If same check, stop this loop but still check
  1025  	// alternate predecessors up to this point.
  1026  	if(f1 != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from))
  1027  		break;
  1028  
  1029  	if((p.Info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) {
  1030  		// Found initialization of value we're checking for nil.
  1031  		// without first finding the check, so this one is unchecked.
  1032  		fcheck->kill = 0;
  1033  		return;
  1034  	}
  1035  
  1036  	if(f1->p1 == nil && f1->p2 == nil) {
  1037  		print("lost pred for %v\n", fcheck->prog);
  1038  		for(f1=f0; f1!=nil; f1=f1->p1) {
  1039  			thearch.proginfo(&info, f1->prog);
  1040  			print("\t%v %d %d %D %D\n", r1->prog, info.flags&RightWrite, thearch.sameaddr(&f1->prog->to, &fcheck->prog->from), &f1->prog->to, &fcheck->prog->from);
  1041  		}
  1042  		fatal("lost pred trail");
  1043  	}
  1044  }
  1045  
  1046  for(f = f0; f != f1; f = f->p1)
  1047  	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
  1048  		nilwalkback(fcheck, f2, gen);
  1049  */
  1050  
  1051  func nilwalkfwd(fcheck *Flow) {
  1052  	// If the path down from rcheck dereferences the address
  1053  	// (possibly with a small offset) before writing to memory
  1054  	// and before any subsequent checks, it's okay to wait for
  1055  	// that implicit check. Only consider this basic block to
  1056  	// avoid problems like:
  1057  	//	_ = *x // should panic
  1058  	//	for {} // no writes but infinite loop may be considered visible
  1059  
  1060  	var last *Flow
  1061  	for f := Uniqs(fcheck); f != nil; f = Uniqs(f) {
  1062  		p := f.Prog
  1063  		if (p.Info.Flags&LeftRead != 0) && Thearch.Smallindir(&p.From, &fcheck.Prog.From) {
  1064  			fcheck.Data = &killed
  1065  			return
  1066  		}
  1067  
  1068  		if (p.Info.Flags&(RightRead|RightWrite) != 0) && Thearch.Smallindir(&p.To, &fcheck.Prog.From) {
  1069  			fcheck.Data = &killed
  1070  			return
  1071  		}
  1072  
  1073  		// Stop if another nil check happens.
  1074  		if p.As == obj.ACHECKNIL {
  1075  			return
  1076  		}
  1077  
  1078  		// Stop if value is lost.
  1079  		if (p.Info.Flags&RightWrite != 0) && Thearch.Sameaddr(&p.To, &fcheck.Prog.From) {
  1080  			return
  1081  		}
  1082  
  1083  		// Stop if memory write.
  1084  		if (p.Info.Flags&RightWrite != 0) && !Thearch.Regtyp(&p.To) {
  1085  			return
  1086  		}
  1087  
  1088  		// Stop if we jump backward.
  1089  		if last != nil && f.Id <= last.Id {
  1090  			return
  1091  		}
  1092  		last = f
  1093  	}
  1094  }