github.com/FenixAra/go@v0.0.0-20170127160404-96ea0918e670/src/cmd/compile/internal/ssa/loopreschedchecks.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import "fmt"
     8  
     9  // an edgeMemCtr records a backedge, together with the memory and
    10  // counter phi functions at the target of the backedge that must
    11  // be updated when a rescheduling check replaces the backedge.
    12  type edgeMemCtr struct {
    13  	e Edge
    14  	m *Value // phi for memory at dest of e
    15  	c *Value // phi for counter at dest of e
    16  }
    17  
    18  // a rewriteTarget is a a value-argindex pair indicating
    19  // where a rewrite is applied.  Note that this is for values,
    20  // not for block controls, because block controls are not targets
    21  // for the rewrites performed in inserting rescheduling checks.
    22  type rewriteTarget struct {
    23  	v *Value
    24  	i int
    25  }
    26  
    27  type rewrite struct {
    28  	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
    29  	rewrites      []rewriteTarget // all the targets for this rewrite.
    30  }
    31  
    32  func (r *rewrite) String() string {
    33  	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
    34  	for _, rw := range r.rewrites {
    35  		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
    36  	}
    37  	s += "\n"
    38  	return s
    39  }
    40  
    41  const initialRescheduleCounterValue = 1021 // Largest 10-bit prime. 97 nSec loop bodies will check every 100 uSec.
    42  
    43  // insertLoopReschedChecks inserts rescheduling checks on loop backedges.
    44  func insertLoopReschedChecks(f *Func) {
    45  	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
    46  
    47  	// Loop reschedule checks decrement a per-function counter
    48  	// shared by all loops, and when the counter becomes non-positive
    49  	// a call is made to a rescheduling check in the runtime.
    50  	//
    51  	// Steps:
    52  	// 1. locate backedges.
    53  	// 2. Record memory definitions at block end so that
    54  	//    the SSA graph for mem can be prperly modified.
    55  	// 3. Define a counter and record its future uses (at backedges)
    56  	//    (Same process as 2, applied to a single definition of the counter.
    57  	//     difference for mem is that there are zero-to-many existing mem
    58  	//     definitions, versus exactly one for the new counter.)
    59  	// 4. Ensure that phi functions that will-be-needed for mem and counter
    60  	//    are present in the graph, initially with trivial inputs.
    61  	// 5. Record all to-be-modified uses of mem and counter;
    62  	//    apply modifications (split into two steps to simplify and
    63  	//    avoided nagging order-dependences).
    64  	// 6. Rewrite backedges to include counter check, reschedule check,
    65  	//    and modify destination phi function appropriately with new
    66  	//    definitions for mem and counter.
    67  
    68  	if f.NoSplit { // nosplit functions don't reschedule.
    69  		return
    70  	}
    71  
    72  	backedges := backedges(f)
    73  	if len(backedges) == 0 { // no backedges means no rescheduling checks.
    74  		return
    75  	}
    76  
    77  	lastMems := findLastMems(f)
    78  
    79  	idom := f.Idom()
    80  	sdom := f.sdom()
    81  
    82  	if f.pass.debug > 2 {
    83  		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
    84  	}
    85  
    86  	tofixBackedges := []edgeMemCtr{}
    87  
    88  	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
    89  		tofixBackedges = append(tofixBackedges, edgeMemCtr{e, nil, nil})
    90  	}
    91  
    92  	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
    93  	if lastMems[f.Entry.ID] == nil {
    94  		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Line, OpInitMem, TypeMem)
    95  	}
    96  
    97  	memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
    98  
    99  	// Propagate last mem definitions forward through successor blocks.
   100  	po := f.postorder()
   101  	for i := len(po) - 1; i >= 0; i-- {
   102  		b := po[i]
   103  		mem := lastMems[b.ID]
   104  		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
   105  			// loop because there might be backedges that haven't been visited yet.
   106  			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
   107  		}
   108  		memDefsAtBlockEnds[b.ID] = mem
   109  	}
   110  
   111  	// Set up counter.  There are no phis etc pre-existing for it.
   112  	counter0 := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), initialRescheduleCounterValue)
   113  	ctrDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, def visible at its end, if that def will be used.
   114  
   115  	// There's a minor difference between memDefsAtBlockEnds and ctrDefsAtBlockEnds;
   116  	// because the counter only matter for loops and code that reaches them, it is nil for blocks where the ctr is no
   117  	// longer live.  This will avoid creation of dead phi functions.  This optimization is ignored for the mem variable
   118  	// because it is harder and also less likely to be helpful, though dead code elimination ought to clean this out anyhow.
   119  
   120  	for _, emc := range tofixBackedges {
   121  		e := emc.e
   122  		// set initial uses of counter zero (note available-at-bottom and use are the same thing initially.)
   123  		// each back-edge will be rewritten to include a reschedule check, and that will use the counter.
   124  		src := e.b.Preds[e.i].b
   125  		ctrDefsAtBlockEnds[src.ID] = counter0
   126  	}
   127  
   128  	// Push uses towards root
   129  	for _, b := range f.postorder() {
   130  		bd := ctrDefsAtBlockEnds[b.ID]
   131  		if bd == nil {
   132  			continue
   133  		}
   134  		for _, e := range b.Preds {
   135  			p := e.b
   136  			if ctrDefsAtBlockEnds[p.ID] == nil {
   137  				ctrDefsAtBlockEnds[p.ID] = bd
   138  			}
   139  		}
   140  	}
   141  
   142  	// Maps from block to newly-inserted phi function in block.
   143  	newmemphis := make(map[*Block]rewrite)
   144  	newctrphis := make(map[*Block]rewrite)
   145  
   146  	// Insert phi functions as necessary for future changes to flow graph.
   147  	for i, emc := range tofixBackedges {
   148  		e := emc.e
   149  		h := e.b
   150  
   151  		// find the phi function for the memory input at "h", if there is one.
   152  		var headerMemPhi *Value // look for header mem phi
   153  
   154  		for _, v := range h.Values {
   155  			if v.Op == OpPhi && v.Type.IsMemory() {
   156  				headerMemPhi = v
   157  			}
   158  		}
   159  
   160  		if headerMemPhi == nil {
   161  			// if the header is nil, make a trivial phi from the dominator
   162  			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
   163  			headerMemPhi = newPhiFor(h, mem0)
   164  			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
   165  			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis)
   166  
   167  		}
   168  		tofixBackedges[i].m = headerMemPhi
   169  
   170  		var headerCtrPhi *Value
   171  		rw, ok := newctrphis[h]
   172  		if !ok {
   173  			headerCtrPhi = newPhiFor(h, counter0)
   174  			newctrphis[h] = rewrite{before: counter0, after: headerCtrPhi}
   175  			addDFphis(counter0, h, h, f, ctrDefsAtBlockEnds, newctrphis)
   176  		} else {
   177  			headerCtrPhi = rw.after
   178  		}
   179  		tofixBackedges[i].c = headerCtrPhi
   180  	}
   181  
   182  	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis)
   183  	rewriteNewPhis(f.Entry, f.Entry, f, ctrDefsAtBlockEnds, newctrphis)
   184  
   185  	if f.pass.debug > 0 {
   186  		for b, r := range newmemphis {
   187  			fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
   188  		}
   189  
   190  		for b, r := range newctrphis {
   191  			fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
   192  		}
   193  	}
   194  
   195  	// Apply collected rewrites.
   196  	for _, r := range newmemphis {
   197  		for _, rw := range r.rewrites {
   198  			rw.v.SetArg(rw.i, r.after)
   199  		}
   200  	}
   201  
   202  	for _, r := range newctrphis {
   203  		for _, rw := range r.rewrites {
   204  			rw.v.SetArg(rw.i, r.after)
   205  		}
   206  	}
   207  
   208  	zero := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 0)
   209  	one := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 1)
   210  
   211  	// Rewrite backedges to include reschedule checks.
   212  	for _, emc := range tofixBackedges {
   213  		e := emc.e
   214  		headerMemPhi := emc.m
   215  		headerCtrPhi := emc.c
   216  		h := e.b
   217  		i := e.i
   218  		p := h.Preds[i]
   219  		bb := p.b
   220  		mem0 := headerMemPhi.Args[i]
   221  		ctr0 := headerCtrPhi.Args[i]
   222  		// bb e->p h,
   223  		// Because we're going to insert a rare-call, make sure the
   224  		// looping edge still looks likely.
   225  		likely := BranchLikely
   226  		if p.i != 0 {
   227  			likely = BranchUnlikely
   228  		}
   229  		bb.Likely = likely
   230  
   231  		// rewrite edge to include reschedule check
   232  		// existing edges:
   233  		//
   234  		// bb.Succs[p.i] == Edge{h, i}
   235  		// h.Preds[i] == p == Edge{bb,p.i}
   236  		//
   237  		// new block(s):
   238  		// test:
   239  		//    ctr1 := ctr0 - 1
   240  		//    if ctr1 <= 0 { goto sched }
   241  		//    goto join
   242  		// sched:
   243  		//    mem1 := call resched (mem0)
   244  		//    goto join
   245  		// join:
   246  		//    ctr2 := phi(ctr1, counter0) // counter0 is the constant
   247  		//    mem2 := phi(mem0, mem1)
   248  		//    goto h
   249  		//
   250  		// and correct arg i of headerMemPhi and headerCtrPhi
   251  		//
   252  		// EXCEPT: block containing only phi functions is bad
   253  		// for the register allocator.  Therefore, there is no
   254  		// join, and instead branches targeting join instead target
   255  		// the header, and the other phi functions within header are
   256  		// adjusted for the additional input.
   257  
   258  		test := f.NewBlock(BlockIf)
   259  		sched := f.NewBlock(BlockPlain)
   260  
   261  		test.Line = bb.Line
   262  		sched.Line = bb.Line
   263  
   264  		//    ctr1 := ctr0 - 1
   265  		//    if ctr1 <= 0 { goto sched }
   266  		//    goto header
   267  		ctr1 := test.NewValue2(bb.Line, OpSub32, f.Config.fe.TypeInt32(), ctr0, one)
   268  		cmp := test.NewValue2(bb.Line, OpLeq32, f.Config.fe.TypeBool(), ctr1, zero)
   269  		test.SetControl(cmp)
   270  		test.AddEdgeTo(sched) // if true
   271  		// if false -- rewrite edge to header.
   272  		// do NOT remove+add, because that will perturb all the other phi functions
   273  		// as well as messing up other edges to the header.
   274  		test.Succs = append(test.Succs, Edge{h, i})
   275  		h.Preds[i] = Edge{test, 1}
   276  		headerMemPhi.SetArg(i, mem0)
   277  		headerCtrPhi.SetArg(i, ctr1)
   278  
   279  		test.Likely = BranchUnlikely
   280  
   281  		// sched:
   282  		//    mem1 := call resched (mem0)
   283  		//    goto header
   284  		resched := f.Config.fe.Syslook("goschedguarded")
   285  		mem1 := sched.NewValue1A(bb.Line, OpStaticCall, TypeMem, resched, mem0)
   286  		sched.AddEdgeTo(h)
   287  		headerMemPhi.AddArg(mem1)
   288  		headerCtrPhi.AddArg(counter0)
   289  
   290  		bb.Succs[p.i] = Edge{test, 0}
   291  		test.Preds = append(test.Preds, Edge{bb, p.i})
   292  
   293  		// Must correct all the other phi functions in the header for new incoming edge.
   294  		// Except for mem and counter phis, it will be the same value seen on the original
   295  		// backedge at index i.
   296  		for _, v := range h.Values {
   297  			if v.Op == OpPhi && v != headerMemPhi && v != headerCtrPhi {
   298  				v.AddArg(v.Args[i])
   299  			}
   300  		}
   301  	}
   302  
   303  	f.invalidateCFG()
   304  
   305  	if f.pass.debug > 2 {
   306  		sdom = newSparseTree(f, f.Idom())
   307  		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
   308  	}
   309  
   310  	return
   311  }
   312  
   313  // newPhiFor inserts a new Phi function into b,
   314  // with all inputs set to v.
   315  func newPhiFor(b *Block, v *Value) *Value {
   316  	phiV := b.NewValue0(b.Line, OpPhi, v.Type)
   317  
   318  	for range b.Preds {
   319  		phiV.AddArg(v)
   320  	}
   321  	return phiV
   322  }
   323  
   324  // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
   325  // in block h will replace a previous definition.  Block b is the block currently being processed;
   326  // if b has its own phi definition then it takes the place of h.
   327  // defsForUses provides information about other definitions of the variable that are present
   328  // (and if nil, indicates that the variable is no longer live)
   329  func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite) {
   330  	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
   331  	if _, ok := newphis[b]; ok {
   332  		h = b
   333  	}
   334  	change := newphis[h]
   335  	x := change.before
   336  	y := change.after
   337  
   338  	// Apply rewrites to this block
   339  	if x != nil { // don't waste time on the common case of no definition.
   340  		p := &change.rewrites
   341  		for _, v := range b.Values {
   342  			if v == y { // don't rewrite self -- phi inputs are handled below.
   343  				continue
   344  			}
   345  			for i, w := range v.Args {
   346  				if w != x {
   347  					continue
   348  				}
   349  				*p = append(*p, rewriteTarget{v, i})
   350  			}
   351  		}
   352  
   353  		// Rewrite appropriate inputs of phis reached in successors
   354  		// in dominance frontier, self, and dominated.
   355  		// If the variable def reaching uses in b is itself defined in b, then the new phi function
   356  		// does not reach the successors of b.  (This assumes a bit about the structure of the
   357  		// phi use-def graph, but it's true for memory and the inserted counter.)
   358  		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
   359  			for _, e := range b.Succs {
   360  				s := e.b
   361  				if sphi, ok := newphis[s]; ok { // saves time to find the phi this way.
   362  					*p = append(*p, rewriteTarget{sphi.after, e.i})
   363  					continue
   364  				}
   365  				for _, v := range s.Values {
   366  					if v.Op == OpPhi && v.Args[e.i] == x {
   367  						*p = append(*p, rewriteTarget{v, e.i})
   368  						break
   369  					}
   370  				}
   371  			}
   372  		}
   373  		newphis[h] = change
   374  	}
   375  
   376  	sdom := f.sdom()
   377  
   378  	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
   379  		rewriteNewPhis(h, c, f, defsForUses, newphis) // TODO: convert to explicit stack from recursion.
   380  	}
   381  }
   382  
   383  // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
   384  // a new definition for variable "x" inserted at h (usually but not necessarily a phi).
   385  // These new phis can only occur at the dominance frontier of h; block s is in the dominance
   386  // frontier of h if h does not strictly dominate s and if s is a successor of a block b where
   387  // either b = h or h strictly dominates b.
   388  // These newly created phis are themselves new definitions that may require addition of their
   389  // own trivial phi functions in their own dominance frontier, and this is handled recursively.
   390  func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite) {
   391  	oldv := defForUses[b.ID]
   392  	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
   393  		return
   394  	}
   395  	sdom := f.sdom()
   396  	idom := f.Idom()
   397  outer:
   398  	for _, e := range b.Succs {
   399  		s := e.b
   400  		// check phi functions in the dominance frontier
   401  		if sdom.isAncestor(h, s) {
   402  			continue // h dominates s, successor of b, therefore s is not in the frontier.
   403  		}
   404  		if _, ok := newphis[s]; ok {
   405  			continue // successor s of b already has a new phi function, so there is no need to add another.
   406  		}
   407  		if x != nil {
   408  			for _, v := range s.Values {
   409  				if v.Op == OpPhi && v.Args[e.i] == x {
   410  					continue outer // successor s of b has an old phi function, so there is no need to add another.
   411  				}
   412  			}
   413  		}
   414  
   415  		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
   416  		headerPhi := newPhiFor(s, old)
   417  		// the new phi will replace "old" in block s and all blocks dominated by s.
   418  		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
   419  		addDFphis(old, s, s, f, defForUses, newphis)        // the new definition may also create new phi functions.
   420  	}
   421  	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
   422  		addDFphis(x, h, c, f, defForUses, newphis) // TODO: convert to explicit stack from recursion.
   423  	}
   424  }
   425  
   426  // findLastMems maps block ids to last memory-output op in a block, if any
   427  func findLastMems(f *Func) []*Value {
   428  
   429  	var stores []*Value
   430  	lastMems := make([]*Value, f.NumBlocks())
   431  	storeUse := f.newSparseSet(f.NumValues())
   432  	defer f.retSparseSet(storeUse)
   433  	for _, b := range f.Blocks {
   434  		// Find all the stores in this block. Categorize their uses:
   435  		//  storeUse contains stores which are used by a subsequent store.
   436  		storeUse.clear()
   437  		stores = stores[:0]
   438  		var memPhi *Value
   439  		for _, v := range b.Values {
   440  			if v.Op == OpPhi {
   441  				if v.Type.IsMemory() {
   442  					memPhi = v
   443  				}
   444  				continue
   445  			}
   446  			if v.Type.IsMemory() {
   447  				stores = append(stores, v)
   448  				if v.Op == OpSelect1 {
   449  					// Use the arg of the tuple-generating op.
   450  					v = v.Args[0]
   451  				}
   452  				for _, a := range v.Args {
   453  					if a.Block == b && a.Type.IsMemory() {
   454  						storeUse.add(a.ID)
   455  					}
   456  				}
   457  			}
   458  		}
   459  		if len(stores) == 0 {
   460  			lastMems[b.ID] = memPhi
   461  			continue
   462  		}
   463  
   464  		// find last store in the block
   465  		var last *Value
   466  		for _, v := range stores {
   467  			if storeUse.contains(v.ID) {
   468  				continue
   469  			}
   470  			if last != nil {
   471  				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
   472  			}
   473  			last = v
   474  		}
   475  		if last == nil {
   476  			b.Fatalf("no last store found - cycle?")
   477  		}
   478  		lastMems[b.ID] = last
   479  	}
   480  	return lastMems
   481  }
   482  
   483  type backedgesState struct {
   484  	b *Block
   485  	i int
   486  }
   487  
   488  // backedges returns a slice of successor edges that are back
   489  // edges.  For reducible loops, edge.b is the header.
   490  func backedges(f *Func) []Edge {
   491  	edges := []Edge{}
   492  	mark := make([]markKind, f.NumBlocks())
   493  	stack := []backedgesState{}
   494  
   495  	mark[f.Entry.ID] = notExplored
   496  	stack = append(stack, backedgesState{f.Entry, 0})
   497  
   498  	for len(stack) > 0 {
   499  		l := len(stack)
   500  		x := stack[l-1]
   501  		if x.i < len(x.b.Succs) {
   502  			e := x.b.Succs[x.i]
   503  			stack[l-1].i++
   504  			s := e.b
   505  			if mark[s.ID] == notFound {
   506  				mark[s.ID] = notExplored
   507  				stack = append(stack, backedgesState{s, 0})
   508  			} else if mark[s.ID] == notExplored {
   509  				edges = append(edges, e)
   510  			}
   511  		} else {
   512  			mark[x.b.ID] = done
   513  			stack = stack[0 : l-1]
   514  		}
   515  	}
   516  	return edges
   517  }