github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/ssa/loopreschedchecks.go

github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/ssa/loopreschedchecks.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"fmt"
     9  
    10  	"github.com/go-asm/go/cmd/compile/types"
    11  )
    12  
    13  // an edgeMem records a backedge, together with the memory
    14  // phi functions at the target of the backedge that must
    15  // be updated when a rescheduling check replaces the backedge.
    16  type edgeMem struct {
    17  	e Edge
    18  	m *Value // phi for memory at dest of e
    19  }
    20  
    21  // a rewriteTarget is a value-argindex pair indicating
    22  // where a rewrite is applied.  Note that this is for values,
    23  // not for block controls, because block controls are not targets
    24  // for the rewrites performed in inserting rescheduling checks.
    25  type rewriteTarget struct {
    26  	v *Value
    27  	i int
    28  }
    29  
    30  type rewrite struct {
    31  	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
    32  	rewrites      []rewriteTarget // all the targets for this rewrite.
    33  }
    34  
    35  func (r *rewrite) String() string {
    36  	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
    37  	for _, rw := range r.rewrites {
    38  		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
    39  	}
    40  	s += "\n"
    41  	return s
    42  }
    43  
    44  // insertLoopReschedChecks inserts rescheduling checks on loop backedges.
    45  func insertLoopReschedChecks(f *Func) {
    46  	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
    47  
    48  	// Loop reschedule checks compare the stack pointer with
    49  	// the per-g stack bound.  If the pointer appears invalid,
    50  	// that means a reschedule check is needed.
    51  	//
    52  	// Steps:
    53  	// 1. locate backedges.
    54  	// 2. Record memory definitions at block end so that
    55  	//    the SSA graph for mem can be properly modified.
    56  	// 3. Ensure that phi functions that will-be-needed for mem
    57  	//    are present in the graph, initially with trivial inputs.
    58  	// 4. Record all to-be-modified uses of mem;
    59  	//    apply modifications (split into two steps to simplify and
    60  	//    avoided nagging order-dependencies).
    61  	// 5. Rewrite backedges to include reschedule check,
    62  	//    and modify destination phi function appropriately with new
    63  	//    definitions for mem.
    64  
    65  	if f.NoSplit { // nosplit functions don't reschedule.
    66  		return
    67  	}
    68  
    69  	backedges := backedges(f)
    70  	if len(backedges) == 0 { // no backedges means no rescheduling checks.
    71  		return
    72  	}
    73  
    74  	lastMems := findLastMems(f)
    75  
    76  	idom := f.Idom()
    77  	po := f.postorder()
    78  	// The ordering in the dominator tree matters; it's important that
    79  	// the walk of the dominator tree also be a preorder (i.e., a node is
    80  	// visited only after all its non-backedge predecessors have been visited).
    81  	sdom := newSparseOrderedTree(f, idom, po)
    82  
    83  	if f.pass.debug > 1 {
    84  		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
    85  	}
    86  
    87  	tofixBackedges := []edgeMem{}
    88  
    89  	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
    90  		tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
    91  	}
    92  
    93  	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
    94  	if lastMems[f.Entry.ID] == nil {
    95  		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
    96  	}
    97  
    98  	memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
    99  	defer f.Cache.freeValueSlice(memDefsAtBlockEnds)
   100  
   101  	// Propagate last mem definitions forward through successor blocks.
   102  	for i := len(po) - 1; i >= 0; i-- {
   103  		b := po[i]
   104  		mem := lastMems[b.ID]
   105  		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
   106  			// loop because there might be backedges that haven't been visited yet.
   107  			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
   108  		}
   109  		memDefsAtBlockEnds[b.ID] = mem
   110  		if f.pass.debug > 2 {
   111  			fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
   112  		}
   113  	}
   114  
   115  	// Maps from block to newly-inserted phi function in block.
   116  	newmemphis := make(map[*Block]rewrite)
   117  
   118  	// Insert phi functions as necessary for future changes to flow graph.
   119  	for i, emc := range tofixBackedges {
   120  		e := emc.e
   121  		h := e.b
   122  
   123  		// find the phi function for the memory input at "h", if there is one.
   124  		var headerMemPhi *Value // look for header mem phi
   125  
   126  		for _, v := range h.Values {
   127  			if v.Op == OpPhi && v.Type.IsMemory() {
   128  				headerMemPhi = v
   129  			}
   130  		}
   131  
   132  		if headerMemPhi == nil {
   133  			// if the header is nil, make a trivial phi from the dominator
   134  			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
   135  			headerMemPhi = newPhiFor(h, mem0)
   136  			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
   137  			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)
   138  
   139  		}
   140  		tofixBackedges[i].m = headerMemPhi
   141  
   142  	}
   143  	if f.pass.debug > 0 {
   144  		for b, r := range newmemphis {
   145  			fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
   146  		}
   147  	}
   148  
   149  	// dfPhiTargets notes inputs to phis in dominance frontiers that should not
   150  	// be rewritten as part of the dominated children of some outer rewrite.
   151  	dfPhiTargets := make(map[rewriteTarget]bool)
   152  
   153  	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)
   154  
   155  	if f.pass.debug > 0 {
   156  		for b, r := range newmemphis {
   157  			fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
   158  		}
   159  	}
   160  
   161  	// Apply collected rewrites.
   162  	for _, r := range newmemphis {
   163  		for _, rw := range r.rewrites {
   164  			rw.v.SetArg(rw.i, r.after)
   165  		}
   166  	}
   167  
   168  	// Rewrite backedges to include reschedule checks.
   169  	for _, emc := range tofixBackedges {
   170  		e := emc.e
   171  		headerMemPhi := emc.m
   172  		h := e.b
   173  		i := e.i
   174  		p := h.Preds[i]
   175  		bb := p.b
   176  		mem0 := headerMemPhi.Args[i]
   177  		// bb e->p h,
   178  		// Because we're going to insert a rare-call, make sure the
   179  		// looping edge still looks likely.
   180  		likely := BranchLikely
   181  		if p.i != 0 {
   182  			likely = BranchUnlikely
   183  		}
   184  		if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue }
   185  			bb.Likely = likely
   186  		}
   187  
   188  		// rewrite edge to include reschedule check
   189  		// existing edges:
   190  		//
   191  		// bb.Succs[p.i] == Edge{h, i}
   192  		// h.Preds[i] == p == Edge{bb,p.i}
   193  		//
   194  		// new block(s):
   195  		// test:
   196  		//    if sp < g.limit { goto sched }
   197  		//    goto join
   198  		// sched:
   199  		//    mem1 := call resched (mem0)
   200  		//    goto join
   201  		// join:
   202  		//    mem2 := phi(mem0, mem1)
   203  		//    goto h
   204  		//
   205  		// and correct arg i of headerMemPhi and headerCtrPhi
   206  		//
   207  		// EXCEPT: join block containing only phi functions is bad
   208  		// for the register allocator.  Therefore, there is no
   209  		// join, and branches targeting join must instead target
   210  		// the header, and the other phi functions within header are
   211  		// adjusted for the additional input.
   212  
   213  		test := f.NewBlock(BlockIf)
   214  		sched := f.NewBlock(BlockPlain)
   215  
   216  		test.Pos = bb.Pos
   217  		sched.Pos = bb.Pos
   218  
   219  		// if sp < g.limit { goto sched }
   220  		// goto header
   221  
   222  		cfgtypes := &f.Config.Types
   223  		pt := cfgtypes.Uintptr
   224  		g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
   225  		sp := test.NewValue0(bb.Pos, OpSP, pt)
   226  		cmpOp := OpLess64U
   227  		if pt.Size() == 4 {
   228  			cmpOp = OpLess32U
   229  		}
   230  		limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
   231  		lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
   232  		cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
   233  		test.SetControl(cmp)
   234  
   235  		// if true, goto sched
   236  		test.AddEdgeTo(sched)
   237  
   238  		// if false, rewrite edge to header.
   239  		// do NOT remove+add, because that will perturb all the other phi functions
   240  		// as well as messing up other edges to the header.
   241  		test.Succs = append(test.Succs, Edge{h, i})
   242  		h.Preds[i] = Edge{test, 1}
   243  		headerMemPhi.SetArg(i, mem0)
   244  
   245  		test.Likely = BranchUnlikely
   246  
   247  		// sched:
   248  		//    mem1 := call resched (mem0)
   249  		//    goto header
   250  		resched := f.fe.Syslook("goschedguarded")
   251  		call := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(resched, bb.Func.ABIDefault.ABIAnalyzeTypes(nil, nil)), mem0)
   252  		mem1 := sched.NewValue1I(bb.Pos, OpSelectN, types.TypeMem, 0, call)
   253  		sched.AddEdgeTo(h)
   254  		headerMemPhi.AddArg(mem1)
   255  
   256  		bb.Succs[p.i] = Edge{test, 0}
   257  		test.Preds = append(test.Preds, Edge{bb, p.i})
   258  
   259  		// Must correct all the other phi functions in the header for new incoming edge.
   260  		// Except for mem phis, it will be the same value seen on the original
   261  		// backedge at index i.
   262  		for _, v := range h.Values {
   263  			if v.Op == OpPhi && v != headerMemPhi {
   264  				v.AddArg(v.Args[i])
   265  			}
   266  		}
   267  	}
   268  
   269  	f.invalidateCFG()
   270  
   271  	if f.pass.debug > 1 {
   272  		sdom = newSparseTree(f, f.Idom())
   273  		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
   274  	}
   275  }
   276  
   277  // newPhiFor inserts a new Phi function into b,
   278  // with all inputs set to v.
   279  func newPhiFor(b *Block, v *Value) *Value {
   280  	phiV := b.NewValue0(b.Pos, OpPhi, v.Type)
   281  
   282  	for range b.Preds {
   283  		phiV.AddArg(v)
   284  	}
   285  	return phiV
   286  }
   287  
   288  // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
   289  // in block h will replace a previous definition.  Block b is the block currently being processed;
   290  // if b has its own phi definition then it takes the place of h.
   291  // defsForUses provides information about other definitions of the variable that are present
   292  // (and if nil, indicates that the variable is no longer live)
   293  // sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
   294  // The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
   295  // requirement.
   296  func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
   297  	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
   298  	if _, ok := newphis[b]; ok {
   299  		h = b
   300  	}
   301  	change := newphis[h]
   302  	x := change.before
   303  	y := change.after
   304  
   305  	// Apply rewrites to this block
   306  	if x != nil { // don't waste time on the common case of no definition.
   307  		p := &change.rewrites
   308  		for _, v := range b.Values {
   309  			if v == y { // don't rewrite self -- phi inputs are handled below.
   310  				continue
   311  			}
   312  			for i, w := range v.Args {
   313  				if w != x {
   314  					continue
   315  				}
   316  				tgt := rewriteTarget{v, i}
   317  
   318  				// It's possible dominated control flow will rewrite this instead.
   319  				// Visiting in preorder (a property of how sdom was constructed)
   320  				// ensures that these are seen in the proper order.
   321  				if dfPhiTargets[tgt] {
   322  					continue
   323  				}
   324  				*p = append(*p, tgt)
   325  				if f.pass.debug > 1 {
   326  					fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
   327  						h, b, x, y, v, i)
   328  				}
   329  			}
   330  		}
   331  
   332  		// Rewrite appropriate inputs of phis reached in successors
   333  		// in dominance frontier, self, and dominated.
   334  		// If the variable def reaching uses in b is itself defined in b, then the new phi function
   335  		// does not reach the successors of b.  (This assumes a bit about the structure of the
   336  		// phi use-def graph, but it's true for memory.)
   337  		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
   338  			for _, e := range b.Succs {
   339  				s := e.b
   340  
   341  				for _, v := range s.Values {
   342  					if v.Op == OpPhi && v.Args[e.i] == x {
   343  						tgt := rewriteTarget{v, e.i}
   344  						*p = append(*p, tgt)
   345  						dfPhiTargets[tgt] = true
   346  						if f.pass.debug > 1 {
   347  							fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
   348  								h, b, s, x, y, v.LongString(), e.i)
   349  						}
   350  						break
   351  					}
   352  				}
   353  			}
   354  		}
   355  		newphis[h] = change
   356  	}
   357  
   358  	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
   359  		rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
   360  	}
   361  }
   362  
   363  // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
   364  // a new definition for variable "x" inserted at h (usually but not necessarily a phi).
   365  // These new phis can only occur at the dominance frontier of h; block s is in the dominance
   366  // frontier of h if h does not strictly dominate s and if s is a successor of a block b where
   367  // either b = h or h strictly dominates b.
   368  // These newly created phis are themselves new definitions that may require addition of their
   369  // own trivial phi functions in their own dominance frontier, and this is handled recursively.
   370  func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) {
   371  	oldv := defForUses[b.ID]
   372  	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
   373  		return
   374  	}
   375  	idom := f.Idom()
   376  outer:
   377  	for _, e := range b.Succs {
   378  		s := e.b
   379  		// check phi functions in the dominance frontier
   380  		if sdom.isAncestor(h, s) {
   381  			continue // h dominates s, successor of b, therefore s is not in the frontier.
   382  		}
   383  		if _, ok := newphis[s]; ok {
   384  			continue // successor s of b already has a new phi function, so there is no need to add another.
   385  		}
   386  		if x != nil {
   387  			for _, v := range s.Values {
   388  				if v.Op == OpPhi && v.Args[e.i] == x {
   389  					continue outer // successor s of b has an old phi function, so there is no need to add another.
   390  				}
   391  			}
   392  		}
   393  
   394  		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
   395  		headerPhi := newPhiFor(s, old)
   396  		// the new phi will replace "old" in block s and all blocks dominated by s.
   397  		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
   398  		addDFphis(old, s, s, f, defForUses, newphis, sdom)  // the new definition may also create new phi functions.
   399  	}
   400  	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
   401  		addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
   402  	}
   403  }
   404  
   405  // findLastMems maps block ids to last memory-output op in a block, if any.
   406  func findLastMems(f *Func) []*Value {
   407  
   408  	var stores []*Value
   409  	lastMems := f.Cache.allocValueSlice(f.NumBlocks())
   410  	defer f.Cache.freeValueSlice(lastMems)
   411  	storeUse := f.newSparseSet(f.NumValues())
   412  	defer f.retSparseSet(storeUse)
   413  	for _, b := range f.Blocks {
   414  		// Find all the stores in this block. Categorize their uses:
   415  		//  storeUse contains stores which are used by a subsequent store.
   416  		storeUse.clear()
   417  		stores = stores[:0]
   418  		var memPhi *Value
   419  		for _, v := range b.Values {
   420  			if v.Op == OpPhi {
   421  				if v.Type.IsMemory() {
   422  					memPhi = v
   423  				}
   424  				continue
   425  			}
   426  			if v.Type.IsMemory() {
   427  				stores = append(stores, v)
   428  				for _, a := range v.Args {
   429  					if a.Block == b && a.Type.IsMemory() {
   430  						storeUse.add(a.ID)
   431  					}
   432  				}
   433  			}
   434  		}
   435  		if len(stores) == 0 {
   436  			lastMems[b.ID] = memPhi
   437  			continue
   438  		}
   439  
   440  		// find last store in the block
   441  		var last *Value
   442  		for _, v := range stores {
   443  			if storeUse.contains(v.ID) {
   444  				continue
   445  			}
   446  			if last != nil {
   447  				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
   448  			}
   449  			last = v
   450  		}
   451  		if last == nil {
   452  			b.Fatalf("no last store found - cycle?")
   453  		}
   454  
   455  		// If this is a tuple containing a mem, select just
   456  		// the mem. This will generate ops we don't need, but
   457  		// it's the easiest thing to do.
   458  		if last.Type.IsTuple() {
   459  			last = b.NewValue1(last.Pos, OpSelect1, types.TypeMem, last)
   460  		} else if last.Type.IsResults() {
   461  			last = b.NewValue1I(last.Pos, OpSelectN, types.TypeMem, int64(last.Type.NumFields()-1), last)
   462  		}
   463  
   464  		lastMems[b.ID] = last
   465  	}
   466  	return lastMems
   467  }
   468  
   469  // mark values
   470  type markKind uint8
   471  
   472  const (
   473  	notFound    markKind = iota // block has not been discovered yet
   474  	notExplored                 // discovered and in queue, outedges not processed yet
   475  	explored                    // discovered and in queue, outedges processed
   476  	done                        // all done, in output ordering
   477  )
   478  
   479  type backedgesState struct {
   480  	b *Block
   481  	i int
   482  }
   483  
   484  // backedges returns a slice of successor edges that are back
   485  // edges.  For reducible loops, edge.b is the header.
   486  func backedges(f *Func) []Edge {
   487  	edges := []Edge{}
   488  	mark := make([]markKind, f.NumBlocks())
   489  	stack := []backedgesState{}
   490  
   491  	mark[f.Entry.ID] = notExplored
   492  	stack = append(stack, backedgesState{f.Entry, 0})
   493  
   494  	for len(stack) > 0 {
   495  		l := len(stack)
   496  		x := stack[l-1]
   497  		if x.i < len(x.b.Succs) {
   498  			e := x.b.Succs[x.i]
   499  			stack[l-1].i++
   500  			s := e.b
   501  			if mark[s.ID] == notFound {
   502  				mark[s.ID] = notExplored
   503  				stack = append(stack, backedgesState{s, 0})
   504  			} else if mark[s.ID] == notExplored {
   505  				edges = append(edges, e)
   506  			}
   507  		} else {
   508  			mark[x.b.ID] = done
   509  			stack = stack[0 : l-1]
   510  		}
   511  	}
   512  	return edges
   513  }