github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/ssa/pass_blk_layouts.go (about)

     1  package ssa
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
     8  )
     9  
    10  // passLayoutBlocks implements Builder.LayoutBlocks. This re-organizes builder.reversePostOrderedBasicBlocks.
    11  //
    12  // TODO: there are tons of room for improvement here. e.g. LLVM has BlockPlacementPass using BlockFrequencyInfo,
    13  // BranchProbabilityInfo, and LoopInfo to do a much better job. Also, if we have the profiling instrumentation
    14  // like ball-larus algorithm, then we could do profile-guided optimization. Basically all of them are trying
    15  // to maximize the fall-through opportunities which is most efficient.
    16  //
    17  // Here, fallthrough happens when a block ends with jump instruction whose target is the right next block in the
    18  // builder.reversePostOrderedBasicBlocks.
    19  //
    20  // Currently, we just place blocks using the DFS reverse post-order of the dominator tree with the heuristics:
    21  //  1. a split edge trampoline towards a loop header will be placed as a fallthrough.
    22  //  2. we invert the brz and brnz if it makes the fallthrough more likely.
    23  //
    24  // This heuristic is done in maybeInvertBranches function.
    25  func passLayoutBlocks(b *builder) {
    26  	b.clearBlkVisited()
    27  
    28  	// We might end up splitting critical edges which adds more basic blocks,
    29  	// so we store the currently existing basic blocks in nonSplitBlocks temporarily.
    30  	// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks.
    31  	nonSplitBlocks := b.blkStack[:0]
    32  	for i, blk := range b.reversePostOrderedBasicBlocks {
    33  		if !blk.Valid() {
    34  			continue
    35  		}
    36  		nonSplitBlocks = append(nonSplitBlocks, blk)
    37  		if i != len(b.reversePostOrderedBasicBlocks)-1 {
    38  			_ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1])
    39  		}
    40  	}
    41  
    42  	var trampolines []*basicBlock
    43  
    44  	// Reset the order slice since we update on the fly by splitting critical edges.
    45  	b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
    46  	uninsertedTrampolines := b.blkStack2[:0]
    47  	for _, blk := range nonSplitBlocks {
    48  		for i := range blk.preds {
    49  			pred := blk.preds[i].blk
    50  			if _, ok := b.blkVisited[pred]; ok || !pred.Valid() {
    51  				continue
    52  			} else if pred.reversePostOrder < blk.reversePostOrder {
    53  				// This means the edge is critical, and this pred is the trampoline and yet to be inserted.
    54  				// Split edge trampolines must come before the destination in reverse post-order.
    55  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred)
    56  				b.blkVisited[pred] = 0 // mark as inserted, the value is not used.
    57  			}
    58  		}
    59  
    60  		// Now that we've already added all the potential trampoline blocks incoming to this block,
    61  		// we can add this block itself.
    62  		b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk)
    63  		b.blkVisited[blk] = 0 // mark as inserted, the value is not used.
    64  
    65  		if len(blk.success) < 2 {
    66  			// There won't be critical edge originating from this block.
    67  			continue
    68  		} else if blk.currentInstr.opcode == OpcodeBrTable {
    69  			// We don't split critical edges here, because at the construction site of BrTable, we already split the edges.
    70  			continue
    71  		}
    72  
    73  		for sidx, succ := range blk.success {
    74  			if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted.
    75  				// Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical.
    76  				len(succ.preds) < 2 {
    77  				continue
    78  			}
    79  
    80  			// Otherwise, we are sure this is a critical edge. To modify the CFG, we need to find the predecessor info
    81  			// from the successor.
    82  			var predInfo *basicBlockPredecessorInfo
    83  			for i := range succ.preds { // This linear search should not be a problem since the number of predecessors should almost always small.
    84  				pred := &succ.preds[i]
    85  				if pred.blk == blk {
    86  					predInfo = pred
    87  					break
    88  				}
    89  			}
    90  
    91  			if predInfo == nil {
    92  				// This must be a bug in somewhere around branch manipulation.
    93  				panic("BUG: predecessor info not found while the successor exists in successors list")
    94  			}
    95  
    96  			if wazevoapi.SSALoggingEnabled {
    97  				fmt.Printf("trying to split edge from %d->%d at %s\n",
    98  					blk.ID(), succ.ID(), predInfo.branch.Format(b))
    99  			}
   100  
   101  			trampoline := b.splitCriticalEdge(blk, succ, predInfo)
   102  			// Update the successors slice because the target is no longer the original `succ`.
   103  			blk.success[sidx] = trampoline
   104  
   105  			if wazevoapi.SSAValidationEnabled {
   106  				trampolines = append(trampolines, trampoline)
   107  			}
   108  
   109  			if wazevoapi.SSALoggingEnabled {
   110  				fmt.Printf("edge split from %d->%d at %s as %d->%d->%d \n",
   111  					blk.ID(), succ.ID(), predInfo.branch.Format(b),
   112  					blk.ID(), trampoline.ID(), succ.ID())
   113  			}
   114  
   115  			fallthroughBranch := blk.currentInstr
   116  			if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {
   117  				// This can be lowered as fallthrough at the end of the block.
   118  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
   119  				b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
   120  			} else {
   121  				uninsertedTrampolines = append(uninsertedTrampolines, trampoline)
   122  			}
   123  		}
   124  
   125  		for _, trampoline := range uninsertedTrampolines {
   126  			if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.
   127  				// This means the critical edge was backward, so we insert after the current block immediately.
   128  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
   129  				b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
   130  			} // If the target is forward, we can wait to insert until the target is inserted.
   131  		}
   132  		uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block.
   133  	}
   134  
   135  	if wazevoapi.SSALoggingEnabled {
   136  		var bs []string
   137  		for _, blk := range b.reversePostOrderedBasicBlocks {
   138  			bs = append(bs, blk.Name())
   139  		}
   140  		fmt.Println("ordered blocks: ", strings.Join(bs, ", "))
   141  	}
   142  
   143  	if wazevoapi.SSAValidationEnabled {
   144  		for _, trampoline := range trampolines {
   145  			if _, ok := b.blkVisited[trampoline]; !ok {
   146  				panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b))
   147  			}
   148  			trampoline.validate(b)
   149  		}
   150  	}
   151  
   152  	// Reuse the stack for the next iteration.
   153  	b.blkStack2 = uninsertedTrampolines[:0]
   154  }
   155  
   156  // markFallthroughJumps finds the fallthrough jumps and marks them as such.
   157  func (b *builder) markFallthroughJumps() {
   158  	l := len(b.reversePostOrderedBasicBlocks) - 1
   159  	for i, blk := range b.reversePostOrderedBasicBlocks {
   160  		if i < l {
   161  			cur := blk.currentInstr
   162  			if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] {
   163  				cur.AsFallthroughJump()
   164  			}
   165  		}
   166  	}
   167  }
   168  
   169  // maybeInvertBranches inverts the branch instructions if it is likely possible to the fallthrough more likely with simple heuristics.
   170  // nextInRPO is the next block in the reverse post-order.
   171  //
   172  // Returns true if the branch is inverted for testing purpose.
   173  func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool {
   174  	fallthroughBranch := now.currentInstr
   175  	if fallthroughBranch.opcode == OpcodeBrTable {
   176  		return false
   177  	}
   178  
   179  	condBranch := fallthroughBranch.prev
   180  	if condBranch == nil || (condBranch.opcode != OpcodeBrnz && condBranch.opcode != OpcodeBrz) {
   181  		return false
   182  	}
   183  
   184  	if len(fallthroughBranch.vs.View()) != 0 || len(condBranch.vs.View()) != 0 {
   185  		// If either one of them has arguments, we don't invert the branches.
   186  		return false
   187  	}
   188  
   189  	// So this block has two branches (a conditional branch followed by an unconditional branch) at the end.
   190  	// We can invert the condition of the branch if it makes the fallthrough more likely.
   191  
   192  	fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock)
   193  
   194  	if fallthroughTarget.loopHeader {
   195  		// First, if the tail's target is loopHeader, we don't need to do anything here,
   196  		// because the edge is likely to be critical edge for complex loops (e.g. loop with branches inside it).
   197  		// That means, we will split the edge in the end of LayoutBlocks function, and insert the trampoline block
   198  		// right after this block, which will be fallthrough in any way.
   199  		return false
   200  	} else if condTarget.loopHeader {
   201  		// On the other hand, if the condBranch's target is loopHeader, we invert the condition of the branch
   202  		// so that we could get the fallthrough to the trampoline block.
   203  		goto invert
   204  	}
   205  
   206  	if fallthroughTarget == nextInRPO {
   207  		// Also, if the tail's target is the next block in the reverse post-order, we don't need to do anything here,
   208  		// because if this is not critical edge, we would end up placing these two blocks adjacent to each other.
   209  		// Even if it is the critical edge, we place the trampoline block right after this block, which will be fallthrough in any way.
   210  		return false
   211  	} else if condTarget == nextInRPO {
   212  		// If the condBranch's target is the next block in the reverse post-order, we invert the condition of the branch
   213  		// so that we could get the fallthrough to the block.
   214  		goto invert
   215  	} else {
   216  		return false
   217  	}
   218  
   219  invert:
   220  	for i := range fallthroughTarget.preds {
   221  		pred := &fallthroughTarget.preds[i]
   222  		if pred.branch == fallthroughBranch {
   223  			pred.branch = condBranch
   224  			break
   225  		}
   226  	}
   227  	for i := range condTarget.preds {
   228  		pred := &condTarget.preds[i]
   229  		if pred.branch == condBranch {
   230  			pred.branch = fallthroughBranch
   231  			break
   232  		}
   233  	}
   234  
   235  	condBranch.InvertBrx()
   236  	condBranch.blk = fallthroughTarget
   237  	fallthroughBranch.blk = condTarget
   238  	if wazevoapi.SSALoggingEnabled {
   239  		fmt.Printf("inverting branches at %d->%d and %d->%d\n",
   240  			now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID())
   241  	}
   242  
   243  	return true
   244  }
   245  
   246  // splitCriticalEdge splits the critical edge between the given predecessor (`pred`) and successor (owning `predInfo`).
   247  //
   248  // - `pred` is the source of the critical edge,
   249  // - `succ` is the destination of the critical edge,
   250  // - `predInfo` is the predecessor info in the succ.preds slice which represents the critical edge.
   251  //
   252  // Why splitting critical edges is important? See following links:
   253  //
   254  //   - https://en.wikipedia.org/wiki/Control-flow_graph
   255  //   - https://nickdesaulniers.github.io/blog/2023/01/27/critical-edge-splitting/
   256  //
   257  // The returned basic block is the trampoline block which is inserted to split the critical edge.
   258  func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlockPredecessorInfo) *basicBlock {
   259  	// In the following, we convert the following CFG:
   260  	//
   261  	//     pred --(originalBranch)--> succ
   262  	//
   263  	// to the following CFG:
   264  	//
   265  	//     pred --(newBranch)--> trampoline --(originalBranch)-> succ
   266  	//
   267  	// where trampoline is a new basic block which is created to split the critical edge.
   268  
   269  	trampoline := b.allocateBasicBlock()
   270  	if int(trampoline.id) >= len(b.dominators) {
   271  		b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...)
   272  	}
   273  	b.dominators[trampoline.id] = pred
   274  
   275  	originalBranch := predInfo.branch
   276  
   277  	// Replace originalBranch with the newBranch.
   278  	newBranch := b.AllocateInstruction()
   279  	newBranch.opcode = originalBranch.opcode
   280  	newBranch.blk = trampoline
   281  	switch originalBranch.opcode {
   282  	case OpcodeJump:
   283  	case OpcodeBrz, OpcodeBrnz:
   284  		originalBranch.opcode = OpcodeJump // Trampoline consists of one unconditional branch.
   285  		newBranch.v = originalBranch.v
   286  		originalBranch.v = ValueInvalid
   287  	default:
   288  		panic("BUG: critical edge shouldn't be originated from br_table")
   289  	}
   290  	swapInstruction(pred, originalBranch, newBranch)
   291  
   292  	// Replace the original branch with the new branch.
   293  	trampoline.rootInstr = originalBranch
   294  	trampoline.currentInstr = originalBranch
   295  	trampoline.success = append(trampoline.success, succ) // Do not use []*basicBlock{pred} because we might have already allocated the slice.
   296  	trampoline.preds = append(trampoline.preds,           // same as ^.
   297  		basicBlockPredecessorInfo{blk: pred, branch: newBranch})
   298  	b.Seal(trampoline)
   299  
   300  	// Update the original branch to point to the trampoline.
   301  	predInfo.blk = trampoline
   302  	predInfo.branch = originalBranch
   303  
   304  	if wazevoapi.SSAValidationEnabled {
   305  		trampoline.validate(b)
   306  	}
   307  
   308  	if len(trampoline.params) > 0 {
   309  		panic("trampoline should not have params")
   310  	}
   311  
   312  	// Assign the same order as the original block so that this will be placed before the actual destination.
   313  	trampoline.reversePostOrder = pred.reversePostOrder
   314  	return trampoline
   315  }
   316  
   317  // swapInstruction replaces `old` in the block `blk` with `New`.
   318  func swapInstruction(blk *basicBlock, old, New *Instruction) {
   319  	if blk.rootInstr == old {
   320  		blk.rootInstr = New
   321  		next := old.next
   322  		New.next = next
   323  		next.prev = New
   324  	} else {
   325  		if blk.currentInstr == old {
   326  			blk.currentInstr = New
   327  		}
   328  		prev := old.prev
   329  		prev.next, New.prev = New, prev
   330  		if next := old.next; next != nil {
   331  			New.next, next.prev = next, New
   332  		}
   333  	}
   334  	old.prev, old.next = nil, nil
   335  }