github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/ssa/pass_blk_layouts.go (about) 1 package ssa 2 3 import ( 4 "fmt" 5 "strings" 6 7 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" 8 ) 9 10 // passLayoutBlocks implements Builder.LayoutBlocks. This re-organizes builder.reversePostOrderedBasicBlocks. 11 // 12 // TODO: there are tons of room for improvement here. e.g. LLVM has BlockPlacementPass using BlockFrequencyInfo, 13 // BranchProbabilityInfo, and LoopInfo to do a much better job. Also, if we have the profiling instrumentation 14 // like ball-larus algorithm, then we could do profile-guided optimization. Basically all of them are trying 15 // to maximize the fall-through opportunities which is most efficient. 16 // 17 // Here, fallthrough happens when a block ends with jump instruction whose target is the right next block in the 18 // builder.reversePostOrderedBasicBlocks. 19 // 20 // Currently, we just place blocks using the DFS reverse post-order of the dominator tree with the heuristics: 21 // 1. a split edge trampoline towards a loop header will be placed as a fallthrough. 22 // 2. we invert the brz and brnz if it makes the fallthrough more likely. 23 // 24 // This heuristic is done in maybeInvertBranches function. 25 func passLayoutBlocks(b *builder) { 26 b.clearBlkVisited() 27 28 // We might end up splitting critical edges which adds more basic blocks, 29 // so we store the currently existing basic blocks in nonSplitBlocks temporarily. 30 // That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks. 31 nonSplitBlocks := b.blkStack[:0] 32 for i, blk := range b.reversePostOrderedBasicBlocks { 33 if !blk.Valid() { 34 continue 35 } 36 nonSplitBlocks = append(nonSplitBlocks, blk) 37 if i != len(b.reversePostOrderedBasicBlocks)-1 { 38 _ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1]) 39 } 40 } 41 42 var trampolines []*basicBlock 43 44 // Reset the order slice since we update on the fly by splitting critical edges. 45 b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0] 46 uninsertedTrampolines := b.blkStack2[:0] 47 for _, blk := range nonSplitBlocks { 48 for i := range blk.preds { 49 pred := blk.preds[i].blk 50 if _, ok := b.blkVisited[pred]; ok || !pred.Valid() { 51 continue 52 } else if pred.reversePostOrder < blk.reversePostOrder { 53 // This means the edge is critical, and this pred is the trampoline and yet to be inserted. 54 // Split edge trampolines must come before the destination in reverse post-order. 55 b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred) 56 b.blkVisited[pred] = 0 // mark as inserted, the value is not used. 57 } 58 } 59 60 // Now that we've already added all the potential trampoline blocks incoming to this block, 61 // we can add this block itself. 62 b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk) 63 b.blkVisited[blk] = 0 // mark as inserted, the value is not used. 64 65 if len(blk.success) < 2 { 66 // There won't be critical edge originating from this block. 67 continue 68 } else if blk.currentInstr.opcode == OpcodeBrTable { 69 // We don't split critical edges here, because at the construction site of BrTable, we already split the edges. 70 continue 71 } 72 73 for sidx, succ := range blk.success { 74 if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted. 75 // Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical. 76 len(succ.preds) < 2 { 77 continue 78 } 79 80 // Otherwise, we are sure this is a critical edge. To modify the CFG, we need to find the predecessor info 81 // from the successor. 82 var predInfo *basicBlockPredecessorInfo 83 for i := range succ.preds { // This linear search should not be a problem since the number of predecessors should almost always small. 84 pred := &succ.preds[i] 85 if pred.blk == blk { 86 predInfo = pred 87 break 88 } 89 } 90 91 if predInfo == nil { 92 // This must be a bug in somewhere around branch manipulation. 93 panic("BUG: predecessor info not found while the successor exists in successors list") 94 } 95 96 if wazevoapi.SSALoggingEnabled { 97 fmt.Printf("trying to split edge from %d->%d at %s\n", 98 blk.ID(), succ.ID(), predInfo.branch.Format(b)) 99 } 100 101 trampoline := b.splitCriticalEdge(blk, succ, predInfo) 102 // Update the successors slice because the target is no longer the original `succ`. 103 blk.success[sidx] = trampoline 104 105 if wazevoapi.SSAValidationEnabled { 106 trampolines = append(trampolines, trampoline) 107 } 108 109 if wazevoapi.SSALoggingEnabled { 110 fmt.Printf("edge split from %d->%d at %s as %d->%d->%d \n", 111 blk.ID(), succ.ID(), predInfo.branch.Format(b), 112 blk.ID(), trampoline.ID(), succ.ID()) 113 } 114 115 fallthroughBranch := blk.currentInstr 116 if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline { 117 // This can be lowered as fallthrough at the end of the block. 118 b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) 119 b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. 120 } else { 121 uninsertedTrampolines = append(uninsertedTrampolines, trampoline) 122 } 123 } 124 125 for _, trampoline := range uninsertedTrampolines { 126 if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself. 127 // This means the critical edge was backward, so we insert after the current block immediately. 128 b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) 129 b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. 130 } // If the target is forward, we can wait to insert until the target is inserted. 131 } 132 uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block. 133 } 134 135 if wazevoapi.SSALoggingEnabled { 136 var bs []string 137 for _, blk := range b.reversePostOrderedBasicBlocks { 138 bs = append(bs, blk.Name()) 139 } 140 fmt.Println("ordered blocks: ", strings.Join(bs, ", ")) 141 } 142 143 if wazevoapi.SSAValidationEnabled { 144 for _, trampoline := range trampolines { 145 if _, ok := b.blkVisited[trampoline]; !ok { 146 panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b)) 147 } 148 trampoline.validate(b) 149 } 150 } 151 152 // Reuse the stack for the next iteration. 153 b.blkStack2 = uninsertedTrampolines[:0] 154 } 155 156 // markFallthroughJumps finds the fallthrough jumps and marks them as such. 157 func (b *builder) markFallthroughJumps() { 158 l := len(b.reversePostOrderedBasicBlocks) - 1 159 for i, blk := range b.reversePostOrderedBasicBlocks { 160 if i < l { 161 cur := blk.currentInstr 162 if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] { 163 cur.AsFallthroughJump() 164 } 165 } 166 } 167 } 168 169 // maybeInvertBranches inverts the branch instructions if it is likely possible to the fallthrough more likely with simple heuristics. 170 // nextInRPO is the next block in the reverse post-order. 171 // 172 // Returns true if the branch is inverted for testing purpose. 173 func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool { 174 fallthroughBranch := now.currentInstr 175 if fallthroughBranch.opcode == OpcodeBrTable { 176 return false 177 } 178 179 condBranch := fallthroughBranch.prev 180 if condBranch == nil || (condBranch.opcode != OpcodeBrnz && condBranch.opcode != OpcodeBrz) { 181 return false 182 } 183 184 if len(fallthroughBranch.vs.View()) != 0 || len(condBranch.vs.View()) != 0 { 185 // If either one of them has arguments, we don't invert the branches. 186 return false 187 } 188 189 // So this block has two branches (a conditional branch followed by an unconditional branch) at the end. 190 // We can invert the condition of the branch if it makes the fallthrough more likely. 191 192 fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock) 193 194 if fallthroughTarget.loopHeader { 195 // First, if the tail's target is loopHeader, we don't need to do anything here, 196 // because the edge is likely to be critical edge for complex loops (e.g. loop with branches inside it). 197 // That means, we will split the edge in the end of LayoutBlocks function, and insert the trampoline block 198 // right after this block, which will be fallthrough in any way. 199 return false 200 } else if condTarget.loopHeader { 201 // On the other hand, if the condBranch's target is loopHeader, we invert the condition of the branch 202 // so that we could get the fallthrough to the trampoline block. 203 goto invert 204 } 205 206 if fallthroughTarget == nextInRPO { 207 // Also, if the tail's target is the next block in the reverse post-order, we don't need to do anything here, 208 // because if this is not critical edge, we would end up placing these two blocks adjacent to each other. 209 // Even if it is the critical edge, we place the trampoline block right after this block, which will be fallthrough in any way. 210 return false 211 } else if condTarget == nextInRPO { 212 // If the condBranch's target is the next block in the reverse post-order, we invert the condition of the branch 213 // so that we could get the fallthrough to the block. 214 goto invert 215 } else { 216 return false 217 } 218 219 invert: 220 for i := range fallthroughTarget.preds { 221 pred := &fallthroughTarget.preds[i] 222 if pred.branch == fallthroughBranch { 223 pred.branch = condBranch 224 break 225 } 226 } 227 for i := range condTarget.preds { 228 pred := &condTarget.preds[i] 229 if pred.branch == condBranch { 230 pred.branch = fallthroughBranch 231 break 232 } 233 } 234 235 condBranch.InvertBrx() 236 condBranch.blk = fallthroughTarget 237 fallthroughBranch.blk = condTarget 238 if wazevoapi.SSALoggingEnabled { 239 fmt.Printf("inverting branches at %d->%d and %d->%d\n", 240 now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID()) 241 } 242 243 return true 244 } 245 246 // splitCriticalEdge splits the critical edge between the given predecessor (`pred`) and successor (owning `predInfo`). 247 // 248 // - `pred` is the source of the critical edge, 249 // - `succ` is the destination of the critical edge, 250 // - `predInfo` is the predecessor info in the succ.preds slice which represents the critical edge. 251 // 252 // Why splitting critical edges is important? See following links: 253 // 254 // - https://en.wikipedia.org/wiki/Control-flow_graph 255 // - https://nickdesaulniers.github.io/blog/2023/01/27/critical-edge-splitting/ 256 // 257 // The returned basic block is the trampoline block which is inserted to split the critical edge. 258 func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlockPredecessorInfo) *basicBlock { 259 // In the following, we convert the following CFG: 260 // 261 // pred --(originalBranch)--> succ 262 // 263 // to the following CFG: 264 // 265 // pred --(newBranch)--> trampoline --(originalBranch)-> succ 266 // 267 // where trampoline is a new basic block which is created to split the critical edge. 268 269 trampoline := b.allocateBasicBlock() 270 if int(trampoline.id) >= len(b.dominators) { 271 b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...) 272 } 273 b.dominators[trampoline.id] = pred 274 275 originalBranch := predInfo.branch 276 277 // Replace originalBranch with the newBranch. 278 newBranch := b.AllocateInstruction() 279 newBranch.opcode = originalBranch.opcode 280 newBranch.blk = trampoline 281 switch originalBranch.opcode { 282 case OpcodeJump: 283 case OpcodeBrz, OpcodeBrnz: 284 originalBranch.opcode = OpcodeJump // Trampoline consists of one unconditional branch. 285 newBranch.v = originalBranch.v 286 originalBranch.v = ValueInvalid 287 default: 288 panic("BUG: critical edge shouldn't be originated from br_table") 289 } 290 swapInstruction(pred, originalBranch, newBranch) 291 292 // Replace the original branch with the new branch. 293 trampoline.rootInstr = originalBranch 294 trampoline.currentInstr = originalBranch 295 trampoline.success = append(trampoline.success, succ) // Do not use []*basicBlock{pred} because we might have already allocated the slice. 296 trampoline.preds = append(trampoline.preds, // same as ^. 297 basicBlockPredecessorInfo{blk: pred, branch: newBranch}) 298 b.Seal(trampoline) 299 300 // Update the original branch to point to the trampoline. 301 predInfo.blk = trampoline 302 predInfo.branch = originalBranch 303 304 if wazevoapi.SSAValidationEnabled { 305 trampoline.validate(b) 306 } 307 308 if len(trampoline.params) > 0 { 309 panic("trampoline should not have params") 310 } 311 312 // Assign the same order as the original block so that this will be placed before the actual destination. 313 trampoline.reversePostOrder = pred.reversePostOrder 314 return trampoline 315 } 316 317 // swapInstruction replaces `old` in the block `blk` with `New`. 318 func swapInstruction(blk *basicBlock, old, New *Instruction) { 319 if blk.rootInstr == old { 320 blk.rootInstr = New 321 next := old.next 322 New.next = next 323 next.prev = New 324 } else { 325 if blk.currentInstr == old { 326 blk.currentInstr = New 327 } 328 prev := old.prev 329 prev.next, New.prev = New, prev 330 if next := old.next; next != nil { 331 New.next, next.prev = next, New 332 } 333 } 334 old.prev, old.next = nil, nil 335 }