github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ssa/branchelim.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "github.com/bir3/gocompiler/src/cmd/internal/src" 8 9 // branchelim tries to eliminate branches by 10 // generating CondSelect instructions. 11 // 12 // Search for basic blocks that look like 13 // 14 // bb0 bb0 15 // | \ / \ 16 // | bb1 or bb1 bb2 <- trivial if/else blocks 17 // | / \ / 18 // bb2 bb3 19 // 20 // where the intermediate blocks are mostly empty (with no side-effects); 21 // rewrite Phis in the postdominator as CondSelects. 22 func branchelim(f *Func) { 23 // FIXME: add support for lowering CondSelects on more architectures 24 switch f.Config.arch { 25 case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64": 26 // implemented 27 default: 28 return 29 } 30 31 // Find all the values used in computing the address of any load. 32 // Typically these values have operations like AddPtr, Lsh64x64, etc. 33 loadAddr := f.newSparseSet(f.NumValues()) 34 defer f.retSparseSet(loadAddr) 35 for _, b := range f.Blocks { 36 for _, v := range b.Values { 37 switch v.Op { 38 case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64: 39 loadAddr.add(v.Args[0].ID) 40 case OpMove: 41 loadAddr.add(v.Args[1].ID) 42 } 43 } 44 } 45 po := f.postorder() 46 for { 47 n := loadAddr.size() 48 for _, b := range po { 49 for i := len(b.Values) - 1; i >= 0; i-- { 50 v := b.Values[i] 51 if !loadAddr.contains(v.ID) { 52 continue 53 } 54 for _, a := range v.Args { 55 if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() { 56 loadAddr.add(a.ID) 57 } 58 } 59 } 60 } 61 if loadAddr.size() == n { 62 break 63 } 64 } 65 66 change := true 67 for change { 68 change = false 69 for _, b := range f.Blocks { 70 change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change 71 } 72 } 73 } 74 75 func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool { 76 if loadAddr.contains(v.ID) { 77 // The result of the soon-to-be conditional move is used to compute a load address. 78 // We want to avoid generating a conditional move in this case 79 // because the load address would now be data-dependent on the condition. 80 // Previously it would only be control-dependent on the condition, which is faster 81 // if the branch predicts well (or possibly even if it doesn't, if the load will 82 // be an expensive cache miss). 83 // See issue #26306. 84 return false 85 } 86 if arch == "loong64" { 87 // We should not generate conditional moves if neither of the arguments is constant zero, 88 // because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the 89 // register pressure. 90 if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) && 91 !(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) { 92 return false 93 } 94 } 95 // For now, stick to simple scalars that fit in registers 96 switch { 97 case v.Type.Size() > v.Block.Func.Config.RegSize: 98 return false 99 case v.Type.IsPtrShaped(): 100 return true 101 case v.Type.IsInteger(): 102 if arch == "amd64" && v.Type.Size() < 2 { 103 // amd64 doesn't support CMOV with byte registers 104 return false 105 } 106 return true 107 default: 108 return false 109 } 110 } 111 112 // elimIf converts the one-way branch starting at dom in f to a conditional move if possible. 113 // loadAddr is a set of values which are used to compute the address of a load. 114 // Those values are exempt from CMOV generation. 115 func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool { 116 // See if dom is an If with one arm that 117 // is trivial and succeeded by the other 118 // successor of dom. 119 if dom.Kind != BlockIf || dom.Likely != BranchUnknown { 120 return false 121 } 122 var simple, post *Block 123 for i := range dom.Succs { 124 bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block() 125 if isLeafPlain(bb) && bb.Succs[0].Block() == other { 126 simple = bb 127 post = other 128 break 129 } 130 } 131 if simple == nil || len(post.Preds) != 2 || post == dom { 132 return false 133 } 134 135 // We've found our diamond CFG of blocks. 136 // Now decide if fusing 'simple' into dom+post 137 // looks profitable. 138 139 // Check that there are Phis, and that all of them 140 // can be safely rewritten to CondSelect. 141 hasphis := false 142 for _, v := range post.Values { 143 if v.Op == OpPhi { 144 hasphis = true 145 if !canCondSelect(v, f.Config.arch, loadAddr) { 146 return false 147 } 148 } 149 } 150 if !hasphis { 151 return false 152 } 153 154 // Pick some upper bound for the number of instructions 155 // we'd be willing to execute just to generate a dead 156 // argument to CondSelect. In the worst case, this is 157 // the number of useless instructions executed. 158 const maxfuseinsts = 2 159 160 if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) { 161 return false 162 } 163 164 // Replace Phi instructions in b with CondSelect instructions 165 swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post) 166 for _, v := range post.Values { 167 if v.Op != OpPhi { 168 continue 169 } 170 v.Op = OpCondSelect 171 if swap { 172 v.Args[0], v.Args[1] = v.Args[1], v.Args[0] 173 } 174 v.AddArg(dom.Controls[0]) 175 } 176 177 // Put all of the instructions into 'dom' 178 // and update the CFG appropriately. 179 dom.Kind = post.Kind 180 dom.CopyControls(post) 181 dom.Aux = post.Aux 182 dom.Succs = append(dom.Succs[:0], post.Succs...) 183 for i := range dom.Succs { 184 e := dom.Succs[i] 185 e.b.Preds[e.i].b = dom 186 } 187 188 // Try really hard to preserve statement marks attached to blocks. 189 simplePos := simple.Pos 190 postPos := post.Pos 191 simpleStmt := simplePos.IsStmt() == src.PosIsStmt 192 postStmt := postPos.IsStmt() == src.PosIsStmt 193 194 for _, v := range simple.Values { 195 v.Block = dom 196 } 197 for _, v := range post.Values { 198 v.Block = dom 199 } 200 201 // findBlockPos determines if b contains a stmt-marked value 202 // that has the same line number as the Pos for b itself. 203 // (i.e. is the position on b actually redundant?) 204 findBlockPos := func(b *Block) bool { 205 pos := b.Pos 206 for _, v := range b.Values { 207 // See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos) 208 if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt { 209 return true 210 } 211 } 212 return false 213 } 214 if simpleStmt { 215 simpleStmt = !findBlockPos(simple) 216 if !simpleStmt && simplePos.SameFileAndLine(postPos) { 217 postStmt = false 218 } 219 220 } 221 if postStmt { 222 postStmt = !findBlockPos(post) 223 } 224 225 // If simpleStmt and/or postStmt are still true, then try harder 226 // to find the corresponding statement marks new homes. 227 228 // setBlockPos determines if b contains a can-be-statement value 229 // that has the same line number as the Pos for b itself, and 230 // puts a statement mark on it, and returns whether it succeeded 231 // in this operation. 232 setBlockPos := func(b *Block) bool { 233 pos := b.Pos 234 for _, v := range b.Values { 235 if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) { 236 v.Pos = v.Pos.WithIsStmt() 237 return true 238 } 239 } 240 return false 241 } 242 // If necessary and possible, add a mark to a value in simple 243 if simpleStmt { 244 if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) { 245 postStmt = false 246 } 247 } 248 // If necessary and possible, add a mark to a value in post 249 if postStmt { 250 postStmt = !setBlockPos(post) 251 } 252 253 // Before giving up (this was added because it helps), try the end of "dom", and if that is not available, 254 // try the values in the successor block if it is uncomplicated. 255 if postStmt { 256 if dom.Pos.IsStmt() != src.PosIsStmt { 257 dom.Pos = postPos 258 } else { 259 // Try the successor block 260 if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 { 261 succ := dom.Succs[0].Block() 262 for _, v := range succ.Values { 263 if isPoorStatementOp(v.Op) { 264 continue 265 } 266 if postPos.SameFileAndLine(v.Pos) { 267 v.Pos = v.Pos.WithIsStmt() 268 } 269 postStmt = false 270 break 271 } 272 // If postStmt still true, tag the block itself if possible 273 if postStmt && succ.Pos.IsStmt() != src.PosIsStmt { 274 succ.Pos = postPos 275 } 276 } 277 } 278 } 279 280 dom.Values = append(dom.Values, simple.Values...) 281 dom.Values = append(dom.Values, post.Values...) 282 283 // Trash 'post' and 'simple' 284 clobberBlock(post) 285 clobberBlock(simple) 286 287 f.invalidateCFG() 288 return true 289 } 290 291 // is this a BlockPlain with one predecessor? 292 func isLeafPlain(b *Block) bool { 293 return b.Kind == BlockPlain && len(b.Preds) == 1 294 } 295 296 func clobberBlock(b *Block) { 297 b.Values = nil 298 b.Preds = nil 299 b.Succs = nil 300 b.Aux = nil 301 b.ResetControls() 302 b.Likely = BranchUnknown 303 b.Kind = BlockInvalid 304 } 305 306 // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible. 307 // loadAddr is a set of values which are used to compute the address of a load. 308 // Those values are exempt from CMOV generation. 309 func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool { 310 // See if 'b' ends in an if/else: it should 311 // have two successors, both of which are BlockPlain 312 // and succeeded by the same block. 313 if b.Kind != BlockIf || b.Likely != BranchUnknown { 314 return false 315 } 316 yes, no := b.Succs[0].Block(), b.Succs[1].Block() 317 if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) { 318 return false 319 } 320 if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) { 321 return false 322 } 323 if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() { 324 return false 325 } 326 // block that postdominates the if/else 327 post := b.Succs[0].Block().Succs[0].Block() 328 if len(post.Preds) != 2 || post == b { 329 return false 330 } 331 hasphis := false 332 for _, v := range post.Values { 333 if v.Op == OpPhi { 334 hasphis = true 335 if !canCondSelect(v, f.Config.arch, loadAddr) { 336 return false 337 } 338 } 339 } 340 if !hasphis { 341 return false 342 } 343 344 // Don't generate CondSelects if branch is cheaper. 345 if !shouldElimIfElse(no, yes, post, f.Config.arch) { 346 return false 347 } 348 349 // now we're committed: rewrite each Phi as a CondSelect 350 swap := post.Preds[0].Block() != b.Succs[0].Block() 351 for _, v := range post.Values { 352 if v.Op != OpPhi { 353 continue 354 } 355 v.Op = OpCondSelect 356 if swap { 357 v.Args[0], v.Args[1] = v.Args[1], v.Args[0] 358 } 359 v.AddArg(b.Controls[0]) 360 } 361 362 // Move the contents of all of these 363 // blocks into 'b' and update CFG edges accordingly 364 b.Kind = post.Kind 365 b.CopyControls(post) 366 b.Aux = post.Aux 367 b.Succs = append(b.Succs[:0], post.Succs...) 368 for i := range b.Succs { 369 e := b.Succs[i] 370 e.b.Preds[e.i].b = b 371 } 372 for i := range post.Values { 373 post.Values[i].Block = b 374 } 375 for i := range yes.Values { 376 yes.Values[i].Block = b 377 } 378 for i := range no.Values { 379 no.Values[i].Block = b 380 } 381 b.Values = append(b.Values, yes.Values...) 382 b.Values = append(b.Values, no.Values...) 383 b.Values = append(b.Values, post.Values...) 384 385 // trash post, yes, and no 386 clobberBlock(yes) 387 clobberBlock(no) 388 clobberBlock(post) 389 390 f.invalidateCFG() 391 return true 392 } 393 394 // shouldElimIfElse reports whether estimated cost of eliminating branch 395 // is lower than threshold. 396 func shouldElimIfElse(no, yes, post *Block, arch string) bool { 397 switch arch { 398 default: 399 return true 400 case "amd64": 401 const maxcost = 2 402 phi := 0 403 other := 0 404 for _, v := range post.Values { 405 if v.Op == OpPhi { 406 // Each phi results in CondSelect, which lowers into CMOV, 407 // CMOV has latency >1 on most CPUs. 408 phi++ 409 } 410 for _, x := range v.Args { 411 if x.Block == no || x.Block == yes { 412 other++ 413 } 414 } 415 } 416 cost := phi * 1 417 if phi > 1 { 418 // If we have more than 1 phi and some values in post have args 419 // in yes or no blocks, we may have to recalculate condition, because 420 // those args may clobber flags. For now assume that all operations clobber flags. 421 cost += other * 1 422 } 423 return cost < maxcost 424 } 425 } 426 427 // canSpeculativelyExecute reports whether every value in the block can 428 // be evaluated without causing any observable side effects (memory 429 // accesses, panics and so on) except for execution time changes. It 430 // also ensures that the block does not contain any phis which we can't 431 // speculatively execute. 432 // Warning: this function cannot currently detect values that represent 433 // instructions the execution of which need to be guarded with CPU 434 // hardware feature checks. See issue #34950. 435 func canSpeculativelyExecute(b *Block) bool { 436 // don't fuse memory ops, Phi ops, divides (can panic), 437 // or anything else with side-effects 438 for _, v := range b.Values { 439 if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() || 440 v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects { 441 return false 442 } 443 } 444 return true 445 } 446 447 func isDivMod(op Op) bool { 448 switch op { 449 case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u, 450 OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u, 451 OpDiv32F, OpDiv64F, 452 OpMod8, OpMod8u, OpMod16, OpMod16u, 453 OpMod32, OpMod32u, OpMod64, OpMod64u: 454 return true 455 default: 456 return false 457 } 458 } 459 460 func isPtrArithmetic(op Op) bool { 461 // Pointer arithmetic can't be speculatively executed because the result 462 // may be an invalid pointer (if, for example, the condition is that the 463 // base pointer is not nil). See issue 56990. 464 switch op { 465 case OpOffPtr, OpAddPtr, OpSubPtr: 466 return true 467 default: 468 return false 469 } 470 }