github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/ssa/loopreschedchecks.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "fmt" 9 10 "github.com/go-asm/go/cmd/compile/types" 11 ) 12 13 // an edgeMem records a backedge, together with the memory 14 // phi functions at the target of the backedge that must 15 // be updated when a rescheduling check replaces the backedge. 16 type edgeMem struct { 17 e Edge 18 m *Value // phi for memory at dest of e 19 } 20 21 // a rewriteTarget is a value-argindex pair indicating 22 // where a rewrite is applied. Note that this is for values, 23 // not for block controls, because block controls are not targets 24 // for the rewrites performed in inserting rescheduling checks. 25 type rewriteTarget struct { 26 v *Value 27 i int 28 } 29 30 type rewrite struct { 31 before, after *Value // before is the expected value before rewrite, after is the new value installed. 32 rewrites []rewriteTarget // all the targets for this rewrite. 33 } 34 35 func (r *rewrite) String() string { 36 s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String() 37 for _, rw := range r.rewrites { 38 s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")" 39 } 40 s += "\n" 41 return s 42 } 43 44 // insertLoopReschedChecks inserts rescheduling checks on loop backedges. 45 func insertLoopReschedChecks(f *Func) { 46 // TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path. 47 48 // Loop reschedule checks compare the stack pointer with 49 // the per-g stack bound. If the pointer appears invalid, 50 // that means a reschedule check is needed. 51 // 52 // Steps: 53 // 1. locate backedges. 54 // 2. Record memory definitions at block end so that 55 // the SSA graph for mem can be properly modified. 56 // 3. Ensure that phi functions that will-be-needed for mem 57 // are present in the graph, initially with trivial inputs. 58 // 4. Record all to-be-modified uses of mem; 59 // apply modifications (split into two steps to simplify and 60 // avoided nagging order-dependencies). 61 // 5. Rewrite backedges to include reschedule check, 62 // and modify destination phi function appropriately with new 63 // definitions for mem. 64 65 if f.NoSplit { // nosplit functions don't reschedule. 66 return 67 } 68 69 backedges := backedges(f) 70 if len(backedges) == 0 { // no backedges means no rescheduling checks. 71 return 72 } 73 74 lastMems := findLastMems(f) 75 76 idom := f.Idom() 77 po := f.postorder() 78 // The ordering in the dominator tree matters; it's important that 79 // the walk of the dominator tree also be a preorder (i.e., a node is 80 // visited only after all its non-backedge predecessors have been visited). 81 sdom := newSparseOrderedTree(f, idom, po) 82 83 if f.pass.debug > 1 { 84 fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry)) 85 } 86 87 tofixBackedges := []edgeMem{} 88 89 for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data. 90 tofixBackedges = append(tofixBackedges, edgeMem{e, nil}) 91 } 92 93 // It's possible that there is no memory state (no global/pointer loads/stores or calls) 94 if lastMems[f.Entry.ID] == nil { 95 lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem) 96 } 97 98 memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block. 99 defer f.Cache.freeValueSlice(memDefsAtBlockEnds) 100 101 // Propagate last mem definitions forward through successor blocks. 102 for i := len(po) - 1; i >= 0; i-- { 103 b := po[i] 104 mem := lastMems[b.ID] 105 for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors. 106 // loop because there might be backedges that haven't been visited yet. 107 mem = memDefsAtBlockEnds[b.Preds[j].b.ID] 108 } 109 memDefsAtBlockEnds[b.ID] = mem 110 if f.pass.debug > 2 { 111 fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem) 112 } 113 } 114 115 // Maps from block to newly-inserted phi function in block. 116 newmemphis := make(map[*Block]rewrite) 117 118 // Insert phi functions as necessary for future changes to flow graph. 119 for i, emc := range tofixBackedges { 120 e := emc.e 121 h := e.b 122 123 // find the phi function for the memory input at "h", if there is one. 124 var headerMemPhi *Value // look for header mem phi 125 126 for _, v := range h.Values { 127 if v.Op == OpPhi && v.Type.IsMemory() { 128 headerMemPhi = v 129 } 130 } 131 132 if headerMemPhi == nil { 133 // if the header is nil, make a trivial phi from the dominator 134 mem0 := memDefsAtBlockEnds[idom[h.ID].ID] 135 headerMemPhi = newPhiFor(h, mem0) 136 newmemphis[h] = rewrite{before: mem0, after: headerMemPhi} 137 addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom) 138 139 } 140 tofixBackedges[i].m = headerMemPhi 141 142 } 143 if f.pass.debug > 0 { 144 for b, r := range newmemphis { 145 fmt.Printf("before b=%s, rewrite=%s\n", b, r.String()) 146 } 147 } 148 149 // dfPhiTargets notes inputs to phis in dominance frontiers that should not 150 // be rewritten as part of the dominated children of some outer rewrite. 151 dfPhiTargets := make(map[rewriteTarget]bool) 152 153 rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom) 154 155 if f.pass.debug > 0 { 156 for b, r := range newmemphis { 157 fmt.Printf("after b=%s, rewrite=%s\n", b, r.String()) 158 } 159 } 160 161 // Apply collected rewrites. 162 for _, r := range newmemphis { 163 for _, rw := range r.rewrites { 164 rw.v.SetArg(rw.i, r.after) 165 } 166 } 167 168 // Rewrite backedges to include reschedule checks. 169 for _, emc := range tofixBackedges { 170 e := emc.e 171 headerMemPhi := emc.m 172 h := e.b 173 i := e.i 174 p := h.Preds[i] 175 bb := p.b 176 mem0 := headerMemPhi.Args[i] 177 // bb e->p h, 178 // Because we're going to insert a rare-call, make sure the 179 // looping edge still looks likely. 180 likely := BranchLikely 181 if p.i != 0 { 182 likely = BranchUnlikely 183 } 184 if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue } 185 bb.Likely = likely 186 } 187 188 // rewrite edge to include reschedule check 189 // existing edges: 190 // 191 // bb.Succs[p.i] == Edge{h, i} 192 // h.Preds[i] == p == Edge{bb,p.i} 193 // 194 // new block(s): 195 // test: 196 // if sp < g.limit { goto sched } 197 // goto join 198 // sched: 199 // mem1 := call resched (mem0) 200 // goto join 201 // join: 202 // mem2 := phi(mem0, mem1) 203 // goto h 204 // 205 // and correct arg i of headerMemPhi and headerCtrPhi 206 // 207 // EXCEPT: join block containing only phi functions is bad 208 // for the register allocator. Therefore, there is no 209 // join, and branches targeting join must instead target 210 // the header, and the other phi functions within header are 211 // adjusted for the additional input. 212 213 test := f.NewBlock(BlockIf) 214 sched := f.NewBlock(BlockPlain) 215 216 test.Pos = bb.Pos 217 sched.Pos = bb.Pos 218 219 // if sp < g.limit { goto sched } 220 // goto header 221 222 cfgtypes := &f.Config.Types 223 pt := cfgtypes.Uintptr 224 g := test.NewValue1(bb.Pos, OpGetG, pt, mem0) 225 sp := test.NewValue0(bb.Pos, OpSP, pt) 226 cmpOp := OpLess64U 227 if pt.Size() == 4 { 228 cmpOp = OpLess32U 229 } 230 limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g) 231 lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0) 232 cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim) 233 test.SetControl(cmp) 234 235 // if true, goto sched 236 test.AddEdgeTo(sched) 237 238 // if false, rewrite edge to header. 239 // do NOT remove+add, because that will perturb all the other phi functions 240 // as well as messing up other edges to the header. 241 test.Succs = append(test.Succs, Edge{h, i}) 242 h.Preds[i] = Edge{test, 1} 243 headerMemPhi.SetArg(i, mem0) 244 245 test.Likely = BranchUnlikely 246 247 // sched: 248 // mem1 := call resched (mem0) 249 // goto header 250 resched := f.fe.Syslook("goschedguarded") 251 call := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(resched, bb.Func.ABIDefault.ABIAnalyzeTypes(nil, nil)), mem0) 252 mem1 := sched.NewValue1I(bb.Pos, OpSelectN, types.TypeMem, 0, call) 253 sched.AddEdgeTo(h) 254 headerMemPhi.AddArg(mem1) 255 256 bb.Succs[p.i] = Edge{test, 0} 257 test.Preds = append(test.Preds, Edge{bb, p.i}) 258 259 // Must correct all the other phi functions in the header for new incoming edge. 260 // Except for mem phis, it will be the same value seen on the original 261 // backedge at index i. 262 for _, v := range h.Values { 263 if v.Op == OpPhi && v != headerMemPhi { 264 v.AddArg(v.Args[i]) 265 } 266 } 267 } 268 269 f.invalidateCFG() 270 271 if f.pass.debug > 1 { 272 sdom = newSparseTree(f, f.Idom()) 273 fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry)) 274 } 275 } 276 277 // newPhiFor inserts a new Phi function into b, 278 // with all inputs set to v. 279 func newPhiFor(b *Block, v *Value) *Value { 280 phiV := b.NewValue0(b.Pos, OpPhi, v.Type) 281 282 for range b.Preds { 283 phiV.AddArg(v) 284 } 285 return phiV 286 } 287 288 // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted 289 // in block h will replace a previous definition. Block b is the block currently being processed; 290 // if b has its own phi definition then it takes the place of h. 291 // defsForUses provides information about other definitions of the variable that are present 292 // (and if nil, indicates that the variable is no longer live) 293 // sdom must yield a preorder of the flow graph if recursively walked, root-to-children. 294 // The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this 295 // requirement. 296 func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) { 297 // If b is a block with a new phi, then a new rewrite applies below it in the dominator tree. 298 if _, ok := newphis[b]; ok { 299 h = b 300 } 301 change := newphis[h] 302 x := change.before 303 y := change.after 304 305 // Apply rewrites to this block 306 if x != nil { // don't waste time on the common case of no definition. 307 p := &change.rewrites 308 for _, v := range b.Values { 309 if v == y { // don't rewrite self -- phi inputs are handled below. 310 continue 311 } 312 for i, w := range v.Args { 313 if w != x { 314 continue 315 } 316 tgt := rewriteTarget{v, i} 317 318 // It's possible dominated control flow will rewrite this instead. 319 // Visiting in preorder (a property of how sdom was constructed) 320 // ensures that these are seen in the proper order. 321 if dfPhiTargets[tgt] { 322 continue 323 } 324 *p = append(*p, tgt) 325 if f.pass.debug > 1 { 326 fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n", 327 h, b, x, y, v, i) 328 } 329 } 330 } 331 332 // Rewrite appropriate inputs of phis reached in successors 333 // in dominance frontier, self, and dominated. 334 // If the variable def reaching uses in b is itself defined in b, then the new phi function 335 // does not reach the successors of b. (This assumes a bit about the structure of the 336 // phi use-def graph, but it's true for memory.) 337 if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b { 338 for _, e := range b.Succs { 339 s := e.b 340 341 for _, v := range s.Values { 342 if v.Op == OpPhi && v.Args[e.i] == x { 343 tgt := rewriteTarget{v, e.i} 344 *p = append(*p, tgt) 345 dfPhiTargets[tgt] = true 346 if f.pass.debug > 1 { 347 fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n", 348 h, b, s, x, y, v.LongString(), e.i) 349 } 350 break 351 } 352 } 353 } 354 } 355 newphis[h] = change 356 } 357 358 for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling { 359 rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion. 360 } 361 } 362 363 // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA) 364 // a new definition for variable "x" inserted at h (usually but not necessarily a phi). 365 // These new phis can only occur at the dominance frontier of h; block s is in the dominance 366 // frontier of h if h does not strictly dominate s and if s is a successor of a block b where 367 // either b = h or h strictly dominates b. 368 // These newly created phis are themselves new definitions that may require addition of their 369 // own trivial phi functions in their own dominance frontier, and this is handled recursively. 370 func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) { 371 oldv := defForUses[b.ID] 372 if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b 373 return 374 } 375 idom := f.Idom() 376 outer: 377 for _, e := range b.Succs { 378 s := e.b 379 // check phi functions in the dominance frontier 380 if sdom.isAncestor(h, s) { 381 continue // h dominates s, successor of b, therefore s is not in the frontier. 382 } 383 if _, ok := newphis[s]; ok { 384 continue // successor s of b already has a new phi function, so there is no need to add another. 385 } 386 if x != nil { 387 for _, v := range s.Values { 388 if v.Op == OpPhi && v.Args[e.i] == x { 389 continue outer // successor s of b has an old phi function, so there is no need to add another. 390 } 391 } 392 } 393 394 old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs. 395 headerPhi := newPhiFor(s, old) 396 // the new phi will replace "old" in block s and all blocks dominated by s. 397 newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi" 398 addDFphis(old, s, s, f, defForUses, newphis, sdom) // the new definition may also create new phi functions. 399 } 400 for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling { 401 addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion. 402 } 403 } 404 405 // findLastMems maps block ids to last memory-output op in a block, if any. 406 func findLastMems(f *Func) []*Value { 407 408 var stores []*Value 409 lastMems := f.Cache.allocValueSlice(f.NumBlocks()) 410 defer f.Cache.freeValueSlice(lastMems) 411 storeUse := f.newSparseSet(f.NumValues()) 412 defer f.retSparseSet(storeUse) 413 for _, b := range f.Blocks { 414 // Find all the stores in this block. Categorize their uses: 415 // storeUse contains stores which are used by a subsequent store. 416 storeUse.clear() 417 stores = stores[:0] 418 var memPhi *Value 419 for _, v := range b.Values { 420 if v.Op == OpPhi { 421 if v.Type.IsMemory() { 422 memPhi = v 423 } 424 continue 425 } 426 if v.Type.IsMemory() { 427 stores = append(stores, v) 428 for _, a := range v.Args { 429 if a.Block == b && a.Type.IsMemory() { 430 storeUse.add(a.ID) 431 } 432 } 433 } 434 } 435 if len(stores) == 0 { 436 lastMems[b.ID] = memPhi 437 continue 438 } 439 440 // find last store in the block 441 var last *Value 442 for _, v := range stores { 443 if storeUse.contains(v.ID) { 444 continue 445 } 446 if last != nil { 447 b.Fatalf("two final stores - simultaneous live stores %s %s", last, v) 448 } 449 last = v 450 } 451 if last == nil { 452 b.Fatalf("no last store found - cycle?") 453 } 454 455 // If this is a tuple containing a mem, select just 456 // the mem. This will generate ops we don't need, but 457 // it's the easiest thing to do. 458 if last.Type.IsTuple() { 459 last = b.NewValue1(last.Pos, OpSelect1, types.TypeMem, last) 460 } else if last.Type.IsResults() { 461 last = b.NewValue1I(last.Pos, OpSelectN, types.TypeMem, int64(last.Type.NumFields()-1), last) 462 } 463 464 lastMems[b.ID] = last 465 } 466 return lastMems 467 } 468 469 // mark values 470 type markKind uint8 471 472 const ( 473 notFound markKind = iota // block has not been discovered yet 474 notExplored // discovered and in queue, outedges not processed yet 475 explored // discovered and in queue, outedges processed 476 done // all done, in output ordering 477 ) 478 479 type backedgesState struct { 480 b *Block 481 i int 482 } 483 484 // backedges returns a slice of successor edges that are back 485 // edges. For reducible loops, edge.b is the header. 486 func backedges(f *Func) []Edge { 487 edges := []Edge{} 488 mark := make([]markKind, f.NumBlocks()) 489 stack := []backedgesState{} 490 491 mark[f.Entry.ID] = notExplored 492 stack = append(stack, backedgesState{f.Entry, 0}) 493 494 for len(stack) > 0 { 495 l := len(stack) 496 x := stack[l-1] 497 if x.i < len(x.b.Succs) { 498 e := x.b.Succs[x.i] 499 stack[l-1].i++ 500 s := e.b 501 if mark[s.ID] == notFound { 502 mark[s.ID] = notExplored 503 stack = append(stack, backedgesState{s, 0}) 504 } else if mark[s.ID] == notExplored { 505 edges = append(edges, e) 506 } 507 } else { 508 mark[x.b.ID] = done 509 stack = stack[0 : l-1] 510 } 511 } 512 return edges 513 }