github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/cmd/compile/internal/ssa/loopreschedchecks.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "fmt" 8 9 // an edgeMemCtr records a backedge, together with the memory and 10 // counter phi functions at the target of the backedge that must 11 // be updated when a rescheduling check replaces the backedge. 12 type edgeMemCtr struct { 13 e Edge 14 m *Value // phi for memory at dest of e 15 c *Value // phi for counter at dest of e 16 } 17 18 // a rewriteTarget is a a value-argindex pair indicating 19 // where a rewrite is applied. Note that this is for values, 20 // not for block controls, because block controls are not targets 21 // for the rewrites performed in inserting rescheduling checks. 22 type rewriteTarget struct { 23 v *Value 24 i int 25 } 26 27 type rewrite struct { 28 before, after *Value // before is the expected value before rewrite, after is the new value installed. 29 rewrites []rewriteTarget // all the targets for this rewrite. 30 } 31 32 func (r *rewrite) String() string { 33 s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String() 34 for _, rw := range r.rewrites { 35 s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")" 36 } 37 s += "\n" 38 return s 39 } 40 41 const initialRescheduleCounterValue = 1021 // Largest 10-bit prime. 97 nSec loop bodies will check every 100 uSec. 42 43 // insertLoopReschedChecks inserts rescheduling checks on loop backedges. 44 func insertLoopReschedChecks(f *Func) { 45 // TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path. 46 47 // Loop reschedule checks decrement a per-function counter 48 // shared by all loops, and when the counter becomes non-positive 49 // a call is made to a rescheduling check in the runtime. 50 // 51 // Steps: 52 // 1. locate backedges. 53 // 2. Record memory definitions at block end so that 54 // the SSA graph for mem can be prperly modified. 55 // 3. Define a counter and record its future uses (at backedges) 56 // (Same process as 2, applied to a single definition of the counter. 57 // difference for mem is that there are zero-to-many existing mem 58 // definitions, versus exactly one for the new counter.) 59 // 4. Ensure that phi functions that will-be-needed for mem and counter 60 // are present in the graph, initially with trivial inputs. 61 // 5. Record all to-be-modified uses of mem and counter; 62 // apply modifications (split into two steps to simplify and 63 // avoided nagging order-dependences). 64 // 6. Rewrite backedges to include counter check, reschedule check, 65 // and modify destination phi function appropriately with new 66 // definitions for mem and counter. 67 68 if f.NoSplit { // nosplit functions don't reschedule. 69 return 70 } 71 72 backedges := backedges(f) 73 if len(backedges) == 0 { // no backedges means no rescheduling checks. 74 return 75 } 76 77 lastMems := findLastMems(f) 78 79 idom := f.Idom() 80 sdom := f.sdom() 81 82 if f.pass.debug > 2 { 83 fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry)) 84 } 85 86 tofixBackedges := []edgeMemCtr{} 87 88 for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data. 89 tofixBackedges = append(tofixBackedges, edgeMemCtr{e, nil, nil}) 90 } 91 92 // It's possible that there is no memory state (no global/pointer loads/stores or calls) 93 if lastMems[f.Entry.ID] == nil { 94 lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, TypeMem) 95 } 96 97 memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block. 98 99 // Propagate last mem definitions forward through successor blocks. 100 po := f.postorder() 101 for i := len(po) - 1; i >= 0; i-- { 102 b := po[i] 103 mem := lastMems[b.ID] 104 for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors. 105 // loop because there might be backedges that haven't been visited yet. 106 mem = memDefsAtBlockEnds[b.Preds[j].b.ID] 107 } 108 memDefsAtBlockEnds[b.ID] = mem 109 } 110 111 // Set up counter. There are no phis etc pre-existing for it. 112 counter0 := f.Entry.NewValue0I(f.Entry.Pos, OpConst32, f.Config.fe.TypeInt32(), initialRescheduleCounterValue) 113 ctrDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, def visible at its end, if that def will be used. 114 115 // There's a minor difference between memDefsAtBlockEnds and ctrDefsAtBlockEnds; 116 // because the counter only matter for loops and code that reaches them, it is nil for blocks where the ctr is no 117 // longer live. This will avoid creation of dead phi functions. This optimization is ignored for the mem variable 118 // because it is harder and also less likely to be helpful, though dead code elimination ought to clean this out anyhow. 119 120 for _, emc := range tofixBackedges { 121 e := emc.e 122 // set initial uses of counter zero (note available-at-bottom and use are the same thing initially.) 123 // each back-edge will be rewritten to include a reschedule check, and that will use the counter. 124 src := e.b.Preds[e.i].b 125 ctrDefsAtBlockEnds[src.ID] = counter0 126 } 127 128 // Push uses towards root 129 for _, b := range f.postorder() { 130 bd := ctrDefsAtBlockEnds[b.ID] 131 if bd == nil { 132 continue 133 } 134 for _, e := range b.Preds { 135 p := e.b 136 if ctrDefsAtBlockEnds[p.ID] == nil { 137 ctrDefsAtBlockEnds[p.ID] = bd 138 } 139 } 140 } 141 142 // Maps from block to newly-inserted phi function in block. 143 newmemphis := make(map[*Block]rewrite) 144 newctrphis := make(map[*Block]rewrite) 145 146 // Insert phi functions as necessary for future changes to flow graph. 147 for i, emc := range tofixBackedges { 148 e := emc.e 149 h := e.b 150 151 // find the phi function for the memory input at "h", if there is one. 152 var headerMemPhi *Value // look for header mem phi 153 154 for _, v := range h.Values { 155 if v.Op == OpPhi && v.Type.IsMemory() { 156 headerMemPhi = v 157 } 158 } 159 160 if headerMemPhi == nil { 161 // if the header is nil, make a trivial phi from the dominator 162 mem0 := memDefsAtBlockEnds[idom[h.ID].ID] 163 headerMemPhi = newPhiFor(h, mem0) 164 newmemphis[h] = rewrite{before: mem0, after: headerMemPhi} 165 addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis) 166 167 } 168 tofixBackedges[i].m = headerMemPhi 169 170 var headerCtrPhi *Value 171 rw, ok := newctrphis[h] 172 if !ok { 173 headerCtrPhi = newPhiFor(h, counter0) 174 newctrphis[h] = rewrite{before: counter0, after: headerCtrPhi} 175 addDFphis(counter0, h, h, f, ctrDefsAtBlockEnds, newctrphis) 176 } else { 177 headerCtrPhi = rw.after 178 } 179 tofixBackedges[i].c = headerCtrPhi 180 } 181 182 rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis) 183 rewriteNewPhis(f.Entry, f.Entry, f, ctrDefsAtBlockEnds, newctrphis) 184 185 if f.pass.debug > 0 { 186 for b, r := range newmemphis { 187 fmt.Printf("b=%s, rewrite=%s\n", b, r.String()) 188 } 189 190 for b, r := range newctrphis { 191 fmt.Printf("b=%s, rewrite=%s\n", b, r.String()) 192 } 193 } 194 195 // Apply collected rewrites. 196 for _, r := range newmemphis { 197 for _, rw := range r.rewrites { 198 rw.v.SetArg(rw.i, r.after) 199 } 200 } 201 202 for _, r := range newctrphis { 203 for _, rw := range r.rewrites { 204 rw.v.SetArg(rw.i, r.after) 205 } 206 } 207 208 zero := f.Entry.NewValue0I(f.Entry.Pos, OpConst32, f.Config.fe.TypeInt32(), 0) 209 one := f.Entry.NewValue0I(f.Entry.Pos, OpConst32, f.Config.fe.TypeInt32(), 1) 210 211 // Rewrite backedges to include reschedule checks. 212 for _, emc := range tofixBackedges { 213 e := emc.e 214 headerMemPhi := emc.m 215 headerCtrPhi := emc.c 216 h := e.b 217 i := e.i 218 p := h.Preds[i] 219 bb := p.b 220 mem0 := headerMemPhi.Args[i] 221 ctr0 := headerCtrPhi.Args[i] 222 // bb e->p h, 223 // Because we're going to insert a rare-call, make sure the 224 // looping edge still looks likely. 225 likely := BranchLikely 226 if p.i != 0 { 227 likely = BranchUnlikely 228 } 229 bb.Likely = likely 230 231 // rewrite edge to include reschedule check 232 // existing edges: 233 // 234 // bb.Succs[p.i] == Edge{h, i} 235 // h.Preds[i] == p == Edge{bb,p.i} 236 // 237 // new block(s): 238 // test: 239 // ctr1 := ctr0 - 1 240 // if ctr1 <= 0 { goto sched } 241 // goto join 242 // sched: 243 // mem1 := call resched (mem0) 244 // goto join 245 // join: 246 // ctr2 := phi(ctr1, counter0) // counter0 is the constant 247 // mem2 := phi(mem0, mem1) 248 // goto h 249 // 250 // and correct arg i of headerMemPhi and headerCtrPhi 251 // 252 // EXCEPT: block containing only phi functions is bad 253 // for the register allocator. Therefore, there is no 254 // join, and instead branches targeting join instead target 255 // the header, and the other phi functions within header are 256 // adjusted for the additional input. 257 258 test := f.NewBlock(BlockIf) 259 sched := f.NewBlock(BlockPlain) 260 261 test.Pos = bb.Pos 262 sched.Pos = bb.Pos 263 264 // ctr1 := ctr0 - 1 265 // if ctr1 <= 0 { goto sched } 266 // goto header 267 ctr1 := test.NewValue2(bb.Pos, OpSub32, f.Config.fe.TypeInt32(), ctr0, one) 268 cmp := test.NewValue2(bb.Pos, OpLeq32, f.Config.fe.TypeBool(), ctr1, zero) 269 test.SetControl(cmp) 270 test.AddEdgeTo(sched) // if true 271 // if false -- rewrite edge to header. 272 // do NOT remove+add, because that will perturb all the other phi functions 273 // as well as messing up other edges to the header. 274 test.Succs = append(test.Succs, Edge{h, i}) 275 h.Preds[i] = Edge{test, 1} 276 headerMemPhi.SetArg(i, mem0) 277 headerCtrPhi.SetArg(i, ctr1) 278 279 test.Likely = BranchUnlikely 280 281 // sched: 282 // mem1 := call resched (mem0) 283 // goto header 284 resched := f.Config.fe.Syslook("goschedguarded") 285 mem1 := sched.NewValue1A(bb.Pos, OpStaticCall, TypeMem, resched, mem0) 286 sched.AddEdgeTo(h) 287 headerMemPhi.AddArg(mem1) 288 headerCtrPhi.AddArg(counter0) 289 290 bb.Succs[p.i] = Edge{test, 0} 291 test.Preds = append(test.Preds, Edge{bb, p.i}) 292 293 // Must correct all the other phi functions in the header for new incoming edge. 294 // Except for mem and counter phis, it will be the same value seen on the original 295 // backedge at index i. 296 for _, v := range h.Values { 297 if v.Op == OpPhi && v != headerMemPhi && v != headerCtrPhi { 298 v.AddArg(v.Args[i]) 299 } 300 } 301 } 302 303 f.invalidateCFG() 304 305 if f.pass.debug > 2 { 306 sdom = newSparseTree(f, f.Idom()) 307 fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry)) 308 } 309 310 return 311 } 312 313 // newPhiFor inserts a new Phi function into b, 314 // with all inputs set to v. 315 func newPhiFor(b *Block, v *Value) *Value { 316 phiV := b.NewValue0(b.Pos, OpPhi, v.Type) 317 318 for range b.Preds { 319 phiV.AddArg(v) 320 } 321 return phiV 322 } 323 324 // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted 325 // in block h will replace a previous definition. Block b is the block currently being processed; 326 // if b has its own phi definition then it takes the place of h. 327 // defsForUses provides information about other definitions of the variable that are present 328 // (and if nil, indicates that the variable is no longer live) 329 func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite) { 330 // If b is a block with a new phi, then a new rewrite applies below it in the dominator tree. 331 if _, ok := newphis[b]; ok { 332 h = b 333 } 334 change := newphis[h] 335 x := change.before 336 y := change.after 337 338 // Apply rewrites to this block 339 if x != nil { // don't waste time on the common case of no definition. 340 p := &change.rewrites 341 for _, v := range b.Values { 342 if v == y { // don't rewrite self -- phi inputs are handled below. 343 continue 344 } 345 for i, w := range v.Args { 346 if w != x { 347 continue 348 } 349 *p = append(*p, rewriteTarget{v, i}) 350 } 351 } 352 353 // Rewrite appropriate inputs of phis reached in successors 354 // in dominance frontier, self, and dominated. 355 // If the variable def reaching uses in b is itself defined in b, then the new phi function 356 // does not reach the successors of b. (This assumes a bit about the structure of the 357 // phi use-def graph, but it's true for memory and the inserted counter.) 358 if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b { 359 for _, e := range b.Succs { 360 s := e.b 361 if sphi, ok := newphis[s]; ok { // saves time to find the phi this way. 362 *p = append(*p, rewriteTarget{sphi.after, e.i}) 363 continue 364 } 365 for _, v := range s.Values { 366 if v.Op == OpPhi && v.Args[e.i] == x { 367 *p = append(*p, rewriteTarget{v, e.i}) 368 break 369 } 370 } 371 } 372 } 373 newphis[h] = change 374 } 375 376 sdom := f.sdom() 377 378 for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling { 379 rewriteNewPhis(h, c, f, defsForUses, newphis) // TODO: convert to explicit stack from recursion. 380 } 381 } 382 383 // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA) 384 // a new definition for variable "x" inserted at h (usually but not necessarily a phi). 385 // These new phis can only occur at the dominance frontier of h; block s is in the dominance 386 // frontier of h if h does not strictly dominate s and if s is a successor of a block b where 387 // either b = h or h strictly dominates b. 388 // These newly created phis are themselves new definitions that may require addition of their 389 // own trivial phi functions in their own dominance frontier, and this is handled recursively. 390 func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite) { 391 oldv := defForUses[b.ID] 392 if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b 393 return 394 } 395 sdom := f.sdom() 396 idom := f.Idom() 397 outer: 398 for _, e := range b.Succs { 399 s := e.b 400 // check phi functions in the dominance frontier 401 if sdom.isAncestor(h, s) { 402 continue // h dominates s, successor of b, therefore s is not in the frontier. 403 } 404 if _, ok := newphis[s]; ok { 405 continue // successor s of b already has a new phi function, so there is no need to add another. 406 } 407 if x != nil { 408 for _, v := range s.Values { 409 if v.Op == OpPhi && v.Args[e.i] == x { 410 continue outer // successor s of b has an old phi function, so there is no need to add another. 411 } 412 } 413 } 414 415 old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs. 416 headerPhi := newPhiFor(s, old) 417 // the new phi will replace "old" in block s and all blocks dominated by s. 418 newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi" 419 addDFphis(old, s, s, f, defForUses, newphis) // the new definition may also create new phi functions. 420 } 421 for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling { 422 addDFphis(x, h, c, f, defForUses, newphis) // TODO: convert to explicit stack from recursion. 423 } 424 } 425 426 // findLastMems maps block ids to last memory-output op in a block, if any 427 func findLastMems(f *Func) []*Value { 428 429 var stores []*Value 430 lastMems := make([]*Value, f.NumBlocks()) 431 storeUse := f.newSparseSet(f.NumValues()) 432 defer f.retSparseSet(storeUse) 433 for _, b := range f.Blocks { 434 // Find all the stores in this block. Categorize their uses: 435 // storeUse contains stores which are used by a subsequent store. 436 storeUse.clear() 437 stores = stores[:0] 438 var memPhi *Value 439 for _, v := range b.Values { 440 if v.Op == OpPhi { 441 if v.Type.IsMemory() { 442 memPhi = v 443 } 444 continue 445 } 446 if v.Type.IsMemory() { 447 stores = append(stores, v) 448 if v.Op == OpSelect1 { 449 // Use the arg of the tuple-generating op. 450 v = v.Args[0] 451 } 452 for _, a := range v.Args { 453 if a.Block == b && a.Type.IsMemory() { 454 storeUse.add(a.ID) 455 } 456 } 457 } 458 } 459 if len(stores) == 0 { 460 lastMems[b.ID] = memPhi 461 continue 462 } 463 464 // find last store in the block 465 var last *Value 466 for _, v := range stores { 467 if storeUse.contains(v.ID) { 468 continue 469 } 470 if last != nil { 471 b.Fatalf("two final stores - simultaneous live stores %s %s", last, v) 472 } 473 last = v 474 } 475 if last == nil { 476 b.Fatalf("no last store found - cycle?") 477 } 478 lastMems[b.ID] = last 479 } 480 return lastMems 481 } 482 483 type backedgesState struct { 484 b *Block 485 i int 486 } 487 488 // backedges returns a slice of successor edges that are back 489 // edges. For reducible loops, edge.b is the header. 490 func backedges(f *Func) []Edge { 491 edges := []Edge{} 492 mark := make([]markKind, f.NumBlocks()) 493 stack := []backedgesState{} 494 495 mark[f.Entry.ID] = notExplored 496 stack = append(stack, backedgesState{f.Entry, 0}) 497 498 for len(stack) > 0 { 499 l := len(stack) 500 x := stack[l-1] 501 if x.i < len(x.b.Succs) { 502 e := x.b.Succs[x.i] 503 stack[l-1].i++ 504 s := e.b 505 if mark[s.ID] == notFound { 506 mark[s.ID] = notExplored 507 stack = append(stack, backedgesState{s, 0}) 508 } else if mark[s.ID] == notExplored { 509 edges = append(edges, e) 510 } 511 } else { 512 mark[x.b.ID] = done 513 stack = stack[0 : l-1] 514 } 515 } 516 return edges 517 }