github.com/jd-ly/tools@v0.5.7/go/ssa/lift.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 // This file defines the lifting pass which tries to "lift" Alloc 8 // cells (new/local variables) into SSA registers, replacing loads 9 // with the dominating stored value, eliminating loads and stores, and 10 // inserting φ-nodes as needed. 11 12 // Cited papers and resources: 13 // 14 // Ron Cytron et al. 1991. Efficiently computing SSA form... 15 // http://doi.acm.org/10.1145/115372.115320 16 // 17 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. 18 // Software Practice and Experience 2001, 4:1-10. 19 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf 20 // 21 // Daniel Berlin, llvmdev mailing list, 2012. 22 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html 23 // (Be sure to expand the whole thread.) 24 25 // TODO(adonovan): opt: there are many optimizations worth evaluating, and 26 // the conventional wisdom for SSA construction is that a simple 27 // algorithm well engineered often beats those of better asymptotic 28 // complexity on all but the most egregious inputs. 29 // 30 // Danny Berlin suggests that the Cooper et al. algorithm for 31 // computing the dominance frontier is superior to Cytron et al. 32 // Furthermore he recommends that rather than computing the DF for the 33 // whole function then renaming all alloc cells, it may be cheaper to 34 // compute the DF for each alloc cell separately and throw it away. 35 // 36 // Consider exploiting liveness information to avoid creating dead 37 // φ-nodes which we then immediately remove. 38 // 39 // Also see many other "TODO: opt" suggestions in the code. 40 41 import ( 42 "fmt" 43 "go/token" 44 "go/types" 45 "math/big" 46 "os" 47 ) 48 49 // If true, show diagnostic information at each step of lifting. 50 // Very verbose. 51 const debugLifting = false 52 53 // domFrontier maps each block to the set of blocks in its dominance 54 // frontier. The outer slice is conceptually a map keyed by 55 // Block.Index. The inner slice is conceptually a set, possibly 56 // containing duplicates. 57 // 58 // TODO(adonovan): opt: measure impact of dups; consider a packed bit 59 // representation, e.g. big.Int, and bitwise parallel operations for 60 // the union step in the Children loop. 61 // 62 // domFrontier's methods mutate the slice's elements but not its 63 // length, so their receivers needn't be pointers. 64 // 65 type domFrontier [][]*BasicBlock 66 67 func (df domFrontier) add(u, v *BasicBlock) { 68 p := &df[u.Index] 69 *p = append(*p, v) 70 } 71 72 // build builds the dominance frontier df for the dominator (sub)tree 73 // rooted at u, using the Cytron et al. algorithm. 74 // 75 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA 76 // by pruning the entire IDF computation, rather than merely pruning 77 // the DF -> IDF step. 78 func (df domFrontier) build(u *BasicBlock) { 79 // Encounter each node u in postorder of dom tree. 80 for _, child := range u.dom.children { 81 df.build(child) 82 } 83 for _, vb := range u.Succs { 84 if v := vb.dom; v.idom != u { 85 df.add(u, vb) 86 } 87 } 88 for _, w := range u.dom.children { 89 for _, vb := range df[w.Index] { 90 // TODO(adonovan): opt: use word-parallel bitwise union. 91 if v := vb.dom; v.idom != u { 92 df.add(u, vb) 93 } 94 } 95 } 96 } 97 98 func buildDomFrontier(fn *Function) domFrontier { 99 df := make(domFrontier, len(fn.Blocks)) 100 df.build(fn.Blocks[0]) 101 if fn.Recover != nil { 102 df.build(fn.Recover) 103 } 104 return df 105 } 106 107 func removeInstr(refs []Instruction, instr Instruction) []Instruction { 108 i := 0 109 for _, ref := range refs { 110 if ref == instr { 111 continue 112 } 113 refs[i] = ref 114 i++ 115 } 116 for j := i; j != len(refs); j++ { 117 refs[j] = nil // aid GC 118 } 119 return refs[:i] 120 } 121 122 // lift replaces local and new Allocs accessed only with 123 // load/store by SSA registers, inserting φ-nodes where necessary. 124 // The result is a program in classical pruned SSA form. 125 // 126 // Preconditions: 127 // - fn has no dead blocks (blockopt has run). 128 // - Def/use info (Operands and Referrers) is up-to-date. 129 // - The dominator tree is up-to-date. 130 // 131 func lift(fn *Function) { 132 // TODO(adonovan): opt: lots of little optimizations may be 133 // worthwhile here, especially if they cause us to avoid 134 // buildDomFrontier. For example: 135 // 136 // - Alloc never loaded? Eliminate. 137 // - Alloc never stored? Replace all loads with a zero constant. 138 // - Alloc stored once? Replace loads with dominating store; 139 // don't forget that an Alloc is itself an effective store 140 // of zero. 141 // - Alloc used only within a single block? 142 // Use degenerate algorithm avoiding φ-nodes. 143 // - Consider synergy with scalar replacement of aggregates (SRA). 144 // e.g. *(&x.f) where x is an Alloc. 145 // Perhaps we'd get better results if we generated this as x.f 146 // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)). 147 // Unclear. 148 // 149 // But we will start with the simplest correct code. 150 df := buildDomFrontier(fn) 151 152 if debugLifting { 153 title := false 154 for i, blocks := range df { 155 if blocks != nil { 156 if !title { 157 fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) 158 title = true 159 } 160 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) 161 } 162 } 163 } 164 165 newPhis := make(newPhiMap) 166 167 // During this pass we will replace some BasicBlock.Instrs 168 // (allocs, loads and stores) with nil, keeping a count in 169 // BasicBlock.gaps. At the end we will reset Instrs to the 170 // concatenation of all non-dead newPhis and non-nil Instrs 171 // for the block, reusing the original array if space permits. 172 173 // While we're here, we also eliminate 'rundefers' 174 // instructions in functions that contain no 'defer' 175 // instructions. 176 usesDefer := false 177 178 // A counter used to generate ~unique ids for Phi nodes, as an 179 // aid to debugging. We use large numbers to make them highly 180 // visible. All nodes are renumbered later. 181 fresh := 1000 182 183 // Determine which allocs we can lift and number them densely. 184 // The renaming phase uses this numbering for compact maps. 185 numAllocs := 0 186 for _, b := range fn.Blocks { 187 b.gaps = 0 188 b.rundefers = 0 189 for _, instr := range b.Instrs { 190 switch instr := instr.(type) { 191 case *Alloc: 192 index := -1 193 if liftAlloc(df, instr, newPhis, &fresh) { 194 index = numAllocs 195 numAllocs++ 196 } 197 instr.index = index 198 case *Defer: 199 usesDefer = true 200 case *RunDefers: 201 b.rundefers++ 202 } 203 } 204 } 205 206 // renaming maps an alloc (keyed by index) to its replacement 207 // value. Initially the renaming contains nil, signifying the 208 // zero constant of the appropriate type; we construct the 209 // Const lazily at most once on each path through the domtree. 210 // TODO(adonovan): opt: cache per-function not per subtree. 211 renaming := make([]Value, numAllocs) 212 213 // Renaming. 214 rename(fn.Blocks[0], renaming, newPhis) 215 216 // Eliminate dead φ-nodes. 217 removeDeadPhis(fn.Blocks, newPhis) 218 219 // Prepend remaining live φ-nodes to each block. 220 for _, b := range fn.Blocks { 221 nps := newPhis[b] 222 j := len(nps) 223 224 rundefersToKill := b.rundefers 225 if usesDefer { 226 rundefersToKill = 0 227 } 228 229 if j+b.gaps+rundefersToKill == 0 { 230 continue // fast path: no new phis or gaps 231 } 232 233 // Compact nps + non-nil Instrs into a new slice. 234 // TODO(adonovan): opt: compact in situ (rightwards) 235 // if Instrs has sufficient space or slack. 236 dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill) 237 for i, np := range nps { 238 dst[i] = np.phi 239 } 240 for _, instr := range b.Instrs { 241 if instr == nil { 242 continue 243 } 244 if !usesDefer { 245 if _, ok := instr.(*RunDefers); ok { 246 continue 247 } 248 } 249 dst[j] = instr 250 j++ 251 } 252 b.Instrs = dst 253 } 254 255 // Remove any fn.Locals that were lifted. 256 j := 0 257 for _, l := range fn.Locals { 258 if l.index < 0 { 259 fn.Locals[j] = l 260 j++ 261 } 262 } 263 // Nil out fn.Locals[j:] to aid GC. 264 for i := j; i < len(fn.Locals); i++ { 265 fn.Locals[i] = nil 266 } 267 fn.Locals = fn.Locals[:j] 268 } 269 270 // removeDeadPhis removes φ-nodes not transitively needed by a 271 // non-Phi, non-DebugRef instruction. 272 func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) { 273 // First pass: find the set of "live" φ-nodes: those reachable 274 // from some non-Phi instruction. 275 // 276 // We compute reachability in reverse, starting from each φ, 277 // rather than forwards, starting from each live non-Phi 278 // instruction, because this way visits much less of the 279 // Value graph. 280 livePhis := make(map[*Phi]bool) 281 for _, npList := range newPhis { 282 for _, np := range npList { 283 phi := np.phi 284 if !livePhis[phi] && phiHasDirectReferrer(phi) { 285 markLivePhi(livePhis, phi) 286 } 287 } 288 } 289 290 // Existing φ-nodes due to && and || operators 291 // are all considered live (see Go issue 19622). 292 for _, b := range blocks { 293 for _, phi := range b.phis() { 294 markLivePhi(livePhis, phi.(*Phi)) 295 } 296 } 297 298 // Second pass: eliminate unused phis from newPhis. 299 for block, npList := range newPhis { 300 j := 0 301 for _, np := range npList { 302 if livePhis[np.phi] { 303 npList[j] = np 304 j++ 305 } else { 306 // discard it, first removing it from referrers 307 for _, val := range np.phi.Edges { 308 if refs := val.Referrers(); refs != nil { 309 *refs = removeInstr(*refs, np.phi) 310 } 311 } 312 np.phi.block = nil 313 } 314 } 315 newPhis[block] = npList[:j] 316 } 317 } 318 319 // markLivePhi marks phi, and all φ-nodes transitively reachable via 320 // its Operands, live. 321 func markLivePhi(livePhis map[*Phi]bool, phi *Phi) { 322 livePhis[phi] = true 323 for _, rand := range phi.Operands(nil) { 324 if q, ok := (*rand).(*Phi); ok { 325 if !livePhis[q] { 326 markLivePhi(livePhis, q) 327 } 328 } 329 } 330 } 331 332 // phiHasDirectReferrer reports whether phi is directly referred to by 333 // a non-Phi instruction. Such instructions are the 334 // roots of the liveness traversal. 335 func phiHasDirectReferrer(phi *Phi) bool { 336 for _, instr := range *phi.Referrers() { 337 if _, ok := instr.(*Phi); !ok { 338 return true 339 } 340 } 341 return false 342 } 343 344 type blockSet struct{ big.Int } // (inherit methods from Int) 345 346 // add adds b to the set and returns true if the set changed. 347 func (s *blockSet) add(b *BasicBlock) bool { 348 i := b.Index 349 if s.Bit(i) != 0 { 350 return false 351 } 352 s.SetBit(&s.Int, i, 1) 353 return true 354 } 355 356 // take removes an arbitrary element from a set s and 357 // returns its index, or returns -1 if empty. 358 func (s *blockSet) take() int { 359 l := s.BitLen() 360 for i := 0; i < l; i++ { 361 if s.Bit(i) == 1 { 362 s.SetBit(&s.Int, i, 0) 363 return i 364 } 365 } 366 return -1 367 } 368 369 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc 370 // it replaces. 371 type newPhi struct { 372 phi *Phi 373 alloc *Alloc 374 } 375 376 // newPhiMap records for each basic block, the set of newPhis that 377 // must be prepended to the block. 378 type newPhiMap map[*BasicBlock][]newPhi 379 380 // liftAlloc determines whether alloc can be lifted into registers, 381 // and if so, it populates newPhis with all the φ-nodes it may require 382 // and returns true. 383 // 384 // fresh is a source of fresh ids for phi nodes. 385 // 386 func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool { 387 // Don't lift aggregates into registers, because we don't have 388 // a way to express their zero-constants. 389 switch deref(alloc.Type()).Underlying().(type) { 390 case *types.Array, *types.Struct: 391 return false 392 } 393 394 // Don't lift named return values in functions that defer 395 // calls that may recover from panic. 396 if fn := alloc.Parent(); fn.Recover != nil { 397 for _, nr := range fn.namedResults { 398 if nr == alloc { 399 return false 400 } 401 } 402 } 403 404 // Compute defblocks, the set of blocks containing a 405 // definition of the alloc cell. 406 var defblocks blockSet 407 for _, instr := range *alloc.Referrers() { 408 // Bail out if we discover the alloc is not liftable; 409 // the only operations permitted to use the alloc are 410 // loads/stores into the cell, and DebugRef. 411 switch instr := instr.(type) { 412 case *Store: 413 if instr.Val == alloc { 414 return false // address used as value 415 } 416 if instr.Addr != alloc { 417 panic("Alloc.Referrers is inconsistent") 418 } 419 defblocks.add(instr.Block()) 420 case *UnOp: 421 if instr.Op != token.MUL { 422 return false // not a load 423 } 424 if instr.X != alloc { 425 panic("Alloc.Referrers is inconsistent") 426 } 427 case *DebugRef: 428 // ok 429 default: 430 return false // some other instruction 431 } 432 } 433 // The Alloc itself counts as a (zero) definition of the cell. 434 defblocks.add(alloc.Block()) 435 436 if debugLifting { 437 fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) 438 } 439 440 fn := alloc.Parent() 441 442 // Φ-insertion. 443 // 444 // What follows is the body of the main loop of the insert-φ 445 // function described by Cytron et al, but instead of using 446 // counter tricks, we just reset the 'hasAlready' and 'work' 447 // sets each iteration. These are bitmaps so it's pretty cheap. 448 // 449 // TODO(adonovan): opt: recycle slice storage for W, 450 // hasAlready, defBlocks across liftAlloc calls. 451 var hasAlready blockSet 452 453 // Initialize W and work to defblocks. 454 var work blockSet = defblocks // blocks seen 455 var W blockSet // blocks to do 456 W.Set(&defblocks.Int) 457 458 // Traverse iterated dominance frontier, inserting φ-nodes. 459 for i := W.take(); i != -1; i = W.take() { 460 u := fn.Blocks[i] 461 for _, v := range df[u.Index] { 462 if hasAlready.add(v) { 463 // Create φ-node. 464 // It will be prepended to v.Instrs later, if needed. 465 phi := &Phi{ 466 Edges: make([]Value, len(v.Preds)), 467 Comment: alloc.Comment, 468 } 469 // This is merely a debugging aid: 470 phi.setNum(*fresh) 471 *fresh++ 472 473 phi.pos = alloc.Pos() 474 phi.setType(deref(alloc.Type())) 475 phi.block = v 476 if debugLifting { 477 fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v) 478 } 479 newPhis[v] = append(newPhis[v], newPhi{phi, alloc}) 480 481 if work.add(v) { 482 W.add(v) 483 } 484 } 485 } 486 } 487 488 return true 489 } 490 491 // replaceAll replaces all intraprocedural uses of x with y, 492 // updating x.Referrers and y.Referrers. 493 // Precondition: x.Referrers() != nil, i.e. x must be local to some function. 494 // 495 func replaceAll(x, y Value) { 496 var rands []*Value 497 pxrefs := x.Referrers() 498 pyrefs := y.Referrers() 499 for _, instr := range *pxrefs { 500 rands = instr.Operands(rands[:0]) // recycle storage 501 for _, rand := range rands { 502 if *rand != nil { 503 if *rand == x { 504 *rand = y 505 } 506 } 507 } 508 if pyrefs != nil { 509 *pyrefs = append(*pyrefs, instr) // dups ok 510 } 511 } 512 *pxrefs = nil // x is now unreferenced 513 } 514 515 // renamed returns the value to which alloc is being renamed, 516 // constructing it lazily if it's the implicit zero initialization. 517 // 518 func renamed(renaming []Value, alloc *Alloc) Value { 519 v := renaming[alloc.index] 520 if v == nil { 521 v = zeroConst(deref(alloc.Type())) 522 renaming[alloc.index] = v 523 } 524 return v 525 } 526 527 // rename implements the (Cytron et al) SSA renaming algorithm, a 528 // preorder traversal of the dominator tree replacing all loads of 529 // Alloc cells with the value stored to that cell by the dominating 530 // store instruction. For lifting, we need only consider loads, 531 // stores and φ-nodes. 532 // 533 // renaming is a map from *Alloc (keyed by index number) to its 534 // dominating stored value; newPhis[x] is the set of new φ-nodes to be 535 // prepended to block x. 536 // 537 func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) { 538 // Each φ-node becomes the new name for its associated Alloc. 539 for _, np := range newPhis[u] { 540 phi := np.phi 541 alloc := np.alloc 542 renaming[alloc.index] = phi 543 } 544 545 // Rename loads and stores of allocs. 546 for i, instr := range u.Instrs { 547 switch instr := instr.(type) { 548 case *Alloc: 549 if instr.index >= 0 { // store of zero to Alloc cell 550 // Replace dominated loads by the zero value. 551 renaming[instr.index] = nil 552 if debugLifting { 553 fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) 554 } 555 // Delete the Alloc. 556 u.Instrs[i] = nil 557 u.gaps++ 558 } 559 560 case *Store: 561 if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell 562 // Replace dominated loads by the stored value. 563 renaming[alloc.index] = instr.Val 564 if debugLifting { 565 fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", 566 instr, instr.Val.Name()) 567 } 568 // Remove the store from the referrer list of the stored value. 569 if refs := instr.Val.Referrers(); refs != nil { 570 *refs = removeInstr(*refs, instr) 571 } 572 // Delete the Store. 573 u.Instrs[i] = nil 574 u.gaps++ 575 } 576 577 case *UnOp: 578 if instr.Op == token.MUL { 579 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell 580 newval := renamed(renaming, alloc) 581 if debugLifting { 582 fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", 583 instr.Name(), instr, newval.Name()) 584 } 585 // Replace all references to 586 // the loaded value by the 587 // dominating stored value. 588 replaceAll(instr, newval) 589 // Delete the Load. 590 u.Instrs[i] = nil 591 u.gaps++ 592 } 593 } 594 595 case *DebugRef: 596 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell 597 if instr.IsAddr { 598 instr.X = renamed(renaming, alloc) 599 instr.IsAddr = false 600 601 // Add DebugRef to instr.X's referrers. 602 if refs := instr.X.Referrers(); refs != nil { 603 *refs = append(*refs, instr) 604 } 605 } else { 606 // A source expression denotes the address 607 // of an Alloc that was optimized away. 608 instr.X = nil 609 610 // Delete the DebugRef. 611 u.Instrs[i] = nil 612 u.gaps++ 613 } 614 } 615 } 616 } 617 618 // For each φ-node in a CFG successor, rename the edge. 619 for _, v := range u.Succs { 620 phis := newPhis[v] 621 if len(phis) == 0 { 622 continue 623 } 624 i := v.predIndex(u) 625 for _, np := range phis { 626 phi := np.phi 627 alloc := np.alloc 628 newval := renamed(renaming, alloc) 629 if debugLifting { 630 fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", 631 phi.Name(), u, v, i, alloc.Name(), newval.Name()) 632 } 633 phi.Edges[i] = newval 634 if prefs := newval.Referrers(); prefs != nil { 635 *prefs = append(*prefs, phi) 636 } 637 } 638 } 639 640 // Continue depth-first recursion over domtree, pushing a 641 // fresh copy of the renaming map for each subtree. 642 for i, v := range u.dom.children { 643 r := renaming 644 if i < len(u.dom.children)-1 { 645 // On all but the final iteration, we must make 646 // a copy to avoid destructive update. 647 r = make([]Value, len(renaming)) 648 copy(r, renaming) 649 } 650 rename(v, r, newPhis) 651 } 652 653 }