golang.org/x/tools@v0.21.0/go/ssa/lift.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 // This file defines the lifting pass which tries to "lift" Alloc 8 // cells (new/local variables) into SSA registers, replacing loads 9 // with the dominating stored value, eliminating loads and stores, and 10 // inserting φ-nodes as needed. 11 12 // Cited papers and resources: 13 // 14 // Ron Cytron et al. 1991. Efficiently computing SSA form... 15 // http://doi.acm.org/10.1145/115372.115320 16 // 17 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. 18 // Software Practice and Experience 2001, 4:1-10. 19 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf 20 // 21 // Daniel Berlin, llvmdev mailing list, 2012. 22 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html 23 // (Be sure to expand the whole thread.) 24 25 // TODO(adonovan): opt: there are many optimizations worth evaluating, and 26 // the conventional wisdom for SSA construction is that a simple 27 // algorithm well engineered often beats those of better asymptotic 28 // complexity on all but the most egregious inputs. 29 // 30 // Danny Berlin suggests that the Cooper et al. algorithm for 31 // computing the dominance frontier is superior to Cytron et al. 32 // Furthermore he recommends that rather than computing the DF for the 33 // whole function then renaming all alloc cells, it may be cheaper to 34 // compute the DF for each alloc cell separately and throw it away. 35 // 36 // Consider exploiting liveness information to avoid creating dead 37 // φ-nodes which we then immediately remove. 38 // 39 // Also see many other "TODO: opt" suggestions in the code. 40 41 import ( 42 "fmt" 43 "go/token" 44 "math/big" 45 "os" 46 47 "golang.org/x/tools/internal/typeparams" 48 ) 49 50 // If true, show diagnostic information at each step of lifting. 51 // Very verbose. 52 const debugLifting = false 53 54 // domFrontier maps each block to the set of blocks in its dominance 55 // frontier. The outer slice is conceptually a map keyed by 56 // Block.Index. The inner slice is conceptually a set, possibly 57 // containing duplicates. 58 // 59 // TODO(adonovan): opt: measure impact of dups; consider a packed bit 60 // representation, e.g. big.Int, and bitwise parallel operations for 61 // the union step in the Children loop. 62 // 63 // domFrontier's methods mutate the slice's elements but not its 64 // length, so their receivers needn't be pointers. 65 type domFrontier [][]*BasicBlock 66 67 func (df domFrontier) add(u, v *BasicBlock) { 68 p := &df[u.Index] 69 *p = append(*p, v) 70 } 71 72 // build builds the dominance frontier df for the dominator (sub)tree 73 // rooted at u, using the Cytron et al. algorithm. 74 // 75 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA 76 // by pruning the entire IDF computation, rather than merely pruning 77 // the DF -> IDF step. 78 func (df domFrontier) build(u *BasicBlock) { 79 // Encounter each node u in postorder of dom tree. 80 for _, child := range u.dom.children { 81 df.build(child) 82 } 83 for _, vb := range u.Succs { 84 if v := vb.dom; v.idom != u { 85 df.add(u, vb) 86 } 87 } 88 for _, w := range u.dom.children { 89 for _, vb := range df[w.Index] { 90 // TODO(adonovan): opt: use word-parallel bitwise union. 91 if v := vb.dom; v.idom != u { 92 df.add(u, vb) 93 } 94 } 95 } 96 } 97 98 func buildDomFrontier(fn *Function) domFrontier { 99 df := make(domFrontier, len(fn.Blocks)) 100 df.build(fn.Blocks[0]) 101 if fn.Recover != nil { 102 df.build(fn.Recover) 103 } 104 return df 105 } 106 107 func removeInstr(refs []Instruction, instr Instruction) []Instruction { 108 return removeInstrsIf(refs, func(i Instruction) bool { return i == instr }) 109 } 110 111 func removeInstrsIf(refs []Instruction, p func(Instruction) bool) []Instruction { 112 // TODO(taking): replace with go1.22 slices.DeleteFunc. 113 i := 0 114 for _, ref := range refs { 115 if p(ref) { 116 continue 117 } 118 refs[i] = ref 119 i++ 120 } 121 for j := i; j != len(refs); j++ { 122 refs[j] = nil // aid GC 123 } 124 return refs[:i] 125 } 126 127 // lift replaces local and new Allocs accessed only with 128 // load/store by SSA registers, inserting φ-nodes where necessary. 129 // The result is a program in classical pruned SSA form. 130 // 131 // Preconditions: 132 // - fn has no dead blocks (blockopt has run). 133 // - Def/use info (Operands and Referrers) is up-to-date. 134 // - The dominator tree is up-to-date. 135 func lift(fn *Function) { 136 // TODO(adonovan): opt: lots of little optimizations may be 137 // worthwhile here, especially if they cause us to avoid 138 // buildDomFrontier. For example: 139 // 140 // - Alloc never loaded? Eliminate. 141 // - Alloc never stored? Replace all loads with a zero constant. 142 // - Alloc stored once? Replace loads with dominating store; 143 // don't forget that an Alloc is itself an effective store 144 // of zero. 145 // - Alloc used only within a single block? 146 // Use degenerate algorithm avoiding φ-nodes. 147 // - Consider synergy with scalar replacement of aggregates (SRA). 148 // e.g. *(&x.f) where x is an Alloc. 149 // Perhaps we'd get better results if we generated this as x.f 150 // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)). 151 // Unclear. 152 // 153 // But we will start with the simplest correct code. 154 df := buildDomFrontier(fn) 155 156 if debugLifting { 157 title := false 158 for i, blocks := range df { 159 if blocks != nil { 160 if !title { 161 fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) 162 title = true 163 } 164 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) 165 } 166 } 167 } 168 169 newPhis := make(newPhiMap) 170 171 // During this pass we will replace some BasicBlock.Instrs 172 // (allocs, loads and stores) with nil, keeping a count in 173 // BasicBlock.gaps. At the end we will reset Instrs to the 174 // concatenation of all non-dead newPhis and non-nil Instrs 175 // for the block, reusing the original array if space permits. 176 177 // While we're here, we also eliminate 'rundefers' 178 // instructions in functions that contain no 'defer' 179 // instructions. 180 usesDefer := false 181 182 // A counter used to generate ~unique ids for Phi nodes, as an 183 // aid to debugging. We use large numbers to make them highly 184 // visible. All nodes are renumbered later. 185 fresh := 1000 186 187 // Determine which allocs we can lift and number them densely. 188 // The renaming phase uses this numbering for compact maps. 189 numAllocs := 0 190 for _, b := range fn.Blocks { 191 b.gaps = 0 192 b.rundefers = 0 193 for _, instr := range b.Instrs { 194 switch instr := instr.(type) { 195 case *Alloc: 196 index := -1 197 if liftAlloc(df, instr, newPhis, &fresh) { 198 index = numAllocs 199 numAllocs++ 200 } 201 instr.index = index 202 case *Defer: 203 usesDefer = true 204 case *RunDefers: 205 b.rundefers++ 206 } 207 } 208 } 209 210 // renaming maps an alloc (keyed by index) to its replacement 211 // value. Initially the renaming contains nil, signifying the 212 // zero constant of the appropriate type; we construct the 213 // Const lazily at most once on each path through the domtree. 214 // TODO(adonovan): opt: cache per-function not per subtree. 215 renaming := make([]Value, numAllocs) 216 217 // Renaming. 218 rename(fn.Blocks[0], renaming, newPhis) 219 220 // Eliminate dead φ-nodes. 221 removeDeadPhis(fn.Blocks, newPhis) 222 223 // Prepend remaining live φ-nodes to each block. 224 for _, b := range fn.Blocks { 225 nps := newPhis[b] 226 j := len(nps) 227 228 rundefersToKill := b.rundefers 229 if usesDefer { 230 rundefersToKill = 0 231 } 232 233 if j+b.gaps+rundefersToKill == 0 { 234 continue // fast path: no new phis or gaps 235 } 236 237 // Compact nps + non-nil Instrs into a new slice. 238 // TODO(adonovan): opt: compact in situ (rightwards) 239 // if Instrs has sufficient space or slack. 240 dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill) 241 for i, np := range nps { 242 dst[i] = np.phi 243 } 244 for _, instr := range b.Instrs { 245 if instr == nil { 246 continue 247 } 248 if !usesDefer { 249 if _, ok := instr.(*RunDefers); ok { 250 continue 251 } 252 } 253 dst[j] = instr 254 j++ 255 } 256 b.Instrs = dst 257 } 258 259 // Remove any fn.Locals that were lifted. 260 j := 0 261 for _, l := range fn.Locals { 262 if l.index < 0 { 263 fn.Locals[j] = l 264 j++ 265 } 266 } 267 // Nil out fn.Locals[j:] to aid GC. 268 for i := j; i < len(fn.Locals); i++ { 269 fn.Locals[i] = nil 270 } 271 fn.Locals = fn.Locals[:j] 272 } 273 274 // removeDeadPhis removes φ-nodes not transitively needed by a 275 // non-Phi, non-DebugRef instruction. 276 func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) { 277 // First pass: find the set of "live" φ-nodes: those reachable 278 // from some non-Phi instruction. 279 // 280 // We compute reachability in reverse, starting from each φ, 281 // rather than forwards, starting from each live non-Phi 282 // instruction, because this way visits much less of the 283 // Value graph. 284 livePhis := make(map[*Phi]bool) 285 for _, npList := range newPhis { 286 for _, np := range npList { 287 phi := np.phi 288 if !livePhis[phi] && phiHasDirectReferrer(phi) { 289 markLivePhi(livePhis, phi) 290 } 291 } 292 } 293 294 // Existing φ-nodes due to && and || operators 295 // are all considered live (see Go issue 19622). 296 for _, b := range blocks { 297 for _, phi := range b.phis() { 298 markLivePhi(livePhis, phi.(*Phi)) 299 } 300 } 301 302 // Second pass: eliminate unused phis from newPhis. 303 for block, npList := range newPhis { 304 j := 0 305 for _, np := range npList { 306 if livePhis[np.phi] { 307 npList[j] = np 308 j++ 309 } else { 310 // discard it, first removing it from referrers 311 for _, val := range np.phi.Edges { 312 if refs := val.Referrers(); refs != nil { 313 *refs = removeInstr(*refs, np.phi) 314 } 315 } 316 np.phi.block = nil 317 } 318 } 319 newPhis[block] = npList[:j] 320 } 321 } 322 323 // markLivePhi marks phi, and all φ-nodes transitively reachable via 324 // its Operands, live. 325 func markLivePhi(livePhis map[*Phi]bool, phi *Phi) { 326 livePhis[phi] = true 327 for _, rand := range phi.Operands(nil) { 328 if q, ok := (*rand).(*Phi); ok { 329 if !livePhis[q] { 330 markLivePhi(livePhis, q) 331 } 332 } 333 } 334 } 335 336 // phiHasDirectReferrer reports whether phi is directly referred to by 337 // a non-Phi instruction. Such instructions are the 338 // roots of the liveness traversal. 339 func phiHasDirectReferrer(phi *Phi) bool { 340 for _, instr := range *phi.Referrers() { 341 if _, ok := instr.(*Phi); !ok { 342 return true 343 } 344 } 345 return false 346 } 347 348 type blockSet struct{ big.Int } // (inherit methods from Int) 349 350 // add adds b to the set and returns true if the set changed. 351 func (s *blockSet) add(b *BasicBlock) bool { 352 i := b.Index 353 if s.Bit(i) != 0 { 354 return false 355 } 356 s.SetBit(&s.Int, i, 1) 357 return true 358 } 359 360 // take removes an arbitrary element from a set s and 361 // returns its index, or returns -1 if empty. 362 func (s *blockSet) take() int { 363 l := s.BitLen() 364 for i := 0; i < l; i++ { 365 if s.Bit(i) == 1 { 366 s.SetBit(&s.Int, i, 0) 367 return i 368 } 369 } 370 return -1 371 } 372 373 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc 374 // it replaces. 375 type newPhi struct { 376 phi *Phi 377 alloc *Alloc 378 } 379 380 // newPhiMap records for each basic block, the set of newPhis that 381 // must be prepended to the block. 382 type newPhiMap map[*BasicBlock][]newPhi 383 384 // liftAlloc determines whether alloc can be lifted into registers, 385 // and if so, it populates newPhis with all the φ-nodes it may require 386 // and returns true. 387 // 388 // fresh is a source of fresh ids for phi nodes. 389 func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool { 390 // Don't lift named return values in functions that defer 391 // calls that may recover from panic. 392 if fn := alloc.Parent(); fn.Recover != nil { 393 for _, nr := range fn.namedResults { 394 if nr == alloc { 395 return false 396 } 397 } 398 } 399 400 // Compute defblocks, the set of blocks containing a 401 // definition of the alloc cell. 402 var defblocks blockSet 403 for _, instr := range *alloc.Referrers() { 404 // Bail out if we discover the alloc is not liftable; 405 // the only operations permitted to use the alloc are 406 // loads/stores into the cell, and DebugRef. 407 switch instr := instr.(type) { 408 case *Store: 409 if instr.Val == alloc { 410 return false // address used as value 411 } 412 if instr.Addr != alloc { 413 panic("Alloc.Referrers is inconsistent") 414 } 415 defblocks.add(instr.Block()) 416 case *UnOp: 417 if instr.Op != token.MUL { 418 return false // not a load 419 } 420 if instr.X != alloc { 421 panic("Alloc.Referrers is inconsistent") 422 } 423 case *DebugRef: 424 // ok 425 default: 426 return false // some other instruction 427 } 428 } 429 // The Alloc itself counts as a (zero) definition of the cell. 430 defblocks.add(alloc.Block()) 431 432 if debugLifting { 433 fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) 434 } 435 436 fn := alloc.Parent() 437 438 // Φ-insertion. 439 // 440 // What follows is the body of the main loop of the insert-φ 441 // function described by Cytron et al, but instead of using 442 // counter tricks, we just reset the 'hasAlready' and 'work' 443 // sets each iteration. These are bitmaps so it's pretty cheap. 444 // 445 // TODO(adonovan): opt: recycle slice storage for W, 446 // hasAlready, defBlocks across liftAlloc calls. 447 var hasAlready blockSet 448 449 // Initialize W and work to defblocks. 450 var work blockSet = defblocks // blocks seen 451 var W blockSet // blocks to do 452 W.Set(&defblocks.Int) 453 454 // Traverse iterated dominance frontier, inserting φ-nodes. 455 for i := W.take(); i != -1; i = W.take() { 456 u := fn.Blocks[i] 457 for _, v := range df[u.Index] { 458 if hasAlready.add(v) { 459 // Create φ-node. 460 // It will be prepended to v.Instrs later, if needed. 461 phi := &Phi{ 462 Edges: make([]Value, len(v.Preds)), 463 Comment: alloc.Comment, 464 } 465 // This is merely a debugging aid: 466 phi.setNum(*fresh) 467 *fresh++ 468 469 phi.pos = alloc.Pos() 470 phi.setType(typeparams.MustDeref(alloc.Type())) 471 phi.block = v 472 if debugLifting { 473 fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v) 474 } 475 newPhis[v] = append(newPhis[v], newPhi{phi, alloc}) 476 477 if work.add(v) { 478 W.add(v) 479 } 480 } 481 } 482 } 483 484 return true 485 } 486 487 // replaceAll replaces all intraprocedural uses of x with y, 488 // updating x.Referrers and y.Referrers. 489 // Precondition: x.Referrers() != nil, i.e. x must be local to some function. 490 func replaceAll(x, y Value) { 491 var rands []*Value 492 pxrefs := x.Referrers() 493 pyrefs := y.Referrers() 494 for _, instr := range *pxrefs { 495 rands = instr.Operands(rands[:0]) // recycle storage 496 for _, rand := range rands { 497 if *rand != nil { 498 if *rand == x { 499 *rand = y 500 } 501 } 502 } 503 if pyrefs != nil { 504 *pyrefs = append(*pyrefs, instr) // dups ok 505 } 506 } 507 *pxrefs = nil // x is now unreferenced 508 } 509 510 // renamed returns the value to which alloc is being renamed, 511 // constructing it lazily if it's the implicit zero initialization. 512 func renamed(renaming []Value, alloc *Alloc) Value { 513 v := renaming[alloc.index] 514 if v == nil { 515 v = zeroConst(typeparams.MustDeref(alloc.Type())) 516 renaming[alloc.index] = v 517 } 518 return v 519 } 520 521 // rename implements the (Cytron et al) SSA renaming algorithm, a 522 // preorder traversal of the dominator tree replacing all loads of 523 // Alloc cells with the value stored to that cell by the dominating 524 // store instruction. For lifting, we need only consider loads, 525 // stores and φ-nodes. 526 // 527 // renaming is a map from *Alloc (keyed by index number) to its 528 // dominating stored value; newPhis[x] is the set of new φ-nodes to be 529 // prepended to block x. 530 func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) { 531 // Each φ-node becomes the new name for its associated Alloc. 532 for _, np := range newPhis[u] { 533 phi := np.phi 534 alloc := np.alloc 535 renaming[alloc.index] = phi 536 } 537 538 // Rename loads and stores of allocs. 539 for i, instr := range u.Instrs { 540 switch instr := instr.(type) { 541 case *Alloc: 542 if instr.index >= 0 { // store of zero to Alloc cell 543 // Replace dominated loads by the zero value. 544 renaming[instr.index] = nil 545 if debugLifting { 546 fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) 547 } 548 // Delete the Alloc. 549 u.Instrs[i] = nil 550 u.gaps++ 551 } 552 553 case *Store: 554 if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell 555 // Replace dominated loads by the stored value. 556 renaming[alloc.index] = instr.Val 557 if debugLifting { 558 fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", 559 instr, instr.Val.Name()) 560 } 561 // Remove the store from the referrer list of the stored value. 562 if refs := instr.Val.Referrers(); refs != nil { 563 *refs = removeInstr(*refs, instr) 564 } 565 // Delete the Store. 566 u.Instrs[i] = nil 567 u.gaps++ 568 } 569 570 case *UnOp: 571 if instr.Op == token.MUL { 572 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell 573 newval := renamed(renaming, alloc) 574 if debugLifting { 575 fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", 576 instr.Name(), instr, newval.Name()) 577 } 578 // Replace all references to 579 // the loaded value by the 580 // dominating stored value. 581 replaceAll(instr, newval) 582 // Delete the Load. 583 u.Instrs[i] = nil 584 u.gaps++ 585 } 586 } 587 588 case *DebugRef: 589 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell 590 if instr.IsAddr { 591 instr.X = renamed(renaming, alloc) 592 instr.IsAddr = false 593 594 // Add DebugRef to instr.X's referrers. 595 if refs := instr.X.Referrers(); refs != nil { 596 *refs = append(*refs, instr) 597 } 598 } else { 599 // A source expression denotes the address 600 // of an Alloc that was optimized away. 601 instr.X = nil 602 603 // Delete the DebugRef. 604 u.Instrs[i] = nil 605 u.gaps++ 606 } 607 } 608 } 609 } 610 611 // For each φ-node in a CFG successor, rename the edge. 612 for _, v := range u.Succs { 613 phis := newPhis[v] 614 if len(phis) == 0 { 615 continue 616 } 617 i := v.predIndex(u) 618 for _, np := range phis { 619 phi := np.phi 620 alloc := np.alloc 621 newval := renamed(renaming, alloc) 622 if debugLifting { 623 fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", 624 phi.Name(), u, v, i, alloc.Name(), newval.Name()) 625 } 626 phi.Edges[i] = newval 627 if prefs := newval.Referrers(); prefs != nil { 628 *prefs = append(*prefs, phi) 629 } 630 } 631 } 632 633 // Continue depth-first recursion over domtree, pushing a 634 // fresh copy of the renaming map for each subtree. 635 for i, v := range u.dom.children { 636 r := renaming 637 if i < len(u.dom.children)-1 { 638 // On all but the final iteration, we must make 639 // a copy to avoid destructive update. 640 r = make([]Value, len(renaming)) 641 copy(r, renaming) 642 } 643 rename(v, r, newPhis) 644 } 645 646 }