golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/go/ssa/lift.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 // This file defines the lifting pass which tries to "lift" Alloc 8 // cells (new/local variables) into SSA registers, replacing loads 9 // with the dominating stored value, eliminating loads and stores, and 10 // inserting φ-nodes as needed. 11 12 // Cited papers and resources: 13 // 14 // Ron Cytron et al. 1991. Efficiently computing SSA form... 15 // http://doi.acm.org/10.1145/115372.115320 16 // 17 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. 18 // Software Practice and Experience 2001, 4:1-10. 19 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf 20 // 21 // Daniel Berlin, llvmdev mailing list, 2012. 22 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html 23 // (Be sure to expand the whole thread.) 24 25 // TODO(adonovan): opt: there are many optimizations worth evaluating, and 26 // the conventional wisdom for SSA construction is that a simple 27 // algorithm well engineered often beats those of better asymptotic 28 // complexity on all but the most egregious inputs. 29 // 30 // Danny Berlin suggests that the Cooper et al. algorithm for 31 // computing the dominance frontier is superior to Cytron et al. 32 // Furthermore he recommends that rather than computing the DF for the 33 // whole function then renaming all alloc cells, it may be cheaper to 34 // compute the DF for each alloc cell separately and throw it away. 35 // 36 // Consider exploiting liveness information to avoid creating dead 37 // φ-nodes which we then immediately remove. 38 // 39 // Also see many other "TODO: opt" suggestions in the code. 40 41 import ( 42 "fmt" 43 "go/token" 44 "math/big" 45 "os" 46 47 "golang.org/x/tools/internal/typeparams" 48 ) 49 50 // If true, show diagnostic information at each step of lifting. 51 // Very verbose. 52 const debugLifting = false 53 54 // domFrontier maps each block to the set of blocks in its dominance 55 // frontier. The outer slice is conceptually a map keyed by 56 // Block.Index. The inner slice is conceptually a set, possibly 57 // containing duplicates. 58 // 59 // TODO(adonovan): opt: measure impact of dups; consider a packed bit 60 // representation, e.g. big.Int, and bitwise parallel operations for 61 // the union step in the Children loop. 62 // 63 // domFrontier's methods mutate the slice's elements but not its 64 // length, so their receivers needn't be pointers. 65 type domFrontier [][]*BasicBlock 66 67 func (df domFrontier) add(u, v *BasicBlock) { 68 p := &df[u.Index] 69 *p = append(*p, v) 70 } 71 72 // build builds the dominance frontier df for the dominator (sub)tree 73 // rooted at u, using the Cytron et al. algorithm. 74 // 75 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA 76 // by pruning the entire IDF computation, rather than merely pruning 77 // the DF -> IDF step. 78 func (df domFrontier) build(u *BasicBlock) { 79 // Encounter each node u in postorder of dom tree. 80 for _, child := range u.dom.children { 81 df.build(child) 82 } 83 for _, vb := range u.Succs { 84 if v := vb.dom; v.idom != u { 85 df.add(u, vb) 86 } 87 } 88 for _, w := range u.dom.children { 89 for _, vb := range df[w.Index] { 90 // TODO(adonovan): opt: use word-parallel bitwise union. 91 if v := vb.dom; v.idom != u { 92 df.add(u, vb) 93 } 94 } 95 } 96 } 97 98 func buildDomFrontier(fn *Function) domFrontier { 99 df := make(domFrontier, len(fn.Blocks)) 100 df.build(fn.Blocks[0]) 101 if fn.Recover != nil { 102 df.build(fn.Recover) 103 } 104 return df 105 } 106 107 func removeInstr(refs []Instruction, instr Instruction) []Instruction { 108 return removeInstrsIf(refs, func(i Instruction) bool { return i == instr }) 109 } 110 111 func removeInstrsIf(refs []Instruction, p func(Instruction) bool) []Instruction { 112 // TODO(taking): replace with go1.22 slices.DeleteFunc. 113 i := 0 114 for _, ref := range refs { 115 if p(ref) { 116 continue 117 } 118 refs[i] = ref 119 i++ 120 } 121 for j := i; j != len(refs); j++ { 122 refs[j] = nil // aid GC 123 } 124 return refs[:i] 125 } 126 127 // lift replaces local and new Allocs accessed only with 128 // load/store by SSA registers, inserting φ-nodes where necessary. 129 // The result is a program in classical pruned SSA form. 130 // 131 // Preconditions: 132 // - fn has no dead blocks (blockopt has run). 133 // - Def/use info (Operands and Referrers) is up-to-date. 134 // - The dominator tree is up-to-date. 135 func lift(fn *Function) { 136 // TODO(adonovan): opt: lots of little optimizations may be 137 // worthwhile here, especially if they cause us to avoid 138 // buildDomFrontier. For example: 139 // 140 // - Alloc never loaded? Eliminate. 141 // - Alloc never stored? Replace all loads with a zero constant. 142 // - Alloc stored once? Replace loads with dominating store; 143 // don't forget that an Alloc is itself an effective store 144 // of zero. 145 // - Alloc used only within a single block? 146 // Use degenerate algorithm avoiding φ-nodes. 147 // - Consider synergy with scalar replacement of aggregates (SRA). 148 // e.g. *(&x.f) where x is an Alloc. 149 // Perhaps we'd get better results if we generated this as x.f 150 // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)). 151 // Unclear. 152 // 153 // But we will start with the simplest correct code. 154 df := buildDomFrontier(fn) 155 156 if debugLifting { 157 title := false 158 for i, blocks := range df { 159 if blocks != nil { 160 if !title { 161 fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) 162 title = true 163 } 164 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) 165 } 166 } 167 } 168 169 newPhis := make(newPhiMap) 170 171 // During this pass we will replace some BasicBlock.Instrs 172 // (allocs, loads and stores) with nil, keeping a count in 173 // BasicBlock.gaps. At the end we will reset Instrs to the 174 // concatenation of all non-dead newPhis and non-nil Instrs 175 // for the block, reusing the original array if space permits. 176 177 // While we're here, we also eliminate 'rundefers' 178 // instructions and ssa:deferstack() in functions that contain no 179 // 'defer' instructions. Eliminate ssa:deferstack() if it does not 180 // escape. 181 usesDefer := false 182 deferstackAlloc, deferstackCall := deferstackPreamble(fn) 183 eliminateDeferStack := deferstackAlloc != nil && !deferstackAlloc.Heap 184 185 // A counter used to generate ~unique ids for Phi nodes, as an 186 // aid to debugging. We use large numbers to make them highly 187 // visible. All nodes are renumbered later. 188 fresh := 1000 189 190 // Determine which allocs we can lift and number them densely. 191 // The renaming phase uses this numbering for compact maps. 192 numAllocs := 0 193 for _, b := range fn.Blocks { 194 b.gaps = 0 195 b.rundefers = 0 196 for _, instr := range b.Instrs { 197 switch instr := instr.(type) { 198 case *Alloc: 199 index := -1 200 if liftAlloc(df, instr, newPhis, &fresh) { 201 index = numAllocs 202 numAllocs++ 203 } 204 instr.index = index 205 case *Defer: 206 usesDefer = true 207 if eliminateDeferStack { 208 // Clear _DeferStack and remove references to loads 209 if instr._DeferStack != nil { 210 if refs := instr._DeferStack.Referrers(); refs != nil { 211 *refs = removeInstr(*refs, instr) 212 } 213 instr._DeferStack = nil 214 } 215 } 216 case *RunDefers: 217 b.rundefers++ 218 } 219 } 220 } 221 222 // renaming maps an alloc (keyed by index) to its replacement 223 // value. Initially the renaming contains nil, signifying the 224 // zero constant of the appropriate type; we construct the 225 // Const lazily at most once on each path through the domtree. 226 // TODO(adonovan): opt: cache per-function not per subtree. 227 renaming := make([]Value, numAllocs) 228 229 // Renaming. 230 rename(fn.Blocks[0], renaming, newPhis) 231 232 // Eliminate dead φ-nodes. 233 removeDeadPhis(fn.Blocks, newPhis) 234 235 // Eliminate ssa:deferstack() call. 236 if eliminateDeferStack { 237 b := deferstackCall.block 238 for i, instr := range b.Instrs { 239 if instr == deferstackCall { 240 b.Instrs[i] = nil 241 b.gaps++ 242 break 243 } 244 } 245 } 246 247 // Prepend remaining live φ-nodes to each block. 248 for _, b := range fn.Blocks { 249 nps := newPhis[b] 250 j := len(nps) 251 252 rundefersToKill := b.rundefers 253 if usesDefer { 254 rundefersToKill = 0 255 } 256 257 if j+b.gaps+rundefersToKill == 0 { 258 continue // fast path: no new phis or gaps 259 } 260 261 // Compact nps + non-nil Instrs into a new slice. 262 // TODO(adonovan): opt: compact in situ (rightwards) 263 // if Instrs has sufficient space or slack. 264 dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill) 265 for i, np := range nps { 266 dst[i] = np.phi 267 } 268 for _, instr := range b.Instrs { 269 if instr == nil { 270 continue 271 } 272 if !usesDefer { 273 if _, ok := instr.(*RunDefers); ok { 274 continue 275 } 276 } 277 dst[j] = instr 278 j++ 279 } 280 b.Instrs = dst 281 } 282 283 // Remove any fn.Locals that were lifted. 284 j := 0 285 for _, l := range fn.Locals { 286 if l.index < 0 { 287 fn.Locals[j] = l 288 j++ 289 } 290 } 291 // Nil out fn.Locals[j:] to aid GC. 292 for i := j; i < len(fn.Locals); i++ { 293 fn.Locals[i] = nil 294 } 295 fn.Locals = fn.Locals[:j] 296 } 297 298 // removeDeadPhis removes φ-nodes not transitively needed by a 299 // non-Phi, non-DebugRef instruction. 300 func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) { 301 // First pass: find the set of "live" φ-nodes: those reachable 302 // from some non-Phi instruction. 303 // 304 // We compute reachability in reverse, starting from each φ, 305 // rather than forwards, starting from each live non-Phi 306 // instruction, because this way visits much less of the 307 // Value graph. 308 livePhis := make(map[*Phi]bool) 309 for _, npList := range newPhis { 310 for _, np := range npList { 311 phi := np.phi 312 if !livePhis[phi] && phiHasDirectReferrer(phi) { 313 markLivePhi(livePhis, phi) 314 } 315 } 316 } 317 318 // Existing φ-nodes due to && and || operators 319 // are all considered live (see Go issue 19622). 320 for _, b := range blocks { 321 for _, phi := range b.phis() { 322 markLivePhi(livePhis, phi.(*Phi)) 323 } 324 } 325 326 // Second pass: eliminate unused phis from newPhis. 327 for block, npList := range newPhis { 328 j := 0 329 for _, np := range npList { 330 if livePhis[np.phi] { 331 npList[j] = np 332 j++ 333 } else { 334 // discard it, first removing it from referrers 335 for _, val := range np.phi.Edges { 336 if refs := val.Referrers(); refs != nil { 337 *refs = removeInstr(*refs, np.phi) 338 } 339 } 340 np.phi.block = nil 341 } 342 } 343 newPhis[block] = npList[:j] 344 } 345 } 346 347 // markLivePhi marks phi, and all φ-nodes transitively reachable via 348 // its Operands, live. 349 func markLivePhi(livePhis map[*Phi]bool, phi *Phi) { 350 livePhis[phi] = true 351 for _, rand := range phi.Operands(nil) { 352 if q, ok := (*rand).(*Phi); ok { 353 if !livePhis[q] { 354 markLivePhi(livePhis, q) 355 } 356 } 357 } 358 } 359 360 // phiHasDirectReferrer reports whether phi is directly referred to by 361 // a non-Phi instruction. Such instructions are the 362 // roots of the liveness traversal. 363 func phiHasDirectReferrer(phi *Phi) bool { 364 for _, instr := range *phi.Referrers() { 365 if _, ok := instr.(*Phi); !ok { 366 return true 367 } 368 } 369 return false 370 } 371 372 type blockSet struct{ big.Int } // (inherit methods from Int) 373 374 // add adds b to the set and returns true if the set changed. 375 func (s *blockSet) add(b *BasicBlock) bool { 376 i := b.Index 377 if s.Bit(i) != 0 { 378 return false 379 } 380 s.SetBit(&s.Int, i, 1) 381 return true 382 } 383 384 // take removes an arbitrary element from a set s and 385 // returns its index, or returns -1 if empty. 386 func (s *blockSet) take() int { 387 l := s.BitLen() 388 for i := 0; i < l; i++ { 389 if s.Bit(i) == 1 { 390 s.SetBit(&s.Int, i, 0) 391 return i 392 } 393 } 394 return -1 395 } 396 397 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc 398 // it replaces. 399 type newPhi struct { 400 phi *Phi 401 alloc *Alloc 402 } 403 404 // newPhiMap records for each basic block, the set of newPhis that 405 // must be prepended to the block. 406 type newPhiMap map[*BasicBlock][]newPhi 407 408 // liftAlloc determines whether alloc can be lifted into registers, 409 // and if so, it populates newPhis with all the φ-nodes it may require 410 // and returns true. 411 // 412 // fresh is a source of fresh ids for phi nodes. 413 func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool { 414 // Don't lift result values in functions that defer 415 // calls that may recover from panic. 416 if fn := alloc.Parent(); fn.Recover != nil { 417 for _, nr := range fn.results { 418 if nr == alloc { 419 return false 420 } 421 } 422 } 423 424 // Compute defblocks, the set of blocks containing a 425 // definition of the alloc cell. 426 var defblocks blockSet 427 for _, instr := range *alloc.Referrers() { 428 // Bail out if we discover the alloc is not liftable; 429 // the only operations permitted to use the alloc are 430 // loads/stores into the cell, and DebugRef. 431 switch instr := instr.(type) { 432 case *Store: 433 if instr.Val == alloc { 434 return false // address used as value 435 } 436 if instr.Addr != alloc { 437 panic("Alloc.Referrers is inconsistent") 438 } 439 defblocks.add(instr.Block()) 440 case *UnOp: 441 if instr.Op != token.MUL { 442 return false // not a load 443 } 444 if instr.X != alloc { 445 panic("Alloc.Referrers is inconsistent") 446 } 447 case *DebugRef: 448 // ok 449 default: 450 return false // some other instruction 451 } 452 } 453 // The Alloc itself counts as a (zero) definition of the cell. 454 defblocks.add(alloc.Block()) 455 456 if debugLifting { 457 fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) 458 } 459 460 fn := alloc.Parent() 461 462 // Φ-insertion. 463 // 464 // What follows is the body of the main loop of the insert-φ 465 // function described by Cytron et al, but instead of using 466 // counter tricks, we just reset the 'hasAlready' and 'work' 467 // sets each iteration. These are bitmaps so it's pretty cheap. 468 // 469 // TODO(adonovan): opt: recycle slice storage for W, 470 // hasAlready, defBlocks across liftAlloc calls. 471 var hasAlready blockSet 472 473 // Initialize W and work to defblocks. 474 var work blockSet = defblocks // blocks seen 475 var W blockSet // blocks to do 476 W.Set(&defblocks.Int) 477 478 // Traverse iterated dominance frontier, inserting φ-nodes. 479 for i := W.take(); i != -1; i = W.take() { 480 u := fn.Blocks[i] 481 for _, v := range df[u.Index] { 482 if hasAlready.add(v) { 483 // Create φ-node. 484 // It will be prepended to v.Instrs later, if needed. 485 phi := &Phi{ 486 Edges: make([]Value, len(v.Preds)), 487 Comment: alloc.Comment, 488 } 489 // This is merely a debugging aid: 490 phi.setNum(*fresh) 491 *fresh++ 492 493 phi.pos = alloc.Pos() 494 phi.setType(typeparams.MustDeref(alloc.Type())) 495 phi.block = v 496 if debugLifting { 497 fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v) 498 } 499 newPhis[v] = append(newPhis[v], newPhi{phi, alloc}) 500 501 if work.add(v) { 502 W.add(v) 503 } 504 } 505 } 506 } 507 508 return true 509 } 510 511 // replaceAll replaces all intraprocedural uses of x with y, 512 // updating x.Referrers and y.Referrers. 513 // Precondition: x.Referrers() != nil, i.e. x must be local to some function. 514 func replaceAll(x, y Value) { 515 var rands []*Value 516 pxrefs := x.Referrers() 517 pyrefs := y.Referrers() 518 for _, instr := range *pxrefs { 519 rands = instr.Operands(rands[:0]) // recycle storage 520 for _, rand := range rands { 521 if *rand != nil { 522 if *rand == x { 523 *rand = y 524 } 525 } 526 } 527 if pyrefs != nil { 528 *pyrefs = append(*pyrefs, instr) // dups ok 529 } 530 } 531 *pxrefs = nil // x is now unreferenced 532 } 533 534 // renamed returns the value to which alloc is being renamed, 535 // constructing it lazily if it's the implicit zero initialization. 536 func renamed(renaming []Value, alloc *Alloc) Value { 537 v := renaming[alloc.index] 538 if v == nil { 539 v = zeroConst(typeparams.MustDeref(alloc.Type())) 540 renaming[alloc.index] = v 541 } 542 return v 543 } 544 545 // rename implements the (Cytron et al) SSA renaming algorithm, a 546 // preorder traversal of the dominator tree replacing all loads of 547 // Alloc cells with the value stored to that cell by the dominating 548 // store instruction. For lifting, we need only consider loads, 549 // stores and φ-nodes. 550 // 551 // renaming is a map from *Alloc (keyed by index number) to its 552 // dominating stored value; newPhis[x] is the set of new φ-nodes to be 553 // prepended to block x. 554 func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) { 555 // Each φ-node becomes the new name for its associated Alloc. 556 for _, np := range newPhis[u] { 557 phi := np.phi 558 alloc := np.alloc 559 renaming[alloc.index] = phi 560 } 561 562 // Rename loads and stores of allocs. 563 for i, instr := range u.Instrs { 564 switch instr := instr.(type) { 565 case *Alloc: 566 if instr.index >= 0 { // store of zero to Alloc cell 567 // Replace dominated loads by the zero value. 568 renaming[instr.index] = nil 569 if debugLifting { 570 fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) 571 } 572 // Delete the Alloc. 573 u.Instrs[i] = nil 574 u.gaps++ 575 } 576 577 case *Store: 578 if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell 579 // Replace dominated loads by the stored value. 580 renaming[alloc.index] = instr.Val 581 if debugLifting { 582 fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", 583 instr, instr.Val.Name()) 584 } 585 // Remove the store from the referrer list of the stored value. 586 if refs := instr.Val.Referrers(); refs != nil { 587 *refs = removeInstr(*refs, instr) 588 } 589 // Delete the Store. 590 u.Instrs[i] = nil 591 u.gaps++ 592 } 593 594 case *UnOp: 595 if instr.Op == token.MUL { 596 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell 597 newval := renamed(renaming, alloc) 598 if debugLifting { 599 fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", 600 instr.Name(), instr, newval.Name()) 601 } 602 // Replace all references to 603 // the loaded value by the 604 // dominating stored value. 605 replaceAll(instr, newval) 606 // Delete the Load. 607 u.Instrs[i] = nil 608 u.gaps++ 609 } 610 } 611 612 case *DebugRef: 613 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell 614 if instr.IsAddr { 615 instr.X = renamed(renaming, alloc) 616 instr.IsAddr = false 617 618 // Add DebugRef to instr.X's referrers. 619 if refs := instr.X.Referrers(); refs != nil { 620 *refs = append(*refs, instr) 621 } 622 } else { 623 // A source expression denotes the address 624 // of an Alloc that was optimized away. 625 instr.X = nil 626 627 // Delete the DebugRef. 628 u.Instrs[i] = nil 629 u.gaps++ 630 } 631 } 632 } 633 } 634 635 // For each φ-node in a CFG successor, rename the edge. 636 for _, v := range u.Succs { 637 phis := newPhis[v] 638 if len(phis) == 0 { 639 continue 640 } 641 i := v.predIndex(u) 642 for _, np := range phis { 643 phi := np.phi 644 alloc := np.alloc 645 newval := renamed(renaming, alloc) 646 if debugLifting { 647 fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", 648 phi.Name(), u, v, i, alloc.Name(), newval.Name()) 649 } 650 phi.Edges[i] = newval 651 if prefs := newval.Referrers(); prefs != nil { 652 *prefs = append(*prefs, phi) 653 } 654 } 655 } 656 657 // Continue depth-first recursion over domtree, pushing a 658 // fresh copy of the renaming map for each subtree. 659 for i, v := range u.dom.children { 660 r := renaming 661 if i < len(u.dom.children)-1 { 662 // On all but the final iteration, we must make 663 // a copy to avoid destructive update. 664 r = make([]Value, len(renaming)) 665 copy(r, renaming) 666 } 667 rename(v, r, newPhis) 668 } 669 670 } 671 672 // deferstackPreamble returns the *Alloc and ssa:deferstack() call for fn.deferstack. 673 func deferstackPreamble(fn *Function) (*Alloc, *Call) { 674 if alloc, _ := fn.vars[fn.deferstack].(*Alloc); alloc != nil { 675 for _, ref := range *alloc.Referrers() { 676 if ref, _ := ref.(*Store); ref != nil && ref.Addr == alloc { 677 if call, _ := ref.Val.(*Call); call != nil { 678 return alloc, call 679 } 680 } 681 } 682 } 683 return nil, nil 684 }