github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/go/ir/lift.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ir 6 7 // This file defines the lifting pass which tries to "lift" Alloc 8 // cells (new/local variables) into SSA registers, replacing loads 9 // with the dominating stored value, eliminating loads and stores, and 10 // inserting φ- and σ-nodes as needed. 11 12 // Cited papers and resources: 13 // 14 // Ron Cytron et al. 1991. Efficiently computing SSA form... 15 // https://doi.acm.org/10.1145/115372.115320 16 // 17 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. 18 // Software Practice and Experience 2001, 4:1-10. 19 // https://www.hipersoft.rice.edu/grads/publications/dom14.pdf 20 // 21 // Daniel Berlin, llvmdev mailing list, 2012. 22 // https://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html 23 // (Be sure to expand the whole thread.) 24 // 25 // C. Scott Ananian. 1997. The static single information form. 26 // 27 // Jeremy Singer. 2006. Static program analysis based on virtual register renaming. 28 29 // TODO(adonovan): opt: there are many optimizations worth evaluating, and 30 // the conventional wisdom for SSA construction is that a simple 31 // algorithm well engineered often beats those of better asymptotic 32 // complexity on all but the most egregious inputs. 33 // 34 // Danny Berlin suggests that the Cooper et al. algorithm for 35 // computing the dominance frontier is superior to Cytron et al. 36 // Furthermore he recommends that rather than computing the DF for the 37 // whole function then renaming all alloc cells, it may be cheaper to 38 // compute the DF for each alloc cell separately and throw it away. 39 // 40 // Consider exploiting liveness information to avoid creating dead 41 // φ-nodes which we then immediately remove. 42 // 43 // Also see many other "TODO: opt" suggestions in the code. 44 45 import ( 46 "encoding/binary" 47 "fmt" 48 "os" 49 ) 50 51 // If true, show diagnostic information at each step of lifting. 52 // Very verbose. 53 const debugLifting = false 54 55 // domFrontier maps each block to the set of blocks in its dominance 56 // frontier. The outer slice is conceptually a map keyed by 57 // Block.Index. The inner slice is conceptually a set, possibly 58 // containing duplicates. 59 // 60 // TODO(adonovan): opt: measure impact of dups; consider a packed bit 61 // representation, e.g. big.Int, and bitwise parallel operations for 62 // the union step in the Children loop. 63 // 64 // domFrontier's methods mutate the slice's elements but not its 65 // length, so their receivers needn't be pointers. 66 type domFrontier BlockMap[[]*BasicBlock] 67 68 func (df domFrontier) add(u, v *BasicBlock) { 69 df[u.Index] = append(df[u.Index], v) 70 } 71 72 // build builds the dominance frontier df for the dominator tree of 73 // fn, using the algorithm found in A Simple, Fast Dominance 74 // Algorithm, Figure 5. 75 // 76 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA 77 // by pruning the entire IDF computation, rather than merely pruning 78 // the DF -> IDF step. 79 func (df domFrontier) build(fn *Function) { 80 for _, b := range fn.Blocks { 81 preds := b.Preds[0:len(b.Preds):len(b.Preds)] 82 if b == fn.Exit { 83 for i, v := range fn.fakeExits.values { 84 if v { 85 preds = append(preds, fn.Blocks[i]) 86 } 87 } 88 } 89 if len(preds) >= 2 { 90 for _, p := range preds { 91 runner := p 92 for runner != b.dom.idom { 93 df.add(runner, b) 94 runner = runner.dom.idom 95 } 96 } 97 } 98 } 99 } 100 101 func buildDomFrontier(fn *Function) domFrontier { 102 df := make(domFrontier, len(fn.Blocks)) 103 df.build(fn) 104 return df 105 } 106 107 type postDomFrontier BlockMap[[]*BasicBlock] 108 109 func (rdf postDomFrontier) add(u, v *BasicBlock) { 110 rdf[u.Index] = append(rdf[u.Index], v) 111 } 112 113 func (rdf postDomFrontier) build(fn *Function) { 114 for _, b := range fn.Blocks { 115 succs := b.Succs[0:len(b.Succs):len(b.Succs)] 116 if fn.fakeExits.Has(b) { 117 succs = append(succs, fn.Exit) 118 } 119 if len(succs) >= 2 { 120 for _, s := range succs { 121 runner := s 122 for runner != b.pdom.idom { 123 rdf.add(runner, b) 124 runner = runner.pdom.idom 125 } 126 } 127 } 128 } 129 } 130 131 func buildPostDomFrontier(fn *Function) postDomFrontier { 132 rdf := make(postDomFrontier, len(fn.Blocks)) 133 rdf.build(fn) 134 return rdf 135 } 136 137 func removeInstr(refs []Instruction, instr Instruction) []Instruction { 138 i := 0 139 for _, ref := range refs { 140 if ref == instr { 141 continue 142 } 143 refs[i] = ref 144 i++ 145 } 146 for j := i; j != len(refs); j++ { 147 refs[j] = nil // aid GC 148 } 149 return refs[:i] 150 } 151 152 func clearInstrs(instrs []Instruction) { 153 for i := range instrs { 154 instrs[i] = nil 155 } 156 } 157 158 func numberNodesPerBlock(f *Function) { 159 for _, b := range f.Blocks { 160 var base ID 161 for _, instr := range b.Instrs { 162 if instr == nil { 163 continue 164 } 165 instr.setID(base) 166 base++ 167 } 168 } 169 } 170 171 // lift replaces local and new Allocs accessed only with 172 // load/store by IR registers, inserting φ- and σ-nodes where necessary. 173 // The result is a program in pruned SSI form. 174 // 175 // Preconditions: 176 // - fn has no dead blocks (blockopt has run). 177 // - Def/use info (Operands and Referrers) is up-to-date. 178 // - The dominator tree is up-to-date. 179 func lift(fn *Function) bool { 180 // TODO(adonovan): opt: lots of little optimizations may be 181 // worthwhile here, especially if they cause us to avoid 182 // buildDomFrontier. For example: 183 // 184 // - Alloc never loaded? Eliminate. 185 // - Alloc never stored? Replace all loads with a zero constant. 186 // - Alloc stored once? Replace loads with dominating store; 187 // don't forget that an Alloc is itself an effective store 188 // of zero. 189 // - Alloc used only within a single block? 190 // Use degenerate algorithm avoiding φ-nodes. 191 // - Consider synergy with scalar replacement of aggregates (SRA). 192 // e.g. *(&x.f) where x is an Alloc. 193 // Perhaps we'd get better results if we generated this as x.f 194 // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)). 195 // Unclear. 196 // 197 // But we will start with the simplest correct code. 198 var df domFrontier 199 var rdf postDomFrontier 200 var closure *closure 201 var newPhis BlockMap[[]newPhi] 202 var newSigmas BlockMap[[]newSigma] 203 204 // During this pass we will replace some BasicBlock.Instrs 205 // (allocs, loads and stores) with nil, keeping a count in 206 // BasicBlock.gaps. At the end we will reset Instrs to the 207 // concatenation of all non-dead newPhis and non-nil Instrs 208 // for the block, reusing the original array if space permits. 209 210 // While we're here, we also eliminate 'rundefers' 211 // instructions in functions that contain no 'defer' 212 // instructions. 213 usesDefer := false 214 215 // Determine which allocs we can lift and number them densely. 216 // The renaming phase uses this numbering for compact maps. 217 numAllocs := 0 218 219 instructions := make(BlockMap[liftInstructions], len(fn.Blocks)) 220 for i := range instructions { 221 instructions[i].insertInstructions = map[Instruction][]Instruction{} 222 } 223 224 // Number nodes, for liftable 225 numberNodesPerBlock(fn) 226 227 for _, b := range fn.Blocks { 228 b.gaps = 0 229 b.rundefers = 0 230 231 for _, instr := range b.Instrs { 232 switch instr := instr.(type) { 233 case *Alloc: 234 if !liftable(instr, instructions) { 235 instr.index = -1 236 continue 237 } 238 239 if numAllocs == 0 { 240 df = buildDomFrontier(fn) 241 rdf = buildPostDomFrontier(fn) 242 if len(fn.Blocks) > 2 { 243 closure = transitiveClosure(fn) 244 } 245 newPhis = make(BlockMap[[]newPhi], len(fn.Blocks)) 246 newSigmas = make(BlockMap[[]newSigma], len(fn.Blocks)) 247 248 if debugLifting { 249 title := false 250 for i, blocks := range df { 251 if blocks != nil { 252 if !title { 253 fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) 254 title = true 255 } 256 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) 257 } 258 } 259 } 260 } 261 instr.index = numAllocs 262 numAllocs++ 263 case *Defer: 264 usesDefer = true 265 case *RunDefers: 266 b.rundefers++ 267 } 268 } 269 } 270 271 if numAllocs > 0 { 272 for _, b := range fn.Blocks { 273 work := instructions[b.Index] 274 for _, rename := range work.renameAllocs { 275 for _, instr_ := range b.Instrs[rename.startingAt:] { 276 replace(instr_, rename.from, rename.to) 277 } 278 } 279 } 280 281 for _, b := range fn.Blocks { 282 work := instructions[b.Index] 283 if len(work.insertInstructions) != 0 { 284 newInstrs := make([]Instruction, 0, len(fn.Blocks)+len(work.insertInstructions)*3) 285 for _, instr := range b.Instrs { 286 if add, ok := work.insertInstructions[instr]; ok { 287 newInstrs = append(newInstrs, add...) 288 } 289 newInstrs = append(newInstrs, instr) 290 } 291 b.Instrs = newInstrs 292 } 293 } 294 295 // TODO(dh): remove inserted allocs that end up unused after lifting. 296 297 for _, b := range fn.Blocks { 298 for _, instr := range b.Instrs { 299 if instr, ok := instr.(*Alloc); ok && instr.index >= 0 { 300 liftAlloc(closure, df, rdf, instr, newPhis, newSigmas) 301 } 302 } 303 } 304 305 // renaming maps an alloc (keyed by index) to its replacement 306 // value. Initially the renaming contains nil, signifying the 307 // zero constant of the appropriate type; we construct the 308 // Const lazily at most once on each path through the domtree. 309 // TODO(adonovan): opt: cache per-function not per subtree. 310 renaming := make([]Value, numAllocs) 311 312 // Renaming. 313 rename(fn.Blocks[0], renaming, newPhis, newSigmas) 314 315 simplifyPhisAndSigmas(newPhis, newSigmas) 316 317 // Eliminate dead φ- and σ-nodes. 318 markLiveNodes(fn.Blocks, newPhis, newSigmas) 319 } 320 321 // Prepend remaining live φ-nodes to each block and possibly kill rundefers. 322 for _, b := range fn.Blocks { 323 var head []Instruction 324 if numAllocs > 0 { 325 nps := newPhis[b.Index] 326 head = make([]Instruction, 0, len(nps)) 327 for _, pred := range b.Preds { 328 nss := newSigmas[pred.Index] 329 idx := pred.succIndex(b) 330 for _, newSigma := range nss { 331 if sigma := newSigma.sigmas[idx]; sigma != nil && sigma.live { 332 head = append(head, sigma) 333 334 // we didn't populate referrers before, as most 335 // sigma nodes will be killed 336 if refs := sigma.X.Referrers(); refs != nil { 337 *refs = append(*refs, sigma) 338 } 339 } else if sigma != nil { 340 sigma.block = nil 341 } 342 } 343 } 344 for _, np := range nps { 345 if np.phi.live { 346 head = append(head, np.phi) 347 } else { 348 for _, edge := range np.phi.Edges { 349 if refs := edge.Referrers(); refs != nil { 350 *refs = removeInstr(*refs, np.phi) 351 } 352 } 353 np.phi.block = nil 354 } 355 } 356 } 357 358 rundefersToKill := b.rundefers 359 if usesDefer { 360 rundefersToKill = 0 361 } 362 363 j := len(head) 364 if j+b.gaps+rundefersToKill == 0 { 365 continue // fast path: no new phis or gaps 366 } 367 368 // We could do straight copies instead of element-wise copies 369 // when both b.gaps and rundefersToKill are zero. However, 370 // that seems to only be the case ~1% of the time, which 371 // doesn't seem worth the extra branch. 372 373 // Remove dead instructions, add phis and sigmas 374 ns := len(b.Instrs) + j - b.gaps - rundefersToKill 375 if ns <= cap(b.Instrs) { 376 // b.Instrs has enough capacity to store all instructions 377 378 // OPT(dh): check cap vs the actually required space; if 379 // there is a big enough difference, it may be worth 380 // allocating a new slice, to avoid pinning memory. 381 dst := b.Instrs[:cap(b.Instrs)] 382 i := len(dst) - 1 383 for n := len(b.Instrs) - 1; n >= 0; n-- { 384 instr := dst[n] 385 if instr == nil { 386 continue 387 } 388 if !usesDefer { 389 if _, ok := instr.(*RunDefers); ok { 390 continue 391 } 392 } 393 dst[i] = instr 394 i-- 395 } 396 off := i + 1 - len(head) 397 // aid GC 398 clearInstrs(dst[:off]) 399 dst = dst[off:] 400 copy(dst, head) 401 b.Instrs = dst 402 } else { 403 // not enough space, so allocate a new slice and copy 404 // over. 405 dst := make([]Instruction, ns) 406 copy(dst, head) 407 408 for _, instr := range b.Instrs { 409 if instr == nil { 410 continue 411 } 412 if !usesDefer { 413 if _, ok := instr.(*RunDefers); ok { 414 continue 415 } 416 } 417 dst[j] = instr 418 j++ 419 } 420 b.Instrs = dst 421 } 422 } 423 424 // Remove any fn.Locals that were lifted. 425 j := 0 426 for _, l := range fn.Locals { 427 if l.index < 0 { 428 fn.Locals[j] = l 429 j++ 430 } 431 } 432 // Nil out fn.Locals[j:] to aid GC. 433 for i := j; i < len(fn.Locals); i++ { 434 fn.Locals[i] = nil 435 } 436 fn.Locals = fn.Locals[:j] 437 438 return numAllocs > 0 439 } 440 441 func hasDirectReferrer(instr Instruction) bool { 442 for _, instr := range *instr.Referrers() { 443 switch instr.(type) { 444 case *Phi, *Sigma: 445 // ignore 446 default: 447 return true 448 } 449 } 450 return false 451 } 452 453 func markLiveNodes(blocks []*BasicBlock, newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) { 454 // Phis and sigmas may become dead due to optimization passes. We may also insert more nodes than strictly 455 // necessary, e.g. sigma nodes for constants, which will never be used. 456 457 // Phi and sigma nodes are considered live if a non-phi, non-sigma 458 // node uses them. Once we find a node that is live, we mark all 459 // of its operands as used, too. 460 for _, npList := range newPhis { 461 for _, np := range npList { 462 phi := np.phi 463 if !phi.live && hasDirectReferrer(phi) { 464 markLivePhi(phi) 465 } 466 } 467 } 468 for _, npList := range newSigmas { 469 for _, np := range npList { 470 for _, sigma := range np.sigmas { 471 if sigma != nil && !sigma.live && hasDirectReferrer(sigma) { 472 markLiveSigma(sigma) 473 } 474 } 475 } 476 } 477 // Existing φ-nodes due to && and || operators 478 // are all considered live (see Go issue 19622). 479 for _, b := range blocks { 480 for _, phi := range b.phis() { 481 markLivePhi(phi.(*Phi)) 482 } 483 } 484 } 485 486 func markLivePhi(phi *Phi) { 487 phi.live = true 488 for _, rand := range phi.Edges { 489 switch rand := rand.(type) { 490 case *Phi: 491 if !rand.live { 492 markLivePhi(rand) 493 } 494 case *Sigma: 495 if !rand.live { 496 markLiveSigma(rand) 497 } 498 } 499 } 500 } 501 502 func markLiveSigma(sigma *Sigma) { 503 sigma.live = true 504 switch rand := sigma.X.(type) { 505 case *Phi: 506 if !rand.live { 507 markLivePhi(rand) 508 } 509 case *Sigma: 510 if !rand.live { 511 markLiveSigma(rand) 512 } 513 } 514 } 515 516 // simplifyPhisAndSigmas removes duplicate phi and sigma nodes, 517 // and replaces trivial phis with non-phi alternatives. Phi 518 // nodes where all edges are identical, or consist of only the phi 519 // itself and one other value, may be replaced with the value. 520 func simplifyPhisAndSigmas(newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) { 521 // temporary numbering of values used in phis so that we can build map keys 522 var id ID 523 for _, npList := range newPhis { 524 for _, np := range npList { 525 for _, edge := range np.phi.Edges { 526 edge.setID(id) 527 id++ 528 } 529 } 530 } 531 // find all phis that are trivial and can be replaced with a 532 // non-phi value. run until we reach a fixpoint, because replacing 533 // a phi may make other phis trivial. 534 for changed := true; changed; { 535 changed = false 536 for _, npList := range newPhis { 537 for _, np := range npList { 538 if np.phi.live { 539 // we're reusing 'live' to mean 'dead' in the context of simplifyPhisAndSigmas 540 continue 541 } 542 if r, ok := isUselessPhi(np.phi); ok { 543 // useless phi, replace its uses with the 544 // replacement value. the dead phi pass will clean 545 // up the phi afterwards. 546 replaceAll(np.phi, r) 547 np.phi.live = true 548 changed = true 549 } 550 } 551 } 552 553 // Replace duplicate sigma nodes with a single node. These nodes exist when multiple allocs get replaced with the 554 // same dominating store. 555 for _, sigmaList := range newSigmas { 556 primarySigmas := map[struct { 557 succ int 558 v Value 559 }]*Sigma{} 560 for _, sigmas := range sigmaList { 561 for succ, sigma := range sigmas.sigmas { 562 if sigma == nil { 563 continue 564 } 565 if sigma.live { 566 // we're reusing 'live' to mean 'dead' in the context of simplifyPhisAndSigmas 567 continue 568 } 569 key := struct { 570 succ int 571 v Value 572 }{succ, sigma.X} 573 if alt, ok := primarySigmas[key]; ok { 574 replaceAll(sigma, alt) 575 sigma.live = true 576 changed = true 577 } else { 578 primarySigmas[key] = sigma 579 } 580 } 581 } 582 } 583 584 // Replace duplicate phi nodes with a single node. As far as we know, these duplicate nodes only ever exist 585 // because of the previous sigma deduplication. 586 keyb := make([]byte, 0, 4*8) 587 for _, npList := range newPhis { 588 primaryPhis := map[string]*Phi{} 589 for _, np := range npList { 590 if np.phi.live { 591 continue 592 } 593 if n := len(np.phi.Edges) * 8; cap(keyb) >= n { 594 keyb = keyb[:n] 595 } else { 596 keyb = make([]byte, n, n*2) 597 } 598 for i, e := range np.phi.Edges { 599 binary.LittleEndian.PutUint64(keyb[i*8:i*8+8], uint64(e.ID())) 600 } 601 if alt, ok := primaryPhis[string(keyb)]; ok { 602 replaceAll(np.phi, alt) 603 np.phi.live = true 604 changed = true 605 } else { 606 primaryPhis[string(keyb)] = np.phi 607 } 608 } 609 } 610 611 } 612 613 for _, npList := range newPhis { 614 for _, np := range npList { 615 np.phi.live = false 616 for _, edge := range np.phi.Edges { 617 edge.setID(0) 618 } 619 } 620 } 621 622 for _, sigmaList := range newSigmas { 623 for _, sigmas := range sigmaList { 624 for _, sigma := range sigmas.sigmas { 625 if sigma != nil { 626 sigma.live = false 627 } 628 } 629 } 630 } 631 } 632 633 type BlockSet struct { 634 idx int 635 values []bool 636 count int 637 } 638 639 func NewBlockSet(size int) *BlockSet { 640 return &BlockSet{values: make([]bool, size)} 641 } 642 643 func (s *BlockSet) Set(s2 *BlockSet) { 644 copy(s.values, s2.values) 645 s.count = 0 646 for _, v := range s.values { 647 if v { 648 s.count++ 649 } 650 } 651 } 652 653 func (s *BlockSet) Num() int { 654 return s.count 655 } 656 657 func (s *BlockSet) Has(b *BasicBlock) bool { 658 if b.Index >= len(s.values) { 659 return false 660 } 661 return s.values[b.Index] 662 } 663 664 // add adds b to the set and returns true if the set changed. 665 func (s *BlockSet) Add(b *BasicBlock) bool { 666 if s.values[b.Index] { 667 return false 668 } 669 s.count++ 670 s.values[b.Index] = true 671 s.idx = b.Index 672 673 return true 674 } 675 676 func (s *BlockSet) Clear() { 677 for j := range s.values { 678 s.values[j] = false 679 } 680 s.count = 0 681 } 682 683 // take removes an arbitrary element from a set s and 684 // returns its index, or returns -1 if empty. 685 func (s *BlockSet) Take() int { 686 // [i, end] 687 for i := s.idx; i < len(s.values); i++ { 688 if s.values[i] { 689 s.values[i] = false 690 s.idx = i 691 s.count-- 692 return i 693 } 694 } 695 696 // [start, i) 697 for i := 0; i < s.idx; i++ { 698 if s.values[i] { 699 s.values[i] = false 700 s.idx = i 701 s.count-- 702 return i 703 } 704 } 705 706 return -1 707 } 708 709 type closure struct { 710 span []uint32 711 reachables BlockMap[interval] 712 } 713 714 type interval uint32 715 716 const ( 717 flagMask = 1 << 31 718 numBits = 20 719 lengthBits = 32 - numBits - 1 720 lengthMask = (1<<lengthBits - 1) << numBits 721 numMask = 1<<numBits - 1 722 ) 723 724 func (c closure) has(s, v *BasicBlock) bool { 725 idx := uint32(v.Index) 726 if idx == 1 || s.Dominates(v) { 727 return true 728 } 729 r := c.reachable(s.Index) 730 for i := 0; i < len(r); i++ { 731 inv := r[i] 732 var start, end uint32 733 if inv&flagMask == 0 { 734 // small interval 735 start = uint32(inv & numMask) 736 end = start + uint32(inv&lengthMask)>>numBits 737 } else { 738 // large interval 739 i++ 740 start = uint32(inv & numMask) 741 end = uint32(r[i]) 742 } 743 if idx >= start && idx <= end { 744 return true 745 } 746 } 747 return false 748 } 749 750 func (c closure) reachable(id int) []interval { 751 return c.reachables[c.span[id]:c.span[id+1]] 752 } 753 754 func (c closure) walk(current *BasicBlock, b *BasicBlock, visited []bool) { 755 // TODO(dh): the 'current' argument seems to be unused 756 // TODO(dh): there's no reason for this to be a method 757 visited[b.Index] = true 758 for _, succ := range b.Succs { 759 if visited[succ.Index] { 760 continue 761 } 762 visited[succ.Index] = true 763 c.walk(current, succ, visited) 764 } 765 } 766 767 func transitiveClosure(fn *Function) *closure { 768 reachable := make(BlockMap[bool], len(fn.Blocks)) 769 c := &closure{} 770 c.span = make([]uint32, len(fn.Blocks)+1) 771 772 addInterval := func(start, end uint32) { 773 if l := end - start; l <= 1<<lengthBits-1 { 774 n := interval(l<<numBits | start) 775 c.reachables = append(c.reachables, n) 776 } else { 777 n1 := interval(1<<31 | start) 778 n2 := interval(end) 779 c.reachables = append(c.reachables, n1, n2) 780 } 781 } 782 783 for i, b := range fn.Blocks[1:] { 784 for i := range reachable { 785 reachable[i] = false 786 } 787 788 c.walk(b, b, reachable) 789 start := ^uint32(0) 790 for id, isReachable := range reachable { 791 if !isReachable { 792 if start != ^uint32(0) { 793 end := uint32(id) - 1 794 addInterval(start, end) 795 start = ^uint32(0) 796 } 797 continue 798 } else if start == ^uint32(0) { 799 start = uint32(id) 800 } 801 } 802 if start != ^uint32(0) { 803 addInterval(start, uint32(len(reachable))-1) 804 } 805 806 c.span[i+2] = uint32(len(c.reachables)) 807 } 808 809 return c 810 } 811 812 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc 813 // it replaces. 814 type newPhi struct { 815 phi *Phi 816 alloc *Alloc 817 } 818 819 type newSigma struct { 820 alloc *Alloc 821 sigmas []*Sigma 822 } 823 824 type liftInstructions struct { 825 insertInstructions map[Instruction][]Instruction 826 renameAllocs []struct { 827 from *Alloc 828 to *Alloc 829 startingAt int 830 } 831 } 832 833 // liftable determines if alloc can be lifted, and records instructions to split partially liftable allocs. 834 // 835 // In the trivial case, all uses of the alloc can be lifted. This is the case when it is only used for storing into and 836 // loading from. In that case, no instructions are recorded. 837 // 838 // In the more complex case, the alloc is used for storing into and loading from, but it is also used as a value, for 839 // example because it gets passed to a function, e.g. fn(&x). In this case, uses of the alloc fall into one of two 840 // categories: those that can be lifted and those that can't. A boundary forms between these two categories in the 841 // function's control flow: Once an unliftable use is encountered, the alloc is no longer liftable for the remainder of 842 // the basic block the use is in, nor in any blocks reachable from it. 843 // 844 // We record instructions that split the alloc into two allocs: one that is used in liftable uses, and one that is used 845 // in unliftable uses. Whenever we encounter a boundary between liftable and unliftable uses or blocks, we emit a pair 846 // of Load and Store that copy the value from the liftable alloc into the unliftable alloc. Taking these instructions 847 // into account, the normal lifting machinery will completely lift the liftable alloc, store the correct lifted values 848 // into the unliftable alloc, and will not at all lift the unliftable alloc. 849 // 850 // In Go syntax, the transformation looks somewhat like this: 851 // 852 // func foo() { 853 // x := 32 854 // if cond { 855 // println(x) 856 // escape(&x) 857 // println(x) 858 // } else { 859 // println(x) 860 // } 861 // println(x) 862 // } 863 // 864 // transforms into 865 // 866 // func fooSplitAlloc() { 867 // x := 32 868 // var x_ int 869 // if cond { 870 // println(x) 871 // x_ = x 872 // escape(&x_) 873 // println(x_) 874 // } else { 875 // println(x) 876 // x_ = x 877 // } 878 // println(x_) 879 // } 880 func liftable(alloc *Alloc, instructions BlockMap[liftInstructions]) bool { 881 fn := alloc.block.parent 882 883 // Don't lift named return values in functions that defer 884 // calls that may recover from panic. 885 if fn.hasDefer { 886 for _, nr := range fn.namedResults { 887 if nr == alloc { 888 return false 889 } 890 } 891 } 892 893 type blockDesc struct { 894 // is the block (partially) unliftable, because it contains unliftable instructions or is reachable by an unliftable block 895 isUnliftable bool 896 hasLiftableLoad bool 897 hasLiftableOther bool 898 // we need to emit stores in predecessors because the unliftable use is in a phi 899 storeInPreds bool 900 901 lastLiftable int 902 firstUnliftable int 903 } 904 blocks := make(BlockMap[blockDesc], len(fn.Blocks)) 905 for _, b := range fn.Blocks { 906 blocks[b.Index].lastLiftable = -1 907 blocks[b.Index].firstUnliftable = len(b.Instrs) + 1 908 } 909 910 // Look at all uses of the alloc and deduce which blocks have liftable or unliftable instructions. 911 for _, instr := range alloc.referrers { 912 // Find the first unliftable use 913 914 desc := &blocks[instr.Block().Index] 915 hasUnliftable := false 916 inHead := false 917 switch instr := instr.(type) { 918 case *Store: 919 if instr.Val == alloc { 920 hasUnliftable = true 921 } 922 case *Load: 923 case *DebugRef: 924 case *Phi, *Sigma: 925 inHead = true 926 hasUnliftable = true 927 default: 928 hasUnliftable = true 929 } 930 931 if hasUnliftable { 932 desc.isUnliftable = true 933 if int(instr.ID()) < desc.firstUnliftable { 934 desc.firstUnliftable = int(instr.ID()) 935 } 936 if inHead { 937 desc.storeInPreds = true 938 desc.firstUnliftable = 0 939 } 940 } 941 } 942 943 for _, instr := range alloc.referrers { 944 // Find the last liftable use, taking the previously calculated firstUnliftable into consideration 945 946 desc := &blocks[instr.Block().Index] 947 if int(instr.ID()) >= desc.firstUnliftable { 948 continue 949 } 950 hasLiftable := false 951 switch instr := instr.(type) { 952 case *Store: 953 if instr.Val != alloc { 954 desc.hasLiftableOther = true 955 hasLiftable = true 956 } 957 case *Load: 958 desc.hasLiftableLoad = true 959 hasLiftable = true 960 case *DebugRef: 961 desc.hasLiftableOther = true 962 } 963 if hasLiftable { 964 if int(instr.ID()) > desc.lastLiftable { 965 desc.lastLiftable = int(instr.ID()) 966 } 967 } 968 } 969 970 for i := range blocks { 971 // Update firstUnliftable to be one after lastLiftable. We do this to include the unliftable's preceding 972 // DebugRefs in the renaming. 973 if blocks[i].lastLiftable == -1 && !blocks[i].storeInPreds { 974 // There are no liftable instructions (for this alloc) in this block. Set firstUnliftable to the 975 // first non-head instruction to avoid inserting the store before phi instructions, which would 976 // fail validation. 977 first := -1 978 instrLoop: 979 for i, instr := range fn.Blocks[i].Instrs { 980 switch instr.(type) { 981 case *Phi, *Sigma: 982 default: 983 first = i 984 break instrLoop 985 } 986 } 987 blocks[i].firstUnliftable = first 988 } else { 989 blocks[i].firstUnliftable = blocks[i].lastLiftable + 1 990 } 991 } 992 993 // If a block is reachable by a (partially) unliftable block, then the entirety of the block is unliftable. In that 994 // case, stores have to be inserted in the predecessors. 995 // 996 // TODO(dh): this isn't always necessary. If the block is reachable by itself, i.e. part of a loop, then if the 997 // Alloc instruction is itself part of that loop, then there is a subset of instructions in the loop that can be 998 // lifted. For example: 999 // 1000 // for { 1001 // x := 42 1002 // println(x) 1003 // escape(&x) 1004 // } 1005 // 1006 // The x that escapes in one iteration of the loop isn't the same x that we read from on the next iteration. 1007 seen := make(BlockMap[bool], len(fn.Blocks)) 1008 var dfs func(b *BasicBlock) 1009 dfs = func(b *BasicBlock) { 1010 if seen[b.Index] { 1011 return 1012 } 1013 seen[b.Index] = true 1014 desc := &blocks[b.Index] 1015 desc.hasLiftableLoad = false 1016 desc.hasLiftableOther = false 1017 desc.isUnliftable = true 1018 desc.firstUnliftable = 0 1019 desc.storeInPreds = true 1020 for _, succ := range b.Succs { 1021 dfs(succ) 1022 } 1023 } 1024 for _, b := range fn.Blocks { 1025 if blocks[b.Index].isUnliftable { 1026 for _, succ := range b.Succs { 1027 dfs(succ) 1028 } 1029 } 1030 } 1031 1032 hasLiftableLoad := false 1033 hasLiftableOther := false 1034 hasUnliftable := false 1035 for _, b := range fn.Blocks { 1036 desc := blocks[b.Index] 1037 hasLiftableLoad = hasLiftableLoad || desc.hasLiftableLoad 1038 hasLiftableOther = hasLiftableOther || desc.hasLiftableOther 1039 if desc.isUnliftable { 1040 hasUnliftable = true 1041 } 1042 } 1043 if !hasLiftableLoad && !hasLiftableOther { 1044 // There are no liftable uses 1045 return false 1046 } else if !hasUnliftable { 1047 // The alloc is entirely liftable without splitting 1048 return true 1049 } else if !hasLiftableLoad { 1050 // The alloc is not entirely liftable, and the only liftable uses are stores. While some of those stores could 1051 // get lifted away, it would also lead to an infinite loop when lifting to a fixpoint, because the newly created 1052 // allocs also get stored into repeatable and that's their only liftable uses. 1053 return false 1054 } 1055 1056 // We need to insert stores for the new alloc. If a (partially) unliftable block has no unliftable 1057 // predecessors and the use isn't in a phi node, then the store can be inserted right before the unliftable use. 1058 // Otherwise, stores have to be inserted at the end of all liftable predecessors. 1059 1060 newAlloc := &Alloc{Heap: true} 1061 newAlloc.setBlock(alloc.block) 1062 newAlloc.setType(alloc.typ) 1063 newAlloc.setSource(alloc.source) 1064 newAlloc.index = -1 1065 newAlloc.comment = "split alloc" 1066 1067 { 1068 work := instructions[alloc.block.Index] 1069 work.insertInstructions[alloc] = append(work.insertInstructions[alloc], newAlloc) 1070 } 1071 1072 predHasStore := make(BlockMap[bool], len(fn.Blocks)) 1073 for _, b := range fn.Blocks { 1074 desc := &blocks[b.Index] 1075 bWork := &instructions[b.Index] 1076 1077 if desc.isUnliftable { 1078 bWork.renameAllocs = append(bWork.renameAllocs, struct { 1079 from *Alloc 1080 to *Alloc 1081 startingAt int 1082 }{ 1083 alloc, newAlloc, int(desc.firstUnliftable), 1084 }) 1085 } 1086 1087 if !desc.isUnliftable { 1088 continue 1089 } 1090 1091 propagate := func(in *BasicBlock, before Instruction) { 1092 load := &Load{ 1093 X: alloc, 1094 } 1095 store := &Store{ 1096 Addr: newAlloc, 1097 Val: load, 1098 } 1099 load.setType(deref(alloc.typ)) 1100 load.setBlock(in) 1101 load.comment = "split alloc" 1102 store.setBlock(in) 1103 updateOperandReferrers(load) 1104 updateOperandReferrers(store) 1105 store.comment = "split alloc" 1106 1107 entry := &instructions[in.Index] 1108 entry.insertInstructions[before] = append(entry.insertInstructions[before], load, store) 1109 } 1110 1111 if desc.storeInPreds { 1112 // emit stores at the end of liftable preds 1113 for _, pred := range b.Preds { 1114 if blocks[pred.Index].isUnliftable { 1115 continue 1116 } 1117 1118 if !alloc.block.Dominates(pred) { 1119 // Consider this cfg: 1120 // 1121 // 1 1122 // /| 1123 // / | 1124 // ↙ ↓ 1125 // 2--→3 1126 // 1127 // with an Alloc in block 2. It doesn't make sense to insert a store in block 1 for the jump to 1128 // block 3, because 1 can never see the Alloc in the first place. 1129 // 1130 // Ignoring phi nodes, an Alloc always dominates all of its uses, and phi nodes don't matter here, 1131 // because for the incoming edges that do matter, we do emit the stores. 1132 1133 continue 1134 } 1135 1136 if predHasStore[pred.Index] { 1137 // Don't generate redundant propagations. Not only is it unnecessary, it can lead to infinite loops 1138 // when trying to lift to a fix point, because redundant stores are liftable. 1139 continue 1140 } 1141 1142 predHasStore[pred.Index] = true 1143 1144 before := pred.Instrs[len(pred.Instrs)-1] 1145 propagate(pred, before) 1146 } 1147 } else { 1148 // emit store before the first unliftable use 1149 before := b.Instrs[desc.firstUnliftable] 1150 propagate(b, before) 1151 } 1152 } 1153 1154 return true 1155 } 1156 1157 // liftAlloc lifts alloc into registers and populates newPhis and newSigmas with all the φ- and σ-nodes it may require. 1158 func liftAlloc(closure *closure, df domFrontier, rdf postDomFrontier, alloc *Alloc, newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) { 1159 fn := alloc.Parent() 1160 1161 defblocks := fn.blockset(0) 1162 useblocks := fn.blockset(1) 1163 Aphi := fn.blockset(2) 1164 Asigma := fn.blockset(3) 1165 W := fn.blockset(4) 1166 1167 // Compute defblocks, the set of blocks containing a 1168 // definition of the alloc cell. 1169 for _, instr := range *alloc.Referrers() { 1170 switch instr := instr.(type) { 1171 case *Store: 1172 defblocks.Add(instr.Block()) 1173 case *Load: 1174 useblocks.Add(instr.Block()) 1175 for _, ref := range *instr.Referrers() { 1176 useblocks.Add(ref.Block()) 1177 } 1178 } 1179 } 1180 // The Alloc itself counts as a (zero) definition of the cell. 1181 defblocks.Add(alloc.Block()) 1182 1183 if debugLifting { 1184 fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) 1185 } 1186 1187 // Φ-insertion. 1188 // 1189 // What follows is the body of the main loop of the insert-φ 1190 // function described by Cytron et al, but instead of using 1191 // counter tricks, we just reset the 'hasAlready' and 'work' 1192 // sets each iteration. These are bitmaps so it's pretty cheap. 1193 1194 // Initialize W and work to defblocks. 1195 1196 for change := true; change; { 1197 change = false 1198 { 1199 // Traverse iterated dominance frontier, inserting φ-nodes. 1200 W.Set(defblocks) 1201 1202 for i := W.Take(); i != -1; i = W.Take() { 1203 n := fn.Blocks[i] 1204 for _, y := range df[n.Index] { 1205 if Aphi.Add(y) { 1206 if len(*alloc.Referrers()) == 0 { 1207 continue 1208 } 1209 live := false 1210 if closure == nil { 1211 live = true 1212 } else { 1213 for _, ref := range *alloc.Referrers() { 1214 if _, ok := ref.(*Load); ok { 1215 if closure.has(y, ref.Block()) { 1216 live = true 1217 break 1218 } 1219 } 1220 } 1221 } 1222 if !live { 1223 continue 1224 } 1225 1226 // Create φ-node. 1227 // It will be prepended to v.Instrs later, if needed. 1228 phi := &Phi{ 1229 Edges: make([]Value, len(y.Preds)), 1230 } 1231 1232 phi.source = alloc.source 1233 phi.setType(deref(alloc.Type())) 1234 phi.block = y 1235 if debugLifting { 1236 fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, y) 1237 } 1238 newPhis[y.Index] = append(newPhis[y.Index], newPhi{phi, alloc}) 1239 1240 for _, p := range y.Preds { 1241 useblocks.Add(p) 1242 } 1243 change = true 1244 if defblocks.Add(y) { 1245 W.Add(y) 1246 } 1247 } 1248 } 1249 } 1250 } 1251 1252 { 1253 W.Set(useblocks) 1254 for i := W.Take(); i != -1; i = W.Take() { 1255 n := fn.Blocks[i] 1256 for _, y := range rdf[n.Index] { 1257 if Asigma.Add(y) { 1258 sigmas := make([]*Sigma, 0, len(y.Succs)) 1259 anyLive := false 1260 for _, succ := range y.Succs { 1261 live := false 1262 for _, ref := range *alloc.Referrers() { 1263 if closure == nil || closure.has(succ, ref.Block()) { 1264 live = true 1265 anyLive = true 1266 break 1267 } 1268 } 1269 if live { 1270 sigma := &Sigma{ 1271 From: y, 1272 X: alloc, 1273 } 1274 sigma.source = alloc.source 1275 sigma.setType(deref(alloc.Type())) 1276 sigma.block = succ 1277 sigmas = append(sigmas, sigma) 1278 } else { 1279 sigmas = append(sigmas, nil) 1280 } 1281 } 1282 1283 if anyLive { 1284 newSigmas[y.Index] = append(newSigmas[y.Index], newSigma{alloc, sigmas}) 1285 for _, s := range y.Succs { 1286 defblocks.Add(s) 1287 } 1288 change = true 1289 if useblocks.Add(y) { 1290 W.Add(y) 1291 } 1292 } 1293 } 1294 } 1295 } 1296 } 1297 } 1298 } 1299 1300 // replaceAll replaces all intraprocedural uses of x with y, 1301 // updating x.Referrers and y.Referrers. 1302 // Precondition: x.Referrers() != nil, i.e. x must be local to some function. 1303 func replaceAll(x, y Value) { 1304 var rands []*Value 1305 pxrefs := x.Referrers() 1306 pyrefs := y.Referrers() 1307 for _, instr := range *pxrefs { 1308 switch instr := instr.(type) { 1309 case *CompositeValue: 1310 // Special case CompositeValue because it might have very large lists of operands 1311 // 1312 // OPT(dh): this loop is still expensive for large composite values 1313 for i, rand := range instr.Values { 1314 if rand == x { 1315 instr.Values[i] = y 1316 } 1317 } 1318 default: 1319 rands = instr.Operands(rands[:0]) // recycle storage 1320 for _, rand := range rands { 1321 if *rand != nil { 1322 if *rand == x { 1323 *rand = y 1324 } 1325 } 1326 } 1327 } 1328 if pyrefs != nil { 1329 *pyrefs = append(*pyrefs, instr) // dups ok 1330 } 1331 } 1332 *pxrefs = nil // x is now unreferenced 1333 } 1334 1335 func replace(instr Instruction, x, y Value) { 1336 args := instr.Operands(nil) 1337 matched := false 1338 for _, arg := range args { 1339 if *arg == x { 1340 *arg = y 1341 matched = true 1342 } 1343 } 1344 if matched { 1345 yrefs := y.Referrers() 1346 if yrefs != nil { 1347 *yrefs = append(*yrefs, instr) 1348 } 1349 1350 xrefs := x.Referrers() 1351 if xrefs != nil { 1352 *xrefs = removeInstr(*xrefs, instr) 1353 } 1354 } 1355 } 1356 1357 // renamed returns the value to which alloc is being renamed, 1358 // constructing it lazily if it's the implicit zero initialization. 1359 func renamed(fn *Function, renaming []Value, alloc *Alloc) Value { 1360 v := renaming[alloc.index] 1361 if v == nil { 1362 v = emitConst(fn, zeroConst(deref(alloc.Type()), alloc.source)) 1363 renaming[alloc.index] = v 1364 } 1365 return v 1366 } 1367 1368 func copyValue(v Value, why Instruction, info CopyInfo) *Copy { 1369 c := &Copy{ 1370 X: v, 1371 Why: why, 1372 Info: info, 1373 } 1374 if refs := v.Referrers(); refs != nil { 1375 *refs = append(*refs, c) 1376 } 1377 c.setType(v.Type()) 1378 c.setSource(v.Source()) 1379 return c 1380 } 1381 1382 func splitOnNewInformation(u *BasicBlock, renaming *StackMap) { 1383 renaming.Push() 1384 defer renaming.Pop() 1385 1386 rename := func(v Value, why Instruction, info CopyInfo, i int) { 1387 c := copyValue(v, why, info) 1388 c.setBlock(u) 1389 renaming.Set(v, c) 1390 u.Instrs = append(u.Instrs, nil) 1391 copy(u.Instrs[i+2:], u.Instrs[i+1:]) 1392 u.Instrs[i+1] = c 1393 } 1394 1395 replacement := func(v Value) (Value, bool) { 1396 r, ok := renaming.Get(v) 1397 if !ok { 1398 return nil, false 1399 } 1400 for { 1401 rr, ok := renaming.Get(r) 1402 if !ok { 1403 // Store replacement in the map so that future calls to replacement(v) don't have to go through the 1404 // iterative process again. 1405 renaming.Set(v, r) 1406 return r, true 1407 } 1408 r = rr 1409 } 1410 } 1411 1412 var hasInfo func(v Value, info CopyInfo) bool 1413 hasInfo = func(v Value, info CopyInfo) bool { 1414 switch v := v.(type) { 1415 case *Copy: 1416 return (v.Info&info) == info || hasInfo(v.X, info) 1417 case *FieldAddr, *IndexAddr, *TypeAssert, *MakeChan, *MakeMap, *MakeSlice, *Alloc: 1418 return info == CopyInfoNotNil 1419 case Member, *Builtin: 1420 return info == CopyInfoNotNil 1421 case *Sigma: 1422 return hasInfo(v.X, info) 1423 default: 1424 return false 1425 } 1426 } 1427 1428 var args []*Value 1429 for i := 0; i < len(u.Instrs); i++ { 1430 instr := u.Instrs[i] 1431 if instr == nil { 1432 continue 1433 } 1434 args = instr.Operands(args[:0]) 1435 for _, arg := range args { 1436 if *arg == nil { 1437 continue 1438 } 1439 if r, ok := replacement(*arg); ok { 1440 *arg = r 1441 replace(instr, *arg, r) 1442 } 1443 } 1444 1445 // TODO write some bits on why we copy values instead of encoding the actual control flow and panics 1446 1447 switch instr := instr.(type) { 1448 case *IndexAddr: 1449 // Note that we rename instr.Index and instr.X even if they're already copies, because unique combinations 1450 // of X and Index may lead to unique information. 1451 1452 // OPT we should rename both variables at once and avoid one memmove 1453 rename(instr.Index, instr, CopyInfoNotNegative, i) 1454 rename(instr.X, instr, CopyInfoNotNil, i) 1455 i += 2 // skip over instructions we just inserted 1456 case *FieldAddr: 1457 if !hasInfo(instr.X, CopyInfoNotNil) { 1458 rename(instr.X, instr, CopyInfoNotNil, i) 1459 i++ 1460 } 1461 case *TypeAssert: 1462 // If we've already type asserted instr.X without comma-ok before, then it can only contain a single type, 1463 // and successive type assertions, no matter the type, don't tell us anything new. 1464 if !hasInfo(instr.X, CopyInfoNotNil|CopyInfoSingleConcreteType) { 1465 rename(instr.X, instr, CopyInfoNotNil|CopyInfoSingleConcreteType, i) 1466 i++ // skip over instruction we just inserted 1467 } 1468 case *Load: 1469 if !hasInfo(instr.X, CopyInfoNotNil) { 1470 rename(instr.X, instr, CopyInfoNotNil, i) 1471 i++ 1472 } 1473 case *Store: 1474 if !hasInfo(instr.Addr, CopyInfoNotNil) { 1475 rename(instr.Addr, instr, CopyInfoNotNil, i) 1476 i++ 1477 } 1478 case *MapUpdate: 1479 if !hasInfo(instr.Map, CopyInfoNotNil) { 1480 rename(instr.Map, instr, CopyInfoNotNil, i) 1481 i++ 1482 } 1483 case CallInstruction: 1484 off := 0 1485 if !instr.Common().IsInvoke() && !hasInfo(instr.Common().Value, CopyInfoNotNil) { 1486 rename(instr.Common().Value, instr, CopyInfoNotNil, i) 1487 off++ 1488 } 1489 if f, ok := instr.Common().Value.(*Builtin); ok { 1490 switch f.name { 1491 case "close": 1492 arg := instr.Common().Args[0] 1493 if !hasInfo(arg, CopyInfoNotNil|CopyInfoClosed) { 1494 rename(arg, instr, CopyInfoNotNil|CopyInfoClosed, i) 1495 off++ 1496 } 1497 } 1498 } 1499 i += off 1500 case *SliceToArrayPointer: 1501 // A slice to array pointer conversion tells us the minimum length of the slice 1502 rename(instr.X, instr, CopyInfoUnspecified, i) 1503 i++ 1504 case *SliceToArray: 1505 // A slice to array conversion tells us the minimum length of the slice 1506 rename(instr.X, instr, CopyInfoUnspecified, i) 1507 i++ 1508 case *Slice: 1509 // Slicing tells us about some of the bounds 1510 off := 0 1511 if instr.Low == nil && instr.High == nil && instr.Max == nil { 1512 // If all indices are unspecified, then we can only learn something about instr.X if it might've been 1513 // nil. 1514 if !hasInfo(instr.X, CopyInfoNotNil) { 1515 rename(instr.X, instr, CopyInfoUnspecified, i) 1516 off++ 1517 } 1518 } else { 1519 rename(instr.X, instr, CopyInfoUnspecified, i) 1520 off++ 1521 } 1522 // We copy the indices even if we already know they are not negative, because we can associate numeric 1523 // ranges with them. 1524 if instr.Low != nil { 1525 rename(instr.Low, instr, CopyInfoNotNegative, i) 1526 off++ 1527 } 1528 if instr.High != nil { 1529 rename(instr.High, instr, CopyInfoNotNegative, i) 1530 off++ 1531 } 1532 if instr.Max != nil { 1533 rename(instr.Max, instr, CopyInfoNotNegative, i) 1534 off++ 1535 } 1536 i += off 1537 case *StringLookup: 1538 rename(instr.X, instr, CopyInfoUnspecified, i) 1539 rename(instr.Index, instr, CopyInfoNotNegative, i) 1540 i += 2 1541 case *Recv: 1542 if !hasInfo(instr.Chan, CopyInfoNotNil) { 1543 // Receiving from a nil channel never completes 1544 rename(instr.Chan, instr, CopyInfoNotNil, i) 1545 i++ 1546 } 1547 case *Send: 1548 if !hasInfo(instr.Chan, CopyInfoNotNil) { 1549 // Sending to a nil channel never completes. Sending to a closed channel panics, but whether a channel 1550 // is closed isn't local to this function, so we didn't learn anything. 1551 rename(instr.Chan, instr, CopyInfoNotNil, i) 1552 i++ 1553 } 1554 } 1555 } 1556 1557 for _, v := range u.dom.children { 1558 splitOnNewInformation(v, renaming) 1559 } 1560 } 1561 1562 // rename implements the Cytron et al-based SSI renaming algorithm, a 1563 // preorder traversal of the dominator tree replacing all loads of 1564 // Alloc cells with the value stored to that cell by the dominating 1565 // store instruction. 1566 // 1567 // renaming is a map from *Alloc (keyed by index number) to its 1568 // dominating stored value; newPhis[x] is the set of new φ-nodes to be 1569 // prepended to block x. 1570 func rename(u *BasicBlock, renaming []Value, newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) { 1571 // Each φ-node becomes the new name for its associated Alloc. 1572 for _, np := range newPhis[u.Index] { 1573 phi := np.phi 1574 alloc := np.alloc 1575 renaming[alloc.index] = phi 1576 } 1577 1578 // Rename loads and stores of allocs. 1579 for i, instr := range u.Instrs { 1580 switch instr := instr.(type) { 1581 case *Alloc: 1582 if instr.index >= 0 { // store of zero to Alloc cell 1583 // Replace dominated loads by the zero value. 1584 renaming[instr.index] = nil 1585 if debugLifting { 1586 fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) 1587 } 1588 // Delete the Alloc. 1589 u.Instrs[i] = nil 1590 u.gaps++ 1591 } 1592 1593 case *Store: 1594 if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell 1595 // Replace dominated loads by the stored value. 1596 renaming[alloc.index] = instr.Val 1597 if debugLifting { 1598 fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", 1599 instr, instr.Val.Name()) 1600 } 1601 if refs := instr.Addr.Referrers(); refs != nil { 1602 *refs = removeInstr(*refs, instr) 1603 } 1604 if refs := instr.Val.Referrers(); refs != nil { 1605 *refs = removeInstr(*refs, instr) 1606 } 1607 // Delete the Store. 1608 u.Instrs[i] = nil 1609 u.gaps++ 1610 } 1611 1612 case *Load: 1613 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell 1614 // In theory, we wouldn't be able to replace loads directly, because a loaded value could be used in 1615 // different branches, in which case it should be replaced with different sigma nodes. But we can't 1616 // simply defer replacement, either, because then later stores might incorrectly affect this load. 1617 // 1618 // To avoid doing renaming on _all_ values (instead of just loads and stores like we're doing), we make 1619 // sure during code generation that each load is only used in one block. For example, in constant switch 1620 // statements, where the tag is only evaluated once, we store it in a temporary and load it for each 1621 // comparison, so that we have individual loads to replace. 1622 // 1623 // Because we only rename stores and loads, the end result will not contain sigma nodes for all 1624 // constants. Some constants may be used directly, e.g. in comparisons such as 'x == 5'. We may still 1625 // end up inserting dead sigma nodes in branches, but these will never get used in renaming and will be 1626 // cleaned up when we remove dead phis and sigmas. 1627 newval := renamed(u.Parent(), renaming, alloc) 1628 if debugLifting { 1629 fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", 1630 instr.Name(), instr, newval) 1631 } 1632 replaceAll(instr, newval) 1633 u.Instrs[i] = nil 1634 u.gaps++ 1635 } 1636 1637 case *DebugRef: 1638 if x, ok := instr.X.(*Alloc); ok && x.index >= 0 { 1639 if instr.IsAddr { 1640 instr.X = renamed(u.Parent(), renaming, x) 1641 instr.IsAddr = false 1642 1643 // Add DebugRef to instr.X's referrers. 1644 if refs := instr.X.Referrers(); refs != nil { 1645 *refs = append(*refs, instr) 1646 } 1647 } else { 1648 // A source expression denotes the address 1649 // of an Alloc that was optimized away. 1650 instr.X = nil 1651 1652 // Delete the DebugRef. 1653 u.Instrs[i] = nil 1654 u.gaps++ 1655 } 1656 } 1657 } 1658 } 1659 1660 // update all outgoing sigma nodes with the dominating store 1661 for _, sigmas := range newSigmas[u.Index] { 1662 for _, sigma := range sigmas.sigmas { 1663 if sigma == nil { 1664 continue 1665 } 1666 sigma.X = renamed(u.Parent(), renaming, sigmas.alloc) 1667 } 1668 } 1669 1670 // For each φ-node in a CFG successor, rename the edge. 1671 for succi, v := range u.Succs { 1672 phis := newPhis[v.Index] 1673 if len(phis) == 0 { 1674 continue 1675 } 1676 i := v.predIndex(u) 1677 for _, np := range phis { 1678 phi := np.phi 1679 alloc := np.alloc 1680 // if there's a sigma node, use it, else use the dominating value 1681 var newval Value 1682 for _, sigmas := range newSigmas[u.Index] { 1683 if sigmas.alloc == alloc && sigmas.sigmas[succi] != nil { 1684 newval = sigmas.sigmas[succi] 1685 break 1686 } 1687 } 1688 if newval == nil { 1689 newval = renamed(u.Parent(), renaming, alloc) 1690 } 1691 if debugLifting { 1692 fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", 1693 phi.Name(), u, v, i, alloc.Name(), newval.Name()) 1694 } 1695 phi.Edges[i] = newval 1696 if prefs := newval.Referrers(); prefs != nil { 1697 *prefs = append(*prefs, phi) 1698 } 1699 } 1700 } 1701 1702 // Continue depth-first recursion over domtree, pushing a 1703 // fresh copy of the renaming map for each subtree. 1704 r := make([]Value, len(renaming)) 1705 for _, v := range u.dom.children { 1706 copy(r, renaming) 1707 1708 // on entry to a block, the incoming sigma nodes become the new values for their alloc 1709 if idx := u.succIndex(v); idx != -1 { 1710 for _, sigma := range newSigmas[u.Index] { 1711 if sigma.sigmas[idx] != nil { 1712 r[sigma.alloc.index] = sigma.sigmas[idx] 1713 } 1714 } 1715 } 1716 rename(v, r, newPhis, newSigmas) 1717 } 1718 1719 } 1720 1721 func simplifyConstantCompositeValues(fn *Function) bool { 1722 changed := false 1723 1724 for _, b := range fn.Blocks { 1725 n := 0 1726 for _, instr := range b.Instrs { 1727 replaced := false 1728 1729 if cv, ok := instr.(*CompositeValue); ok { 1730 ac := &AggregateConst{} 1731 ac.typ = cv.typ 1732 replaced = true 1733 for _, v := range cv.Values { 1734 if c, ok := v.(Constant); ok { 1735 ac.Values = append(ac.Values, c) 1736 } else { 1737 replaced = false 1738 break 1739 } 1740 } 1741 if replaced { 1742 replaceAll(cv, emitConst(fn, ac)) 1743 killInstruction(cv) 1744 } 1745 1746 } 1747 1748 if replaced { 1749 changed = true 1750 } else { 1751 b.Instrs[n] = instr 1752 n++ 1753 } 1754 } 1755 1756 clearInstrs(b.Instrs[n:]) 1757 b.Instrs = b.Instrs[:n] 1758 } 1759 1760 return changed 1761 } 1762 1763 func updateOperandReferrers(instr Instruction) { 1764 for _, op := range instr.Operands(nil) { 1765 refs := (*op).Referrers() 1766 if refs != nil { 1767 *refs = append(*refs, instr) 1768 } 1769 } 1770 }