github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/go/ir/lift.go

github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/go/ir/lift.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ir
     6  
     7  // This file defines the lifting pass which tries to "lift" Alloc
     8  // cells (new/local variables) into SSA registers, replacing loads
     9  // with the dominating stored value, eliminating loads and stores, and
    10  // inserting φ- and σ-nodes as needed.
    11  
    12  // Cited papers and resources:
    13  //
    14  // Ron Cytron et al. 1991. Efficiently computing SSA form...
    15  // https://doi.acm.org/10.1145/115372.115320
    16  //
    17  // Cooper, Harvey, Kennedy.  2001.  A Simple, Fast Dominance Algorithm.
    18  // Software Practice and Experience 2001, 4:1-10.
    19  // https://www.hipersoft.rice.edu/grads/publications/dom14.pdf
    20  //
    21  // Daniel Berlin, llvmdev mailing list, 2012.
    22  // https://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
    23  // (Be sure to expand the whole thread.)
    24  //
    25  // C. Scott Ananian. 1997. The static single information form.
    26  //
    27  // Jeremy Singer. 2006. Static program analysis based on virtual register renaming.
    28  
    29  // TODO(adonovan): opt: there are many optimizations worth evaluating, and
    30  // the conventional wisdom for SSA construction is that a simple
    31  // algorithm well engineered often beats those of better asymptotic
    32  // complexity on all but the most egregious inputs.
    33  //
    34  // Danny Berlin suggests that the Cooper et al. algorithm for
    35  // computing the dominance frontier is superior to Cytron et al.
    36  // Furthermore he recommends that rather than computing the DF for the
    37  // whole function then renaming all alloc cells, it may be cheaper to
    38  // compute the DF for each alloc cell separately and throw it away.
    39  //
    40  // Consider exploiting liveness information to avoid creating dead
    41  // φ-nodes which we then immediately remove.
    42  //
    43  // Also see many other "TODO: opt" suggestions in the code.
    44  
    45  import (
    46  	"encoding/binary"
    47  	"fmt"
    48  	"os"
    49  )
    50  
    51  // If true, show diagnostic information at each step of lifting.
    52  // Very verbose.
    53  const debugLifting = false
    54  
    55  // domFrontier maps each block to the set of blocks in its dominance
    56  // frontier.  The outer slice is conceptually a map keyed by
    57  // Block.Index.  The inner slice is conceptually a set, possibly
    58  // containing duplicates.
    59  //
    60  // TODO(adonovan): opt: measure impact of dups; consider a packed bit
    61  // representation, e.g. big.Int, and bitwise parallel operations for
    62  // the union step in the Children loop.
    63  //
    64  // domFrontier's methods mutate the slice's elements but not its
    65  // length, so their receivers needn't be pointers.
    66  type domFrontier BlockMap[[]*BasicBlock]
    67  
    68  func (df domFrontier) add(u, v *BasicBlock) {
    69  	df[u.Index] = append(df[u.Index], v)
    70  }
    71  
    72  // build builds the dominance frontier df for the dominator tree of
    73  // fn, using the algorithm found in A Simple, Fast Dominance
    74  // Algorithm, Figure 5.
    75  //
    76  // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
    77  // by pruning the entire IDF computation, rather than merely pruning
    78  // the DF -> IDF step.
    79  func (df domFrontier) build(fn *Function) {
    80  	for _, b := range fn.Blocks {
    81  		preds := b.Preds[0:len(b.Preds):len(b.Preds)]
    82  		if b == fn.Exit {
    83  			for i, v := range fn.fakeExits.values {
    84  				if v {
    85  					preds = append(preds, fn.Blocks[i])
    86  				}
    87  			}
    88  		}
    89  		if len(preds) >= 2 {
    90  			for _, p := range preds {
    91  				runner := p
    92  				for runner != b.dom.idom {
    93  					df.add(runner, b)
    94  					runner = runner.dom.idom
    95  				}
    96  			}
    97  		}
    98  	}
    99  }
   100  
   101  func buildDomFrontier(fn *Function) domFrontier {
   102  	df := make(domFrontier, len(fn.Blocks))
   103  	df.build(fn)
   104  	return df
   105  }
   106  
   107  type postDomFrontier BlockMap[[]*BasicBlock]
   108  
   109  func (rdf postDomFrontier) add(u, v *BasicBlock) {
   110  	rdf[u.Index] = append(rdf[u.Index], v)
   111  }
   112  
   113  func (rdf postDomFrontier) build(fn *Function) {
   114  	for _, b := range fn.Blocks {
   115  		succs := b.Succs[0:len(b.Succs):len(b.Succs)]
   116  		if fn.fakeExits.Has(b) {
   117  			succs = append(succs, fn.Exit)
   118  		}
   119  		if len(succs) >= 2 {
   120  			for _, s := range succs {
   121  				runner := s
   122  				for runner != b.pdom.idom {
   123  					rdf.add(runner, b)
   124  					runner = runner.pdom.idom
   125  				}
   126  			}
   127  		}
   128  	}
   129  }
   130  
   131  func buildPostDomFrontier(fn *Function) postDomFrontier {
   132  	rdf := make(postDomFrontier, len(fn.Blocks))
   133  	rdf.build(fn)
   134  	return rdf
   135  }
   136  
   137  func removeInstr(refs []Instruction, instr Instruction) []Instruction {
   138  	i := 0
   139  	for _, ref := range refs {
   140  		if ref == instr {
   141  			continue
   142  		}
   143  		refs[i] = ref
   144  		i++
   145  	}
   146  	for j := i; j != len(refs); j++ {
   147  		refs[j] = nil // aid GC
   148  	}
   149  	return refs[:i]
   150  }
   151  
   152  func clearInstrs(instrs []Instruction) {
   153  	for i := range instrs {
   154  		instrs[i] = nil
   155  	}
   156  }
   157  
   158  func numberNodesPerBlock(f *Function) {
   159  	for _, b := range f.Blocks {
   160  		var base ID
   161  		for _, instr := range b.Instrs {
   162  			if instr == nil {
   163  				continue
   164  			}
   165  			instr.setID(base)
   166  			base++
   167  		}
   168  	}
   169  }
   170  
   171  // lift replaces local and new Allocs accessed only with
   172  // load/store by IR registers, inserting φ- and σ-nodes where necessary.
   173  // The result is a program in pruned SSI form.
   174  //
   175  // Preconditions:
   176  // - fn has no dead blocks (blockopt has run).
   177  // - Def/use info (Operands and Referrers) is up-to-date.
   178  // - The dominator tree is up-to-date.
   179  func lift(fn *Function) bool {
   180  	// TODO(adonovan): opt: lots of little optimizations may be
   181  	// worthwhile here, especially if they cause us to avoid
   182  	// buildDomFrontier.  For example:
   183  	//
   184  	// - Alloc never loaded?  Eliminate.
   185  	// - Alloc never stored?  Replace all loads with a zero constant.
   186  	// - Alloc stored once?  Replace loads with dominating store;
   187  	//   don't forget that an Alloc is itself an effective store
   188  	//   of zero.
   189  	// - Alloc used only within a single block?
   190  	//   Use degenerate algorithm avoiding φ-nodes.
   191  	// - Consider synergy with scalar replacement of aggregates (SRA).
   192  	//   e.g. *(&x.f) where x is an Alloc.
   193  	//   Perhaps we'd get better results if we generated this as x.f
   194  	//   i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
   195  	//   Unclear.
   196  	//
   197  	// But we will start with the simplest correct code.
   198  	var df domFrontier
   199  	var rdf postDomFrontier
   200  	var closure *closure
   201  	var newPhis BlockMap[[]newPhi]
   202  	var newSigmas BlockMap[[]newSigma]
   203  
   204  	// During this pass we will replace some BasicBlock.Instrs
   205  	// (allocs, loads and stores) with nil, keeping a count in
   206  	// BasicBlock.gaps.  At the end we will reset Instrs to the
   207  	// concatenation of all non-dead newPhis and non-nil Instrs
   208  	// for the block, reusing the original array if space permits.
   209  
   210  	// While we're here, we also eliminate 'rundefers'
   211  	// instructions in functions that contain no 'defer'
   212  	// instructions.
   213  	usesDefer := false
   214  
   215  	// Determine which allocs we can lift and number them densely.
   216  	// The renaming phase uses this numbering for compact maps.
   217  	numAllocs := 0
   218  
   219  	instructions := make(BlockMap[liftInstructions], len(fn.Blocks))
   220  	for i := range instructions {
   221  		instructions[i].insertInstructions = map[Instruction][]Instruction{}
   222  	}
   223  
   224  	// Number nodes, for liftable
   225  	numberNodesPerBlock(fn)
   226  
   227  	for _, b := range fn.Blocks {
   228  		b.gaps = 0
   229  		b.rundefers = 0
   230  
   231  		for _, instr := range b.Instrs {
   232  			switch instr := instr.(type) {
   233  			case *Alloc:
   234  				if !liftable(instr, instructions) {
   235  					instr.index = -1
   236  					continue
   237  				}
   238  
   239  				if numAllocs == 0 {
   240  					df = buildDomFrontier(fn)
   241  					rdf = buildPostDomFrontier(fn)
   242  					if len(fn.Blocks) > 2 {
   243  						closure = transitiveClosure(fn)
   244  					}
   245  					newPhis = make(BlockMap[[]newPhi], len(fn.Blocks))
   246  					newSigmas = make(BlockMap[[]newSigma], len(fn.Blocks))
   247  
   248  					if debugLifting {
   249  						title := false
   250  						for i, blocks := range df {
   251  							if blocks != nil {
   252  								if !title {
   253  									fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
   254  									title = true
   255  								}
   256  								fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
   257  							}
   258  						}
   259  					}
   260  				}
   261  				instr.index = numAllocs
   262  				numAllocs++
   263  			case *Defer:
   264  				usesDefer = true
   265  			case *RunDefers:
   266  				b.rundefers++
   267  			}
   268  		}
   269  	}
   270  
   271  	if numAllocs > 0 {
   272  		for _, b := range fn.Blocks {
   273  			work := instructions[b.Index]
   274  			for _, rename := range work.renameAllocs {
   275  				for _, instr_ := range b.Instrs[rename.startingAt:] {
   276  					replace(instr_, rename.from, rename.to)
   277  				}
   278  			}
   279  		}
   280  
   281  		for _, b := range fn.Blocks {
   282  			work := instructions[b.Index]
   283  			if len(work.insertInstructions) != 0 {
   284  				newInstrs := make([]Instruction, 0, len(fn.Blocks)+len(work.insertInstructions)*3)
   285  				for _, instr := range b.Instrs {
   286  					if add, ok := work.insertInstructions[instr]; ok {
   287  						newInstrs = append(newInstrs, add...)
   288  					}
   289  					newInstrs = append(newInstrs, instr)
   290  				}
   291  				b.Instrs = newInstrs
   292  			}
   293  		}
   294  
   295  		// TODO(dh): remove inserted allocs that end up unused after lifting.
   296  
   297  		for _, b := range fn.Blocks {
   298  			for _, instr := range b.Instrs {
   299  				if instr, ok := instr.(*Alloc); ok && instr.index >= 0 {
   300  					liftAlloc(closure, df, rdf, instr, newPhis, newSigmas)
   301  				}
   302  			}
   303  		}
   304  
   305  		// renaming maps an alloc (keyed by index) to its replacement
   306  		// value.  Initially the renaming contains nil, signifying the
   307  		// zero constant of the appropriate type; we construct the
   308  		// Const lazily at most once on each path through the domtree.
   309  		// TODO(adonovan): opt: cache per-function not per subtree.
   310  		renaming := make([]Value, numAllocs)
   311  
   312  		// Renaming.
   313  		rename(fn.Blocks[0], renaming, newPhis, newSigmas)
   314  
   315  		simplifyPhisAndSigmas(newPhis, newSigmas)
   316  
   317  		// Eliminate dead φ- and σ-nodes.
   318  		markLiveNodes(fn.Blocks, newPhis, newSigmas)
   319  	}
   320  
   321  	// Prepend remaining live φ-nodes to each block and possibly kill rundefers.
   322  	for _, b := range fn.Blocks {
   323  		var head []Instruction
   324  		if numAllocs > 0 {
   325  			nps := newPhis[b.Index]
   326  			head = make([]Instruction, 0, len(nps))
   327  			for _, pred := range b.Preds {
   328  				nss := newSigmas[pred.Index]
   329  				idx := pred.succIndex(b)
   330  				for _, newSigma := range nss {
   331  					if sigma := newSigma.sigmas[idx]; sigma != nil && sigma.live {
   332  						head = append(head, sigma)
   333  
   334  						// we didn't populate referrers before, as most
   335  						// sigma nodes will be killed
   336  						if refs := sigma.X.Referrers(); refs != nil {
   337  							*refs = append(*refs, sigma)
   338  						}
   339  					} else if sigma != nil {
   340  						sigma.block = nil
   341  					}
   342  				}
   343  			}
   344  			for _, np := range nps {
   345  				if np.phi.live {
   346  					head = append(head, np.phi)
   347  				} else {
   348  					for _, edge := range np.phi.Edges {
   349  						if refs := edge.Referrers(); refs != nil {
   350  							*refs = removeInstr(*refs, np.phi)
   351  						}
   352  					}
   353  					np.phi.block = nil
   354  				}
   355  			}
   356  		}
   357  
   358  		rundefersToKill := b.rundefers
   359  		if usesDefer {
   360  			rundefersToKill = 0
   361  		}
   362  
   363  		j := len(head)
   364  		if j+b.gaps+rundefersToKill == 0 {
   365  			continue // fast path: no new phis or gaps
   366  		}
   367  
   368  		// We could do straight copies instead of element-wise copies
   369  		// when both b.gaps and rundefersToKill are zero. However,
   370  		// that seems to only be the case ~1% of the time, which
   371  		// doesn't seem worth the extra branch.
   372  
   373  		// Remove dead instructions, add phis and sigmas
   374  		ns := len(b.Instrs) + j - b.gaps - rundefersToKill
   375  		if ns <= cap(b.Instrs) {
   376  			// b.Instrs has enough capacity to store all instructions
   377  
   378  			// OPT(dh): check cap vs the actually required space; if
   379  			// there is a big enough difference, it may be worth
   380  			// allocating a new slice, to avoid pinning memory.
   381  			dst := b.Instrs[:cap(b.Instrs)]
   382  			i := len(dst) - 1
   383  			for n := len(b.Instrs) - 1; n >= 0; n-- {
   384  				instr := dst[n]
   385  				if instr == nil {
   386  					continue
   387  				}
   388  				if !usesDefer {
   389  					if _, ok := instr.(*RunDefers); ok {
   390  						continue
   391  					}
   392  				}
   393  				dst[i] = instr
   394  				i--
   395  			}
   396  			off := i + 1 - len(head)
   397  			// aid GC
   398  			clearInstrs(dst[:off])
   399  			dst = dst[off:]
   400  			copy(dst, head)
   401  			b.Instrs = dst
   402  		} else {
   403  			// not enough space, so allocate a new slice and copy
   404  			// over.
   405  			dst := make([]Instruction, ns)
   406  			copy(dst, head)
   407  
   408  			for _, instr := range b.Instrs {
   409  				if instr == nil {
   410  					continue
   411  				}
   412  				if !usesDefer {
   413  					if _, ok := instr.(*RunDefers); ok {
   414  						continue
   415  					}
   416  				}
   417  				dst[j] = instr
   418  				j++
   419  			}
   420  			b.Instrs = dst
   421  		}
   422  	}
   423  
   424  	// Remove any fn.Locals that were lifted.
   425  	j := 0
   426  	for _, l := range fn.Locals {
   427  		if l.index < 0 {
   428  			fn.Locals[j] = l
   429  			j++
   430  		}
   431  	}
   432  	// Nil out fn.Locals[j:] to aid GC.
   433  	for i := j; i < len(fn.Locals); i++ {
   434  		fn.Locals[i] = nil
   435  	}
   436  	fn.Locals = fn.Locals[:j]
   437  
   438  	return numAllocs > 0
   439  }
   440  
   441  func hasDirectReferrer(instr Instruction) bool {
   442  	for _, instr := range *instr.Referrers() {
   443  		switch instr.(type) {
   444  		case *Phi, *Sigma:
   445  			// ignore
   446  		default:
   447  			return true
   448  		}
   449  	}
   450  	return false
   451  }
   452  
   453  func markLiveNodes(blocks []*BasicBlock, newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) {
   454  	// Phis and sigmas may become dead due to optimization passes. We may also insert more nodes than strictly
   455  	// necessary, e.g. sigma nodes for constants, which will never be used.
   456  
   457  	// Phi and sigma nodes are considered live if a non-phi, non-sigma
   458  	// node uses them. Once we find a node that is live, we mark all
   459  	// of its operands as used, too.
   460  	for _, npList := range newPhis {
   461  		for _, np := range npList {
   462  			phi := np.phi
   463  			if !phi.live && hasDirectReferrer(phi) {
   464  				markLivePhi(phi)
   465  			}
   466  		}
   467  	}
   468  	for _, npList := range newSigmas {
   469  		for _, np := range npList {
   470  			for _, sigma := range np.sigmas {
   471  				if sigma != nil && !sigma.live && hasDirectReferrer(sigma) {
   472  					markLiveSigma(sigma)
   473  				}
   474  			}
   475  		}
   476  	}
   477  	// Existing φ-nodes due to && and || operators
   478  	// are all considered live (see Go issue 19622).
   479  	for _, b := range blocks {
   480  		for _, phi := range b.phis() {
   481  			markLivePhi(phi.(*Phi))
   482  		}
   483  	}
   484  }
   485  
   486  func markLivePhi(phi *Phi) {
   487  	phi.live = true
   488  	for _, rand := range phi.Edges {
   489  		switch rand := rand.(type) {
   490  		case *Phi:
   491  			if !rand.live {
   492  				markLivePhi(rand)
   493  			}
   494  		case *Sigma:
   495  			if !rand.live {
   496  				markLiveSigma(rand)
   497  			}
   498  		}
   499  	}
   500  }
   501  
   502  func markLiveSigma(sigma *Sigma) {
   503  	sigma.live = true
   504  	switch rand := sigma.X.(type) {
   505  	case *Phi:
   506  		if !rand.live {
   507  			markLivePhi(rand)
   508  		}
   509  	case *Sigma:
   510  		if !rand.live {
   511  			markLiveSigma(rand)
   512  		}
   513  	}
   514  }
   515  
   516  // simplifyPhisAndSigmas removes duplicate phi and sigma nodes,
   517  // and replaces trivial phis with non-phi alternatives. Phi
   518  // nodes where all edges are identical, or consist of only the phi
   519  // itself and one other value, may be replaced with the value.
   520  func simplifyPhisAndSigmas(newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) {
   521  	// temporary numbering of values used in phis so that we can build map keys
   522  	var id ID
   523  	for _, npList := range newPhis {
   524  		for _, np := range npList {
   525  			for _, edge := range np.phi.Edges {
   526  				edge.setID(id)
   527  				id++
   528  			}
   529  		}
   530  	}
   531  	// find all phis that are trivial and can be replaced with a
   532  	// non-phi value. run until we reach a fixpoint, because replacing
   533  	// a phi may make other phis trivial.
   534  	for changed := true; changed; {
   535  		changed = false
   536  		for _, npList := range newPhis {
   537  			for _, np := range npList {
   538  				if np.phi.live {
   539  					// we're reusing 'live' to mean 'dead' in the context of simplifyPhisAndSigmas
   540  					continue
   541  				}
   542  				if r, ok := isUselessPhi(np.phi); ok {
   543  					// useless phi, replace its uses with the
   544  					// replacement value. the dead phi pass will clean
   545  					// up the phi afterwards.
   546  					replaceAll(np.phi, r)
   547  					np.phi.live = true
   548  					changed = true
   549  				}
   550  			}
   551  		}
   552  
   553  		// Replace duplicate sigma nodes with a single node. These nodes exist when multiple allocs get replaced with the
   554  		// same dominating store.
   555  		for _, sigmaList := range newSigmas {
   556  			primarySigmas := map[struct {
   557  				succ int
   558  				v    Value
   559  			}]*Sigma{}
   560  			for _, sigmas := range sigmaList {
   561  				for succ, sigma := range sigmas.sigmas {
   562  					if sigma == nil {
   563  						continue
   564  					}
   565  					if sigma.live {
   566  						// we're reusing 'live' to mean 'dead' in the context of simplifyPhisAndSigmas
   567  						continue
   568  					}
   569  					key := struct {
   570  						succ int
   571  						v    Value
   572  					}{succ, sigma.X}
   573  					if alt, ok := primarySigmas[key]; ok {
   574  						replaceAll(sigma, alt)
   575  						sigma.live = true
   576  						changed = true
   577  					} else {
   578  						primarySigmas[key] = sigma
   579  					}
   580  				}
   581  			}
   582  		}
   583  
   584  		// Replace duplicate phi nodes with a single node. As far as we know, these duplicate nodes only ever exist
   585  		// because of the previous sigma deduplication.
   586  		keyb := make([]byte, 0, 4*8)
   587  		for _, npList := range newPhis {
   588  			primaryPhis := map[string]*Phi{}
   589  			for _, np := range npList {
   590  				if np.phi.live {
   591  					continue
   592  				}
   593  				if n := len(np.phi.Edges) * 8; cap(keyb) >= n {
   594  					keyb = keyb[:n]
   595  				} else {
   596  					keyb = make([]byte, n, n*2)
   597  				}
   598  				for i, e := range np.phi.Edges {
   599  					binary.LittleEndian.PutUint64(keyb[i*8:i*8+8], uint64(e.ID()))
   600  				}
   601  				if alt, ok := primaryPhis[string(keyb)]; ok {
   602  					replaceAll(np.phi, alt)
   603  					np.phi.live = true
   604  					changed = true
   605  				} else {
   606  					primaryPhis[string(keyb)] = np.phi
   607  				}
   608  			}
   609  		}
   610  
   611  	}
   612  
   613  	for _, npList := range newPhis {
   614  		for _, np := range npList {
   615  			np.phi.live = false
   616  			for _, edge := range np.phi.Edges {
   617  				edge.setID(0)
   618  			}
   619  		}
   620  	}
   621  
   622  	for _, sigmaList := range newSigmas {
   623  		for _, sigmas := range sigmaList {
   624  			for _, sigma := range sigmas.sigmas {
   625  				if sigma != nil {
   626  					sigma.live = false
   627  				}
   628  			}
   629  		}
   630  	}
   631  }
   632  
   633  type BlockSet struct {
   634  	idx    int
   635  	values []bool
   636  	count  int
   637  }
   638  
   639  func NewBlockSet(size int) *BlockSet {
   640  	return &BlockSet{values: make([]bool, size)}
   641  }
   642  
   643  func (s *BlockSet) Set(s2 *BlockSet) {
   644  	copy(s.values, s2.values)
   645  	s.count = 0
   646  	for _, v := range s.values {
   647  		if v {
   648  			s.count++
   649  		}
   650  	}
   651  }
   652  
   653  func (s *BlockSet) Num() int {
   654  	return s.count
   655  }
   656  
   657  func (s *BlockSet) Has(b *BasicBlock) bool {
   658  	if b.Index >= len(s.values) {
   659  		return false
   660  	}
   661  	return s.values[b.Index]
   662  }
   663  
   664  // add adds b to the set and returns true if the set changed.
   665  func (s *BlockSet) Add(b *BasicBlock) bool {
   666  	if s.values[b.Index] {
   667  		return false
   668  	}
   669  	s.count++
   670  	s.values[b.Index] = true
   671  	s.idx = b.Index
   672  
   673  	return true
   674  }
   675  
   676  func (s *BlockSet) Clear() {
   677  	for j := range s.values {
   678  		s.values[j] = false
   679  	}
   680  	s.count = 0
   681  }
   682  
   683  // take removes an arbitrary element from a set s and
   684  // returns its index, or returns -1 if empty.
   685  func (s *BlockSet) Take() int {
   686  	// [i, end]
   687  	for i := s.idx; i < len(s.values); i++ {
   688  		if s.values[i] {
   689  			s.values[i] = false
   690  			s.idx = i
   691  			s.count--
   692  			return i
   693  		}
   694  	}
   695  
   696  	// [start, i)
   697  	for i := 0; i < s.idx; i++ {
   698  		if s.values[i] {
   699  			s.values[i] = false
   700  			s.idx = i
   701  			s.count--
   702  			return i
   703  		}
   704  	}
   705  
   706  	return -1
   707  }
   708  
   709  type closure struct {
   710  	span       []uint32
   711  	reachables BlockMap[interval]
   712  }
   713  
   714  type interval uint32
   715  
   716  const (
   717  	flagMask   = 1 << 31
   718  	numBits    = 20
   719  	lengthBits = 32 - numBits - 1
   720  	lengthMask = (1<<lengthBits - 1) << numBits
   721  	numMask    = 1<<numBits - 1
   722  )
   723  
   724  func (c closure) has(s, v *BasicBlock) bool {
   725  	idx := uint32(v.Index)
   726  	if idx == 1 || s.Dominates(v) {
   727  		return true
   728  	}
   729  	r := c.reachable(s.Index)
   730  	for i := 0; i < len(r); i++ {
   731  		inv := r[i]
   732  		var start, end uint32
   733  		if inv&flagMask == 0 {
   734  			// small interval
   735  			start = uint32(inv & numMask)
   736  			end = start + uint32(inv&lengthMask)>>numBits
   737  		} else {
   738  			// large interval
   739  			i++
   740  			start = uint32(inv & numMask)
   741  			end = uint32(r[i])
   742  		}
   743  		if idx >= start && idx <= end {
   744  			return true
   745  		}
   746  	}
   747  	return false
   748  }
   749  
   750  func (c closure) reachable(id int) []interval {
   751  	return c.reachables[c.span[id]:c.span[id+1]]
   752  }
   753  
   754  func (c closure) walk(current *BasicBlock, b *BasicBlock, visited []bool) {
   755  	// TODO(dh): the 'current' argument seems to be unused
   756  	// TODO(dh): there's no reason for this to be a method
   757  	visited[b.Index] = true
   758  	for _, succ := range b.Succs {
   759  		if visited[succ.Index] {
   760  			continue
   761  		}
   762  		visited[succ.Index] = true
   763  		c.walk(current, succ, visited)
   764  	}
   765  }
   766  
   767  func transitiveClosure(fn *Function) *closure {
   768  	reachable := make(BlockMap[bool], len(fn.Blocks))
   769  	c := &closure{}
   770  	c.span = make([]uint32, len(fn.Blocks)+1)
   771  
   772  	addInterval := func(start, end uint32) {
   773  		if l := end - start; l <= 1<<lengthBits-1 {
   774  			n := interval(l<<numBits | start)
   775  			c.reachables = append(c.reachables, n)
   776  		} else {
   777  			n1 := interval(1<<31 | start)
   778  			n2 := interval(end)
   779  			c.reachables = append(c.reachables, n1, n2)
   780  		}
   781  	}
   782  
   783  	for i, b := range fn.Blocks[1:] {
   784  		for i := range reachable {
   785  			reachable[i] = false
   786  		}
   787  
   788  		c.walk(b, b, reachable)
   789  		start := ^uint32(0)
   790  		for id, isReachable := range reachable {
   791  			if !isReachable {
   792  				if start != ^uint32(0) {
   793  					end := uint32(id) - 1
   794  					addInterval(start, end)
   795  					start = ^uint32(0)
   796  				}
   797  				continue
   798  			} else if start == ^uint32(0) {
   799  				start = uint32(id)
   800  			}
   801  		}
   802  		if start != ^uint32(0) {
   803  			addInterval(start, uint32(len(reachable))-1)
   804  		}
   805  
   806  		c.span[i+2] = uint32(len(c.reachables))
   807  	}
   808  
   809  	return c
   810  }
   811  
   812  // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
   813  // it replaces.
   814  type newPhi struct {
   815  	phi   *Phi
   816  	alloc *Alloc
   817  }
   818  
   819  type newSigma struct {
   820  	alloc  *Alloc
   821  	sigmas []*Sigma
   822  }
   823  
   824  type liftInstructions struct {
   825  	insertInstructions map[Instruction][]Instruction
   826  	renameAllocs       []struct {
   827  		from       *Alloc
   828  		to         *Alloc
   829  		startingAt int
   830  	}
   831  }
   832  
   833  // liftable determines if alloc can be lifted, and records instructions to split partially liftable allocs.
   834  //
   835  // In the trivial case, all uses of the alloc can be lifted. This is the case when it is only used for storing into and
   836  // loading from. In that case, no instructions are recorded.
   837  //
   838  // In the more complex case, the alloc is used for storing into and loading from, but it is also used as a value, for
   839  // example because it gets passed to a function, e.g. fn(&x). In this case, uses of the alloc fall into one of two
   840  // categories: those that can be lifted and those that can't. A boundary forms between these two categories in the
   841  // function's control flow: Once an unliftable use is encountered, the alloc is no longer liftable for the remainder of
   842  // the basic block the use is in, nor in any blocks reachable from it.
   843  //
   844  // We record instructions that split the alloc into two allocs: one that is used in liftable uses, and one that is used
   845  // in unliftable uses. Whenever we encounter a boundary between liftable and unliftable uses or blocks, we emit a pair
   846  // of Load and Store that copy the value from the liftable alloc into the unliftable alloc. Taking these instructions
   847  // into account, the normal lifting machinery will completely lift the liftable alloc, store the correct lifted values
   848  // into the unliftable alloc, and will not at all lift the unliftable alloc.
   849  //
   850  // In Go syntax, the transformation looks somewhat like this:
   851  //
   852  //	func foo() {
   853  //		x := 32
   854  //		if cond {
   855  //			println(x)
   856  //			escape(&x)
   857  //			println(x)
   858  //		} else {
   859  //			println(x)
   860  //		}
   861  //		println(x)
   862  //	}
   863  //
   864  // transforms into
   865  //
   866  //	func fooSplitAlloc() {
   867  //		x := 32
   868  //		var x_ int
   869  //		if cond {
   870  //			println(x)
   871  //			x_ = x
   872  //			escape(&x_)
   873  //			println(x_)
   874  //		} else {
   875  //			println(x)
   876  //			x_ = x
   877  //		}
   878  //		println(x_)
   879  //	}
   880  func liftable(alloc *Alloc, instructions BlockMap[liftInstructions]) bool {
   881  	fn := alloc.block.parent
   882  
   883  	// Don't lift named return values in functions that defer
   884  	// calls that may recover from panic.
   885  	if fn.hasDefer {
   886  		for _, nr := range fn.namedResults {
   887  			if nr == alloc {
   888  				return false
   889  			}
   890  		}
   891  	}
   892  
   893  	type blockDesc struct {
   894  		// is the block (partially) unliftable, because it contains unliftable instructions or is reachable by an unliftable block
   895  		isUnliftable     bool
   896  		hasLiftableLoad  bool
   897  		hasLiftableOther bool
   898  		// we need to emit stores in predecessors because the unliftable use is in a phi
   899  		storeInPreds bool
   900  
   901  		lastLiftable    int
   902  		firstUnliftable int
   903  	}
   904  	blocks := make(BlockMap[blockDesc], len(fn.Blocks))
   905  	for _, b := range fn.Blocks {
   906  		blocks[b.Index].lastLiftable = -1
   907  		blocks[b.Index].firstUnliftable = len(b.Instrs) + 1
   908  	}
   909  
   910  	// Look at all uses of the alloc and deduce which blocks have liftable or unliftable instructions.
   911  	for _, instr := range alloc.referrers {
   912  		// Find the first unliftable use
   913  
   914  		desc := &blocks[instr.Block().Index]
   915  		hasUnliftable := false
   916  		inHead := false
   917  		switch instr := instr.(type) {
   918  		case *Store:
   919  			if instr.Val == alloc {
   920  				hasUnliftable = true
   921  			}
   922  		case *Load:
   923  		case *DebugRef:
   924  		case *Phi, *Sigma:
   925  			inHead = true
   926  			hasUnliftable = true
   927  		default:
   928  			hasUnliftable = true
   929  		}
   930  
   931  		if hasUnliftable {
   932  			desc.isUnliftable = true
   933  			if int(instr.ID()) < desc.firstUnliftable {
   934  				desc.firstUnliftable = int(instr.ID())
   935  			}
   936  			if inHead {
   937  				desc.storeInPreds = true
   938  				desc.firstUnliftable = 0
   939  			}
   940  		}
   941  	}
   942  
   943  	for _, instr := range alloc.referrers {
   944  		// Find the last liftable use, taking the previously calculated firstUnliftable into consideration
   945  
   946  		desc := &blocks[instr.Block().Index]
   947  		if int(instr.ID()) >= desc.firstUnliftable {
   948  			continue
   949  		}
   950  		hasLiftable := false
   951  		switch instr := instr.(type) {
   952  		case *Store:
   953  			if instr.Val != alloc {
   954  				desc.hasLiftableOther = true
   955  				hasLiftable = true
   956  			}
   957  		case *Load:
   958  			desc.hasLiftableLoad = true
   959  			hasLiftable = true
   960  		case *DebugRef:
   961  			desc.hasLiftableOther = true
   962  		}
   963  		if hasLiftable {
   964  			if int(instr.ID()) > desc.lastLiftable {
   965  				desc.lastLiftable = int(instr.ID())
   966  			}
   967  		}
   968  	}
   969  
   970  	for i := range blocks {
   971  		// Update firstUnliftable to be one after lastLiftable. We do this to include the unliftable's preceding
   972  		// DebugRefs in the renaming.
   973  		if blocks[i].lastLiftable == -1 && !blocks[i].storeInPreds {
   974  			// There are no liftable instructions (for this alloc) in this block. Set firstUnliftable to the
   975  			// first non-head instruction to avoid inserting the store before phi instructions, which would
   976  			// fail validation.
   977  			first := -1
   978  		instrLoop:
   979  			for i, instr := range fn.Blocks[i].Instrs {
   980  				switch instr.(type) {
   981  				case *Phi, *Sigma:
   982  				default:
   983  					first = i
   984  					break instrLoop
   985  				}
   986  			}
   987  			blocks[i].firstUnliftable = first
   988  		} else {
   989  			blocks[i].firstUnliftable = blocks[i].lastLiftable + 1
   990  		}
   991  	}
   992  
   993  	// If a block is reachable by a (partially) unliftable block, then the entirety of the block is unliftable. In that
   994  	// case, stores have to be inserted in the predecessors.
   995  	//
   996  	// TODO(dh): this isn't always necessary. If the block is reachable by itself, i.e. part of a loop, then if the
   997  	// Alloc instruction is itself part of that loop, then there is a subset of instructions in the loop that can be
   998  	// lifted. For example:
   999  	//
  1000  	// 	for {
  1001  	// 		x := 42
  1002  	// 		println(x)
  1003  	// 		escape(&x)
  1004  	// 	}
  1005  	//
  1006  	// The x that escapes in one iteration of the loop isn't the same x that we read from on the next iteration.
  1007  	seen := make(BlockMap[bool], len(fn.Blocks))
  1008  	var dfs func(b *BasicBlock)
  1009  	dfs = func(b *BasicBlock) {
  1010  		if seen[b.Index] {
  1011  			return
  1012  		}
  1013  		seen[b.Index] = true
  1014  		desc := &blocks[b.Index]
  1015  		desc.hasLiftableLoad = false
  1016  		desc.hasLiftableOther = false
  1017  		desc.isUnliftable = true
  1018  		desc.firstUnliftable = 0
  1019  		desc.storeInPreds = true
  1020  		for _, succ := range b.Succs {
  1021  			dfs(succ)
  1022  		}
  1023  	}
  1024  	for _, b := range fn.Blocks {
  1025  		if blocks[b.Index].isUnliftable {
  1026  			for _, succ := range b.Succs {
  1027  				dfs(succ)
  1028  			}
  1029  		}
  1030  	}
  1031  
  1032  	hasLiftableLoad := false
  1033  	hasLiftableOther := false
  1034  	hasUnliftable := false
  1035  	for _, b := range fn.Blocks {
  1036  		desc := blocks[b.Index]
  1037  		hasLiftableLoad = hasLiftableLoad || desc.hasLiftableLoad
  1038  		hasLiftableOther = hasLiftableOther || desc.hasLiftableOther
  1039  		if desc.isUnliftable {
  1040  			hasUnliftable = true
  1041  		}
  1042  	}
  1043  	if !hasLiftableLoad && !hasLiftableOther {
  1044  		// There are no liftable uses
  1045  		return false
  1046  	} else if !hasUnliftable {
  1047  		// The alloc is entirely liftable without splitting
  1048  		return true
  1049  	} else if !hasLiftableLoad {
  1050  		// The alloc is not entirely liftable, and the only liftable uses are stores. While some of those stores could
  1051  		// get lifted away, it would also lead to an infinite loop when lifting to a fixpoint, because the newly created
  1052  		// allocs also get stored into repeatable and that's their only liftable uses.
  1053  		return false
  1054  	}
  1055  
  1056  	// We need to insert stores for the new alloc. If a (partially) unliftable block has no unliftable
  1057  	// predecessors and the use isn't in a phi node, then the store can be inserted right before the unliftable use.
  1058  	// Otherwise, stores have to be inserted at the end of all liftable predecessors.
  1059  
  1060  	newAlloc := &Alloc{Heap: true}
  1061  	newAlloc.setBlock(alloc.block)
  1062  	newAlloc.setType(alloc.typ)
  1063  	newAlloc.setSource(alloc.source)
  1064  	newAlloc.index = -1
  1065  	newAlloc.comment = "split alloc"
  1066  
  1067  	{
  1068  		work := instructions[alloc.block.Index]
  1069  		work.insertInstructions[alloc] = append(work.insertInstructions[alloc], newAlloc)
  1070  	}
  1071  
  1072  	predHasStore := make(BlockMap[bool], len(fn.Blocks))
  1073  	for _, b := range fn.Blocks {
  1074  		desc := &blocks[b.Index]
  1075  		bWork := &instructions[b.Index]
  1076  
  1077  		if desc.isUnliftable {
  1078  			bWork.renameAllocs = append(bWork.renameAllocs, struct {
  1079  				from       *Alloc
  1080  				to         *Alloc
  1081  				startingAt int
  1082  			}{
  1083  				alloc, newAlloc, int(desc.firstUnliftable),
  1084  			})
  1085  		}
  1086  
  1087  		if !desc.isUnliftable {
  1088  			continue
  1089  		}
  1090  
  1091  		propagate := func(in *BasicBlock, before Instruction) {
  1092  			load := &Load{
  1093  				X: alloc,
  1094  			}
  1095  			store := &Store{
  1096  				Addr: newAlloc,
  1097  				Val:  load,
  1098  			}
  1099  			load.setType(deref(alloc.typ))
  1100  			load.setBlock(in)
  1101  			load.comment = "split alloc"
  1102  			store.setBlock(in)
  1103  			updateOperandReferrers(load)
  1104  			updateOperandReferrers(store)
  1105  			store.comment = "split alloc"
  1106  
  1107  			entry := &instructions[in.Index]
  1108  			entry.insertInstructions[before] = append(entry.insertInstructions[before], load, store)
  1109  		}
  1110  
  1111  		if desc.storeInPreds {
  1112  			// emit stores at the end of liftable preds
  1113  			for _, pred := range b.Preds {
  1114  				if blocks[pred.Index].isUnliftable {
  1115  					continue
  1116  				}
  1117  
  1118  				if !alloc.block.Dominates(pred) {
  1119  					// Consider this cfg:
  1120  					//
  1121  					//      1
  1122  					//     /|
  1123  					//    / |
  1124  					//   ↙  ↓
  1125  					//  2--→3
  1126  					//
  1127  					// with an Alloc in block 2. It doesn't make sense to insert a store in block 1 for the jump to
  1128  					// block 3, because 1 can never see the Alloc in the first place.
  1129  					//
  1130  					// Ignoring phi nodes, an Alloc always dominates all of its uses, and phi nodes don't matter here,
  1131  					// because for the incoming edges that do matter, we do emit the stores.
  1132  
  1133  					continue
  1134  				}
  1135  
  1136  				if predHasStore[pred.Index] {
  1137  					// Don't generate redundant propagations. Not only is it unnecessary, it can lead to infinite loops
  1138  					// when trying to lift to a fix point, because redundant stores are liftable.
  1139  					continue
  1140  				}
  1141  
  1142  				predHasStore[pred.Index] = true
  1143  
  1144  				before := pred.Instrs[len(pred.Instrs)-1]
  1145  				propagate(pred, before)
  1146  			}
  1147  		} else {
  1148  			// emit store before the first unliftable use
  1149  			before := b.Instrs[desc.firstUnliftable]
  1150  			propagate(b, before)
  1151  		}
  1152  	}
  1153  
  1154  	return true
  1155  }
  1156  
  1157  // liftAlloc lifts alloc into registers and populates newPhis and newSigmas with all the φ- and σ-nodes it may require.
  1158  func liftAlloc(closure *closure, df domFrontier, rdf postDomFrontier, alloc *Alloc, newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) {
  1159  	fn := alloc.Parent()
  1160  
  1161  	defblocks := fn.blockset(0)
  1162  	useblocks := fn.blockset(1)
  1163  	Aphi := fn.blockset(2)
  1164  	Asigma := fn.blockset(3)
  1165  	W := fn.blockset(4)
  1166  
  1167  	// Compute defblocks, the set of blocks containing a
  1168  	// definition of the alloc cell.
  1169  	for _, instr := range *alloc.Referrers() {
  1170  		switch instr := instr.(type) {
  1171  		case *Store:
  1172  			defblocks.Add(instr.Block())
  1173  		case *Load:
  1174  			useblocks.Add(instr.Block())
  1175  			for _, ref := range *instr.Referrers() {
  1176  				useblocks.Add(ref.Block())
  1177  			}
  1178  		}
  1179  	}
  1180  	// The Alloc itself counts as a (zero) definition of the cell.
  1181  	defblocks.Add(alloc.Block())
  1182  
  1183  	if debugLifting {
  1184  		fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
  1185  	}
  1186  
  1187  	// Φ-insertion.
  1188  	//
  1189  	// What follows is the body of the main loop of the insert-φ
  1190  	// function described by Cytron et al, but instead of using
  1191  	// counter tricks, we just reset the 'hasAlready' and 'work'
  1192  	// sets each iteration.  These are bitmaps so it's pretty cheap.
  1193  
  1194  	// Initialize W and work to defblocks.
  1195  
  1196  	for change := true; change; {
  1197  		change = false
  1198  		{
  1199  			// Traverse iterated dominance frontier, inserting φ-nodes.
  1200  			W.Set(defblocks)
  1201  
  1202  			for i := W.Take(); i != -1; i = W.Take() {
  1203  				n := fn.Blocks[i]
  1204  				for _, y := range df[n.Index] {
  1205  					if Aphi.Add(y) {
  1206  						if len(*alloc.Referrers()) == 0 {
  1207  							continue
  1208  						}
  1209  						live := false
  1210  						if closure == nil {
  1211  							live = true
  1212  						} else {
  1213  							for _, ref := range *alloc.Referrers() {
  1214  								if _, ok := ref.(*Load); ok {
  1215  									if closure.has(y, ref.Block()) {
  1216  										live = true
  1217  										break
  1218  									}
  1219  								}
  1220  							}
  1221  						}
  1222  						if !live {
  1223  							continue
  1224  						}
  1225  
  1226  						// Create φ-node.
  1227  						// It will be prepended to v.Instrs later, if needed.
  1228  						phi := &Phi{
  1229  							Edges: make([]Value, len(y.Preds)),
  1230  						}
  1231  
  1232  						phi.source = alloc.source
  1233  						phi.setType(deref(alloc.Type()))
  1234  						phi.block = y
  1235  						if debugLifting {
  1236  							fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, y)
  1237  						}
  1238  						newPhis[y.Index] = append(newPhis[y.Index], newPhi{phi, alloc})
  1239  
  1240  						for _, p := range y.Preds {
  1241  							useblocks.Add(p)
  1242  						}
  1243  						change = true
  1244  						if defblocks.Add(y) {
  1245  							W.Add(y)
  1246  						}
  1247  					}
  1248  				}
  1249  			}
  1250  		}
  1251  
  1252  		{
  1253  			W.Set(useblocks)
  1254  			for i := W.Take(); i != -1; i = W.Take() {
  1255  				n := fn.Blocks[i]
  1256  				for _, y := range rdf[n.Index] {
  1257  					if Asigma.Add(y) {
  1258  						sigmas := make([]*Sigma, 0, len(y.Succs))
  1259  						anyLive := false
  1260  						for _, succ := range y.Succs {
  1261  							live := false
  1262  							for _, ref := range *alloc.Referrers() {
  1263  								if closure == nil || closure.has(succ, ref.Block()) {
  1264  									live = true
  1265  									anyLive = true
  1266  									break
  1267  								}
  1268  							}
  1269  							if live {
  1270  								sigma := &Sigma{
  1271  									From: y,
  1272  									X:    alloc,
  1273  								}
  1274  								sigma.source = alloc.source
  1275  								sigma.setType(deref(alloc.Type()))
  1276  								sigma.block = succ
  1277  								sigmas = append(sigmas, sigma)
  1278  							} else {
  1279  								sigmas = append(sigmas, nil)
  1280  							}
  1281  						}
  1282  
  1283  						if anyLive {
  1284  							newSigmas[y.Index] = append(newSigmas[y.Index], newSigma{alloc, sigmas})
  1285  							for _, s := range y.Succs {
  1286  								defblocks.Add(s)
  1287  							}
  1288  							change = true
  1289  							if useblocks.Add(y) {
  1290  								W.Add(y)
  1291  							}
  1292  						}
  1293  					}
  1294  				}
  1295  			}
  1296  		}
  1297  	}
  1298  }
  1299  
  1300  // replaceAll replaces all intraprocedural uses of x with y,
  1301  // updating x.Referrers and y.Referrers.
  1302  // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
  1303  func replaceAll(x, y Value) {
  1304  	var rands []*Value
  1305  	pxrefs := x.Referrers()
  1306  	pyrefs := y.Referrers()
  1307  	for _, instr := range *pxrefs {
  1308  		switch instr := instr.(type) {
  1309  		case *CompositeValue:
  1310  			// Special case CompositeValue because it might have very large lists of operands
  1311  			//
  1312  			// OPT(dh): this loop is still expensive for large composite values
  1313  			for i, rand := range instr.Values {
  1314  				if rand == x {
  1315  					instr.Values[i] = y
  1316  				}
  1317  			}
  1318  		default:
  1319  			rands = instr.Operands(rands[:0]) // recycle storage
  1320  			for _, rand := range rands {
  1321  				if *rand != nil {
  1322  					if *rand == x {
  1323  						*rand = y
  1324  					}
  1325  				}
  1326  			}
  1327  		}
  1328  		if pyrefs != nil {
  1329  			*pyrefs = append(*pyrefs, instr) // dups ok
  1330  		}
  1331  	}
  1332  	*pxrefs = nil // x is now unreferenced
  1333  }
  1334  
  1335  func replace(instr Instruction, x, y Value) {
  1336  	args := instr.Operands(nil)
  1337  	matched := false
  1338  	for _, arg := range args {
  1339  		if *arg == x {
  1340  			*arg = y
  1341  			matched = true
  1342  		}
  1343  	}
  1344  	if matched {
  1345  		yrefs := y.Referrers()
  1346  		if yrefs != nil {
  1347  			*yrefs = append(*yrefs, instr)
  1348  		}
  1349  
  1350  		xrefs := x.Referrers()
  1351  		if xrefs != nil {
  1352  			*xrefs = removeInstr(*xrefs, instr)
  1353  		}
  1354  	}
  1355  }
  1356  
  1357  // renamed returns the value to which alloc is being renamed,
  1358  // constructing it lazily if it's the implicit zero initialization.
  1359  func renamed(fn *Function, renaming []Value, alloc *Alloc) Value {
  1360  	v := renaming[alloc.index]
  1361  	if v == nil {
  1362  		v = emitConst(fn, zeroConst(deref(alloc.Type()), alloc.source))
  1363  		renaming[alloc.index] = v
  1364  	}
  1365  	return v
  1366  }
  1367  
  1368  func copyValue(v Value, why Instruction, info CopyInfo) *Copy {
  1369  	c := &Copy{
  1370  		X:    v,
  1371  		Why:  why,
  1372  		Info: info,
  1373  	}
  1374  	if refs := v.Referrers(); refs != nil {
  1375  		*refs = append(*refs, c)
  1376  	}
  1377  	c.setType(v.Type())
  1378  	c.setSource(v.Source())
  1379  	return c
  1380  }
  1381  
  1382  func splitOnNewInformation(u *BasicBlock, renaming *StackMap) {
  1383  	renaming.Push()
  1384  	defer renaming.Pop()
  1385  
  1386  	rename := func(v Value, why Instruction, info CopyInfo, i int) {
  1387  		c := copyValue(v, why, info)
  1388  		c.setBlock(u)
  1389  		renaming.Set(v, c)
  1390  		u.Instrs = append(u.Instrs, nil)
  1391  		copy(u.Instrs[i+2:], u.Instrs[i+1:])
  1392  		u.Instrs[i+1] = c
  1393  	}
  1394  
  1395  	replacement := func(v Value) (Value, bool) {
  1396  		r, ok := renaming.Get(v)
  1397  		if !ok {
  1398  			return nil, false
  1399  		}
  1400  		for {
  1401  			rr, ok := renaming.Get(r)
  1402  			if !ok {
  1403  				// Store replacement in the map so that future calls to replacement(v) don't have to go through the
  1404  				// iterative process again.
  1405  				renaming.Set(v, r)
  1406  				return r, true
  1407  			}
  1408  			r = rr
  1409  		}
  1410  	}
  1411  
  1412  	var hasInfo func(v Value, info CopyInfo) bool
  1413  	hasInfo = func(v Value, info CopyInfo) bool {
  1414  		switch v := v.(type) {
  1415  		case *Copy:
  1416  			return (v.Info&info) == info || hasInfo(v.X, info)
  1417  		case *FieldAddr, *IndexAddr, *TypeAssert, *MakeChan, *MakeMap, *MakeSlice, *Alloc:
  1418  			return info == CopyInfoNotNil
  1419  		case Member, *Builtin:
  1420  			return info == CopyInfoNotNil
  1421  		case *Sigma:
  1422  			return hasInfo(v.X, info)
  1423  		default:
  1424  			return false
  1425  		}
  1426  	}
  1427  
  1428  	var args []*Value
  1429  	for i := 0; i < len(u.Instrs); i++ {
  1430  		instr := u.Instrs[i]
  1431  		if instr == nil {
  1432  			continue
  1433  		}
  1434  		args = instr.Operands(args[:0])
  1435  		for _, arg := range args {
  1436  			if *arg == nil {
  1437  				continue
  1438  			}
  1439  			if r, ok := replacement(*arg); ok {
  1440  				*arg = r
  1441  				replace(instr, *arg, r)
  1442  			}
  1443  		}
  1444  
  1445  		// TODO write some bits on why we copy values instead of encoding the actual control flow and panics
  1446  
  1447  		switch instr := instr.(type) {
  1448  		case *IndexAddr:
  1449  			// Note that we rename instr.Index and instr.X even if they're already copies, because unique combinations
  1450  			// of X and Index may lead to unique information.
  1451  
  1452  			// OPT we should rename both variables at once and avoid one memmove
  1453  			rename(instr.Index, instr, CopyInfoNotNegative, i)
  1454  			rename(instr.X, instr, CopyInfoNotNil, i)
  1455  			i += 2 // skip over instructions we just inserted
  1456  		case *FieldAddr:
  1457  			if !hasInfo(instr.X, CopyInfoNotNil) {
  1458  				rename(instr.X, instr, CopyInfoNotNil, i)
  1459  				i++
  1460  			}
  1461  		case *TypeAssert:
  1462  			// If we've already type asserted instr.X without comma-ok before, then it can only contain a single type,
  1463  			// and successive type assertions, no matter the type, don't tell us anything new.
  1464  			if !hasInfo(instr.X, CopyInfoNotNil|CopyInfoSingleConcreteType) {
  1465  				rename(instr.X, instr, CopyInfoNotNil|CopyInfoSingleConcreteType, i)
  1466  				i++ // skip over instruction we just inserted
  1467  			}
  1468  		case *Load:
  1469  			if !hasInfo(instr.X, CopyInfoNotNil) {
  1470  				rename(instr.X, instr, CopyInfoNotNil, i)
  1471  				i++
  1472  			}
  1473  		case *Store:
  1474  			if !hasInfo(instr.Addr, CopyInfoNotNil) {
  1475  				rename(instr.Addr, instr, CopyInfoNotNil, i)
  1476  				i++
  1477  			}
  1478  		case *MapUpdate:
  1479  			if !hasInfo(instr.Map, CopyInfoNotNil) {
  1480  				rename(instr.Map, instr, CopyInfoNotNil, i)
  1481  				i++
  1482  			}
  1483  		case CallInstruction:
  1484  			off := 0
  1485  			if !instr.Common().IsInvoke() && !hasInfo(instr.Common().Value, CopyInfoNotNil) {
  1486  				rename(instr.Common().Value, instr, CopyInfoNotNil, i)
  1487  				off++
  1488  			}
  1489  			if f, ok := instr.Common().Value.(*Builtin); ok {
  1490  				switch f.name {
  1491  				case "close":
  1492  					arg := instr.Common().Args[0]
  1493  					if !hasInfo(arg, CopyInfoNotNil|CopyInfoClosed) {
  1494  						rename(arg, instr, CopyInfoNotNil|CopyInfoClosed, i)
  1495  						off++
  1496  					}
  1497  				}
  1498  			}
  1499  			i += off
  1500  		case *SliceToArrayPointer:
  1501  			// A slice to array pointer conversion tells us the minimum length of the slice
  1502  			rename(instr.X, instr, CopyInfoUnspecified, i)
  1503  			i++
  1504  		case *SliceToArray:
  1505  			// A slice to array conversion tells us the minimum length of the slice
  1506  			rename(instr.X, instr, CopyInfoUnspecified, i)
  1507  			i++
  1508  		case *Slice:
  1509  			// Slicing tells us about some of the bounds
  1510  			off := 0
  1511  			if instr.Low == nil && instr.High == nil && instr.Max == nil {
  1512  				// If all indices are unspecified, then we can only learn something about instr.X if it might've been
  1513  				// nil.
  1514  				if !hasInfo(instr.X, CopyInfoNotNil) {
  1515  					rename(instr.X, instr, CopyInfoUnspecified, i)
  1516  					off++
  1517  				}
  1518  			} else {
  1519  				rename(instr.X, instr, CopyInfoUnspecified, i)
  1520  				off++
  1521  			}
  1522  			// We copy the indices even if we already know they are not negative, because we can associate numeric
  1523  			// ranges with them.
  1524  			if instr.Low != nil {
  1525  				rename(instr.Low, instr, CopyInfoNotNegative, i)
  1526  				off++
  1527  			}
  1528  			if instr.High != nil {
  1529  				rename(instr.High, instr, CopyInfoNotNegative, i)
  1530  				off++
  1531  			}
  1532  			if instr.Max != nil {
  1533  				rename(instr.Max, instr, CopyInfoNotNegative, i)
  1534  				off++
  1535  			}
  1536  			i += off
  1537  		case *StringLookup:
  1538  			rename(instr.X, instr, CopyInfoUnspecified, i)
  1539  			rename(instr.Index, instr, CopyInfoNotNegative, i)
  1540  			i += 2
  1541  		case *Recv:
  1542  			if !hasInfo(instr.Chan, CopyInfoNotNil) {
  1543  				// Receiving from a nil channel never completes
  1544  				rename(instr.Chan, instr, CopyInfoNotNil, i)
  1545  				i++
  1546  			}
  1547  		case *Send:
  1548  			if !hasInfo(instr.Chan, CopyInfoNotNil) {
  1549  				// Sending to a nil channel never completes. Sending to a closed channel panics, but whether a channel
  1550  				// is closed isn't local to this function, so we didn't learn anything.
  1551  				rename(instr.Chan, instr, CopyInfoNotNil, i)
  1552  				i++
  1553  			}
  1554  		}
  1555  	}
  1556  
  1557  	for _, v := range u.dom.children {
  1558  		splitOnNewInformation(v, renaming)
  1559  	}
  1560  }
  1561  
  1562  // rename implements the Cytron et al-based SSI renaming algorithm, a
  1563  // preorder traversal of the dominator tree replacing all loads of
  1564  // Alloc cells with the value stored to that cell by the dominating
  1565  // store instruction.
  1566  //
  1567  // renaming is a map from *Alloc (keyed by index number) to its
  1568  // dominating stored value; newPhis[x] is the set of new φ-nodes to be
  1569  // prepended to block x.
  1570  func rename(u *BasicBlock, renaming []Value, newPhis BlockMap[[]newPhi], newSigmas BlockMap[[]newSigma]) {
  1571  	// Each φ-node becomes the new name for its associated Alloc.
  1572  	for _, np := range newPhis[u.Index] {
  1573  		phi := np.phi
  1574  		alloc := np.alloc
  1575  		renaming[alloc.index] = phi
  1576  	}
  1577  
  1578  	// Rename loads and stores of allocs.
  1579  	for i, instr := range u.Instrs {
  1580  		switch instr := instr.(type) {
  1581  		case *Alloc:
  1582  			if instr.index >= 0 { // store of zero to Alloc cell
  1583  				// Replace dominated loads by the zero value.
  1584  				renaming[instr.index] = nil
  1585  				if debugLifting {
  1586  					fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
  1587  				}
  1588  				// Delete the Alloc.
  1589  				u.Instrs[i] = nil
  1590  				u.gaps++
  1591  			}
  1592  
  1593  		case *Store:
  1594  			if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
  1595  				// Replace dominated loads by the stored value.
  1596  				renaming[alloc.index] = instr.Val
  1597  				if debugLifting {
  1598  					fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
  1599  						instr, instr.Val.Name())
  1600  				}
  1601  				if refs := instr.Addr.Referrers(); refs != nil {
  1602  					*refs = removeInstr(*refs, instr)
  1603  				}
  1604  				if refs := instr.Val.Referrers(); refs != nil {
  1605  					*refs = removeInstr(*refs, instr)
  1606  				}
  1607  				// Delete the Store.
  1608  				u.Instrs[i] = nil
  1609  				u.gaps++
  1610  			}
  1611  
  1612  		case *Load:
  1613  			if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
  1614  				// In theory, we wouldn't be able to replace loads directly, because a loaded value could be used in
  1615  				// different branches, in which case it should be replaced with different sigma nodes. But we can't
  1616  				// simply defer replacement, either, because then later stores might incorrectly affect this load.
  1617  				//
  1618  				// To avoid doing renaming on _all_ values (instead of just loads and stores like we're doing), we make
  1619  				// sure during code generation that each load is only used in one block. For example, in constant switch
  1620  				// statements, where the tag is only evaluated once, we store it in a temporary and load it for each
  1621  				// comparison, so that we have individual loads to replace.
  1622  				//
  1623  				// Because we only rename stores and loads, the end result will not contain sigma nodes for all
  1624  				// constants. Some constants may be used directly, e.g. in comparisons such as 'x == 5'. We may still
  1625  				// end up inserting dead sigma nodes in branches, but these will never get used in renaming and will be
  1626  				// cleaned up when we remove dead phis and sigmas.
  1627  				newval := renamed(u.Parent(), renaming, alloc)
  1628  				if debugLifting {
  1629  					fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
  1630  						instr.Name(), instr, newval)
  1631  				}
  1632  				replaceAll(instr, newval)
  1633  				u.Instrs[i] = nil
  1634  				u.gaps++
  1635  			}
  1636  
  1637  		case *DebugRef:
  1638  			if x, ok := instr.X.(*Alloc); ok && x.index >= 0 {
  1639  				if instr.IsAddr {
  1640  					instr.X = renamed(u.Parent(), renaming, x)
  1641  					instr.IsAddr = false
  1642  
  1643  					// Add DebugRef to instr.X's referrers.
  1644  					if refs := instr.X.Referrers(); refs != nil {
  1645  						*refs = append(*refs, instr)
  1646  					}
  1647  				} else {
  1648  					// A source expression denotes the address
  1649  					// of an Alloc that was optimized away.
  1650  					instr.X = nil
  1651  
  1652  					// Delete the DebugRef.
  1653  					u.Instrs[i] = nil
  1654  					u.gaps++
  1655  				}
  1656  			}
  1657  		}
  1658  	}
  1659  
  1660  	// update all outgoing sigma nodes with the dominating store
  1661  	for _, sigmas := range newSigmas[u.Index] {
  1662  		for _, sigma := range sigmas.sigmas {
  1663  			if sigma == nil {
  1664  				continue
  1665  			}
  1666  			sigma.X = renamed(u.Parent(), renaming, sigmas.alloc)
  1667  		}
  1668  	}
  1669  
  1670  	// For each φ-node in a CFG successor, rename the edge.
  1671  	for succi, v := range u.Succs {
  1672  		phis := newPhis[v.Index]
  1673  		if len(phis) == 0 {
  1674  			continue
  1675  		}
  1676  		i := v.predIndex(u)
  1677  		for _, np := range phis {
  1678  			phi := np.phi
  1679  			alloc := np.alloc
  1680  			// if there's a sigma node, use it, else use the dominating value
  1681  			var newval Value
  1682  			for _, sigmas := range newSigmas[u.Index] {
  1683  				if sigmas.alloc == alloc && sigmas.sigmas[succi] != nil {
  1684  					newval = sigmas.sigmas[succi]
  1685  					break
  1686  				}
  1687  			}
  1688  			if newval == nil {
  1689  				newval = renamed(u.Parent(), renaming, alloc)
  1690  			}
  1691  			if debugLifting {
  1692  				fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
  1693  					phi.Name(), u, v, i, alloc.Name(), newval.Name())
  1694  			}
  1695  			phi.Edges[i] = newval
  1696  			if prefs := newval.Referrers(); prefs != nil {
  1697  				*prefs = append(*prefs, phi)
  1698  			}
  1699  		}
  1700  	}
  1701  
  1702  	// Continue depth-first recursion over domtree, pushing a
  1703  	// fresh copy of the renaming map for each subtree.
  1704  	r := make([]Value, len(renaming))
  1705  	for _, v := range u.dom.children {
  1706  		copy(r, renaming)
  1707  
  1708  		// on entry to a block, the incoming sigma nodes become the new values for their alloc
  1709  		if idx := u.succIndex(v); idx != -1 {
  1710  			for _, sigma := range newSigmas[u.Index] {
  1711  				if sigma.sigmas[idx] != nil {
  1712  					r[sigma.alloc.index] = sigma.sigmas[idx]
  1713  				}
  1714  			}
  1715  		}
  1716  		rename(v, r, newPhis, newSigmas)
  1717  	}
  1718  
  1719  }
  1720  
  1721  func simplifyConstantCompositeValues(fn *Function) bool {
  1722  	changed := false
  1723  
  1724  	for _, b := range fn.Blocks {
  1725  		n := 0
  1726  		for _, instr := range b.Instrs {
  1727  			replaced := false
  1728  
  1729  			if cv, ok := instr.(*CompositeValue); ok {
  1730  				ac := &AggregateConst{}
  1731  				ac.typ = cv.typ
  1732  				replaced = true
  1733  				for _, v := range cv.Values {
  1734  					if c, ok := v.(Constant); ok {
  1735  						ac.Values = append(ac.Values, c)
  1736  					} else {
  1737  						replaced = false
  1738  						break
  1739  					}
  1740  				}
  1741  				if replaced {
  1742  					replaceAll(cv, emitConst(fn, ac))
  1743  					killInstruction(cv)
  1744  				}
  1745  
  1746  			}
  1747  
  1748  			if replaced {
  1749  				changed = true
  1750  			} else {
  1751  				b.Instrs[n] = instr
  1752  				n++
  1753  			}
  1754  		}
  1755  
  1756  		clearInstrs(b.Instrs[n:])
  1757  		b.Instrs = b.Instrs[:n]
  1758  	}
  1759  
  1760  	return changed
  1761  }
  1762  
  1763  func updateOperandReferrers(instr Instruction) {
  1764  	for _, op := range instr.Operands(nil) {
  1765  		refs := (*op).Referrers()
  1766  		if refs != nil {
  1767  			*refs = append(*refs, instr)
  1768  		}
  1769  	}
  1770  }