github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/regalloc.go

github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/regalloc.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"github.com/bir3/gocompiler/src/cmd/compile/internal/base"
   118  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ir"
   119  	"github.com/bir3/gocompiler/src/cmd/compile/internal/types"
   120  	"github.com/bir3/gocompiler/src/cmd/internal/src"
   121  	"github.com/bir3/gocompiler/src/cmd/internal/sys"
   122  	"fmt"
   123  	"github.com/bir3/gocompiler/src/internal/buildcfg"
   124  	"math/bits"
   125  	"unsafe"
   126  )
   127  
   128  const (
   129  	moveSpills = iota
   130  	logSpills
   131  	regDebug
   132  	stackDebug
   133  )
   134  
   135  // distance is a measure of how far into the future values are used.
   136  // distance is measured in units of instructions.
   137  const (
   138  	likelyDistance   = 1
   139  	normalDistance   = 10
   140  	unlikelyDistance = 100
   141  )
   142  
   143  // regalloc performs register allocation on f. It sets f.RegAlloc
   144  // to the resulting allocation.
   145  func regalloc(f *Func) {
   146  	var s regAllocState
   147  	s.init(f)
   148  	s.regalloc(f)
   149  	s.close()
   150  }
   151  
   152  type register uint8
   153  
   154  const noRegister register = 255
   155  
   156  // For bulk initializing
   157  var noRegisters [32]register = [32]register{
   158  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   159  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   160  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  }
   163  
   164  // A regMask encodes a set of machine registers.
   165  // TODO: regMask -> regSet?
   166  type regMask uint64
   167  
   168  func (m regMask) String() string {
   169  	s := ""
   170  	for r := register(0); m != 0; r++ {
   171  		if m>>r&1 == 0 {
   172  			continue
   173  		}
   174  		m &^= regMask(1) << r
   175  		if s != "" {
   176  			s += " "
   177  		}
   178  		s += fmt.Sprintf("r%d", r)
   179  	}
   180  	return s
   181  }
   182  
   183  func (s *regAllocState) RegMaskString(m regMask) string {
   184  	str := ""
   185  	for r := register(0); m != 0; r++ {
   186  		if m>>r&1 == 0 {
   187  			continue
   188  		}
   189  		m &^= regMask(1) << r
   190  		if str != "" {
   191  			str += " "
   192  		}
   193  		str += s.registers[r].String()
   194  	}
   195  	return str
   196  }
   197  
   198  // countRegs returns the number of set bits in the register mask.
   199  func countRegs(r regMask) int {
   200  	return bits.OnesCount64(uint64(r))
   201  }
   202  
   203  // pickReg picks an arbitrary register from the register mask.
   204  func pickReg(r regMask) register {
   205  	if r == 0 {
   206  		panic("can't pick a register from an empty set")
   207  	}
   208  	// pick the lowest one
   209  	return register(bits.TrailingZeros64(uint64(r)))
   210  }
   211  
   212  type use struct {
   213  	dist int32    // distance from start of the block to a use of a value
   214  	pos  src.XPos // source position of the use
   215  	next *use     // linked list of uses of a value in nondecreasing dist order
   216  }
   217  
   218  // A valState records the register allocation state for a (pre-regalloc) value.
   219  type valState struct {
   220  	regs              regMask // the set of registers holding a Value (usually just one)
   221  	uses              *use    // list of uses in this block
   222  	spill             *Value  // spilled copy of the Value (if any)
   223  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   224  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   225  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   226  	rematerializeable bool    // cached value of v.rematerializeable()
   227  }
   228  
   229  type regState struct {
   230  	v *Value // Original (preregalloc) Value stored in this register.
   231  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   232  	// If a register is unused, v==c==nil
   233  }
   234  
   235  type regAllocState struct {
   236  	f *Func
   237  
   238  	sdom        SparseTree
   239  	registers   []Register
   240  	numRegs     register
   241  	SPReg       register
   242  	SBReg       register
   243  	GReg        register
   244  	allocatable regMask
   245  
   246  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   247  	// which are live at the end of b, together with a count of how many instructions
   248  	// forward to the next use.
   249  	live [][]liveInfo
   250  	// desired register assignments at the end of each block.
   251  	// Note that this is a static map computed before allocation occurs. Dynamic
   252  	// register desires (from partially completed allocations) will trump
   253  	// this information.
   254  	desired []desiredState
   255  
   256  	// current state of each (preregalloc) Value
   257  	values []valState
   258  
   259  	// ID of SP, SB values
   260  	sp, sb ID
   261  
   262  	// For each Value, map from its value ID back to the
   263  	// preregalloc Value it was derived from.
   264  	orig []*Value
   265  
   266  	// current state of each register
   267  	regs []regState
   268  
   269  	// registers that contain values which can't be kicked out
   270  	nospill regMask
   271  
   272  	// mask of registers currently in use
   273  	used regMask
   274  
   275  	// mask of registers used in the current instruction
   276  	tmpused regMask
   277  
   278  	// current block we're working on
   279  	curBlock *Block
   280  
   281  	// cache of use records
   282  	freeUseRecords *use
   283  
   284  	// endRegs[blockid] is the register state at the end of each block.
   285  	// encoded as a set of endReg records.
   286  	endRegs [][]endReg
   287  
   288  	// startRegs[blockid] is the register state at the start of merge blocks.
   289  	// saved state does not include the state of phi ops in the block.
   290  	startRegs [][]startReg
   291  
   292  	// spillLive[blockid] is the set of live spills at the end of each block
   293  	spillLive [][]ID
   294  
   295  	// a set of copies we generated to move things around, and
   296  	// whether it is used in shuffle. Unused copies will be deleted.
   297  	copies map[*Value]bool
   298  
   299  	loopnest *loopnest
   300  
   301  	// choose a good order in which to visit blocks for allocation purposes.
   302  	visitOrder []*Block
   303  
   304  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   305  	blockOrder []int32
   306  
   307  	// whether to insert instructions that clobber dead registers at call sites
   308  	doClobber bool
   309  }
   310  
   311  type endReg struct {
   312  	r register
   313  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   314  	c *Value // cached version of the value
   315  }
   316  
   317  type startReg struct {
   318  	r   register
   319  	v   *Value   // pre-regalloc value needed in this register
   320  	c   *Value   // cached version of the value
   321  	pos src.XPos // source position of use of this register
   322  }
   323  
   324  // freeReg frees up register r. Any current user of r is kicked out.
   325  func (s *regAllocState) freeReg(r register) {
   326  	v := s.regs[r].v
   327  	if v == nil {
   328  		s.f.Fatalf("tried to free an already free register %d\n", r)
   329  	}
   330  
   331  	// Mark r as unused.
   332  	if s.f.pass.debug > regDebug {
   333  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   334  	}
   335  	s.regs[r] = regState{}
   336  	s.values[v.ID].regs &^= regMask(1) << r
   337  	s.used &^= regMask(1) << r
   338  }
   339  
   340  // freeRegs frees up all registers listed in m.
   341  func (s *regAllocState) freeRegs(m regMask) {
   342  	for m&s.used != 0 {
   343  		s.freeReg(pickReg(m & s.used))
   344  	}
   345  }
   346  
   347  // clobberRegs inserts instructions that clobber registers listed in m.
   348  func (s *regAllocState) clobberRegs(m regMask) {
   349  	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
   350  	for m != 0 {
   351  		r := pickReg(m)
   352  		m &^= 1 << r
   353  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   354  		s.f.setHome(x, &s.registers[r])
   355  	}
   356  }
   357  
   358  // setOrig records that c's original value is the same as
   359  // v's original value.
   360  func (s *regAllocState) setOrig(c *Value, v *Value) {
   361  	if int(c.ID) >= cap(s.orig) {
   362  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   363  		copy(x, s.orig)
   364  		s.f.Cache.freeValueSlice(s.orig)
   365  		s.orig = x
   366  	}
   367  	for int(c.ID) >= len(s.orig) {
   368  		s.orig = append(s.orig, nil)
   369  	}
   370  	if s.orig[c.ID] != nil {
   371  		s.f.Fatalf("orig value set twice %s %s", c, v)
   372  	}
   373  	s.orig[c.ID] = s.orig[v.ID]
   374  }
   375  
   376  // assignReg assigns register r to hold c, a copy of v.
   377  // r must be unused.
   378  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   379  	if s.f.pass.debug > regDebug {
   380  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   381  	}
   382  	if s.regs[r].v != nil {
   383  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   384  	}
   385  
   386  	// Update state.
   387  	s.regs[r] = regState{v, c}
   388  	s.values[v.ID].regs |= regMask(1) << r
   389  	s.used |= regMask(1) << r
   390  	s.f.setHome(c, &s.registers[r])
   391  }
   392  
   393  // allocReg chooses a register from the set of registers in mask.
   394  // If there is no unused register, a Value will be kicked out of
   395  // a register to make room.
   396  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   397  	if v.OnWasmStack {
   398  		return noRegister
   399  	}
   400  
   401  	mask &= s.allocatable
   402  	mask &^= s.nospill
   403  	if mask == 0 {
   404  		s.f.Fatalf("no register available for %s", v.LongString())
   405  	}
   406  
   407  	// Pick an unused register if one is available.
   408  	if mask&^s.used != 0 {
   409  		return pickReg(mask &^ s.used)
   410  	}
   411  
   412  	// Pick a value to spill. Spill the value with the
   413  	// farthest-in-the-future use.
   414  	// TODO: Prefer registers with already spilled Values?
   415  	// TODO: Modify preference using affinity graph.
   416  	// TODO: if a single value is in multiple registers, spill one of them
   417  	// before spilling a value in just a single register.
   418  
   419  	// Find a register to spill. We spill the register containing the value
   420  	// whose next use is as far in the future as possible.
   421  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   422  	var r register
   423  	maxuse := int32(-1)
   424  	for t := register(0); t < s.numRegs; t++ {
   425  		if mask>>t&1 == 0 {
   426  			continue
   427  		}
   428  		v := s.regs[t].v
   429  		if n := s.values[v.ID].uses.dist; n > maxuse {
   430  			// v's next use is farther in the future than any value
   431  			// we've seen so far. A new best spill candidate.
   432  			r = t
   433  			maxuse = n
   434  		}
   435  	}
   436  	if maxuse == -1 {
   437  		s.f.Fatalf("couldn't find register to spill")
   438  	}
   439  
   440  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   441  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   442  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   443  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   444  		s.freeReg(r)
   445  		return r
   446  	}
   447  
   448  	// Try to move it around before kicking out, if there is a free register.
   449  	// We generate a Copy and record it. It will be deleted if never used.
   450  	v2 := s.regs[r].v
   451  	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
   452  	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   453  		r2 := pickReg(m)
   454  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   455  		s.copies[c] = false
   456  		if s.f.pass.debug > regDebug {
   457  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   458  		}
   459  		s.setOrig(c, v2)
   460  		s.assignReg(r2, v2, c)
   461  	}
   462  	s.freeReg(r)
   463  	return r
   464  }
   465  
   466  // makeSpill returns a Value which represents the spilled value of v.
   467  // b is the block in which the spill is used.
   468  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   469  	vi := &s.values[v.ID]
   470  	if vi.spill != nil {
   471  		// Final block not known - keep track of subtree where restores reside.
   472  		vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
   473  		vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
   474  		return vi.spill
   475  	}
   476  	// Make a spill for v. We don't know where we want
   477  	// to put it yet, so we leave it blockless for now.
   478  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   479  	// We also don't know what the spill's arg will be.
   480  	// Leave it argless for now.
   481  	s.setOrig(spill, v)
   482  	vi.spill = spill
   483  	vi.restoreMin = s.sdom[b.ID].entry
   484  	vi.restoreMax = s.sdom[b.ID].exit
   485  	return spill
   486  }
   487  
   488  // allocValToReg allocates v to a register selected from regMask and
   489  // returns the register copy of v. Any previous user is kicked out and spilled
   490  // (if necessary). Load code is added at the current pc. If nospill is set the
   491  // allocated register is marked nospill so the assignment cannot be
   492  // undone until the caller allows it by clearing nospill. Returns a
   493  // *Value which is either v or a copy of v allocated to the chosen register.
   494  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   495  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   496  		c := v.copyIntoWithXPos(s.curBlock, pos)
   497  		c.OnWasmStack = true
   498  		s.setOrig(c, v)
   499  		return c
   500  	}
   501  	if v.OnWasmStack {
   502  		return v
   503  	}
   504  
   505  	vi := &s.values[v.ID]
   506  	pos = pos.WithNotStmt()
   507  	// Check if v is already in a requested register.
   508  	if mask&vi.regs != 0 {
   509  		r := pickReg(mask & vi.regs)
   510  		if s.regs[r].v != v || s.regs[r].c == nil {
   511  			panic("bad register state")
   512  		}
   513  		if nospill {
   514  			s.nospill |= regMask(1) << r
   515  		}
   516  		return s.regs[r].c
   517  	}
   518  
   519  	var r register
   520  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   521  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   522  	if !onWasmStack {
   523  		// Allocate a register.
   524  		r = s.allocReg(mask, v)
   525  	}
   526  
   527  	// Allocate v to the new register.
   528  	var c *Value
   529  	if vi.regs != 0 {
   530  		// Copy from a register that v is already in.
   531  		r2 := pickReg(vi.regs)
   532  		if s.regs[r2].v != v {
   533  			panic("bad register state")
   534  		}
   535  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
   536  	} else if v.rematerializeable() {
   537  		// Rematerialize instead of loading from the spill location.
   538  		c = v.copyIntoWithXPos(s.curBlock, pos)
   539  	} else {
   540  		// Load v from its spill location.
   541  		spill := s.makeSpill(v, s.curBlock)
   542  		if s.f.pass.debug > logSpills {
   543  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   544  		}
   545  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   546  	}
   547  
   548  	s.setOrig(c, v)
   549  
   550  	if onWasmStack {
   551  		c.OnWasmStack = true
   552  		return c
   553  	}
   554  
   555  	s.assignReg(r, v, c)
   556  	if c.Op == OpLoadReg && s.isGReg(r) {
   557  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   558  	}
   559  	if nospill {
   560  		s.nospill |= regMask(1) << r
   561  	}
   562  	return c
   563  }
   564  
   565  // isLeaf reports whether f performs any calls.
   566  func isLeaf(f *Func) bool {
   567  	for _, b := range f.Blocks {
   568  		for _, v := range b.Values {
   569  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   570  				// tail call is not counted as it does not save the return PC or need a frame
   571  				return false
   572  			}
   573  		}
   574  	}
   575  	return true
   576  }
   577  
   578  func (s *regAllocState) init(f *Func) {
   579  	s.f = f
   580  	s.f.RegAlloc = s.f.Cache.locs[:0]
   581  	s.registers = f.Config.registers
   582  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
   583  		s.f.Fatalf("bad number of registers: %d", nr)
   584  	} else {
   585  		s.numRegs = register(nr)
   586  	}
   587  	// Locate SP, SB, and g registers.
   588  	s.SPReg = noRegister
   589  	s.SBReg = noRegister
   590  	s.GReg = noRegister
   591  	for r := register(0); r < s.numRegs; r++ {
   592  		switch s.registers[r].String() {
   593  		case "SP":
   594  			s.SPReg = r
   595  		case "SB":
   596  			s.SBReg = r
   597  		case "g":
   598  			s.GReg = r
   599  		}
   600  	}
   601  	// Make sure we found all required registers.
   602  	switch noRegister {
   603  	case s.SPReg:
   604  		s.f.Fatalf("no SP register found")
   605  	case s.SBReg:
   606  		s.f.Fatalf("no SB register found")
   607  	case s.GReg:
   608  		if f.Config.hasGReg {
   609  			s.f.Fatalf("no g register found")
   610  		}
   611  	}
   612  
   613  	// Figure out which registers we're allowed to use.
   614  	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
   615  	s.allocatable &^= 1 << s.SPReg
   616  	s.allocatable &^= 1 << s.SBReg
   617  	if s.f.Config.hasGReg {
   618  		s.allocatable &^= 1 << s.GReg
   619  	}
   620  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   621  		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
   622  	}
   623  	if s.f.Config.LinkReg != -1 {
   624  		if isLeaf(f) {
   625  			// Leaf functions don't save/restore the link register.
   626  			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
   627  		}
   628  	}
   629  	if s.f.Config.ctxt.Flag_dynlink {
   630  		switch s.f.Config.arch {
   631  		case "386":
   632  			// nothing to do.
   633  			// Note that for Flag_shared (position independent code)
   634  			// we do need to be careful, but that carefulness is hidden
   635  			// in the rewrite rules so we always have a free register
   636  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   637  		case "amd64":
   638  			s.allocatable &^= 1 << 15 // R15
   639  		case "arm":
   640  			s.allocatable &^= 1 << 9 // R9
   641  		case "arm64":
   642  			// nothing to do
   643  		case "ppc64le": // R2 already reserved.
   644  			// nothing to do
   645  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   646  			// nothing to do
   647  		case "s390x":
   648  			s.allocatable &^= 1 << 11 // R11
   649  		default:
   650  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   651  		}
   652  	}
   653  
   654  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   655  	// Decouple the register allocation order from the generated block order.
   656  	// This also creates an opportunity for experiments to find a better order.
   657  	s.visitOrder = layoutRegallocOrder(f)
   658  
   659  	// Compute block order. This array allows us to distinguish forward edges
   660  	// from backward edges and compute how far they go.
   661  	s.blockOrder = make([]int32, f.NumBlocks())
   662  	for i, b := range s.visitOrder {
   663  		s.blockOrder[b.ID] = int32(i)
   664  	}
   665  
   666  	s.regs = make([]regState, s.numRegs)
   667  	nv := f.NumValues()
   668  	if cap(s.f.Cache.regallocValues) >= nv {
   669  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   670  	} else {
   671  		s.f.Cache.regallocValues = make([]valState, nv)
   672  	}
   673  	s.values = s.f.Cache.regallocValues
   674  	s.orig = s.f.Cache.allocValueSlice(nv)
   675  	s.copies = make(map[*Value]bool)
   676  	for _, b := range s.visitOrder {
   677  		for _, v := range b.Values {
   678  			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
   679  				s.values[v.ID].needReg = true
   680  				s.values[v.ID].rematerializeable = v.rematerializeable()
   681  				s.orig[v.ID] = v
   682  			}
   683  			// Note: needReg is false for values returning Tuple types.
   684  			// Instead, we mark the corresponding Selects as needReg.
   685  		}
   686  	}
   687  	s.computeLive()
   688  
   689  	s.endRegs = make([][]endReg, f.NumBlocks())
   690  	s.startRegs = make([][]startReg, f.NumBlocks())
   691  	s.spillLive = make([][]ID, f.NumBlocks())
   692  	s.sdom = f.Sdom()
   693  
   694  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   695  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   696  		canLiveOnStack := f.newSparseSet(f.NumValues())
   697  		defer f.retSparseSet(canLiveOnStack)
   698  		for _, b := range f.Blocks {
   699  			// New block. Clear candidate set.
   700  			canLiveOnStack.clear()
   701  			for _, c := range b.ControlValues() {
   702  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   703  					canLiveOnStack.add(c.ID)
   704  				}
   705  			}
   706  			// Walking backwards.
   707  			for i := len(b.Values) - 1; i >= 0; i-- {
   708  				v := b.Values[i]
   709  				if canLiveOnStack.contains(v.ID) {
   710  					v.OnWasmStack = true
   711  				} else {
   712  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   713  					canLiveOnStack.clear()
   714  				}
   715  				for _, arg := range v.Args {
   716  					// Value can live on the stack if:
   717  					// - it is only used once
   718  					// - it is used in the same basic block
   719  					// - it is not a "mem" value
   720  					// - it is a WebAssembly op
   721  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   722  						canLiveOnStack.add(arg.ID)
   723  					}
   724  				}
   725  			}
   726  		}
   727  	}
   728  
   729  	// The clobberdeadreg experiment inserts code to clobber dead registers
   730  	// at call sites.
   731  	// Ignore huge functions to avoid doing too much work.
   732  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   733  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   734  		s.doClobber = true
   735  	}
   736  }
   737  
   738  func (s *regAllocState) close() {
   739  	s.f.Cache.freeValueSlice(s.orig)
   740  }
   741  
   742  // Adds a use record for id at distance dist from the start of the block.
   743  // All calls to addUse must happen with nonincreasing dist.
   744  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   745  	r := s.freeUseRecords
   746  	if r != nil {
   747  		s.freeUseRecords = r.next
   748  	} else {
   749  		r = &use{}
   750  	}
   751  	r.dist = dist
   752  	r.pos = pos
   753  	r.next = s.values[id].uses
   754  	s.values[id].uses = r
   755  	if r.next != nil && dist > r.next.dist {
   756  		s.f.Fatalf("uses added in wrong order")
   757  	}
   758  }
   759  
   760  // advanceUses advances the uses of v's args from the state before v to the state after v.
   761  // Any values which have no more uses are deallocated from registers.
   762  func (s *regAllocState) advanceUses(v *Value) {
   763  	for _, a := range v.Args {
   764  		if !s.values[a.ID].needReg {
   765  			continue
   766  		}
   767  		ai := &s.values[a.ID]
   768  		r := ai.uses
   769  		ai.uses = r.next
   770  		if r.next == nil {
   771  			// Value is dead, free all registers that hold it.
   772  			s.freeRegs(ai.regs)
   773  		}
   774  		r.next = s.freeUseRecords
   775  		s.freeUseRecords = r
   776  	}
   777  }
   778  
   779  // liveAfterCurrentInstruction reports whether v is live after
   780  // the current instruction is completed.  v must be used by the
   781  // current instruction.
   782  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   783  	u := s.values[v.ID].uses
   784  	if u == nil {
   785  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   786  	}
   787  	d := u.dist
   788  	for u != nil && u.dist == d {
   789  		u = u.next
   790  	}
   791  	return u != nil && u.dist > d
   792  }
   793  
   794  // Sets the state of the registers to that encoded in regs.
   795  func (s *regAllocState) setState(regs []endReg) {
   796  	s.freeRegs(s.used)
   797  	for _, x := range regs {
   798  		s.assignReg(x.r, x.v, x.c)
   799  	}
   800  }
   801  
   802  // compatRegs returns the set of registers which can store a type t.
   803  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   804  	var m regMask
   805  	if t.IsTuple() || t.IsFlags() {
   806  		return 0
   807  	}
   808  	if t.IsFloat() || t == types.TypeInt128 {
   809  		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
   810  			m = s.f.Config.fp32RegMask
   811  		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
   812  			m = s.f.Config.fp64RegMask
   813  		} else {
   814  			m = s.f.Config.fpRegMask
   815  		}
   816  	} else {
   817  		m = s.f.Config.gpRegMask
   818  	}
   819  	return m & s.allocatable
   820  }
   821  
   822  // regspec returns the regInfo for operation op.
   823  func (s *regAllocState) regspec(v *Value) regInfo {
   824  	op := v.Op
   825  	if op == OpConvert {
   826  		// OpConvert is a generic op, so it doesn't have a
   827  		// register set in the static table. It can use any
   828  		// allocatable integer register.
   829  		m := s.allocatable & s.f.Config.gpRegMask
   830  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
   831  	}
   832  	if op == OpArgIntReg {
   833  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
   834  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   835  	}
   836  	if op == OpArgFloatReg {
   837  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
   838  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   839  	}
   840  	if op.IsCall() {
   841  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
   842  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
   843  		}
   844  	}
   845  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
   846  		return *s.f.OwnAux.ResultReg(s.f.Config)
   847  	}
   848  	return opcodeTable[op].reg
   849  }
   850  
   851  func (s *regAllocState) isGReg(r register) bool {
   852  	return s.f.Config.hasGReg && s.GReg == r
   853  }
   854  
   855  // Dummy value used to represent the value being held in a temporary register.
   856  var tmpVal Value
   857  
   858  func (s *regAllocState) regalloc(f *Func) {
   859  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
   860  	defer f.retSparseSet(regValLiveSet)
   861  	var oldSched []*Value
   862  	var phis []*Value
   863  	var phiRegs []register
   864  	var args []*Value
   865  
   866  	// Data structure used for computing desired registers.
   867  	var desired desiredState
   868  
   869  	// Desired registers for inputs & outputs for each instruction in the block.
   870  	type dentry struct {
   871  		out [4]register    // desired output registers
   872  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
   873  	}
   874  	var dinfo []dentry
   875  
   876  	if f.Entry != f.Blocks[0] {
   877  		f.Fatalf("entry block must be first")
   878  	}
   879  
   880  	for _, b := range s.visitOrder {
   881  		if s.f.pass.debug > regDebug {
   882  			fmt.Printf("Begin processing block %v\n", b)
   883  		}
   884  		s.curBlock = b
   885  
   886  		// Initialize regValLiveSet and uses fields for this block.
   887  		// Walk backwards through the block doing liveness analysis.
   888  		regValLiveSet.clear()
   889  		for _, e := range s.live[b.ID] {
   890  			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
   891  			regValLiveSet.add(e.ID)
   892  		}
   893  		for _, v := range b.ControlValues() {
   894  			if s.values[v.ID].needReg {
   895  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
   896  				regValLiveSet.add(v.ID)
   897  			}
   898  		}
   899  		for i := len(b.Values) - 1; i >= 0; i-- {
   900  			v := b.Values[i]
   901  			regValLiveSet.remove(v.ID)
   902  			if v.Op == OpPhi {
   903  				// Remove v from the live set, but don't add
   904  				// any inputs. This is the state the len(b.Preds)>1
   905  				// case below desires; it wants to process phis specially.
   906  				continue
   907  			}
   908  			if opcodeTable[v.Op].call {
   909  				// Function call clobbers all the registers but SP and SB.
   910  				regValLiveSet.clear()
   911  				if s.sp != 0 && s.values[s.sp].uses != nil {
   912  					regValLiveSet.add(s.sp)
   913  				}
   914  				if s.sb != 0 && s.values[s.sb].uses != nil {
   915  					regValLiveSet.add(s.sb)
   916  				}
   917  			}
   918  			for _, a := range v.Args {
   919  				if !s.values[a.ID].needReg {
   920  					continue
   921  				}
   922  				s.addUse(a.ID, int32(i), v.Pos)
   923  				regValLiveSet.add(a.ID)
   924  			}
   925  		}
   926  		if s.f.pass.debug > regDebug {
   927  			fmt.Printf("use distances for %s\n", b)
   928  			for i := range s.values {
   929  				vi := &s.values[i]
   930  				u := vi.uses
   931  				if u == nil {
   932  					continue
   933  				}
   934  				fmt.Printf("  v%d:", i)
   935  				for u != nil {
   936  					fmt.Printf(" %d", u.dist)
   937  					u = u.next
   938  				}
   939  				fmt.Println()
   940  			}
   941  		}
   942  
   943  		// Make a copy of the block schedule so we can generate a new one in place.
   944  		// We make a separate copy for phis and regular values.
   945  		nphi := 0
   946  		for _, v := range b.Values {
   947  			if v.Op != OpPhi {
   948  				break
   949  			}
   950  			nphi++
   951  		}
   952  		phis = append(phis[:0], b.Values[:nphi]...)
   953  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
   954  		b.Values = b.Values[:0]
   955  
   956  		// Initialize start state of block.
   957  		if b == f.Entry {
   958  			// Regalloc state is empty to start.
   959  			if nphi > 0 {
   960  				f.Fatalf("phis in entry block")
   961  			}
   962  		} else if len(b.Preds) == 1 {
   963  			// Start regalloc state with the end state of the previous block.
   964  			s.setState(s.endRegs[b.Preds[0].b.ID])
   965  			if nphi > 0 {
   966  				f.Fatalf("phis in single-predecessor block")
   967  			}
   968  			// Drop any values which are no longer live.
   969  			// This may happen because at the end of p, a value may be
   970  			// live but only used by some other successor of p.
   971  			for r := register(0); r < s.numRegs; r++ {
   972  				v := s.regs[r].v
   973  				if v != nil && !regValLiveSet.contains(v.ID) {
   974  					s.freeReg(r)
   975  				}
   976  			}
   977  		} else {
   978  			// This is the complicated case. We have more than one predecessor,
   979  			// which means we may have Phi ops.
   980  
   981  			// Start with the final register state of the predecessor with least spill values.
   982  			// This is based on the following points:
   983  			// 1, The less spill value indicates that the register pressure of this path is smaller,
   984  			//    so the values of this block are more likely to be allocated to registers.
   985  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
   986  			//    contains the function call usually generates a lot of spills and lose the previous
   987  			//    allocation state.
   988  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
   989  			// an impact on the code size and compiler speed. But it is not easy to find a simple
   990  			// and efficient method that combines multiple factors.
   991  			idx := -1
   992  			for i, p := range b.Preds {
   993  				// If the predecessor has not been visited yet, skip it because its end state
   994  				// (redRegs and spillLive) has not been computed yet.
   995  				pb := p.b
   996  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
   997  					continue
   998  				}
   999  				if idx == -1 {
  1000  					idx = i
  1001  					continue
  1002  				}
  1003  				pSel := b.Preds[idx].b
  1004  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1005  					idx = i
  1006  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1007  					// Use a bit of likely information. After critical pass, pb and pSel must
  1008  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1009  					// TODO: improve the prediction of the likely predecessor. The following
  1010  					// method is only suitable for the simplest cases. For complex cases,
  1011  					// the prediction may be inaccurate, but this does not affect the
  1012  					// correctness of the program.
  1013  					// According to the layout algorithm, the predecessor with the
  1014  					// smaller blockOrder is the true branch, and the test results show
  1015  					// that it is better to choose the predecessor with a smaller
  1016  					// blockOrder than no choice.
  1017  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1018  						idx = i
  1019  					}
  1020  				}
  1021  			}
  1022  			if idx < 0 {
  1023  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1024  			}
  1025  			p := b.Preds[idx].b
  1026  			s.setState(s.endRegs[p.ID])
  1027  
  1028  			if s.f.pass.debug > regDebug {
  1029  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1030  				for _, x := range s.endRegs[p.ID] {
  1031  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1032  				}
  1033  			}
  1034  
  1035  			// Decide on registers for phi ops. Use the registers determined
  1036  			// by the primary predecessor if we can.
  1037  			// TODO: pick best of (already processed) predecessors?
  1038  			// Majority vote? Deepest nesting level?
  1039  			phiRegs = phiRegs[:0]
  1040  			var phiUsed regMask
  1041  
  1042  			for _, v := range phis {
  1043  				if !s.values[v.ID].needReg {
  1044  					phiRegs = append(phiRegs, noRegister)
  1045  					continue
  1046  				}
  1047  				a := v.Args[idx]
  1048  				// Some instructions target not-allocatable registers.
  1049  				// They're not suitable for further (phi-function) allocation.
  1050  				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
  1051  				if m != 0 {
  1052  					r := pickReg(m)
  1053  					phiUsed |= regMask(1) << r
  1054  					phiRegs = append(phiRegs, r)
  1055  				} else {
  1056  					phiRegs = append(phiRegs, noRegister)
  1057  				}
  1058  			}
  1059  
  1060  			// Second pass - deallocate all in-register phi inputs.
  1061  			for i, v := range phis {
  1062  				if !s.values[v.ID].needReg {
  1063  					continue
  1064  				}
  1065  				a := v.Args[idx]
  1066  				r := phiRegs[i]
  1067  				if r == noRegister {
  1068  					continue
  1069  				}
  1070  				if regValLiveSet.contains(a.ID) {
  1071  					// Input value is still live (it is used by something other than Phi).
  1072  					// Try to move it around before kicking out, if there is a free register.
  1073  					// We generate a Copy in the predecessor block and record it. It will be
  1074  					// deleted later if never used.
  1075  					//
  1076  					// Pick a free register. At this point some registers used in the predecessor
  1077  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1078  					// them (and they are not going to be helpful anyway).
  1079  					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
  1080  					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1081  						r2 := pickReg(m)
  1082  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1083  						s.copies[c] = false
  1084  						if s.f.pass.debug > regDebug {
  1085  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1086  						}
  1087  						s.setOrig(c, a)
  1088  						s.assignReg(r2, a, c)
  1089  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1090  					}
  1091  				}
  1092  				s.freeReg(r)
  1093  			}
  1094  
  1095  			// Copy phi ops into new schedule.
  1096  			b.Values = append(b.Values, phis...)
  1097  
  1098  			// Third pass - pick registers for phis whose input
  1099  			// was not in a register in the primary predecessor.
  1100  			for i, v := range phis {
  1101  				if !s.values[v.ID].needReg {
  1102  					continue
  1103  				}
  1104  				if phiRegs[i] != noRegister {
  1105  					continue
  1106  				}
  1107  				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
  1108  				// If one of the other inputs of v is in a register, and the register is available,
  1109  				// select this register, which can save some unnecessary copies.
  1110  				for i, pe := range b.Preds {
  1111  					if i == idx {
  1112  						continue
  1113  					}
  1114  					ri := noRegister
  1115  					for _, er := range s.endRegs[pe.b.ID] {
  1116  						if er.v == s.orig[v.Args[i].ID] {
  1117  							ri = er.r
  1118  							break
  1119  						}
  1120  					}
  1121  					if ri != noRegister && m>>ri&1 != 0 {
  1122  						m = regMask(1) << ri
  1123  						break
  1124  					}
  1125  				}
  1126  				if m != 0 {
  1127  					r := pickReg(m)
  1128  					phiRegs[i] = r
  1129  					phiUsed |= regMask(1) << r
  1130  				}
  1131  			}
  1132  
  1133  			// Set registers for phis. Add phi spill code.
  1134  			for i, v := range phis {
  1135  				if !s.values[v.ID].needReg {
  1136  					continue
  1137  				}
  1138  				r := phiRegs[i]
  1139  				if r == noRegister {
  1140  					// stack-based phi
  1141  					// Spills will be inserted in all the predecessors below.
  1142  					s.values[v.ID].spill = v // v starts life spilled
  1143  					continue
  1144  				}
  1145  				// register-based phi
  1146  				s.assignReg(r, v, v)
  1147  			}
  1148  
  1149  			// Deallocate any values which are no longer live. Phis are excluded.
  1150  			for r := register(0); r < s.numRegs; r++ {
  1151  				if phiUsed>>r&1 != 0 {
  1152  					continue
  1153  				}
  1154  				v := s.regs[r].v
  1155  				if v != nil && !regValLiveSet.contains(v.ID) {
  1156  					s.freeReg(r)
  1157  				}
  1158  			}
  1159  
  1160  			// Save the starting state for use by merge edges.
  1161  			// We append to a stack allocated variable that we'll
  1162  			// later copy into s.startRegs in one fell swoop, to save
  1163  			// on allocations.
  1164  			regList := make([]startReg, 0, 32)
  1165  			for r := register(0); r < s.numRegs; r++ {
  1166  				v := s.regs[r].v
  1167  				if v == nil {
  1168  					continue
  1169  				}
  1170  				if phiUsed>>r&1 != 0 {
  1171  					// Skip registers that phis used, we'll handle those
  1172  					// specially during merge edge processing.
  1173  					continue
  1174  				}
  1175  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1176  			}
  1177  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1178  			copy(s.startRegs[b.ID], regList)
  1179  
  1180  			if s.f.pass.debug > regDebug {
  1181  				fmt.Printf("after phis\n")
  1182  				for _, x := range s.startRegs[b.ID] {
  1183  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1184  				}
  1185  			}
  1186  		}
  1187  
  1188  		// Allocate space to record the desired registers for each value.
  1189  		if l := len(oldSched); cap(dinfo) < l {
  1190  			dinfo = make([]dentry, l)
  1191  		} else {
  1192  			dinfo = dinfo[:l]
  1193  			for i := range dinfo {
  1194  				dinfo[i] = dentry{}
  1195  			}
  1196  		}
  1197  
  1198  		// Load static desired register info at the end of the block.
  1199  		desired.copy(&s.desired[b.ID])
  1200  
  1201  		// Check actual assigned registers at the start of the next block(s).
  1202  		// Dynamically assigned registers will trump the static
  1203  		// desired registers computed during liveness analysis.
  1204  		// Note that we do this phase after startRegs is set above, so that
  1205  		// we get the right behavior for a block which branches to itself.
  1206  		for _, e := range b.Succs {
  1207  			succ := e.b
  1208  			// TODO: prioritize likely successor?
  1209  			for _, x := range s.startRegs[succ.ID] {
  1210  				desired.add(x.v.ID, x.r)
  1211  			}
  1212  			// Process phi ops in succ.
  1213  			pidx := e.i
  1214  			for _, v := range succ.Values {
  1215  				if v.Op != OpPhi {
  1216  					break
  1217  				}
  1218  				if !s.values[v.ID].needReg {
  1219  					continue
  1220  				}
  1221  				rp, ok := s.f.getHome(v.ID).(*Register)
  1222  				if !ok {
  1223  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1224  					// Hopefully v will get assigned that register later.
  1225  					// If the inputs have allocated register information, add it to desired,
  1226  					// which may reduce spill or copy operations when the register is available.
  1227  					for _, a := range v.Args {
  1228  						rp, ok = s.f.getHome(a.ID).(*Register)
  1229  						if ok {
  1230  							break
  1231  						}
  1232  					}
  1233  					if !ok {
  1234  						continue
  1235  					}
  1236  				}
  1237  				desired.add(v.Args[pidx].ID, register(rp.num))
  1238  			}
  1239  		}
  1240  		// Walk values backwards computing desired register info.
  1241  		// See computeLive for more comments.
  1242  		for i := len(oldSched) - 1; i >= 0; i-- {
  1243  			v := oldSched[i]
  1244  			prefs := desired.remove(v.ID)
  1245  			regspec := s.regspec(v)
  1246  			desired.clobber(regspec.clobbers)
  1247  			for _, j := range regspec.inputs {
  1248  				if countRegs(j.regs) != 1 {
  1249  					continue
  1250  				}
  1251  				desired.clobber(j.regs)
  1252  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1253  			}
  1254  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1255  				if opcodeTable[v.Op].commutative {
  1256  					desired.addList(v.Args[1].ID, prefs)
  1257  				}
  1258  				desired.addList(v.Args[0].ID, prefs)
  1259  			}
  1260  			// Save desired registers for this value.
  1261  			dinfo[i].out = prefs
  1262  			for j, a := range v.Args {
  1263  				if j >= len(dinfo[i].in) {
  1264  					break
  1265  				}
  1266  				dinfo[i].in[j] = desired.get(a.ID)
  1267  			}
  1268  		}
  1269  
  1270  		// Process all the non-phi values.
  1271  		for idx, v := range oldSched {
  1272  			tmpReg := noRegister
  1273  			if s.f.pass.debug > regDebug {
  1274  				fmt.Printf("  processing %s\n", v.LongString())
  1275  			}
  1276  			regspec := s.regspec(v)
  1277  			if v.Op == OpPhi {
  1278  				f.Fatalf("phi %s not at start of block", v)
  1279  			}
  1280  			if v.Op == OpSP {
  1281  				s.assignReg(s.SPReg, v, v)
  1282  				b.Values = append(b.Values, v)
  1283  				s.advanceUses(v)
  1284  				s.sp = v.ID
  1285  				continue
  1286  			}
  1287  			if v.Op == OpSB {
  1288  				s.assignReg(s.SBReg, v, v)
  1289  				b.Values = append(b.Values, v)
  1290  				s.advanceUses(v)
  1291  				s.sb = v.ID
  1292  				continue
  1293  			}
  1294  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1295  				if s.values[v.ID].needReg {
  1296  					if v.Op == OpSelectN {
  1297  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1298  					} else {
  1299  						var i = 0
  1300  						if v.Op == OpSelect1 {
  1301  							i = 1
  1302  						}
  1303  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1304  					}
  1305  				}
  1306  				b.Values = append(b.Values, v)
  1307  				s.advanceUses(v)
  1308  				continue
  1309  			}
  1310  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1311  				// use hardware g register
  1312  				if s.regs[s.GReg].v != nil {
  1313  					s.freeReg(s.GReg) // kick out the old value
  1314  				}
  1315  				s.assignReg(s.GReg, v, v)
  1316  				b.Values = append(b.Values, v)
  1317  				s.advanceUses(v)
  1318  				continue
  1319  			}
  1320  			if v.Op == OpArg {
  1321  				// Args are "pre-spilled" values. We don't allocate
  1322  				// any register here. We just set up the spill pointer to
  1323  				// point at itself and any later user will restore it to use it.
  1324  				s.values[v.ID].spill = v
  1325  				b.Values = append(b.Values, v)
  1326  				s.advanceUses(v)
  1327  				continue
  1328  			}
  1329  			if v.Op == OpKeepAlive {
  1330  				// Make sure the argument to v is still live here.
  1331  				s.advanceUses(v)
  1332  				a := v.Args[0]
  1333  				vi := &s.values[a.ID]
  1334  				if vi.regs == 0 && !vi.rematerializeable {
  1335  					// Use the spill location.
  1336  					// This forces later liveness analysis to make the
  1337  					// value live at this point.
  1338  					v.SetArg(0, s.makeSpill(a, b))
  1339  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1340  					// Rematerializeable value with a gc.Node. This is the address of
  1341  					// a stack object (e.g. an LEAQ). Keep the object live.
  1342  					// Change it to VarLive, which is what plive expects for locals.
  1343  					v.Op = OpVarLive
  1344  					v.SetArgs1(v.Args[1])
  1345  					v.Aux = a.Aux
  1346  				} else {
  1347  					// In-register and rematerializeable values are already live.
  1348  					// These are typically rematerializeable constants like nil,
  1349  					// or values of a variable that were modified since the last call.
  1350  					v.Op = OpCopy
  1351  					v.SetArgs1(v.Args[1])
  1352  				}
  1353  				b.Values = append(b.Values, v)
  1354  				continue
  1355  			}
  1356  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1357  				// No register allocation required (or none specified yet)
  1358  				if s.doClobber && v.Op.IsCall() {
  1359  					s.clobberRegs(regspec.clobbers)
  1360  				}
  1361  				s.freeRegs(regspec.clobbers)
  1362  				b.Values = append(b.Values, v)
  1363  				s.advanceUses(v)
  1364  				continue
  1365  			}
  1366  
  1367  			if s.values[v.ID].rematerializeable {
  1368  				// Value is rematerializeable, don't issue it here.
  1369  				// It will get issued just before each use (see
  1370  				// allocValueToReg).
  1371  				for _, a := range v.Args {
  1372  					a.Uses--
  1373  				}
  1374  				s.advanceUses(v)
  1375  				continue
  1376  			}
  1377  
  1378  			if s.f.pass.debug > regDebug {
  1379  				fmt.Printf("value %s\n", v.LongString())
  1380  				fmt.Printf("  out:")
  1381  				for _, r := range dinfo[idx].out {
  1382  					if r != noRegister {
  1383  						fmt.Printf(" %s", &s.registers[r])
  1384  					}
  1385  				}
  1386  				fmt.Println()
  1387  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1388  					fmt.Printf("  in%d:", i)
  1389  					for _, r := range dinfo[idx].in[i] {
  1390  						if r != noRegister {
  1391  							fmt.Printf(" %s", &s.registers[r])
  1392  						}
  1393  					}
  1394  					fmt.Println()
  1395  				}
  1396  			}
  1397  
  1398  			// Move arguments to registers.
  1399  			// First, if an arg must be in a specific register and it is already
  1400  			// in place, keep it.
  1401  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1402  			for i, a := range v.Args {
  1403  				if !s.values[a.ID].needReg {
  1404  					args[i] = a
  1405  				}
  1406  			}
  1407  			for _, i := range regspec.inputs {
  1408  				mask := i.regs
  1409  				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
  1410  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1411  				}
  1412  			}
  1413  			// Then, if an arg must be in a specific register and that
  1414  			// register is free, allocate that one. Otherwise when processing
  1415  			// another input we may kick a value into the free register, which
  1416  			// then will be kicked out again.
  1417  			// This is a common case for passing-in-register arguments for
  1418  			// function calls.
  1419  			for {
  1420  				freed := false
  1421  				for _, i := range regspec.inputs {
  1422  					if args[i.idx] != nil {
  1423  						continue // already allocated
  1424  					}
  1425  					mask := i.regs
  1426  					if countRegs(mask) == 1 && mask&^s.used != 0 {
  1427  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1428  						// If the input is in other registers that will be clobbered by v,
  1429  						// or the input is dead, free the registers. This may make room
  1430  						// for other inputs.
  1431  						oldregs := s.values[v.Args[i.idx].ID].regs
  1432  						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1433  							s.freeRegs(oldregs &^ mask &^ s.nospill)
  1434  							freed = true
  1435  						}
  1436  					}
  1437  				}
  1438  				if !freed {
  1439  					break
  1440  				}
  1441  			}
  1442  			// Last, allocate remaining ones, in an ordering defined
  1443  			// by the register specification (most constrained first).
  1444  			for _, i := range regspec.inputs {
  1445  				if args[i.idx] != nil {
  1446  					continue // already allocated
  1447  				}
  1448  				mask := i.regs
  1449  				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
  1450  					// Need a new register for the input.
  1451  					mask &= s.allocatable
  1452  					mask &^= s.nospill
  1453  					// Used desired register if available.
  1454  					if i.idx < 3 {
  1455  						for _, r := range dinfo[idx].in[i.idx] {
  1456  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1457  								// Desired register is allowed and unused.
  1458  								mask = regMask(1) << r
  1459  								break
  1460  							}
  1461  						}
  1462  					}
  1463  					// Avoid registers we're saving for other values.
  1464  					if mask&^desired.avoid != 0 {
  1465  						mask &^= desired.avoid
  1466  					}
  1467  				}
  1468  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1469  			}
  1470  
  1471  			// If the output clobbers the input register, make sure we have
  1472  			// at least two copies of the input register so we don't
  1473  			// have to reload the value from the spill location.
  1474  			if opcodeTable[v.Op].resultInArg0 {
  1475  				var m regMask
  1476  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1477  					// arg0 is dead.  We can clobber its register.
  1478  					goto ok
  1479  				}
  1480  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1481  					args[0], args[1] = args[1], args[0]
  1482  					goto ok
  1483  				}
  1484  				if s.values[v.Args[0].ID].rematerializeable {
  1485  					// We can rematerialize the input, don't worry about clobbering it.
  1486  					goto ok
  1487  				}
  1488  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1489  					args[0], args[1] = args[1], args[0]
  1490  					goto ok
  1491  				}
  1492  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1493  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1494  					goto ok
  1495  				}
  1496  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1497  					args[0], args[1] = args[1], args[0]
  1498  					goto ok
  1499  				}
  1500  
  1501  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1502  				// need to make a copy of an input so we have a register we can modify.
  1503  
  1504  				// Possible new registers to copy into.
  1505  				m = s.compatRegs(v.Args[0].Type) &^ s.used
  1506  				if m == 0 {
  1507  					// No free registers.  In this case we'll just clobber
  1508  					// an input and future uses of that input must use a restore.
  1509  					// TODO(khr): We should really do this like allocReg does it,
  1510  					// spilling the value with the most distant next use.
  1511  					goto ok
  1512  				}
  1513  
  1514  				// Try to move an input to the desired output, if allowed.
  1515  				for _, r := range dinfo[idx].out {
  1516  					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
  1517  						m = regMask(1) << r
  1518  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1519  						// Note: we update args[0] so the instruction will
  1520  						// use the register copy we just made.
  1521  						goto ok
  1522  					}
  1523  				}
  1524  				// Try to copy input to its desired location & use its old
  1525  				// location as the result register.
  1526  				for _, r := range dinfo[idx].in[0] {
  1527  					if r != noRegister && m>>r&1 != 0 {
  1528  						m = regMask(1) << r
  1529  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1530  						s.copies[c] = false
  1531  						// Note: no update to args[0] so the instruction will
  1532  						// use the original copy.
  1533  						goto ok
  1534  					}
  1535  				}
  1536  				if opcodeTable[v.Op].commutative {
  1537  					for _, r := range dinfo[idx].in[1] {
  1538  						if r != noRegister && m>>r&1 != 0 {
  1539  							m = regMask(1) << r
  1540  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1541  							s.copies[c] = false
  1542  							args[0], args[1] = args[1], args[0]
  1543  							goto ok
  1544  						}
  1545  					}
  1546  				}
  1547  				// Avoid future fixed uses if we can.
  1548  				if m&^desired.avoid != 0 {
  1549  					m &^= desired.avoid
  1550  				}
  1551  				// Save input 0 to a new register so we can clobber it.
  1552  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1553  				s.copies[c] = false
  1554  			}
  1555  
  1556  		ok:
  1557  			// Pick a temporary register if needed.
  1558  			// It should be distinct from all the input registers, so we
  1559  			// allocate it after all the input registers, but before
  1560  			// the input registers are freed via advanceUses below.
  1561  			// (Not all instructions need that distinct part, but it is conservative.)
  1562  			if opcodeTable[v.Op].needIntTemp {
  1563  				m := s.allocatable & s.f.Config.gpRegMask
  1564  				if m&^desired.avoid&^s.nospill != 0 {
  1565  					m &^= desired.avoid
  1566  				}
  1567  				tmpReg = s.allocReg(m, &tmpVal)
  1568  				s.nospill |= regMask(1) << tmpReg
  1569  			}
  1570  
  1571  			// Now that all args are in regs, we're ready to issue the value itself.
  1572  			// Before we pick a register for the output value, allow input registers
  1573  			// to be deallocated. We do this here so that the output can use the
  1574  			// same register as a dying input.
  1575  			if !opcodeTable[v.Op].resultNotInArgs {
  1576  				s.tmpused = s.nospill
  1577  				s.nospill = 0
  1578  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1579  			}
  1580  
  1581  			// Dump any registers which will be clobbered
  1582  			if s.doClobber && v.Op.IsCall() {
  1583  				// clobber registers that are marked as clobber in regmask, but
  1584  				// don't clobber inputs.
  1585  				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
  1586  			}
  1587  			s.freeRegs(regspec.clobbers)
  1588  			s.tmpused |= regspec.clobbers
  1589  
  1590  			// Pick registers for outputs.
  1591  			{
  1592  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1593  				maxOutIdx := -1
  1594  				var used regMask
  1595  				if tmpReg != noRegister {
  1596  					// Ensure output registers are distinct from the temporary register.
  1597  					// (Not all instructions need that distinct part, but it is conservative.)
  1598  					used |= regMask(1) << tmpReg
  1599  				}
  1600  				for _, out := range regspec.outputs {
  1601  					mask := out.regs & s.allocatable &^ used
  1602  					if mask == 0 {
  1603  						continue
  1604  					}
  1605  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1606  						if !opcodeTable[v.Op].commutative {
  1607  							// Output must use the same register as input 0.
  1608  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1609  							if mask>>r&1 == 0 {
  1610  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1611  							}
  1612  							mask = regMask(1) << r
  1613  						} else {
  1614  							// Output must use the same register as input 0 or 1.
  1615  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1616  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1617  							// Check r0 and r1 for desired output register.
  1618  							found := false
  1619  							for _, r := range dinfo[idx].out {
  1620  								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
  1621  									mask = regMask(1) << r
  1622  									found = true
  1623  									if r == r1 {
  1624  										args[0], args[1] = args[1], args[0]
  1625  									}
  1626  									break
  1627  								}
  1628  							}
  1629  							if !found {
  1630  								// Neither are desired, pick r0.
  1631  								mask = regMask(1) << r0
  1632  							}
  1633  						}
  1634  					}
  1635  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1636  						for _, r := range dinfo[idx].out {
  1637  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1638  								// Desired register is allowed and unused.
  1639  								mask = regMask(1) << r
  1640  								break
  1641  							}
  1642  						}
  1643  					}
  1644  					// Avoid registers we're saving for other values.
  1645  					if mask&^desired.avoid&^s.nospill != 0 {
  1646  						mask &^= desired.avoid
  1647  					}
  1648  					r := s.allocReg(mask, v)
  1649  					if out.idx > maxOutIdx {
  1650  						maxOutIdx = out.idx
  1651  					}
  1652  					outRegs[out.idx] = r
  1653  					used |= regMask(1) << r
  1654  					s.tmpused |= regMask(1) << r
  1655  				}
  1656  				// Record register choices
  1657  				if v.Type.IsTuple() {
  1658  					var outLocs LocPair
  1659  					if r := outRegs[0]; r != noRegister {
  1660  						outLocs[0] = &s.registers[r]
  1661  					}
  1662  					if r := outRegs[1]; r != noRegister {
  1663  						outLocs[1] = &s.registers[r]
  1664  					}
  1665  					s.f.setHome(v, outLocs)
  1666  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1667  				} else if v.Type.IsResults() {
  1668  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1669  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1670  					for i := 0; i <= maxOutIdx; i++ {
  1671  						if r := outRegs[i]; r != noRegister {
  1672  							outLocs[i] = &s.registers[r]
  1673  						}
  1674  					}
  1675  					s.f.setHome(v, outLocs)
  1676  				} else {
  1677  					if r := outRegs[0]; r != noRegister {
  1678  						s.assignReg(r, v, v)
  1679  					}
  1680  				}
  1681  				if tmpReg != noRegister {
  1682  					// Remember the temp register allocation, if any.
  1683  					if s.f.tempRegs == nil {
  1684  						s.f.tempRegs = map[ID]*Register{}
  1685  					}
  1686  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  1687  				}
  1688  			}
  1689  
  1690  			// deallocate dead args, if we have not done so
  1691  			if opcodeTable[v.Op].resultNotInArgs {
  1692  				s.nospill = 0
  1693  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1694  			}
  1695  			s.tmpused = 0
  1696  
  1697  			// Issue the Value itself.
  1698  			for i, a := range args {
  1699  				v.SetArg(i, a) // use register version of arguments
  1700  			}
  1701  			b.Values = append(b.Values, v)
  1702  		}
  1703  
  1704  		// Copy the control values - we need this so we can reduce the
  1705  		// uses property of these values later.
  1706  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  1707  
  1708  		// Load control values into registers.
  1709  		for i, v := range b.ControlValues() {
  1710  			if !s.values[v.ID].needReg {
  1711  				continue
  1712  			}
  1713  			if s.f.pass.debug > regDebug {
  1714  				fmt.Printf("  processing control %s\n", v.LongString())
  1715  			}
  1716  			// We assume that a control input can be passed in any
  1717  			// type-compatible register. If this turns out not to be true,
  1718  			// we'll need to introduce a regspec for a block's control value.
  1719  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  1720  		}
  1721  
  1722  		// Reduce the uses of the control values once registers have been loaded.
  1723  		// This loop is equivalent to the advanceUses method.
  1724  		for _, v := range controls {
  1725  			vi := &s.values[v.ID]
  1726  			if !vi.needReg {
  1727  				continue
  1728  			}
  1729  			// Remove this use from the uses list.
  1730  			u := vi.uses
  1731  			vi.uses = u.next
  1732  			if u.next == nil {
  1733  				s.freeRegs(vi.regs) // value is dead
  1734  			}
  1735  			u.next = s.freeUseRecords
  1736  			s.freeUseRecords = u
  1737  		}
  1738  
  1739  		// If we are approaching a merge point and we are the primary
  1740  		// predecessor of it, find live values that we use soon after
  1741  		// the merge point and promote them to registers now.
  1742  		if len(b.Succs) == 1 {
  1743  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  1744  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  1745  			}
  1746  			// For this to be worthwhile, the loop must have no calls in it.
  1747  			top := b.Succs[0].b
  1748  			loop := s.loopnest.b2l[top.ID]
  1749  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  1750  				goto badloop
  1751  			}
  1752  
  1753  			// TODO: sort by distance, pick the closest ones?
  1754  			for _, live := range s.live[b.ID] {
  1755  				if live.dist >= unlikelyDistance {
  1756  					// Don't preload anything live after the loop.
  1757  					continue
  1758  				}
  1759  				vid := live.ID
  1760  				vi := &s.values[vid]
  1761  				if vi.regs != 0 {
  1762  					continue
  1763  				}
  1764  				if vi.rematerializeable {
  1765  					continue
  1766  				}
  1767  				v := s.orig[vid]
  1768  				m := s.compatRegs(v.Type) &^ s.used
  1769  				// Used desired register if available.
  1770  			outerloop:
  1771  				for _, e := range desired.entries {
  1772  					if e.ID != v.ID {
  1773  						continue
  1774  					}
  1775  					for _, r := range e.regs {
  1776  						if r != noRegister && m>>r&1 != 0 {
  1777  							m = regMask(1) << r
  1778  							break outerloop
  1779  						}
  1780  					}
  1781  				}
  1782  				if m&^desired.avoid != 0 {
  1783  					m &^= desired.avoid
  1784  				}
  1785  				if m != 0 {
  1786  					s.allocValToReg(v, m, false, b.Pos)
  1787  				}
  1788  			}
  1789  		}
  1790  	badloop:
  1791  		;
  1792  
  1793  		// Save end-of-block register state.
  1794  		// First count how many, this cuts allocations in half.
  1795  		k := 0
  1796  		for r := register(0); r < s.numRegs; r++ {
  1797  			v := s.regs[r].v
  1798  			if v == nil {
  1799  				continue
  1800  			}
  1801  			k++
  1802  		}
  1803  		regList := make([]endReg, 0, k)
  1804  		for r := register(0); r < s.numRegs; r++ {
  1805  			v := s.regs[r].v
  1806  			if v == nil {
  1807  				continue
  1808  			}
  1809  			regList = append(regList, endReg{r, v, s.regs[r].c})
  1810  		}
  1811  		s.endRegs[b.ID] = regList
  1812  
  1813  		if checkEnabled {
  1814  			regValLiveSet.clear()
  1815  			for _, x := range s.live[b.ID] {
  1816  				regValLiveSet.add(x.ID)
  1817  			}
  1818  			for r := register(0); r < s.numRegs; r++ {
  1819  				v := s.regs[r].v
  1820  				if v == nil {
  1821  					continue
  1822  				}
  1823  				if !regValLiveSet.contains(v.ID) {
  1824  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  1825  				}
  1826  			}
  1827  		}
  1828  
  1829  		// If a value is live at the end of the block and
  1830  		// isn't in a register, generate a use for the spill location.
  1831  		// We need to remember this information so that
  1832  		// the liveness analysis in stackalloc is correct.
  1833  		for _, e := range s.live[b.ID] {
  1834  			vi := &s.values[e.ID]
  1835  			if vi.regs != 0 {
  1836  				// in a register, we'll use that source for the merge.
  1837  				continue
  1838  			}
  1839  			if vi.rematerializeable {
  1840  				// we'll rematerialize during the merge.
  1841  				continue
  1842  			}
  1843  			if s.f.pass.debug > regDebug {
  1844  				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  1845  			}
  1846  			spill := s.makeSpill(s.orig[e.ID], b)
  1847  			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  1848  		}
  1849  
  1850  		// Clear any final uses.
  1851  		// All that is left should be the pseudo-uses added for values which
  1852  		// are live at the end of b.
  1853  		for _, e := range s.live[b.ID] {
  1854  			u := s.values[e.ID].uses
  1855  			if u == nil {
  1856  				f.Fatalf("live at end, no uses v%d", e.ID)
  1857  			}
  1858  			if u.next != nil {
  1859  				f.Fatalf("live at end, too many uses v%d", e.ID)
  1860  			}
  1861  			s.values[e.ID].uses = nil
  1862  			u.next = s.freeUseRecords
  1863  			s.freeUseRecords = u
  1864  		}
  1865  	}
  1866  
  1867  	// Decide where the spills we generated will go.
  1868  	s.placeSpills()
  1869  
  1870  	// Anything that didn't get a register gets a stack location here.
  1871  	// (StoreReg, stack-based phis, inputs, ...)
  1872  	stacklive := stackalloc(s.f, s.spillLive)
  1873  
  1874  	// Fix up all merge edges.
  1875  	s.shuffle(stacklive)
  1876  
  1877  	// Erase any copies we never used.
  1878  	// Also, an unused copy might be the only use of another copy,
  1879  	// so continue erasing until we reach a fixed point.
  1880  	for {
  1881  		progress := false
  1882  		for c, used := range s.copies {
  1883  			if !used && c.Uses == 0 {
  1884  				if s.f.pass.debug > regDebug {
  1885  					fmt.Printf("delete copied value %s\n", c.LongString())
  1886  				}
  1887  				c.resetArgs()
  1888  				f.freeValue(c)
  1889  				delete(s.copies, c)
  1890  				progress = true
  1891  			}
  1892  		}
  1893  		if !progress {
  1894  			break
  1895  		}
  1896  	}
  1897  
  1898  	for _, b := range s.visitOrder {
  1899  		i := 0
  1900  		for _, v := range b.Values {
  1901  			if v.Op == OpInvalid {
  1902  				continue
  1903  			}
  1904  			b.Values[i] = v
  1905  			i++
  1906  		}
  1907  		b.Values = b.Values[:i]
  1908  	}
  1909  }
  1910  
  1911  func (s *regAllocState) placeSpills() {
  1912  	mustBeFirst := func(op Op) bool {
  1913  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  1914  	}
  1915  
  1916  	// Start maps block IDs to the list of spills
  1917  	// that go at the start of the block (but after any phis).
  1918  	start := map[ID][]*Value{}
  1919  	// After maps value IDs to the list of spills
  1920  	// that go immediately after that value ID.
  1921  	after := map[ID][]*Value{}
  1922  
  1923  	for i := range s.values {
  1924  		vi := s.values[i]
  1925  		spill := vi.spill
  1926  		if spill == nil {
  1927  			continue
  1928  		}
  1929  		if spill.Block != nil {
  1930  			// Some spills are already fully set up,
  1931  			// like OpArgs and stack-based phis.
  1932  			continue
  1933  		}
  1934  		v := s.orig[i]
  1935  
  1936  		// Walk down the dominator tree looking for a good place to
  1937  		// put the spill of v.  At the start "best" is the best place
  1938  		// we have found so far.
  1939  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  1940  		if v == nil {
  1941  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  1942  		}
  1943  		best := v.Block
  1944  		bestArg := v
  1945  		var bestDepth int16
  1946  		if l := s.loopnest.b2l[best.ID]; l != nil {
  1947  			bestDepth = l.depth
  1948  		}
  1949  		b := best
  1950  		const maxSpillSearch = 100
  1951  		for i := 0; i < maxSpillSearch; i++ {
  1952  			// Find the child of b in the dominator tree which
  1953  			// dominates all restores.
  1954  			p := b
  1955  			b = nil
  1956  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  1957  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  1958  					// c also dominates all restores.  Walk down into c.
  1959  					b = c
  1960  					break
  1961  				}
  1962  			}
  1963  			if b == nil {
  1964  				// Ran out of blocks which dominate all restores.
  1965  				break
  1966  			}
  1967  
  1968  			var depth int16
  1969  			if l := s.loopnest.b2l[b.ID]; l != nil {
  1970  				depth = l.depth
  1971  			}
  1972  			if depth > bestDepth {
  1973  				// Don't push the spill into a deeper loop.
  1974  				continue
  1975  			}
  1976  
  1977  			// If v is in a register at the start of b, we can
  1978  			// place the spill here (after the phis).
  1979  			if len(b.Preds) == 1 {
  1980  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  1981  					if e.v == v {
  1982  						// Found a better spot for the spill.
  1983  						best = b
  1984  						bestArg = e.c
  1985  						bestDepth = depth
  1986  						break
  1987  					}
  1988  				}
  1989  			} else {
  1990  				for _, e := range s.startRegs[b.ID] {
  1991  					if e.v == v {
  1992  						// Found a better spot for the spill.
  1993  						best = b
  1994  						bestArg = e.c
  1995  						bestDepth = depth
  1996  						break
  1997  					}
  1998  				}
  1999  			}
  2000  		}
  2001  
  2002  		// Put the spill in the best block we found.
  2003  		spill.Block = best
  2004  		spill.AddArg(bestArg)
  2005  		if best == v.Block && !mustBeFirst(v.Op) {
  2006  			// Place immediately after v.
  2007  			after[v.ID] = append(after[v.ID], spill)
  2008  		} else {
  2009  			// Place at the start of best block.
  2010  			start[best.ID] = append(start[best.ID], spill)
  2011  		}
  2012  	}
  2013  
  2014  	// Insert spill instructions into the block schedules.
  2015  	var oldSched []*Value
  2016  	for _, b := range s.visitOrder {
  2017  		nfirst := 0
  2018  		for _, v := range b.Values {
  2019  			if !mustBeFirst(v.Op) {
  2020  				break
  2021  			}
  2022  			nfirst++
  2023  		}
  2024  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2025  		b.Values = b.Values[:nfirst]
  2026  		b.Values = append(b.Values, start[b.ID]...)
  2027  		for _, v := range oldSched {
  2028  			b.Values = append(b.Values, v)
  2029  			b.Values = append(b.Values, after[v.ID]...)
  2030  		}
  2031  	}
  2032  }
  2033  
  2034  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2035  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2036  	var e edgeState
  2037  	e.s = s
  2038  	e.cache = map[ID][]*Value{}
  2039  	e.contents = map[Location]contentRecord{}
  2040  	if s.f.pass.debug > regDebug {
  2041  		fmt.Printf("shuffle %s\n", s.f.Name)
  2042  		fmt.Println(s.f.String())
  2043  	}
  2044  
  2045  	for _, b := range s.visitOrder {
  2046  		if len(b.Preds) <= 1 {
  2047  			continue
  2048  		}
  2049  		e.b = b
  2050  		for i, edge := range b.Preds {
  2051  			p := edge.b
  2052  			e.p = p
  2053  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2054  			e.process()
  2055  		}
  2056  	}
  2057  
  2058  	if s.f.pass.debug > regDebug {
  2059  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2060  		fmt.Println(s.f.String())
  2061  	}
  2062  }
  2063  
  2064  type edgeState struct {
  2065  	s    *regAllocState
  2066  	p, b *Block // edge goes from p->b.
  2067  
  2068  	// for each pre-regalloc value, a list of equivalent cached values
  2069  	cache      map[ID][]*Value
  2070  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2071  
  2072  	// map from location to the value it contains
  2073  	contents map[Location]contentRecord
  2074  
  2075  	// desired destination locations
  2076  	destinations []dstRecord
  2077  	extra        []dstRecord
  2078  
  2079  	usedRegs              regMask // registers currently holding something
  2080  	uniqueRegs            regMask // registers holding the only copy of a value
  2081  	finalRegs             regMask // registers holding final target
  2082  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2083  }
  2084  
  2085  type contentRecord struct {
  2086  	vid   ID       // pre-regalloc value
  2087  	c     *Value   // cached value
  2088  	final bool     // this is a satisfied destination
  2089  	pos   src.XPos // source position of use of the value
  2090  }
  2091  
  2092  type dstRecord struct {
  2093  	loc    Location // register or stack slot
  2094  	vid    ID       // pre-regalloc value it should contain
  2095  	splice **Value  // place to store reference to the generating instruction
  2096  	pos    src.XPos // source position of use of this location
  2097  }
  2098  
  2099  // setup initializes the edge state for shuffling.
  2100  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2101  	if e.s.f.pass.debug > regDebug {
  2102  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2103  	}
  2104  
  2105  	// Clear state.
  2106  	for _, vid := range e.cachedVals {
  2107  		delete(e.cache, vid)
  2108  	}
  2109  	e.cachedVals = e.cachedVals[:0]
  2110  	for k := range e.contents {
  2111  		delete(e.contents, k)
  2112  	}
  2113  	e.usedRegs = 0
  2114  	e.uniqueRegs = 0
  2115  	e.finalRegs = 0
  2116  	e.rematerializeableRegs = 0
  2117  
  2118  	// Live registers can be sources.
  2119  	for _, x := range srcReg {
  2120  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2121  	}
  2122  	// So can all of the spill locations.
  2123  	for _, spillID := range stacklive {
  2124  		v := e.s.orig[spillID]
  2125  		spill := e.s.values[v.ID].spill
  2126  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2127  			// Spills were placed that only dominate the uses found
  2128  			// during the first regalloc pass. The edge fixup code
  2129  			// can't use a spill location if the spill doesn't dominate
  2130  			// the edge.
  2131  			// We are guaranteed that if the spill doesn't dominate this edge,
  2132  			// then the value is available in a register (because we called
  2133  			// makeSpill for every value not in a register at the start
  2134  			// of an edge).
  2135  			continue
  2136  		}
  2137  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2138  	}
  2139  
  2140  	// Figure out all the destinations we need.
  2141  	dsts := e.destinations[:0]
  2142  	for _, x := range dstReg {
  2143  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2144  	}
  2145  	// Phis need their args to end up in a specific location.
  2146  	for _, v := range e.b.Values {
  2147  		if v.Op != OpPhi {
  2148  			break
  2149  		}
  2150  		loc := e.s.f.getHome(v.ID)
  2151  		if loc == nil {
  2152  			continue
  2153  		}
  2154  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2155  	}
  2156  	e.destinations = dsts
  2157  
  2158  	if e.s.f.pass.debug > regDebug {
  2159  		for _, vid := range e.cachedVals {
  2160  			a := e.cache[vid]
  2161  			for _, c := range a {
  2162  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2163  			}
  2164  		}
  2165  		for _, d := range e.destinations {
  2166  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2167  		}
  2168  	}
  2169  }
  2170  
  2171  // process generates code to move all the values to the right destination locations.
  2172  func (e *edgeState) process() {
  2173  	dsts := e.destinations
  2174  
  2175  	// Process the destinations until they are all satisfied.
  2176  	for len(dsts) > 0 {
  2177  		i := 0
  2178  		for _, d := range dsts {
  2179  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2180  				// Failed - save for next iteration.
  2181  				dsts[i] = d
  2182  				i++
  2183  			}
  2184  		}
  2185  		if i < len(dsts) {
  2186  			// Made some progress. Go around again.
  2187  			dsts = dsts[:i]
  2188  
  2189  			// Append any extras destinations we generated.
  2190  			dsts = append(dsts, e.extra...)
  2191  			e.extra = e.extra[:0]
  2192  			continue
  2193  		}
  2194  
  2195  		// We made no progress. That means that any
  2196  		// remaining unsatisfied moves are in simple cycles.
  2197  		// For example, A -> B -> C -> D -> A.
  2198  		//   A ----> B
  2199  		//   ^       |
  2200  		//   |       |
  2201  		//   |       v
  2202  		//   D <---- C
  2203  
  2204  		// To break the cycle, we pick an unused register, say R,
  2205  		// and put a copy of B there.
  2206  		//   A ----> B
  2207  		//   ^       |
  2208  		//   |       |
  2209  		//   |       v
  2210  		//   D <---- C <---- R=copyofB
  2211  		// When we resume the outer loop, the A->B move can now proceed,
  2212  		// and eventually the whole cycle completes.
  2213  
  2214  		// Copy any cycle location to a temp register. This duplicates
  2215  		// one of the cycle entries, allowing the just duplicated value
  2216  		// to be overwritten and the cycle to proceed.
  2217  		d := dsts[0]
  2218  		loc := d.loc
  2219  		vid := e.contents[loc].vid
  2220  		c := e.contents[loc].c
  2221  		r := e.findRegFor(c.Type)
  2222  		if e.s.f.pass.debug > regDebug {
  2223  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2224  		}
  2225  		e.erase(r)
  2226  		pos := d.pos.WithNotStmt()
  2227  		if _, isReg := loc.(*Register); isReg {
  2228  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2229  		} else {
  2230  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2231  		}
  2232  		e.set(r, vid, c, false, pos)
  2233  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2234  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2235  		}
  2236  	}
  2237  }
  2238  
  2239  // processDest generates code to put value vid into location loc. Returns true
  2240  // if progress was made.
  2241  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2242  	pos = pos.WithNotStmt()
  2243  	occupant := e.contents[loc]
  2244  	if occupant.vid == vid {
  2245  		// Value is already in the correct place.
  2246  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2247  		if splice != nil {
  2248  			(*splice).Uses--
  2249  			*splice = occupant.c
  2250  			occupant.c.Uses++
  2251  		}
  2252  		// Note: if splice==nil then c will appear dead. This is
  2253  		// non-SSA formed code, so be careful after this pass not to run
  2254  		// deadcode elimination.
  2255  		if _, ok := e.s.copies[occupant.c]; ok {
  2256  			// The copy at occupant.c was used to avoid spill.
  2257  			e.s.copies[occupant.c] = true
  2258  		}
  2259  		return true
  2260  	}
  2261  
  2262  	// Check if we're allowed to clobber the destination location.
  2263  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
  2264  		// We can't overwrite the last copy
  2265  		// of a value that needs to survive.
  2266  		return false
  2267  	}
  2268  
  2269  	// Copy from a source of v, register preferred.
  2270  	v := e.s.orig[vid]
  2271  	var c *Value
  2272  	var src Location
  2273  	if e.s.f.pass.debug > regDebug {
  2274  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2275  		fmt.Printf("sources of v%d:", vid)
  2276  	}
  2277  	for _, w := range e.cache[vid] {
  2278  		h := e.s.f.getHome(w.ID)
  2279  		if e.s.f.pass.debug > regDebug {
  2280  			fmt.Printf(" %s:%s", h, w)
  2281  		}
  2282  		_, isreg := h.(*Register)
  2283  		if src == nil || isreg {
  2284  			c = w
  2285  			src = h
  2286  		}
  2287  	}
  2288  	if e.s.f.pass.debug > regDebug {
  2289  		if src != nil {
  2290  			fmt.Printf(" [use %s]\n", src)
  2291  		} else {
  2292  			fmt.Printf(" [no source]\n")
  2293  		}
  2294  	}
  2295  	_, dstReg := loc.(*Register)
  2296  
  2297  	// Pre-clobber destination. This avoids the
  2298  	// following situation:
  2299  	//   - v is currently held in R0 and stacktmp0.
  2300  	//   - We want to copy stacktmp1 to stacktmp0.
  2301  	//   - We choose R0 as the temporary register.
  2302  	// During the copy, both R0 and stacktmp0 are
  2303  	// clobbered, losing both copies of v. Oops!
  2304  	// Erasing the destination early means R0 will not
  2305  	// be chosen as the temp register, as it will then
  2306  	// be the last copy of v.
  2307  	e.erase(loc)
  2308  	var x *Value
  2309  	if c == nil || e.s.values[vid].rematerializeable {
  2310  		if !e.s.values[vid].rematerializeable {
  2311  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2312  		}
  2313  		if dstReg {
  2314  			x = v.copyInto(e.p)
  2315  		} else {
  2316  			// Rematerialize into stack slot. Need a free
  2317  			// register to accomplish this.
  2318  			r := e.findRegFor(v.Type)
  2319  			e.erase(r)
  2320  			x = v.copyIntoWithXPos(e.p, pos)
  2321  			e.set(r, vid, x, false, pos)
  2322  			// Make sure we spill with the size of the slot, not the
  2323  			// size of x (which might be wider due to our dropping
  2324  			// of narrowing conversions).
  2325  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2326  		}
  2327  	} else {
  2328  		// Emit move from src to dst.
  2329  		_, srcReg := src.(*Register)
  2330  		if srcReg {
  2331  			if dstReg {
  2332  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2333  			} else {
  2334  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2335  			}
  2336  		} else {
  2337  			if dstReg {
  2338  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2339  			} else {
  2340  				// mem->mem. Use temp register.
  2341  				r := e.findRegFor(c.Type)
  2342  				e.erase(r)
  2343  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2344  				e.set(r, vid, t, false, pos)
  2345  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2346  			}
  2347  		}
  2348  	}
  2349  	e.set(loc, vid, x, true, pos)
  2350  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2351  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2352  	}
  2353  	if splice != nil {
  2354  		(*splice).Uses--
  2355  		*splice = x
  2356  		x.Uses++
  2357  	}
  2358  	return true
  2359  }
  2360  
  2361  // set changes the contents of location loc to hold the given value and its cached representative.
  2362  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2363  	e.s.f.setHome(c, loc)
  2364  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2365  	a := e.cache[vid]
  2366  	if len(a) == 0 {
  2367  		e.cachedVals = append(e.cachedVals, vid)
  2368  	}
  2369  	a = append(a, c)
  2370  	e.cache[vid] = a
  2371  	if r, ok := loc.(*Register); ok {
  2372  		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
  2373  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2374  		}
  2375  		e.usedRegs |= regMask(1) << uint(r.num)
  2376  		if final {
  2377  			e.finalRegs |= regMask(1) << uint(r.num)
  2378  		}
  2379  		if len(a) == 1 {
  2380  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2381  		}
  2382  		if len(a) == 2 {
  2383  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2384  				e.uniqueRegs &^= regMask(1) << uint(t.num)
  2385  			}
  2386  		}
  2387  		if e.s.values[vid].rematerializeable {
  2388  			e.rematerializeableRegs |= regMask(1) << uint(r.num)
  2389  		}
  2390  	}
  2391  	if e.s.f.pass.debug > regDebug {
  2392  		fmt.Printf("%s\n", c.LongString())
  2393  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2394  	}
  2395  }
  2396  
  2397  // erase removes any user of loc.
  2398  func (e *edgeState) erase(loc Location) {
  2399  	cr := e.contents[loc]
  2400  	if cr.c == nil {
  2401  		return
  2402  	}
  2403  	vid := cr.vid
  2404  
  2405  	if cr.final {
  2406  		// Add a destination to move this value back into place.
  2407  		// Make sure it gets added to the tail of the destination queue
  2408  		// so we make progress on other moves first.
  2409  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2410  	}
  2411  
  2412  	// Remove c from the list of cached values.
  2413  	a := e.cache[vid]
  2414  	for i, c := range a {
  2415  		if e.s.f.getHome(c.ID) == loc {
  2416  			if e.s.f.pass.debug > regDebug {
  2417  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2418  			}
  2419  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2420  			break
  2421  		}
  2422  	}
  2423  	e.cache[vid] = a
  2424  
  2425  	// Update register masks.
  2426  	if r, ok := loc.(*Register); ok {
  2427  		e.usedRegs &^= regMask(1) << uint(r.num)
  2428  		if cr.final {
  2429  			e.finalRegs &^= regMask(1) << uint(r.num)
  2430  		}
  2431  		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
  2432  	}
  2433  	if len(a) == 1 {
  2434  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2435  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2436  		}
  2437  	}
  2438  }
  2439  
  2440  // findRegFor finds a register we can use to make a temp copy of type typ.
  2441  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2442  	// Which registers are possibilities.
  2443  	types := &e.s.f.Config.Types
  2444  	m := e.s.compatRegs(typ)
  2445  
  2446  	// Pick a register. In priority order:
  2447  	// 1) an unused register
  2448  	// 2) a non-unique register not holding a final value
  2449  	// 3) a non-unique register
  2450  	// 4) a register holding a rematerializeable value
  2451  	x := m &^ e.usedRegs
  2452  	if x != 0 {
  2453  		return &e.s.registers[pickReg(x)]
  2454  	}
  2455  	x = m &^ e.uniqueRegs &^ e.finalRegs
  2456  	if x != 0 {
  2457  		return &e.s.registers[pickReg(x)]
  2458  	}
  2459  	x = m &^ e.uniqueRegs
  2460  	if x != 0 {
  2461  		return &e.s.registers[pickReg(x)]
  2462  	}
  2463  	x = m & e.rematerializeableRegs
  2464  	if x != 0 {
  2465  		return &e.s.registers[pickReg(x)]
  2466  	}
  2467  
  2468  	// No register is available.
  2469  	// Pick a register to spill.
  2470  	for _, vid := range e.cachedVals {
  2471  		a := e.cache[vid]
  2472  		for _, c := range a {
  2473  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
  2474  				if !c.rematerializeable() {
  2475  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2476  					// Allocate a temp location to spill a register to.
  2477  					// The type of the slot is immaterial - it will not be live across
  2478  					// any safepoint. Just use a type big enough to hold any register.
  2479  					t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64}
  2480  					// TODO: reuse these slots. They'll need to be erased first.
  2481  					e.set(t, vid, x, false, c.Pos)
  2482  					if e.s.f.pass.debug > regDebug {
  2483  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2484  					}
  2485  				}
  2486  				// r will now be overwritten by the caller. At some point
  2487  				// later, the newly saved value will be moved back to its
  2488  				// final destination in processDest.
  2489  				return r
  2490  			}
  2491  		}
  2492  	}
  2493  
  2494  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2495  	for _, vid := range e.cachedVals {
  2496  		a := e.cache[vid]
  2497  		for _, c := range a {
  2498  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2499  		}
  2500  	}
  2501  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2502  	return nil
  2503  }
  2504  
  2505  // rematerializeable reports whether the register allocator should recompute
  2506  // a value instead of spilling/restoring it.
  2507  func (v *Value) rematerializeable() bool {
  2508  	if !opcodeTable[v.Op].rematerializeable {
  2509  		return false
  2510  	}
  2511  	for _, a := range v.Args {
  2512  		// SP and SB (generated by OpSP and OpSB) are always available.
  2513  		if a.Op != OpSP && a.Op != OpSB {
  2514  			return false
  2515  		}
  2516  	}
  2517  	return true
  2518  }
  2519  
  2520  type liveInfo struct {
  2521  	ID   ID       // ID of value
  2522  	dist int32    // # of instructions before next use
  2523  	pos  src.XPos // source position of next use
  2524  }
  2525  
  2526  // computeLive computes a map from block ID to a list of value IDs live at the end
  2527  // of that block. Together with the value ID is a count of how many instructions
  2528  // to the next use of that value. The resulting map is stored in s.live.
  2529  // computeLive also computes the desired register information at the end of each block.
  2530  // This desired register information is stored in s.desired.
  2531  // TODO: this could be quadratic if lots of variables are live across lots of
  2532  // basic blocks. Figure out a way to make this function (or, more precisely, the user
  2533  // of this function) require only linear size & time.
  2534  func (s *regAllocState) computeLive() {
  2535  	f := s.f
  2536  	s.live = make([][]liveInfo, f.NumBlocks())
  2537  	s.desired = make([]desiredState, f.NumBlocks())
  2538  	var phis []*Value
  2539  
  2540  	live := f.newSparseMapPos(f.NumValues())
  2541  	defer f.retSparseMapPos(live)
  2542  	t := f.newSparseMapPos(f.NumValues())
  2543  	defer f.retSparseMapPos(t)
  2544  
  2545  	// Keep track of which value we want in each register.
  2546  	var desired desiredState
  2547  
  2548  	// Instead of iterating over f.Blocks, iterate over their postordering.
  2549  	// Liveness information flows backward, so starting at the end
  2550  	// increases the probability that we will stabilize quickly.
  2551  	// TODO: Do a better job yet. Here's one possibility:
  2552  	// Calculate the dominator tree and locate all strongly connected components.
  2553  	// If a value is live in one block of an SCC, it is live in all.
  2554  	// Walk the dominator tree from end to beginning, just once, treating SCC
  2555  	// components as single blocks, duplicated calculated liveness information
  2556  	// out to all of them.
  2557  	po := f.postorder()
  2558  	s.loopnest = f.loopnest()
  2559  	s.loopnest.calculateDepths()
  2560  	for {
  2561  		changed := false
  2562  
  2563  		for _, b := range po {
  2564  			// Start with known live values at the end of the block.
  2565  			// Add len(b.Values) to adjust from end-of-block distance
  2566  			// to beginning-of-block distance.
  2567  			live.clear()
  2568  			for _, e := range s.live[b.ID] {
  2569  				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
  2570  			}
  2571  
  2572  			// Mark control values as live
  2573  			for _, c := range b.ControlValues() {
  2574  				if s.values[c.ID].needReg {
  2575  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  2576  				}
  2577  			}
  2578  
  2579  			// Propagate backwards to the start of the block
  2580  			// Assumes Values have been scheduled.
  2581  			phis = phis[:0]
  2582  			for i := len(b.Values) - 1; i >= 0; i-- {
  2583  				v := b.Values[i]
  2584  				live.remove(v.ID)
  2585  				if v.Op == OpPhi {
  2586  					// save phi ops for later
  2587  					phis = append(phis, v)
  2588  					continue
  2589  				}
  2590  				if opcodeTable[v.Op].call {
  2591  					c := live.contents()
  2592  					for i := range c {
  2593  						c[i].val += unlikelyDistance
  2594  					}
  2595  				}
  2596  				for _, a := range v.Args {
  2597  					if s.values[a.ID].needReg {
  2598  						live.set(a.ID, int32(i), v.Pos)
  2599  					}
  2600  				}
  2601  			}
  2602  			// Propagate desired registers backwards.
  2603  			desired.copy(&s.desired[b.ID])
  2604  			for i := len(b.Values) - 1; i >= 0; i-- {
  2605  				v := b.Values[i]
  2606  				prefs := desired.remove(v.ID)
  2607  				if v.Op == OpPhi {
  2608  					// TODO: if v is a phi, save desired register for phi inputs.
  2609  					// For now, we just drop it and don't propagate
  2610  					// desired registers back though phi nodes.
  2611  					continue
  2612  				}
  2613  				regspec := s.regspec(v)
  2614  				// Cancel desired registers if they get clobbered.
  2615  				desired.clobber(regspec.clobbers)
  2616  				// Update desired registers if there are any fixed register inputs.
  2617  				for _, j := range regspec.inputs {
  2618  					if countRegs(j.regs) != 1 {
  2619  						continue
  2620  					}
  2621  					desired.clobber(j.regs)
  2622  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  2623  				}
  2624  				// Set desired register of input 0 if this is a 2-operand instruction.
  2625  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  2626  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  2627  					// the purposes of desired registers, even though it is not an absolute requirement.
  2628  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  2629  					// Same for ADDLconst
  2630  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  2631  					if opcodeTable[v.Op].commutative {
  2632  						desired.addList(v.Args[1].ID, prefs)
  2633  					}
  2634  					desired.addList(v.Args[0].ID, prefs)
  2635  				}
  2636  			}
  2637  
  2638  			// For each predecessor of b, expand its list of live-at-end values.
  2639  			// invariant: live contains the values live at the start of b (excluding phi inputs)
  2640  			for i, e := range b.Preds {
  2641  				p := e.b
  2642  				// Compute additional distance for the edge.
  2643  				// Note: delta must be at least 1 to distinguish the control
  2644  				// value use from the first user in a successor block.
  2645  				delta := int32(normalDistance)
  2646  				if len(p.Succs) == 2 {
  2647  					if p.Succs[0].b == b && p.Likely == BranchLikely ||
  2648  						p.Succs[1].b == b && p.Likely == BranchUnlikely {
  2649  						delta = likelyDistance
  2650  					}
  2651  					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
  2652  						p.Succs[1].b == b && p.Likely == BranchLikely {
  2653  						delta = unlikelyDistance
  2654  					}
  2655  				}
  2656  
  2657  				// Update any desired registers at the end of p.
  2658  				s.desired[p.ID].merge(&desired)
  2659  
  2660  				// Start t off with the previously known live values at the end of p.
  2661  				t.clear()
  2662  				for _, e := range s.live[p.ID] {
  2663  					t.set(e.ID, e.dist, e.pos)
  2664  				}
  2665  				update := false
  2666  
  2667  				// Add new live values from scanning this block.
  2668  				for _, e := range live.contents() {
  2669  					d := e.val + delta
  2670  					if !t.contains(e.key) || d < t.get(e.key) {
  2671  						update = true
  2672  						t.set(e.key, d, e.pos)
  2673  					}
  2674  				}
  2675  				// Also add the correct arg from the saved phi values.
  2676  				// All phis are at distance delta (we consider them
  2677  				// simultaneously happening at the start of the block).
  2678  				for _, v := range phis {
  2679  					id := v.Args[i].ID
  2680  					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
  2681  						update = true
  2682  						t.set(id, delta, v.Pos)
  2683  					}
  2684  				}
  2685  
  2686  				if !update {
  2687  					continue
  2688  				}
  2689  				// The live set has changed, update it.
  2690  				l := s.live[p.ID][:0]
  2691  				if cap(l) < t.size() {
  2692  					l = make([]liveInfo, 0, t.size())
  2693  				}
  2694  				for _, e := range t.contents() {
  2695  					l = append(l, liveInfo{e.key, e.val, e.pos})
  2696  				}
  2697  				s.live[p.ID] = l
  2698  				changed = true
  2699  			}
  2700  		}
  2701  
  2702  		if !changed {
  2703  			break
  2704  		}
  2705  	}
  2706  	if f.pass.debug > regDebug {
  2707  		fmt.Println("live values at end of each block")
  2708  		for _, b := range f.Blocks {
  2709  			fmt.Printf("  %s:", b)
  2710  			for _, x := range s.live[b.ID] {
  2711  				fmt.Printf(" v%d(%d)", x.ID, x.dist)
  2712  				for _, e := range s.desired[b.ID].entries {
  2713  					if e.ID != x.ID {
  2714  						continue
  2715  					}
  2716  					fmt.Printf("[")
  2717  					first := true
  2718  					for _, r := range e.regs {
  2719  						if r == noRegister {
  2720  							continue
  2721  						}
  2722  						if !first {
  2723  							fmt.Printf(",")
  2724  						}
  2725  						fmt.Print(&s.registers[r])
  2726  						first = false
  2727  					}
  2728  					fmt.Printf("]")
  2729  				}
  2730  			}
  2731  			if avoid := s.desired[b.ID].avoid; avoid != 0 {
  2732  				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  2733  			}
  2734  			fmt.Println()
  2735  		}
  2736  	}
  2737  }
  2738  
  2739  // A desiredState represents desired register assignments.
  2740  type desiredState struct {
  2741  	// Desired assignments will be small, so we just use a list
  2742  	// of valueID+registers entries.
  2743  	entries []desiredStateEntry
  2744  	// Registers that other values want to be in.  This value will
  2745  	// contain at least the union of the regs fields of entries, but
  2746  	// may contain additional entries for values that were once in
  2747  	// this data structure but are no longer.
  2748  	avoid regMask
  2749  }
  2750  type desiredStateEntry struct {
  2751  	// (pre-regalloc) value
  2752  	ID ID
  2753  	// Registers it would like to be in, in priority order.
  2754  	// Unused slots are filled with noRegister.
  2755  	// For opcodes that return tuples, we track desired registers only
  2756  	// for the first element of the tuple.
  2757  	regs [4]register
  2758  }
  2759  
  2760  func (d *desiredState) clear() {
  2761  	d.entries = d.entries[:0]
  2762  	d.avoid = 0
  2763  }
  2764  
  2765  // get returns a list of desired registers for value vid.
  2766  func (d *desiredState) get(vid ID) [4]register {
  2767  	for _, e := range d.entries {
  2768  		if e.ID == vid {
  2769  			return e.regs
  2770  		}
  2771  	}
  2772  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2773  }
  2774  
  2775  // add records that we'd like value vid to be in register r.
  2776  func (d *desiredState) add(vid ID, r register) {
  2777  	d.avoid |= regMask(1) << r
  2778  	for i := range d.entries {
  2779  		e := &d.entries[i]
  2780  		if e.ID != vid {
  2781  			continue
  2782  		}
  2783  		if e.regs[0] == r {
  2784  			// Already known and highest priority
  2785  			return
  2786  		}
  2787  		for j := 1; j < len(e.regs); j++ {
  2788  			if e.regs[j] == r {
  2789  				// Move from lower priority to top priority
  2790  				copy(e.regs[1:], e.regs[:j])
  2791  				e.regs[0] = r
  2792  				return
  2793  			}
  2794  		}
  2795  		copy(e.regs[1:], e.regs[:])
  2796  		e.regs[0] = r
  2797  		return
  2798  	}
  2799  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  2800  }
  2801  
  2802  func (d *desiredState) addList(vid ID, regs [4]register) {
  2803  	// regs is in priority order, so iterate in reverse order.
  2804  	for i := len(regs) - 1; i >= 0; i-- {
  2805  		r := regs[i]
  2806  		if r != noRegister {
  2807  			d.add(vid, r)
  2808  		}
  2809  	}
  2810  }
  2811  
  2812  // clobber erases any desired registers in the set m.
  2813  func (d *desiredState) clobber(m regMask) {
  2814  	for i := 0; i < len(d.entries); {
  2815  		e := &d.entries[i]
  2816  		j := 0
  2817  		for _, r := range e.regs {
  2818  			if r != noRegister && m>>r&1 == 0 {
  2819  				e.regs[j] = r
  2820  				j++
  2821  			}
  2822  		}
  2823  		if j == 0 {
  2824  			// No more desired registers for this value.
  2825  			d.entries[i] = d.entries[len(d.entries)-1]
  2826  			d.entries = d.entries[:len(d.entries)-1]
  2827  			continue
  2828  		}
  2829  		for ; j < len(e.regs); j++ {
  2830  			e.regs[j] = noRegister
  2831  		}
  2832  		i++
  2833  	}
  2834  	d.avoid &^= m
  2835  }
  2836  
  2837  // copy copies a desired state from another desiredState x.
  2838  func (d *desiredState) copy(x *desiredState) {
  2839  	d.entries = append(d.entries[:0], x.entries...)
  2840  	d.avoid = x.avoid
  2841  }
  2842  
  2843  // remove removes the desired registers for vid and returns them.
  2844  func (d *desiredState) remove(vid ID) [4]register {
  2845  	for i := range d.entries {
  2846  		if d.entries[i].ID == vid {
  2847  			regs := d.entries[i].regs
  2848  			d.entries[i] = d.entries[len(d.entries)-1]
  2849  			d.entries = d.entries[:len(d.entries)-1]
  2850  			return regs
  2851  		}
  2852  	}
  2853  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2854  }
  2855  
  2856  // merge merges another desired state x into d.
  2857  func (d *desiredState) merge(x *desiredState) {
  2858  	d.avoid |= x.avoid
  2859  	// There should only be a few desired registers, so
  2860  	// linear insert is ok.
  2861  	for _, e := range x.entries {
  2862  		d.addList(e.ID, e.regs)
  2863  	}
  2864  }
  2865  
  2866  func min32(x, y int32) int32 {
  2867  	if x < y {
  2868  		return x
  2869  	}
  2870  	return y
  2871  }
  2872  func max32(x, y int32) int32 {
  2873  	if x > y {
  2874  		return x
  2875  	}
  2876  	return y
  2877  }