github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/regalloc/regalloc.go (about)

     1  // Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
     2  // api.go.
     3  //
     4  // References:
     5  //   - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
     6  //   - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
     7  //   - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
     8  //   - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
     9  //   - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
    10  package regalloc
    11  
    12  import (
    13  	"fmt"
    14  	"math"
    15  	"strings"
    16  
    17  	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
    18  )
    19  
    20  // NewAllocator returns a new Allocator.
    21  func NewAllocator(allocatableRegs *RegisterInfo) Allocator {
    22  	a := Allocator{
    23  		regInfo:            allocatableRegs,
    24  		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList),
    25  		blockStates:        wazevoapi.NewIDedPool[blockState](resetBlockState),
    26  	}
    27  	a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState)
    28  	a.state.reset()
    29  	for _, regs := range allocatableRegs.AllocatableRegisters {
    30  		for _, r := range regs {
    31  			a.allocatableSet = a.allocatableSet.add(r)
    32  		}
    33  	}
    34  	return a
    35  }
    36  
    37  type (
    38  	// RegisterInfo holds the statically-known ISA-specific register information.
    39  	RegisterInfo struct {
    40  		// AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
    41  		// The order matters: the first element is the most preferred one when allocating.
    42  		AllocatableRegisters [NumRegType][]RealReg
    43  		CalleeSavedRegisters RegSet
    44  		CallerSavedRegisters RegSet
    45  		RealRegToVReg        []VReg
    46  		// RealRegName returns the name of the given RealReg for debugging.
    47  		RealRegName func(r RealReg) string
    48  		RealRegType func(r RealReg) RegType
    49  	}
    50  
    51  	// Allocator is a register allocator.
    52  	Allocator struct {
    53  		// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
    54  		regInfo *RegisterInfo
    55  		// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
    56  		allocatableSet           RegSet
    57  		allocatedCalleeSavedRegs []VReg
    58  		vs                       []VReg
    59  		vs2                      []VRegID
    60  		phiDefInstListPool       wazevoapi.Pool[phiDefInstList]
    61  
    62  		// Followings are re-used during various places.
    63  		blks             []Block
    64  		reals            []RealReg
    65  		currentOccupants regInUseSet
    66  
    67  		// Following two fields are updated while iterating the blocks in the reverse postorder.
    68  		state       state
    69  		blockStates wazevoapi.IDedPool[blockState]
    70  	}
    71  
    72  	// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
    73  	programCounter int32
    74  
    75  	state struct {
    76  		argRealRegs []VReg
    77  		regsInUse   regInUseSet
    78  		vrStates    wazevoapi.IDedPool[vrState]
    79  
    80  		currentBlockID int32
    81  
    82  		// allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
    83  		allocatedRegSet RegSet
    84  	}
    85  
    86  	blockState struct {
    87  		// liveIns is a list of VReg that are live at the beginning of the block.
    88  		liveIns []VRegID
    89  		// seen is true if the block is visited during the liveness analysis.
    90  		seen bool
    91  		// visited is true if the block is visited during the allocation phase.
    92  		visited            bool
    93  		startFromPredIndex int
    94  		// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
    95  		startRegs regInUseSet
    96  		// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
    97  		endRegs regInUseSet
    98  	}
    99  
   100  	vrState struct {
   101  		v VReg
   102  		r RealReg
   103  		// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
   104  		defInstr Instr
   105  		// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
   106  		defBlk Block
   107  		// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
   108  		// reloads this value. This is used to determine the spill location. Only valid if spilled=true.
   109  		lca Block
   110  		// lastUse is the program counter of the last use of this value. This changes while iterating the block, and
   111  		// should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
   112  		lastUse                 programCounter
   113  		lastUseUpdatedAtBlockID int32
   114  		// spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
   115  		//
   116  		// Note that this field is used during liveness analysis for different purpose. This is used to determine the
   117  		// value is live-in or not.
   118  		spilled bool
   119  		// isPhi is true if this is a phi value.
   120  		isPhi      bool
   121  		desiredLoc desiredLoc
   122  		// phiDefInstList is a list of instructions that defines this phi value.
   123  		// This is used to determine the spill location, and only valid if isPhi=true.
   124  		*phiDefInstList
   125  	}
   126  
   127  	// phiDefInstList is a linked list of instructions that defines a phi value.
   128  	phiDefInstList struct {
   129  		instr Instr
   130  		v     VReg
   131  		next  *phiDefInstList
   132  	}
   133  
   134  	// desiredLoc represents a desired location for a VReg.
   135  	desiredLoc uint16
   136  	// desiredLocKind is a kind of desired location for a VReg.
   137  	desiredLocKind uint16
   138  )
   139  
   140  const (
   141  	// desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified.
   142  	desiredLocKindUnspecified desiredLocKind = iota
   143  	// desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values.
   144  	desiredLocKindStack
   145  	// desiredLocKindReg is a kind of desired location for a VReg that is in a register.
   146  	desiredLocKindReg
   147  	desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified)
   148  	desiredLocStack       = desiredLoc(desiredLocKindStack)
   149  )
   150  
   151  func newDesiredLocReg(r RealReg) desiredLoc {
   152  	return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2)
   153  }
   154  
   155  func (d desiredLoc) realReg() RealReg {
   156  	return RealReg(d >> 2)
   157  }
   158  
   159  func (d desiredLoc) stack() bool {
   160  	return d&3 == desiredLoc(desiredLocKindStack)
   161  }
   162  
   163  func resetPhiDefInstList(l *phiDefInstList) {
   164  	l.instr = nil
   165  	l.next = nil
   166  	l.v = VRegInvalid
   167  }
   168  
   169  func (s *state) dump(info *RegisterInfo) { //nolint:unused
   170  	fmt.Println("\t\tstate:")
   171  	fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
   172  	fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
   173  	fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
   174  	fmt.Println("\t\t\tused:", s.regsInUse.format(info))
   175  	var strs []string
   176  	for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ {
   177  		vs := s.vrStates.Get(i)
   178  		if vs == nil {
   179  			continue
   180  		}
   181  		if vs.r != RealRegInvalid {
   182  			strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r)))
   183  		}
   184  	}
   185  	fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
   186  }
   187  
   188  func (s *state) reset() {
   189  	s.argRealRegs = s.argRealRegs[:0]
   190  	s.vrStates.Reset()
   191  	s.allocatedRegSet = RegSet(0)
   192  	s.regsInUse.reset()
   193  	s.currentBlockID = -1
   194  }
   195  
   196  func (s *state) setVRegState(v VReg, r RealReg) {
   197  	id := int(v.ID())
   198  	st := s.vrStates.GetOrAllocate(id)
   199  	st.r = r
   200  	st.v = v
   201  }
   202  
   203  func resetVrState(vs *vrState) {
   204  	vs.v = VRegInvalid
   205  	vs.r = RealRegInvalid
   206  	vs.defInstr = nil
   207  	vs.defBlk = nil
   208  	vs.spilled = false
   209  	vs.lastUse = -1
   210  	vs.lastUseUpdatedAtBlockID = -1
   211  	vs.lca = nil
   212  	vs.isPhi = false
   213  	vs.phiDefInstList = nil
   214  	vs.desiredLoc = desiredLocUnspecified
   215  }
   216  
   217  func (s *state) getVRegState(v VRegID) *vrState {
   218  	return s.vrStates.GetOrAllocate(int(v))
   219  }
   220  
   221  func (s *state) useRealReg(r RealReg, v VReg) {
   222  	if s.regsInUse.has(r) {
   223  		panic("BUG: useRealReg: the given real register is already used")
   224  	}
   225  	s.regsInUse.add(r, v)
   226  	s.setVRegState(v, r)
   227  	s.allocatedRegSet = s.allocatedRegSet.add(r)
   228  }
   229  
   230  func (s *state) releaseRealReg(r RealReg) {
   231  	current := s.regsInUse.get(r)
   232  	if current.Valid() {
   233  		s.regsInUse.remove(r)
   234  		s.setVRegState(current, RealRegInvalid)
   235  	}
   236  }
   237  
   238  // recordReload records that the given VReg is reloaded in the given block.
   239  // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
   240  func (vs *vrState) recordReload(f Function, blk Block) {
   241  	vs.spilled = true
   242  	if vs.lca == nil {
   243  		if wazevoapi.RegAllocLoggingEnabled {
   244  			fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
   245  		}
   246  		vs.lca = blk
   247  	} else {
   248  		if wazevoapi.RegAllocLoggingEnabled {
   249  			fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
   250  		}
   251  		vs.lca = f.LowestCommonAncestor(vs.lca, blk)
   252  		if wazevoapi.RegAllocLoggingEnabled {
   253  			fmt.Printf("updated lca=%d\n", vs.lca.ID())
   254  		}
   255  	}
   256  }
   257  
   258  func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
   259  	r = RealRegInvalid
   260  	// First, check if the preferredMask has any allocatable register.
   261  	if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
   262  		for _, candidateReal := range allocatable {
   263  			// TODO: we should ensure the preferred register is in the allocatable set in the first place,
   264  			//  but right now, just in case, we check it here.
   265  			if candidateReal == preferred {
   266  				return preferred
   267  			}
   268  		}
   269  	}
   270  
   271  	var lastUseAt programCounter
   272  	var spillVReg VReg
   273  	for _, candidateReal := range allocatable {
   274  		if forbiddenMask.has(candidateReal) {
   275  			continue
   276  		}
   277  
   278  		using := s.regsInUse.get(candidateReal)
   279  		if using == VRegInvalid {
   280  			// This is not used at this point.
   281  			return candidateReal
   282  		}
   283  
   284  		// Real registers in use should not be spilled, so we skip them.
   285  		// For example, if the register is used as an argument register, and it might be
   286  		// spilled and not reloaded when it ends up being used as a temporary to pass
   287  		// stack based argument.
   288  		if using.IsRealReg() {
   289  			continue
   290  		}
   291  
   292  		isPreferred := candidateReal == preferred
   293  
   294  		// last == -1 means the value won't be used anymore.
   295  		if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
   296  			lastUseAt = last
   297  			r = candidateReal
   298  			spillVReg = using
   299  			if isPreferred {
   300  				break
   301  			}
   302  		}
   303  	}
   304  
   305  	if r == RealRegInvalid {
   306  		panic("not found any allocatable register")
   307  	}
   308  
   309  	if wazevoapi.RegAllocLoggingEnabled {
   310  		fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo))
   311  	}
   312  	s.releaseRealReg(r)
   313  	return r
   314  }
   315  
   316  func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
   317  	for _, r := range allocatable {
   318  		if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
   319  			return r
   320  		}
   321  	}
   322  	return RealRegInvalid
   323  }
   324  
   325  func (s *state) resetAt(bs *blockState) {
   326  	s.regsInUse.range_(func(_ RealReg, vr VReg) {
   327  		s.setVRegState(vr, RealRegInvalid)
   328  	})
   329  	s.regsInUse.reset()
   330  	bs.endRegs.range_(func(r RealReg, v VReg) {
   331  		id := int(v.ID())
   332  		st := s.vrStates.GetOrAllocate(id)
   333  		if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn {
   334  			s.regsInUse.add(r, v)
   335  			s.setVRegState(v, r)
   336  		}
   337  	})
   338  }
   339  
   340  func resetBlockState(b *blockState) {
   341  	b.seen = false
   342  	b.visited = false
   343  	b.endRegs.reset()
   344  	b.startRegs.reset()
   345  	b.startFromPredIndex = -1
   346  	b.liveIns = b.liveIns[:0]
   347  }
   348  
   349  func (b *blockState) dump(a *RegisterInfo) {
   350  	fmt.Println("\t\tblockState:")
   351  	fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
   352  	fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
   353  	fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
   354  	fmt.Println("\t\t\tvisited:", b.visited)
   355  }
   356  
   357  // DoAllocation performs register allocation on the given Function.
   358  func (a *Allocator) DoAllocation(f Function) {
   359  	a.livenessAnalysis(f)
   360  	a.alloc(f)
   361  	a.determineCalleeSavedRealRegs(f)
   362  }
   363  
   364  func (a *Allocator) determineCalleeSavedRealRegs(f Function) {
   365  	a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
   366  	a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
   367  		if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
   368  			a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
   369  		}
   370  	})
   371  	f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
   372  }
   373  
   374  func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState {
   375  	return a.blockStates.GetOrAllocate(int(blockID))
   376  }
   377  
   378  // phiBlk returns the block that defines the given phi value, nil otherwise.
   379  func (s *state) phiBlk(v VRegID) Block {
   380  	vs := s.getVRegState(v)
   381  	if vs.isPhi {
   382  		return vs.defBlk
   383  	}
   384  	return nil
   385  }
   386  
   387  const (
   388  	programCounterLiveIn  = math.MinInt32
   389  	programCounterLiveOut = math.MaxInt32
   390  )
   391  
   392  // liveAnalysis constructs Allocator.blockLivenessData.
   393  // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
   394  func (a *Allocator) livenessAnalysis(f Function) {
   395  	s := &a.state
   396  	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter.
   397  
   398  		// We should gather phi value data.
   399  		for _, p := range blk.BlockParams(&a.vs) {
   400  			vs := s.getVRegState(p.ID())
   401  			vs.isPhi = true
   402  			vs.defBlk = blk
   403  		}
   404  	}
   405  
   406  	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
   407  		blkID := blk.ID()
   408  		info := a.getOrAllocateBlockState(blkID)
   409  
   410  		a.vs2 = a.vs2[:0]
   411  		const (
   412  			flagDeleted = false
   413  			flagLive    = true
   414  		)
   415  		ns := blk.Succs()
   416  		for i := 0; i < ns; i++ {
   417  			succ := blk.Succ(i)
   418  			if succ == nil {
   419  				continue
   420  			}
   421  
   422  			succID := succ.ID()
   423  			succInfo := a.getOrAllocateBlockState(succID)
   424  			if !succInfo.seen { // This means the back edge.
   425  				continue
   426  			}
   427  
   428  			for _, v := range succInfo.liveIns {
   429  				if s.phiBlk(v) != succ {
   430  					st := s.getVRegState(v)
   431  					// We use .spilled field to store the flag.
   432  					st.spilled = flagLive
   433  					a.vs2 = append(a.vs2, v)
   434  				}
   435  			}
   436  		}
   437  
   438  		for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
   439  
   440  			var use, def VReg
   441  			for _, def = range instr.Defs(&a.vs) {
   442  				if !def.IsRealReg() {
   443  					id := def.ID()
   444  					st := s.getVRegState(id)
   445  					// We use .spilled field to store the flag.
   446  					st.spilled = flagDeleted
   447  					a.vs2 = append(a.vs2, id)
   448  				}
   449  			}
   450  			for _, use = range instr.Uses(&a.vs) {
   451  				if !use.IsRealReg() {
   452  					id := use.ID()
   453  					st := s.getVRegState(id)
   454  					// We use .spilled field to store the flag.
   455  					st.spilled = flagLive
   456  					a.vs2 = append(a.vs2, id)
   457  				}
   458  			}
   459  
   460  			if def.Valid() && s.phiBlk(def.ID()) != nil {
   461  				if use.Valid() && use.IsRealReg() {
   462  					// If the destination is a phi value, and the source is a real register, this is the beginning of the function.
   463  					a.state.argRealRegs = append(a.state.argRealRegs, use)
   464  				}
   465  			}
   466  		}
   467  
   468  		for _, v := range a.vs2 {
   469  			st := s.getVRegState(v)
   470  			// We use .spilled field to store the flag.
   471  			if st.spilled == flagLive { //nolint:gosimple
   472  				info.liveIns = append(info.liveIns, v)
   473  				st.spilled = false
   474  			}
   475  		}
   476  
   477  		info.seen = true
   478  	}
   479  
   480  	nrs := f.LoopNestingForestRoots()
   481  	for i := 0; i < nrs; i++ {
   482  		root := f.LoopNestingForestRoot(i)
   483  		a.loopTreeDFS(root)
   484  	}
   485  }
   486  
   487  // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
   488  func (a *Allocator) loopTreeDFS(entry Block) {
   489  	a.blks = a.blks[:0]
   490  	a.blks = append(a.blks, entry)
   491  
   492  	s := &a.state
   493  	for len(a.blks) > 0 {
   494  		tail := len(a.blks) - 1
   495  		loop := a.blks[tail]
   496  		a.blks = a.blks[:tail]
   497  		a.vs2 = a.vs2[:0]
   498  		const (
   499  			flagDone    = false
   500  			flagPending = true
   501  		)
   502  		info := a.getOrAllocateBlockState(loop.ID())
   503  		for _, v := range info.liveIns {
   504  			if s.phiBlk(v) != loop {
   505  				a.vs2 = append(a.vs2, v)
   506  				st := s.getVRegState(v)
   507  				// We use .spilled field to store the flag.
   508  				st.spilled = flagPending
   509  			}
   510  		}
   511  
   512  		var siblingAddedView []VRegID
   513  		cn := loop.LoopNestingForestChildren()
   514  		for i := 0; i < cn; i++ {
   515  			child := loop.LoopNestingForestChild(i)
   516  			childID := child.ID()
   517  			childInfo := a.getOrAllocateBlockState(childID)
   518  
   519  			if i == 0 {
   520  				begin := len(childInfo.liveIns)
   521  				for _, v := range a.vs2 {
   522  					st := s.getVRegState(v)
   523  					// We use .spilled field to store the flag.
   524  					if st.spilled == flagPending { //nolint:gosimple
   525  						st.spilled = flagDone
   526  						// TODO: deduplicate, though I don't think it has much impact.
   527  						childInfo.liveIns = append(childInfo.liveIns, v)
   528  					}
   529  				}
   530  				siblingAddedView = childInfo.liveIns[begin:]
   531  			} else {
   532  				// TODO: deduplicate, though I don't think it has much impact.
   533  				childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...)
   534  			}
   535  
   536  			if child.LoopHeader() {
   537  				a.blks = append(a.blks, child)
   538  			}
   539  		}
   540  
   541  		if cn == 0 {
   542  			// If there's no forest child, we haven't cleared the .spilled field at this point.
   543  			for _, v := range a.vs2 {
   544  				st := s.getVRegState(v)
   545  				st.spilled = false
   546  			}
   547  		}
   548  	}
   549  }
   550  
   551  // alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
   552  // The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
   553  // In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
   554  // one of its predecessors. Each block inherits the selected state and starts allocation from there.
   555  // If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
   556  // The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
   557  // at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
   558  // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
   559  //
   560  // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
   561  func (a *Allocator) alloc(f Function) {
   562  	// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
   563  	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
   564  		if wazevoapi.RegAllocLoggingEnabled {
   565  			fmt.Printf("========== allocating blk%d ========\n", blk.ID())
   566  		}
   567  		if blk.Entry() {
   568  			a.finalizeStartReg(blk)
   569  		}
   570  		a.allocBlock(f, blk)
   571  	}
   572  	// After the allocation, we all know the start and end state of each block. So we can fix the merge states.
   573  	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
   574  		a.fixMergeState(f, blk)
   575  	}
   576  	// Finally, we insert the spill instructions as we know all the places where the reloads happen.
   577  	a.scheduleSpills(f)
   578  }
   579  
   580  func (a *Allocator) updateLiveInVRState(liveness *blockState) {
   581  	currentBlockID := a.state.currentBlockID
   582  	for _, v := range liveness.liveIns {
   583  		vs := a.state.getVRegState(v)
   584  		vs.lastUse = programCounterLiveIn
   585  		vs.lastUseUpdatedAtBlockID = currentBlockID
   586  	}
   587  }
   588  
   589  func (a *Allocator) finalizeStartReg(blk Block) {
   590  	bID := blk.ID()
   591  	liveness := a.getOrAllocateBlockState(bID)
   592  	s := &a.state
   593  	currentBlkState := a.getOrAllocateBlockState(bID)
   594  	if currentBlkState.startFromPredIndex > -1 {
   595  		return
   596  	}
   597  
   598  	s.currentBlockID = bID
   599  	a.updateLiveInVRState(liveness)
   600  
   601  	preds := blk.Preds()
   602  	var predState *blockState
   603  	switch preds {
   604  	case 0: // This is the entry block.
   605  	case 1:
   606  		predID := blk.Pred(0).ID()
   607  		predState = a.getOrAllocateBlockState(predID)
   608  		currentBlkState.startFromPredIndex = 0
   609  	default:
   610  		// TODO: there should be some better heuristic to choose the predecessor.
   611  		for i := 0; i < preds; i++ {
   612  			predID := blk.Pred(i).ID()
   613  			if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
   614  				predState = _predState
   615  				currentBlkState.startFromPredIndex = i
   616  				break
   617  			}
   618  		}
   619  	}
   620  	if predState == nil {
   621  		if !blk.Entry() {
   622  			panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
   623  		}
   624  		for _, u := range s.argRealRegs {
   625  			s.useRealReg(u.RealReg(), u)
   626  		}
   627  		currentBlkState.startFromPredIndex = 0
   628  	} else if predState != nil {
   629  		if wazevoapi.RegAllocLoggingEnabled {
   630  			fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
   631  				bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
   632  		}
   633  		s.resetAt(predState)
   634  	}
   635  
   636  	s.regsInUse.range_(func(allocated RealReg, v VReg) {
   637  		currentBlkState.startRegs.add(allocated, v)
   638  	})
   639  	if wazevoapi.RegAllocLoggingEnabled {
   640  		fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo))
   641  	}
   642  }
   643  
   644  func (a *Allocator) allocBlock(f Function, blk Block) {
   645  	bID := blk.ID()
   646  	s := &a.state
   647  	currentBlkState := a.getOrAllocateBlockState(bID)
   648  	s.currentBlockID = bID
   649  
   650  	if currentBlkState.startFromPredIndex < 0 {
   651  		panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock")
   652  	}
   653  
   654  	// Clears the previous state.
   655  	s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) {
   656  		s.setVRegState(vr, RealRegInvalid)
   657  	})
   658  	s.regsInUse.reset()
   659  	// Then set the start state.
   660  	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
   661  		s.useRealReg(allocatedRealReg, vr)
   662  	})
   663  
   664  	desiredUpdated := a.vs2[:0]
   665  
   666  	// Update the last use of each VReg.
   667  	var pc programCounter
   668  	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
   669  		var use, def VReg
   670  		for _, use = range instr.Uses(&a.vs) {
   671  			if !use.IsRealReg() {
   672  				s.getVRegState(use.ID()).lastUse = pc
   673  			}
   674  		}
   675  
   676  		if instr.IsCopy() {
   677  			def = instr.Defs(&a.vs)[0]
   678  			r := def.RealReg()
   679  			if r != RealRegInvalid {
   680  				useID := use.ID()
   681  				vs := s.getVRegState(useID)
   682  				if !vs.isPhi { // TODO: no idea why do we need this.
   683  					vs.desiredLoc = newDesiredLocReg(r)
   684  					desiredUpdated = append(desiredUpdated, useID)
   685  				}
   686  			}
   687  		}
   688  		pc++
   689  	}
   690  
   691  	// Mark all live-out values by checking live-in of the successors.
   692  	// While doing so, we also update the desired register values.
   693  	var succ Block
   694  	for i, ns := 0, blk.Succs(); i < ns; i++ {
   695  		succ = blk.Succ(i)
   696  		if succ == nil {
   697  			continue
   698  		}
   699  
   700  		succID := succ.ID()
   701  		succState := a.getOrAllocateBlockState(succID)
   702  		for _, v := range succState.liveIns {
   703  			if s.phiBlk(v) != succ {
   704  				st := s.getVRegState(v)
   705  				st.lastUse = programCounterLiveOut
   706  			}
   707  		}
   708  
   709  		if succState.startFromPredIndex > -1 {
   710  			if wazevoapi.RegAllocLoggingEnabled {
   711  				fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
   712  			}
   713  			succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
   714  				vs := s.getVRegState(vr.ID())
   715  				vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
   716  				desiredUpdated = append(desiredUpdated, vr.ID())
   717  			})
   718  			for _, p := range succ.BlockParams(&a.vs) {
   719  				vs := s.getVRegState(p.ID())
   720  				if vs.desiredLoc.realReg() == RealRegInvalid {
   721  					vs.desiredLoc = desiredLocStack
   722  					desiredUpdated = append(desiredUpdated, p.ID())
   723  				}
   724  			}
   725  		}
   726  	}
   727  
   728  	// Propagate the desired register values from the end of the block to the beginning.
   729  	for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
   730  		if instr.IsCopy() {
   731  			def := instr.Defs(&a.vs)[0]
   732  			defState := s.getVRegState(def.ID())
   733  			desired := defState.desiredLoc.realReg()
   734  			if desired == RealRegInvalid {
   735  				continue
   736  			}
   737  
   738  			use := instr.Uses(&a.vs)[0]
   739  			useID := use.ID()
   740  			useState := s.getVRegState(useID)
   741  			if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified {
   742  				useState.desiredLoc = newDesiredLocReg(desired)
   743  				desiredUpdated = append(desiredUpdated, useID)
   744  			}
   745  		}
   746  	}
   747  
   748  	pc = 0
   749  	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
   750  		if wazevoapi.RegAllocLoggingEnabled {
   751  			fmt.Println(instr)
   752  		}
   753  
   754  		var currentUsedSet RegSet
   755  		killSet := a.reals[:0]
   756  
   757  		// Gather the set of registers that will be used in the current instruction.
   758  		for _, use := range instr.Uses(&a.vs) {
   759  			if use.IsRealReg() {
   760  				r := use.RealReg()
   761  				currentUsedSet = currentUsedSet.add(r)
   762  				if a.allocatableSet.has(r) {
   763  					killSet = append(killSet, r)
   764  				}
   765  			} else {
   766  				vs := s.getVRegState(use.ID())
   767  				if r := vs.r; r != RealRegInvalid {
   768  					currentUsedSet = currentUsedSet.add(r)
   769  				}
   770  			}
   771  		}
   772  
   773  		for i, use := range instr.Uses(&a.vs) {
   774  			if !use.IsRealReg() {
   775  				vs := s.getVRegState(use.ID())
   776  				killed := vs.lastUse == pc
   777  				r := vs.r
   778  
   779  				if r == RealRegInvalid {
   780  					r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
   781  						// Prefer the desired register if it's available.
   782  						vs.desiredLoc.realReg())
   783  					vs.recordReload(f, blk)
   784  					f.ReloadRegisterBefore(use.SetRealReg(r), instr)
   785  					s.useRealReg(r, use)
   786  				}
   787  				if wazevoapi.RegAllocLoggingEnabled {
   788  					fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
   789  				}
   790  				instr.AssignUse(i, use.SetRealReg(r))
   791  				currentUsedSet = currentUsedSet.add(r)
   792  				if killed {
   793  					if wazevoapi.RegAllocLoggingEnabled {
   794  						fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
   795  					}
   796  					killSet = append(killSet, r)
   797  				}
   798  			}
   799  		}
   800  
   801  		isIndirect := instr.IsIndirectCall()
   802  		call := instr.IsCall() || isIndirect
   803  		if call {
   804  			addr := RealRegInvalid
   805  			if instr.IsIndirectCall() {
   806  				addr = a.vs[0].RealReg()
   807  			}
   808  			a.releaseCallerSavedRegs(addr)
   809  		}
   810  
   811  		for _, r := range killSet {
   812  			s.releaseRealReg(r)
   813  		}
   814  		a.reals = killSet
   815  
   816  		defs := instr.Defs(&a.vs)
   817  		switch {
   818  		case len(defs) > 1:
   819  			// Some instructions define multiple values on real registers.
   820  			// E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
   821  			//
   822  			// Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs
   823  			// that require allocations. If we need to support such case, we need to add the logic to handle it here,
   824  			// though is there any such instruction?
   825  			for _, def := range defs {
   826  				if !def.IsRealReg() {
   827  					panic("BUG: multiple defs should be on real registers")
   828  				}
   829  				r := def.RealReg()
   830  				if s.regsInUse.has(r) {
   831  					s.releaseRealReg(r)
   832  				}
   833  				s.useRealReg(r, def)
   834  			}
   835  		case len(defs) == 1:
   836  			def := defs[0]
   837  			if def.IsRealReg() {
   838  				r := def.RealReg()
   839  				if a.allocatableSet.has(r) {
   840  					if s.regsInUse.has(r) {
   841  						s.releaseRealReg(r)
   842  					}
   843  					s.useRealReg(r, def)
   844  				}
   845  			} else {
   846  				vState := s.getVRegState(def.ID())
   847  				r := vState.r
   848  
   849  				if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
   850  					if r != desired {
   851  						if (vState.isPhi && vState.defBlk == succ) ||
   852  							// If this is not a phi and it's already assigned a real reg,
   853  							// this value has multiple definitions, hence we cannot assign the desired register.
   854  							(!s.regsInUse.has(desired) && r == RealRegInvalid) {
   855  							// If the phi value is passed via a real register, we force the value to be in the desired register.
   856  							if wazevoapi.RegAllocLoggingEnabled {
   857  								fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired))
   858  							}
   859  							if r != RealRegInvalid {
   860  								// If the value is already in a different real register, we release it to change the state.
   861  								// Otherwise, multiple registers might have the same values at the end, which results in
   862  								// messing up the merge state reconciliation.
   863  								s.releaseRealReg(r)
   864  							}
   865  							r = desired
   866  							s.releaseRealReg(r)
   867  							s.useRealReg(r, def)
   868  						}
   869  					}
   870  				}
   871  
   872  				// Allocate a new real register if `def` is not currently assigned one.
   873  				// It can happen when multiple instructions define the same VReg (e.g. const loads).
   874  				if r == RealRegInvalid {
   875  					if instr.IsCopy() {
   876  						copySrc := instr.Uses(&a.vs)[0].RealReg()
   877  						if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
   878  							r = copySrc
   879  						}
   880  					}
   881  					if r == RealRegInvalid {
   882  						typ := def.RegType()
   883  						r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
   884  					}
   885  					s.useRealReg(r, def)
   886  				}
   887  				dr := def.SetRealReg(r)
   888  				instr.AssignDef(dr)
   889  				if wazevoapi.RegAllocLoggingEnabled {
   890  					fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
   891  				}
   892  				if vState.isPhi {
   893  					if vState.desiredLoc.stack() { // Stack based phi value.
   894  						f.StoreRegisterAfter(dr, instr)
   895  						// Release the real register as it's not used anymore.
   896  						s.releaseRealReg(r)
   897  					} else {
   898  						// Only the register based phis are necessary to track the defining instructions
   899  						// since the stack-based phis are already having stores inserted ^.
   900  						n := a.phiDefInstListPool.Allocate()
   901  						n.instr = instr
   902  						n.next = vState.phiDefInstList
   903  						n.v = dr
   904  						vState.phiDefInstList = n
   905  					}
   906  				} else {
   907  					vState.defInstr = instr
   908  					vState.defBlk = blk
   909  				}
   910  			}
   911  		}
   912  		if wazevoapi.RegAllocLoggingEnabled {
   913  			fmt.Println(instr)
   914  		}
   915  		pc++
   916  	}
   917  
   918  	s.regsInUse.range_(func(allocated RealReg, v VReg) {
   919  		currentBlkState.endRegs.add(allocated, v)
   920  	})
   921  
   922  	currentBlkState.visited = true
   923  	if wazevoapi.RegAllocLoggingEnabled {
   924  		currentBlkState.dump(a.regInfo)
   925  	}
   926  
   927  	// Reset the desired end location.
   928  	for _, v := range desiredUpdated {
   929  		vs := s.getVRegState(v)
   930  		vs.desiredLoc = desiredLocUnspecified
   931  	}
   932  	a.vs2 = desiredUpdated[:0]
   933  
   934  	for i := 0; i < blk.Succs(); i++ {
   935  		succ := blk.Succ(i)
   936  		if succ == nil {
   937  			continue
   938  		}
   939  		// If the successor is not visited yet, finalize the start state.
   940  		a.finalizeStartReg(succ)
   941  	}
   942  }
   943  
   944  func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
   945  	s := &a.state
   946  
   947  	for i := 0; i < 64; i++ {
   948  		allocated := RealReg(i)
   949  		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
   950  			continue
   951  		}
   952  		if v := s.regsInUse.get(allocated); v.Valid() {
   953  			if v.IsRealReg() {
   954  				continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
   955  			}
   956  			if !a.regInfo.CallerSavedRegisters.has(allocated) {
   957  				// If this is not a caller-saved register, it is safe to keep it across the call.
   958  				continue
   959  			}
   960  			s.releaseRealReg(allocated)
   961  		}
   962  	}
   963  }
   964  
   965  func (a *Allocator) fixMergeState(f Function, blk Block) {
   966  	preds := blk.Preds()
   967  	if preds <= 1 {
   968  		return
   969  	}
   970  
   971  	s := &a.state
   972  
   973  	// Restores the state at the beginning of the block.
   974  	bID := blk.ID()
   975  	blkSt := a.getOrAllocateBlockState(bID)
   976  	desiredOccupants := &blkSt.startRegs
   977  	aliveOnRegVRegs := make(map[VReg]RealReg)
   978  	for i := 0; i < 64; i++ {
   979  		r := RealReg(i)
   980  		if v := blkSt.startRegs.get(r); v.Valid() {
   981  			aliveOnRegVRegs[v] = r
   982  		}
   983  	}
   984  
   985  	if wazevoapi.RegAllocLoggingEnabled {
   986  		fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
   987  	}
   988  
   989  	s.currentBlockID = bID
   990  	a.updateLiveInVRState(a.getOrAllocateBlockState(bID))
   991  
   992  	currentOccupants := &a.currentOccupants
   993  	for i := 0; i < preds; i++ {
   994  		currentOccupants.reset()
   995  		if i == blkSt.startFromPredIndex {
   996  			continue
   997  		}
   998  
   999  		currentOccupantsRev := make(map[VReg]RealReg)
  1000  		pred := blk.Pred(i)
  1001  		predSt := a.getOrAllocateBlockState(pred.ID())
  1002  		for ii := 0; ii < 64; ii++ {
  1003  			r := RealReg(ii)
  1004  			if v := predSt.endRegs.get(r); v.Valid() {
  1005  				if _, ok := aliveOnRegVRegs[v]; !ok {
  1006  					continue
  1007  				}
  1008  				currentOccupants.add(r, v)
  1009  				currentOccupantsRev[v] = r
  1010  			}
  1011  		}
  1012  
  1013  		s.resetAt(predSt)
  1014  
  1015  		// Finds the free registers if any.
  1016  		intTmp, floatTmp := VRegInvalid, VRegInvalid
  1017  		if intFree := s.findAllocatable(
  1018  			a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
  1019  		); intFree != RealRegInvalid {
  1020  			intTmp = FromRealReg(intFree, RegTypeInt)
  1021  		}
  1022  		if floatFree := s.findAllocatable(
  1023  			a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
  1024  		); floatFree != RealRegInvalid {
  1025  			floatTmp = FromRealReg(floatFree, RegTypeFloat)
  1026  		}
  1027  
  1028  		if wazevoapi.RegAllocLoggingEnabled {
  1029  			fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
  1030  		}
  1031  
  1032  		for ii := 0; ii < 64; ii++ {
  1033  			r := RealReg(ii)
  1034  			desiredVReg := desiredOccupants.get(r)
  1035  			if !desiredVReg.Valid() {
  1036  				continue
  1037  			}
  1038  
  1039  			currentVReg := currentOccupants.get(r)
  1040  			if desiredVReg.ID() == currentVReg.ID() {
  1041  				continue
  1042  			}
  1043  
  1044  			typ := desiredVReg.RegType()
  1045  			var tmpRealReg VReg
  1046  			if typ == RegTypeInt {
  1047  				tmpRealReg = intTmp
  1048  			} else {
  1049  				tmpRealReg = floatTmp
  1050  			}
  1051  			a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
  1052  		}
  1053  	}
  1054  }
  1055  
  1056  func (a *Allocator) reconcileEdge(f Function,
  1057  	r RealReg,
  1058  	pred Block,
  1059  	currentOccupants *regInUseSet,
  1060  	currentOccupantsRev map[VReg]RealReg,
  1061  	currentVReg, desiredVReg VReg,
  1062  	freeReg VReg,
  1063  	typ RegType,
  1064  ) {
  1065  	s := &a.state
  1066  	if currentVReg.Valid() {
  1067  		// Both are on reg.
  1068  		er, ok := currentOccupantsRev[desiredVReg]
  1069  		if !ok {
  1070  			if wazevoapi.RegAllocLoggingEnabled {
  1071  				fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
  1072  					desiredVReg.ID(), a.regInfo.RealRegName(r),
  1073  				)
  1074  			}
  1075  			// This case is that the desired value is on the stack, but currentVReg is on the target register.
  1076  			// We need to move the current value to the stack, and reload the desired value.
  1077  			// TODO: we can do better here.
  1078  			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
  1079  			delete(currentOccupantsRev, currentVReg)
  1080  
  1081  			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
  1082  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
  1083  			currentOccupants.add(r, desiredVReg)
  1084  			currentOccupantsRev[desiredVReg] = r
  1085  			return
  1086  		}
  1087  
  1088  		if wazevoapi.RegAllocLoggingEnabled {
  1089  			fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
  1090  				desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
  1091  			)
  1092  		}
  1093  		f.SwapBefore(
  1094  			currentVReg.SetRealReg(r),
  1095  			desiredVReg.SetRealReg(er),
  1096  			freeReg,
  1097  			pred.LastInstrForInsertion(),
  1098  		)
  1099  		s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
  1100  		currentOccupantsRev[desiredVReg] = r
  1101  		currentOccupantsRev[currentVReg] = er
  1102  		currentOccupants.add(r, desiredVReg)
  1103  		currentOccupants.add(er, currentVReg)
  1104  		if wazevoapi.RegAllocLoggingEnabled {
  1105  			fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
  1106  		}
  1107  	} else {
  1108  		// Desired is on reg, but currently the target register is not used.
  1109  		if wazevoapi.RegAllocLoggingEnabled {
  1110  			fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
  1111  				desiredVReg.ID(), a.regInfo.RealRegName(r),
  1112  			)
  1113  		}
  1114  		if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
  1115  			f.InsertMoveBefore(
  1116  				FromRealReg(r, typ),
  1117  				desiredVReg.SetRealReg(currentReg),
  1118  				pred.LastInstrForInsertion(),
  1119  			)
  1120  			currentOccupants.remove(currentReg)
  1121  		} else {
  1122  			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
  1123  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
  1124  		}
  1125  		currentOccupantsRev[desiredVReg] = r
  1126  		currentOccupants.add(r, desiredVReg)
  1127  	}
  1128  
  1129  	if wazevoapi.RegAllocLoggingEnabled {
  1130  		fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
  1131  	}
  1132  }
  1133  
  1134  func (a *Allocator) scheduleSpills(f Function) {
  1135  	states := a.state.vrStates
  1136  	for i := 0; i <= states.MaxIDEncountered(); i++ {
  1137  		vs := states.Get(i)
  1138  		if vs == nil {
  1139  			continue
  1140  		}
  1141  		if vs.spilled {
  1142  			a.scheduleSpill(f, vs)
  1143  		}
  1144  	}
  1145  }
  1146  
  1147  func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
  1148  	v := vs.v
  1149  	// If the value is the phi value, we need to insert a spill after each phi definition.
  1150  	if vs.isPhi {
  1151  		for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
  1152  			f.StoreRegisterAfter(defInstr.v, defInstr.instr)
  1153  		}
  1154  		return
  1155  	}
  1156  
  1157  	pos := vs.lca
  1158  	definingBlk := vs.defBlk
  1159  	r := RealRegInvalid
  1160  	if definingBlk == nil {
  1161  		panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
  1162  	}
  1163  	if pos == nil {
  1164  		panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
  1165  	}
  1166  
  1167  	if wazevoapi.RegAllocLoggingEnabled {
  1168  		fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
  1169  	}
  1170  	for pos != definingBlk {
  1171  		st := a.getOrAllocateBlockState(pos.ID())
  1172  		for ii := 0; ii < 64; ii++ {
  1173  			rr := RealReg(ii)
  1174  			if st.startRegs.get(rr) == v {
  1175  				r = rr
  1176  				// Already in the register, so we can place the spill at the beginning of the block.
  1177  				break
  1178  			}
  1179  		}
  1180  
  1181  		if r != RealRegInvalid {
  1182  			break
  1183  		}
  1184  
  1185  		pos = f.Idom(pos)
  1186  	}
  1187  
  1188  	if pos == definingBlk {
  1189  		defInstr := vs.defInstr
  1190  		defInstr.Defs(&a.vs)
  1191  		if wazevoapi.RegAllocLoggingEnabled {
  1192  			fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
  1193  		}
  1194  		f.StoreRegisterAfter(a.vs[0], defInstr)
  1195  	} else {
  1196  		// Found an ancestor block that holds the value in the register at the beginning of the block.
  1197  		// We need to insert a spill before the last use.
  1198  		first := pos.FirstInstr()
  1199  		if wazevoapi.RegAllocLoggingEnabled {
  1200  			fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
  1201  		}
  1202  		f.StoreRegisterAfter(v.SetRealReg(r), first)
  1203  	}
  1204  }
  1205  
  1206  // Reset resets the allocator's internal state so that it can be reused.
  1207  func (a *Allocator) Reset() {
  1208  	a.state.reset()
  1209  	a.blockStates.Reset()
  1210  	a.phiDefInstListPool.Reset()
  1211  	a.vs = a.vs[:0]
  1212  }