github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/regalloc/regalloc.go (about)

     1  // Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
     2  // api.go.
     3  package regalloc
     4  
     5  // References:
     6  // * https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
     7  // * https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
     8  // * https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
     9  // * https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
    10  
    11  import (
    12  	"fmt"
    13  	"math"
    14  	"strings"
    15  
    16  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/wazevoapi"
    17  )
    18  
    19  // NewAllocator returns a new Allocator.
    20  func NewAllocator(allocatableRegs *RegisterInfo) Allocator {
    21  	a := Allocator{
    22  		regInfo:               allocatableRegs,
    23  		blockLivenessDataPool: wazevoapi.NewPool[blockLivenessData](resetBlockLivenessData),
    24  		phiDefInstListPool:    wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList),
    25  		blockStatePool:        wazevoapi.NewPool[blockState](resetBlockState),
    26  	}
    27  	a.state.reset()
    28  	for _, regs := range allocatableRegs.AllocatableRegisters {
    29  		for _, r := range regs {
    30  			a.allocatableSet = a.allocatableSet.add(r)
    31  		}
    32  	}
    33  	return a
    34  }
    35  
    36  type (
    37  	// RegisterInfo holds the statically-known ISA-specific register information.
    38  	RegisterInfo struct {
    39  		// AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
    40  		// The order matters: the first element is the most preferred one when allocating.
    41  		AllocatableRegisters [NumRegType][]RealReg
    42  		CalleeSavedRegisters [RealRegsNumMax]bool
    43  		CallerSavedRegisters [RealRegsNumMax]bool
    44  		RealRegToVReg        []VReg
    45  		// RealRegName returns the name of the given RealReg for debugging.
    46  		RealRegName func(r RealReg) string
    47  		RealRegType func(r RealReg) RegType
    48  	}
    49  
    50  	// Allocator is a register allocator.
    51  	Allocator struct {
    52  		// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
    53  		regInfo *RegisterInfo
    54  		// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
    55  		allocatableSet           regSet
    56  		allocatedCalleeSavedRegs []VReg
    57  		blockLivenessDataPool    wazevoapi.Pool[blockLivenessData]
    58  		blockLivenessData        [] /* blockID to */ *blockLivenessData
    59  		vs                       []VReg
    60  		maxBlockID               int
    61  		phiDefInstListPool       wazevoapi.Pool[phiDefInstList]
    62  
    63  		// Followings are re-used during various places e.g. coloring.
    64  		blks             []Block
    65  		reals            []RealReg
    66  		currentOccupants regInUseSet
    67  
    68  		// Following two fields are updated while iterating the blocks in the reverse postorder.
    69  		state               state
    70  		blockStatePool      wazevoapi.Pool[blockState]
    71  		blockIDToBlockState []*blockState
    72  	}
    73  
    74  	// blockLivenessData is a per-block information used during the register allocation.
    75  	blockLivenessData struct {
    76  		seen     bool
    77  		liveOuts map[VReg]struct{}
    78  		liveIns  map[VReg]struct{}
    79  	}
    80  
    81  	// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
    82  	programCounter int32
    83  
    84  	state struct {
    85  		argRealRegs          []VReg
    86  		regsInUse            regInUseSet
    87  		vrStates             []vrState
    88  		maxVRegIDEncountered int
    89  
    90  		// allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
    91  		allocatedRegSet regSet
    92  	}
    93  
    94  	blockState struct {
    95  		visited            bool
    96  		startFromPredIndex int
    97  		// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
    98  		startRegs regInUseSet
    99  		// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
   100  		endRegs regInUseSet
   101  	}
   102  
   103  	vrState struct {
   104  		v VReg
   105  		r RealReg
   106  		// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
   107  		defInstr Instr
   108  		// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
   109  		defBlk Block
   110  		// spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
   111  		spilled bool
   112  		// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
   113  		// reloads this value. This is used to determine the spill location. Only valid if spilled=true.
   114  		lca Block
   115  		// lastUse is the program counter of the last use of this value. This changes while iterating the block, and
   116  		// should not be used across the blocks as it becomes invalid.
   117  		lastUse programCounter
   118  		// isPhi is true if this is a phi value.
   119  		isPhi bool
   120  		// phiDefInstList is a list of instructions that defines this phi value.
   121  		// This is used to determine the spill location, and only valid if isPhi=true.
   122  		*phiDefInstList
   123  	}
   124  
   125  	// phiDefInstList is a linked list of instructions that defines a phi value.
   126  	phiDefInstList struct {
   127  		instr Instr
   128  		next  *phiDefInstList
   129  	}
   130  )
   131  
   132  func resetPhiDefInstList(l *phiDefInstList) {
   133  	l.instr = nil
   134  	l.next = nil
   135  }
   136  
   137  func (s *state) dump(info *RegisterInfo) { //nolint:unused
   138  	fmt.Println("\t\tstate:")
   139  	fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
   140  	fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
   141  	fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
   142  	fmt.Println("\t\t\tused:", s.regsInUse.format(info))
   143  	fmt.Println("\t\t\tmaxVRegIDEncountered:", s.maxVRegIDEncountered)
   144  	var strs []string
   145  	for i, v := range s.vrStates {
   146  		if v.r != RealRegInvalid {
   147  			strs = append(strs, fmt.Sprintf("(v%d: %s)", i, info.RealRegName(v.r)))
   148  		}
   149  	}
   150  	fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
   151  }
   152  
   153  func (s *state) reset() {
   154  	s.argRealRegs = s.argRealRegs[:0]
   155  	for i, l := 0, len(s.vrStates); i <= s.maxVRegIDEncountered && i < l; i++ {
   156  		s.vrStates[i].reset()
   157  	}
   158  	s.maxVRegIDEncountered = -1
   159  	s.allocatedRegSet = regSet(0)
   160  	s.regsInUse.reset()
   161  }
   162  
   163  func (a *Allocator) getBlockState(bID int) *blockState {
   164  	if bID >= len(a.blockIDToBlockState) {
   165  		a.blockIDToBlockState = append(a.blockIDToBlockState, make([]*blockState, bID+1)...)
   166  	}
   167  
   168  	st := a.blockIDToBlockState[bID]
   169  	if st == nil {
   170  		st = a.blockStatePool.Allocate()
   171  		a.blockIDToBlockState[bID] = st
   172  	}
   173  	return st
   174  }
   175  
   176  func (s *state) setVRegState(v VReg, r RealReg) {
   177  	id := int(v.ID())
   178  	if id >= len(s.vrStates) {
   179  		s.vrStates = append(s.vrStates, make([]vrState, id+1-len(s.vrStates))...)
   180  		s.vrStates = s.vrStates[:cap(s.vrStates)]
   181  	}
   182  
   183  	st := &s.vrStates[id]
   184  	st.r = r
   185  	st.v = v
   186  }
   187  
   188  func (vs *vrState) reset() {
   189  	vs.r = RealRegInvalid
   190  	vs.defInstr = nil
   191  	vs.defBlk = nil
   192  	vs.spilled = false
   193  	vs.lca = nil
   194  	vs.isPhi = false
   195  	vs.phiDefInstList = nil
   196  }
   197  
   198  func (s *state) getVRegState(v VReg) *vrState {
   199  	id := int(v.ID())
   200  	if id >= len(s.vrStates) {
   201  		s.setVRegState(v, RealRegInvalid)
   202  	}
   203  	if s.maxVRegIDEncountered < id {
   204  		s.maxVRegIDEncountered = id
   205  	}
   206  	return &s.vrStates[id]
   207  }
   208  
   209  func (s *state) useRealReg(r RealReg, v VReg) {
   210  	if s.regsInUse.has(r) {
   211  		panic("BUG: useRealReg: the given real register is already used")
   212  	}
   213  	s.regsInUse.add(r, v)
   214  	s.setVRegState(v, r)
   215  	s.allocatedRegSet = s.allocatedRegSet.add(r)
   216  }
   217  
   218  func (s *state) releaseRealReg(r RealReg) {
   219  	current := s.regsInUse.get(r)
   220  	if current.Valid() {
   221  		s.regsInUse.remove(r)
   222  		s.setVRegState(current, RealRegInvalid)
   223  	}
   224  }
   225  
   226  // recordReload records that the given VReg is reloaded in the given block.
   227  // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
   228  func (vs *vrState) recordReload(f Function, blk Block) {
   229  	vs.spilled = true
   230  	if vs.lca == nil {
   231  		if wazevoapi.RegAllocLoggingEnabled {
   232  			fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
   233  		}
   234  		vs.lca = blk
   235  	} else {
   236  		if wazevoapi.RegAllocLoggingEnabled {
   237  			fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
   238  		}
   239  		vs.lca = f.LowestCommonAncestor(vs.lca, blk)
   240  		if wazevoapi.RegAllocLoggingEnabled {
   241  			fmt.Printf("updated lca=%d\n", vs.lca.ID())
   242  		}
   243  	}
   244  }
   245  
   246  func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask regSet) (r RealReg) {
   247  	r = RealRegInvalid
   248  	var lastUseAt programCounter = math.MinInt32
   249  	var spillVReg VReg
   250  	for _, candidateReal := range allocatable {
   251  		if forbiddenMask.has(candidateReal) {
   252  			continue
   253  		}
   254  
   255  		using := s.regsInUse.get(candidateReal)
   256  		if using == VRegInvalid {
   257  			// This is not used at this point.
   258  			return candidateReal
   259  		}
   260  
   261  		if last := s.getVRegState(using).lastUse; last > lastUseAt {
   262  			lastUseAt = last
   263  			r = candidateReal
   264  			spillVReg = using
   265  		}
   266  	}
   267  
   268  	if r == RealRegInvalid {
   269  		panic("not found any allocatable register")
   270  	}
   271  
   272  	if wazevoapi.RegAllocLoggingEnabled {
   273  		fmt.Printf("\tspilling v%d when: %s\n", spillVReg.ID(), forbiddenMask.format(a.regInfo))
   274  	}
   275  	s.releaseRealReg(r)
   276  	return r
   277  }
   278  
   279  func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask regSet) RealReg {
   280  	for _, r := range allocatable {
   281  		if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
   282  			return r
   283  		}
   284  	}
   285  	return RealRegInvalid
   286  }
   287  
   288  func (s *state) resetAt(bs *blockState, liveIns map[VReg]struct{}) {
   289  	s.regsInUse.range_(func(_ RealReg, vr VReg) {
   290  		s.setVRegState(vr, RealRegInvalid)
   291  	})
   292  	s.regsInUse.reset()
   293  	bs.endRegs.range_(func(r RealReg, v VReg) {
   294  		if _, ok := liveIns[v]; ok {
   295  			s.regsInUse.add(r, v)
   296  			s.setVRegState(v, r)
   297  		}
   298  	})
   299  }
   300  
   301  func resetBlockState(b *blockState) {
   302  	b.visited = false
   303  	b.endRegs.reset()
   304  	b.startRegs.reset()
   305  	b.startFromPredIndex = -1
   306  }
   307  
   308  func (b *blockState) dump(a *RegisterInfo) {
   309  	fmt.Println("\t\tblockState:")
   310  	fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
   311  	fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
   312  	fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
   313  	fmt.Println("\t\t\tvisited:", b.visited)
   314  }
   315  
   316  // DoAllocation performs register allocation on the given Function.
   317  func (a *Allocator) DoAllocation(f Function) {
   318  	a.livenessAnalysis(f)
   319  	a.alloc(f)
   320  	a.determineCalleeSavedRealRegs(f)
   321  	f.Done()
   322  }
   323  
   324  func (a *Allocator) determineCalleeSavedRealRegs(f Function) {
   325  	a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
   326  	a.state.allocatedRegSet.range_(func(allocatedRealReg RealReg) {
   327  		if a.regInfo.isCalleeSaved(allocatedRealReg) {
   328  			a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
   329  		}
   330  	})
   331  	f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
   332  }
   333  
   334  // phiBlk returns the block that defines the given phi value, nil otherwise.
   335  func (s *state) phiBlk(v VReg) Block {
   336  	vs := s.getVRegState(v)
   337  	if vs.isPhi {
   338  		return vs.defBlk
   339  	}
   340  	return nil
   341  }
   342  
   343  // liveAnalysis constructs Allocator.blockLivenessData.
   344  // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
   345  func (a *Allocator) livenessAnalysis(f Function) {
   346  	// First, we need to allocate blockLivenessData.
   347  	s := &a.state
   348  	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter.
   349  		a.allocateBlockLivenessData(blk.ID())
   350  
   351  		// We should gather phi value data.
   352  		for _, p := range blk.BlockParams(&a.vs) {
   353  			vs := s.getVRegState(p)
   354  			vs.isPhi = true
   355  			vs.defBlk = blk
   356  		}
   357  		if blk.ID() > a.maxBlockID {
   358  			a.maxBlockID = blk.ID()
   359  		}
   360  	}
   361  
   362  	// Run the Algorithm 9.2 in the bool.
   363  	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
   364  		blkID := blk.ID()
   365  		info := a.livenessDataAt(blkID)
   366  
   367  		ns := blk.Succs()
   368  		for i := 0; i < ns; i++ {
   369  			succ := blk.Succ(i)
   370  			if succ == nil {
   371  				continue
   372  			}
   373  
   374  			succID := succ.ID()
   375  			succInfo := a.livenessDataAt(succID)
   376  			if !succInfo.seen { // This means the back edge.
   377  				continue
   378  			}
   379  
   380  			for v := range succInfo.liveIns {
   381  				if s.phiBlk(v) != succ {
   382  					info.liveOuts[v] = struct{}{}
   383  					info.liveIns[v] = struct{}{}
   384  				}
   385  			}
   386  		}
   387  
   388  		for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
   389  
   390  			var use, def VReg
   391  			for _, def = range instr.Defs(&a.vs) {
   392  				if !def.IsRealReg() {
   393  					delete(info.liveIns, def)
   394  				}
   395  			}
   396  			for _, use = range instr.Uses(&a.vs) {
   397  				if !use.IsRealReg() {
   398  					info.liveIns[use] = struct{}{}
   399  				}
   400  			}
   401  
   402  			// If the destination is a phi value, and ...
   403  			if def.Valid() && s.phiBlk(def) != nil {
   404  				if use.Valid() && use.IsRealReg() {
   405  					// If the source is a real register, this is the beginning of the function.
   406  					a.state.argRealRegs = append(a.state.argRealRegs, use)
   407  				} else {
   408  					// Otherwise, this is the definition of the phi value for the successor block.
   409  					// So we need to make it outlive the block.
   410  					info.liveOuts[def] = struct{}{}
   411  				}
   412  			}
   413  		}
   414  		info.seen = true
   415  	}
   416  
   417  	nrs := f.LoopNestingForestRoots()
   418  	for i := 0; i < nrs; i++ {
   419  		root := f.LoopNestingForestRoot(i)
   420  		a.loopTreeDFS(root)
   421  	}
   422  }
   423  
   424  // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
   425  func (a *Allocator) loopTreeDFS(entry Block) {
   426  	a.blks = a.blks[:0]
   427  	a.blks = append(a.blks, entry)
   428  
   429  	s := &a.state
   430  	for len(a.blks) > 0 {
   431  		tail := len(a.blks) - 1
   432  		loop := a.blks[tail]
   433  		a.blks = a.blks[:tail]
   434  		a.vs = a.vs[:0]
   435  
   436  		info := a.livenessDataAt(loop.ID())
   437  		for v := range info.liveIns {
   438  			if s.phiBlk(v) != loop {
   439  				a.vs = append(a.vs, v)
   440  				info.liveOuts[v] = struct{}{}
   441  			}
   442  		}
   443  
   444  		cn := loop.LoopNestingForestChildren()
   445  		for i := 0; i < cn; i++ {
   446  			child := loop.LoopNestingForestChild(i)
   447  			childID := child.ID()
   448  			childInfo := a.livenessDataAt(childID)
   449  			for _, v := range a.vs {
   450  				childInfo.liveIns[v] = struct{}{}
   451  				childInfo.liveOuts[v] = struct{}{}
   452  			}
   453  			if child.LoopHeader() {
   454  				a.blks = append(a.blks, child)
   455  			}
   456  		}
   457  	}
   458  }
   459  
   460  // alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
   461  // The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
   462  // In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
   463  // one of its predecessors. Each block inherits the selected state and starts allocation from there.
   464  // If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
   465  // The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
   466  // at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
   467  // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
   468  //
   469  // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
   470  func (a *Allocator) alloc(f Function) {
   471  	// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
   472  	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
   473  		if wazevoapi.RegAllocLoggingEnabled {
   474  			fmt.Printf("========== allocating blk%d ========\n", blk.ID())
   475  		}
   476  		a.allocBlock(f, blk)
   477  	}
   478  	// After the allocation, we all know the start and end state of each block. So we can fix the merge states.
   479  	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
   480  		a.fixMergeState(f, blk)
   481  	}
   482  	// Finally, we insert the spill instructions as we know all the places where the reloads happen.
   483  	a.scheduleSpills(f)
   484  }
   485  
   486  func (a *Allocator) allocBlock(f Function, blk Block) {
   487  	bID := blk.ID()
   488  	liveness := a.livenessDataAt(bID)
   489  	s := &a.state
   490  	currentBlkState := a.getBlockState(bID)
   491  
   492  	preds := blk.Preds()
   493  	var predState *blockState
   494  	switch preds {
   495  	case 0: // This is the entry block.
   496  	case 1:
   497  		predID := blk.Pred(0).ID()
   498  		predState = a.getBlockState(predID)
   499  		currentBlkState.startFromPredIndex = 0
   500  	default:
   501  		// TODO: there should be some better heuristic to choose the predecessor.
   502  		for i := 0; i < preds; i++ {
   503  			predID := blk.Pred(i).ID()
   504  			if _predState := a.getBlockState(predID); _predState.visited {
   505  				predState = _predState
   506  				currentBlkState.startFromPredIndex = i
   507  				break
   508  			}
   509  		}
   510  	}
   511  	if predState == nil {
   512  		if !blk.Entry() {
   513  			panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
   514  		}
   515  		for _, u := range s.argRealRegs {
   516  			s.useRealReg(u.RealReg(), u)
   517  		}
   518  	} else if predState != nil {
   519  		if wazevoapi.RegAllocLoggingEnabled {
   520  			fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
   521  				bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
   522  		}
   523  		s.resetAt(predState, liveness.liveIns)
   524  	}
   525  
   526  	s.regsInUse.range_(func(allocated RealReg, v VReg) {
   527  		currentBlkState.startRegs.add(allocated, v)
   528  	})
   529  
   530  	// Update the last use of each VReg.
   531  	var pc programCounter
   532  	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
   533  		for _, use := range instr.Uses(&a.vs) {
   534  			if !use.IsRealReg() {
   535  				s.getVRegState(use).lastUse = pc
   536  			}
   537  		}
   538  		pc++
   539  	}
   540  	// Reset the last use of the liveOuts.
   541  	for outlive := range liveness.liveOuts {
   542  		s.getVRegState(outlive).lastUse = math.MaxInt32
   543  	}
   544  
   545  	pc = 0
   546  	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
   547  		if wazevoapi.RegAllocLoggingEnabled {
   548  			fmt.Println(instr)
   549  		}
   550  
   551  		var currentUsedSet regSet
   552  		killSet := a.reals[:0]
   553  
   554  		// Gather the set of registers that will be used in the current instruction.
   555  		for _, use := range instr.Uses(&a.vs) {
   556  			if use.IsRealReg() {
   557  				r := use.RealReg()
   558  				currentUsedSet = currentUsedSet.add(r)
   559  				if a.allocatableSet.has(r) {
   560  					killSet = append(killSet, r)
   561  				}
   562  			} else {
   563  				vs := s.getVRegState(use)
   564  				if r := vs.r; r != RealRegInvalid {
   565  					currentUsedSet = currentUsedSet.add(r)
   566  				}
   567  			}
   568  		}
   569  
   570  		for i, use := range instr.Uses(&a.vs) {
   571  			if !use.IsRealReg() {
   572  				vs := s.getVRegState(use)
   573  				killed := liveness.isKilledAt(vs, pc)
   574  				r := vs.r
   575  
   576  				if r == RealRegInvalid {
   577  					r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet)
   578  					vs.recordReload(f, blk)
   579  					f.ReloadRegisterBefore(use.SetRealReg(r), instr)
   580  					s.useRealReg(r, use)
   581  				}
   582  				if wazevoapi.RegAllocLoggingEnabled {
   583  					fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
   584  				}
   585  				instr.AssignUse(i, use.SetRealReg(r))
   586  				currentUsedSet = currentUsedSet.add(r)
   587  				if killed {
   588  					if wazevoapi.RegAllocLoggingEnabled {
   589  						fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
   590  					}
   591  					killSet = append(killSet, r)
   592  				}
   593  			}
   594  		}
   595  
   596  		isIndirect := instr.IsIndirectCall()
   597  		call := instr.IsCall() || isIndirect
   598  		if call {
   599  			addr := RealRegInvalid
   600  			if instr.IsIndirectCall() {
   601  				addr = a.vs[0].RealReg()
   602  			}
   603  			a.releaseCallerSavedRegs(addr)
   604  		}
   605  
   606  		for _, r := range killSet {
   607  			s.releaseRealReg(r)
   608  		}
   609  		a.reals = killSet
   610  
   611  		defs := instr.Defs(&a.vs)
   612  		switch {
   613  		case len(defs) > 1:
   614  			if !call {
   615  				panic("only call can have multiple defs")
   616  			}
   617  			// Call's defining register are all caller-saved registers.
   618  			// Therefore, we can assume that all of them are allocatable.
   619  			for _, def := range defs {
   620  				s.useRealReg(def.RealReg(), def)
   621  			}
   622  		case len(defs) == 1:
   623  			def := defs[0]
   624  			if def.IsRealReg() {
   625  				r := def.RealReg()
   626  				if a.allocatableSet.has(r) {
   627  					if s.regsInUse.has(r) {
   628  						s.releaseRealReg(r)
   629  					}
   630  					s.useRealReg(r, def)
   631  				}
   632  			} else {
   633  				vState := s.getVRegState(def)
   634  				r := vState.r
   635  				// Allocate a new real register if `def` is not currently assigned one.
   636  				// It can happen when multiple instructions define the same VReg (e.g. const loads).
   637  				if r == RealRegInvalid {
   638  					if instr.IsCopy() {
   639  						copySrc := instr.Uses(&a.vs)[0].RealReg()
   640  						if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
   641  							r = copySrc
   642  						}
   643  					}
   644  					if r == RealRegInvalid {
   645  						typ := def.RegType()
   646  						r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], regSet(0))
   647  					}
   648  					s.useRealReg(r, def)
   649  				}
   650  				instr.AssignDef(def.SetRealReg(r))
   651  				if wazevoapi.RegAllocLoggingEnabled {
   652  					fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
   653  				}
   654  				if vState.isPhi {
   655  					n := a.phiDefInstListPool.Allocate()
   656  					n.instr = instr
   657  					n.next = vState.phiDefInstList
   658  					vState.phiDefInstList = n
   659  				} else {
   660  					vState.defInstr = instr
   661  					vState.defBlk = blk
   662  				}
   663  			}
   664  		}
   665  		if wazevoapi.RegAllocLoggingEnabled {
   666  			fmt.Println(instr)
   667  		}
   668  		pc++
   669  	}
   670  
   671  	s.regsInUse.range_(func(allocated RealReg, v VReg) {
   672  		currentBlkState.endRegs.add(allocated, v)
   673  	})
   674  
   675  	currentBlkState.visited = true
   676  	if wazevoapi.RegAllocLoggingEnabled {
   677  		currentBlkState.dump(a.regInfo)
   678  	}
   679  }
   680  
   681  func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
   682  	s := &a.state
   683  
   684  	for i := 0; i < 64; i++ {
   685  		allocated := RealReg(i)
   686  		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
   687  			continue
   688  		}
   689  		if v := s.regsInUse.get(allocated); v.Valid() {
   690  			if v.IsRealReg() {
   691  				continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
   692  			}
   693  			if !a.regInfo.isCallerSaved(allocated) {
   694  				// If this is not a caller-saved register, it is safe to keep it across the call.
   695  				continue
   696  			}
   697  			s.releaseRealReg(allocated)
   698  		}
   699  	}
   700  }
   701  
   702  func (a *Allocator) fixMergeState(f Function, blk Block) {
   703  	preds := blk.Preds()
   704  	if preds <= 1 {
   705  		return
   706  	}
   707  
   708  	s := &a.state
   709  
   710  	// Restores the state at the beginning of the block.
   711  	bID := blk.ID()
   712  	blkSt := a.getBlockState(bID)
   713  	desiredOccupants := &blkSt.startRegs
   714  	aliveOnRegVRegs := make(map[VReg]RealReg)
   715  	for i := 0; i < 64; i++ {
   716  		r := RealReg(i)
   717  		if v := blkSt.startRegs.get(r); v.Valid() {
   718  			aliveOnRegVRegs[v] = r
   719  		}
   720  	}
   721  
   722  	if wazevoapi.RegAllocLoggingEnabled {
   723  		fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
   724  	}
   725  
   726  	currentOccupants := &a.currentOccupants
   727  	for i := 0; i < preds; i++ {
   728  		currentOccupants.reset()
   729  		if i == blkSt.startFromPredIndex {
   730  			continue
   731  		}
   732  
   733  		currentOccupantsRev := make(map[VReg]RealReg)
   734  		pred := blk.Pred(i)
   735  		predSt := a.getBlockState(pred.ID())
   736  		for ii := 0; ii < 64; ii++ {
   737  			r := RealReg(ii)
   738  			if v := predSt.endRegs.get(r); v.Valid() {
   739  				if _, ok := aliveOnRegVRegs[v]; !ok {
   740  					continue
   741  				}
   742  				currentOccupants.add(r, v)
   743  				currentOccupantsRev[v] = r
   744  			}
   745  		}
   746  
   747  		s.resetAt(predSt, a.livenessDataAt(bID).liveIns)
   748  
   749  		// Finds the free registers if any.
   750  		intTmp, floatTmp := VRegInvalid, VRegInvalid
   751  		if intFree := s.findAllocatable(
   752  			a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
   753  		); intFree != RealRegInvalid {
   754  			intTmp = FromRealReg(intFree, RegTypeInt)
   755  		}
   756  		if floatFree := s.findAllocatable(
   757  			a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
   758  		); floatFree != RealRegInvalid {
   759  			floatTmp = FromRealReg(floatFree, RegTypeFloat)
   760  		}
   761  
   762  		if wazevoapi.RegAllocLoggingEnabled {
   763  			fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
   764  		}
   765  
   766  		for ii := 0; ii < 64; ii++ {
   767  			r := RealReg(ii)
   768  			desiredVReg := desiredOccupants.get(r)
   769  			if !desiredVReg.Valid() {
   770  				continue
   771  			}
   772  
   773  			currentVReg := currentOccupants.get(r)
   774  			if desiredVReg.ID() == currentVReg.ID() {
   775  				continue
   776  			}
   777  
   778  			typ := desiredVReg.RegType()
   779  			var tmpRealReg VReg
   780  			if typ == RegTypeInt {
   781  				tmpRealReg = intTmp
   782  			} else {
   783  				tmpRealReg = floatTmp
   784  			}
   785  			a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
   786  		}
   787  	}
   788  }
   789  
   790  func (a *Allocator) reconcileEdge(f Function,
   791  	r RealReg,
   792  	pred Block,
   793  	currentOccupants *regInUseSet,
   794  	currentOccupantsRev map[VReg]RealReg,
   795  	currentVReg, desiredVReg VReg,
   796  	freeReg VReg,
   797  	typ RegType,
   798  ) {
   799  	s := &a.state
   800  	if currentVReg.Valid() {
   801  		// Both are on reg.
   802  		er, ok := currentOccupantsRev[desiredVReg]
   803  		if !ok {
   804  			if wazevoapi.RegAllocLoggingEnabled {
   805  				fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
   806  					desiredVReg.ID(), a.regInfo.RealRegName(r),
   807  				)
   808  			}
   809  			// This case is that the desired value is on the stack, but currentVReg is on the target register.
   810  			// We need to move the current value to the stack, and reload the desired value.
   811  			// TODO: we can do better here.
   812  			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstr())
   813  			delete(currentOccupantsRev, currentVReg)
   814  
   815  			s.getVRegState(desiredVReg).recordReload(f, pred)
   816  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstr())
   817  			currentOccupants.add(r, desiredVReg)
   818  			currentOccupantsRev[desiredVReg] = r
   819  			return
   820  		}
   821  
   822  		if wazevoapi.RegAllocLoggingEnabled {
   823  			fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
   824  				desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
   825  			)
   826  		}
   827  		f.SwapAtEndOfBlock(
   828  			currentVReg.SetRealReg(r),
   829  			desiredVReg.SetRealReg(er),
   830  			freeReg,
   831  			pred,
   832  		)
   833  		s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
   834  		currentOccupantsRev[desiredVReg] = r
   835  		currentOccupantsRev[currentVReg] = er
   836  		currentOccupants.add(r, desiredVReg)
   837  		currentOccupants.add(er, currentVReg)
   838  		if wazevoapi.RegAllocLoggingEnabled {
   839  			fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
   840  		}
   841  	} else {
   842  		// Desired is on reg, but currently the target register is not used.
   843  		if wazevoapi.RegAllocLoggingEnabled {
   844  			fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
   845  				desiredVReg.ID(), a.regInfo.RealRegName(r),
   846  			)
   847  		}
   848  		if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
   849  			f.InsertMoveBefore(
   850  				FromRealReg(r, typ),
   851  				desiredVReg.SetRealReg(currentReg),
   852  				pred.LastInstr(),
   853  			)
   854  			currentOccupants.remove(currentReg)
   855  		} else {
   856  			s.getVRegState(desiredVReg).recordReload(f, pred)
   857  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstr())
   858  		}
   859  		currentOccupantsRev[desiredVReg] = r
   860  		currentOccupants.add(r, desiredVReg)
   861  	}
   862  
   863  	if wazevoapi.RegAllocLoggingEnabled {
   864  		fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
   865  	}
   866  }
   867  
   868  func (a *Allocator) scheduleSpills(f Function) {
   869  	vrStates := a.state.vrStates
   870  	for i := 0; i <= a.state.maxVRegIDEncountered; i++ {
   871  		vs := &vrStates[i]
   872  		if vs.spilled {
   873  			a.scheduleSpill(f, vs)
   874  		}
   875  	}
   876  }
   877  
   878  func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
   879  	v := vs.v
   880  	// If the value is the phi value, we need to insert a spill after each phi definition.
   881  	if vs.isPhi {
   882  		for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
   883  			def := defInstr.instr.Defs(&a.vs)[0]
   884  			f.StoreRegisterAfter(def, defInstr.instr)
   885  		}
   886  		return
   887  	}
   888  
   889  	pos := vs.lca
   890  	definingBlk := vs.defBlk
   891  	r := RealRegInvalid
   892  	if wazevoapi.RegAllocLoggingEnabled {
   893  		fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
   894  	}
   895  	for pos != definingBlk {
   896  		st := a.getBlockState(pos.ID())
   897  		for ii := 0; ii < 64; ii++ {
   898  			rr := RealReg(ii)
   899  			if st.startRegs.get(rr) == v {
   900  				r = rr
   901  				// Already in the register, so we can place the spill at the beginning of the block.
   902  				break
   903  			}
   904  		}
   905  
   906  		if r != RealRegInvalid {
   907  			break
   908  		}
   909  
   910  		pos = f.Idom(pos)
   911  	}
   912  
   913  	if pos == definingBlk {
   914  		defInstr := vs.defInstr
   915  		defInstr.Defs(&a.vs)
   916  		if wazevoapi.RegAllocLoggingEnabled {
   917  			fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
   918  		}
   919  		f.StoreRegisterAfter(a.vs[0], defInstr)
   920  	} else {
   921  		// Found an ancestor block that holds the value in the register at the beginning of the block.
   922  		// We need to insert a spill before the last use.
   923  		first := pos.FirstInstr()
   924  		if wazevoapi.RegAllocLoggingEnabled {
   925  			fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
   926  		}
   927  		f.StoreRegisterAfter(v.SetRealReg(r), first)
   928  	}
   929  }
   930  
   931  // Reset resets the allocator's internal state so that it can be reused.
   932  func (a *Allocator) Reset() {
   933  	a.state.reset()
   934  	for i := 0; i <= a.maxBlockID && i < len(a.blockIDToBlockState); i++ {
   935  		a.blockLivenessData[i] = nil
   936  		a.blockIDToBlockState[i] = nil
   937  	}
   938  	a.blockStatePool.Reset()
   939  	a.blockLivenessDataPool.Reset()
   940  	a.phiDefInstListPool.Reset()
   941  
   942  	a.vs = a.vs[:0]
   943  	a.maxBlockID = -1
   944  }
   945  
   946  func (a *Allocator) allocateBlockLivenessData(blockID int) *blockLivenessData {
   947  	if blockID >= len(a.blockLivenessData) {
   948  		a.blockLivenessData = append(a.blockLivenessData, make([]*blockLivenessData, (blockID+1)-len(a.blockLivenessData))...)
   949  	}
   950  	info := a.blockLivenessData[blockID]
   951  	if info == nil {
   952  		info = a.blockLivenessDataPool.Allocate()
   953  		a.blockLivenessData[blockID] = info
   954  	}
   955  	return info
   956  }
   957  
   958  func (a *Allocator) livenessDataAt(blockID int) (info *blockLivenessData) {
   959  	info = a.blockLivenessData[blockID]
   960  	return
   961  }
   962  
   963  func resetBlockLivenessData(i *blockLivenessData) {
   964  	i.seen = false
   965  	i.liveOuts = resetMap(i.liveOuts)
   966  	i.liveIns = resetMap(i.liveIns)
   967  }
   968  
   969  func resetMap[K comparable, V any](m map[K]V) map[K]V {
   970  	if m == nil {
   971  		m = make(map[K]V)
   972  	} else {
   973  		for v := range m {
   974  			delete(m, v)
   975  		}
   976  	}
   977  	return m
   978  }
   979  
   980  // Format is for debugging.
   981  func (i *blockLivenessData) Format(ri *RegisterInfo) string {
   982  	var buf strings.Builder
   983  	buf.WriteString("\t\tblockLivenessData:")
   984  	buf.WriteString("\n\t\t\tliveOuts: ")
   985  	for v := range i.liveOuts {
   986  		if v.IsRealReg() {
   987  			buf.WriteString(fmt.Sprintf("%s ", ri.RealRegName(v.RealReg())))
   988  		} else {
   989  			buf.WriteString(fmt.Sprintf("%v ", v))
   990  		}
   991  	}
   992  	buf.WriteString("\n\t\t\tliveIns: ")
   993  	for v := range i.liveIns {
   994  		if v.IsRealReg() {
   995  			buf.WriteString(fmt.Sprintf("%s ", ri.RealRegName(v.RealReg())))
   996  		} else {
   997  			buf.WriteString(fmt.Sprintf("%v ", v))
   998  		}
   999  	}
  1000  	buf.WriteString(fmt.Sprintf("\n\t\t\tseen: %v", i.seen))
  1001  	return buf.String()
  1002  }
  1003  
  1004  func (i *blockLivenessData) isKilledAt(vs *vrState, pos programCounter) bool {
  1005  	v := vs.v
  1006  	if vs.lastUse == pos {
  1007  		if _, ok := i.liveOuts[v]; !ok {
  1008  			return true
  1009  		}
  1010  	}
  1011  	return false
  1012  }
  1013  
  1014  func (r *RegisterInfo) isCalleeSaved(reg RealReg) bool {
  1015  	return r.CalleeSavedRegisters[reg]
  1016  }
  1017  
  1018  func (r *RegisterInfo) isCallerSaved(reg RealReg) bool {
  1019  	return r.CallerSavedRegisters[reg]
  1020  }