github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/regalloc/regalloc.go (about) 1 // Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in 2 // api.go. 3 package regalloc 4 5 // References: 6 // * https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf 7 // * https://en.wikipedia.org/wiki/Chaitin%27s_algorithm 8 // * https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf 9 // * https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis. 10 11 import ( 12 "fmt" 13 "math" 14 "strings" 15 16 "github.com/bananabytelabs/wazero/internal/engine/wazevo/wazevoapi" 17 ) 18 19 // NewAllocator returns a new Allocator. 20 func NewAllocator(allocatableRegs *RegisterInfo) Allocator { 21 a := Allocator{ 22 regInfo: allocatableRegs, 23 blockLivenessDataPool: wazevoapi.NewPool[blockLivenessData](resetBlockLivenessData), 24 phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList), 25 blockStatePool: wazevoapi.NewPool[blockState](resetBlockState), 26 } 27 a.state.reset() 28 for _, regs := range allocatableRegs.AllocatableRegisters { 29 for _, r := range regs { 30 a.allocatableSet = a.allocatableSet.add(r) 31 } 32 } 33 return a 34 } 35 36 type ( 37 // RegisterInfo holds the statically-known ISA-specific register information. 38 RegisterInfo struct { 39 // AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum. 40 // The order matters: the first element is the most preferred one when allocating. 41 AllocatableRegisters [NumRegType][]RealReg 42 CalleeSavedRegisters [RealRegsNumMax]bool 43 CallerSavedRegisters [RealRegsNumMax]bool 44 RealRegToVReg []VReg 45 // RealRegName returns the name of the given RealReg for debugging. 46 RealRegName func(r RealReg) string 47 RealRegType func(r RealReg) RegType 48 } 49 50 // Allocator is a register allocator. 51 Allocator struct { 52 // regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator. 53 regInfo *RegisterInfo 54 // allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA. 55 allocatableSet regSet 56 allocatedCalleeSavedRegs []VReg 57 blockLivenessDataPool wazevoapi.Pool[blockLivenessData] 58 blockLivenessData [] /* blockID to */ *blockLivenessData 59 vs []VReg 60 maxBlockID int 61 phiDefInstListPool wazevoapi.Pool[phiDefInstList] 62 63 // Followings are re-used during various places e.g. coloring. 64 blks []Block 65 reals []RealReg 66 currentOccupants regInUseSet 67 68 // Following two fields are updated while iterating the blocks in the reverse postorder. 69 state state 70 blockStatePool wazevoapi.Pool[blockState] 71 blockIDToBlockState []*blockState 72 } 73 74 // blockLivenessData is a per-block information used during the register allocation. 75 blockLivenessData struct { 76 seen bool 77 liveOuts map[VReg]struct{} 78 liveIns map[VReg]struct{} 79 } 80 81 // programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg. 82 programCounter int32 83 84 state struct { 85 argRealRegs []VReg 86 regsInUse regInUseSet 87 vrStates []vrState 88 maxVRegIDEncountered int 89 90 // allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function. 91 allocatedRegSet regSet 92 } 93 94 blockState struct { 95 visited bool 96 startFromPredIndex int 97 // startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges. 98 startRegs regInUseSet 99 // endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges. 100 endRegs regInUseSet 101 } 102 103 vrState struct { 104 v VReg 105 r RealReg 106 // defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil. 107 defInstr Instr 108 // defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value. 109 defBlk Block 110 // spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program. 111 spilled bool 112 // lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that 113 // reloads this value. This is used to determine the spill location. Only valid if spilled=true. 114 lca Block 115 // lastUse is the program counter of the last use of this value. This changes while iterating the block, and 116 // should not be used across the blocks as it becomes invalid. 117 lastUse programCounter 118 // isPhi is true if this is a phi value. 119 isPhi bool 120 // phiDefInstList is a list of instructions that defines this phi value. 121 // This is used to determine the spill location, and only valid if isPhi=true. 122 *phiDefInstList 123 } 124 125 // phiDefInstList is a linked list of instructions that defines a phi value. 126 phiDefInstList struct { 127 instr Instr 128 next *phiDefInstList 129 } 130 ) 131 132 func resetPhiDefInstList(l *phiDefInstList) { 133 l.instr = nil 134 l.next = nil 135 } 136 137 func (s *state) dump(info *RegisterInfo) { //nolint:unused 138 fmt.Println("\t\tstate:") 139 fmt.Println("\t\t\targRealRegs:", s.argRealRegs) 140 fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info)) 141 fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info)) 142 fmt.Println("\t\t\tused:", s.regsInUse.format(info)) 143 fmt.Println("\t\t\tmaxVRegIDEncountered:", s.maxVRegIDEncountered) 144 var strs []string 145 for i, v := range s.vrStates { 146 if v.r != RealRegInvalid { 147 strs = append(strs, fmt.Sprintf("(v%d: %s)", i, info.RealRegName(v.r))) 148 } 149 } 150 fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", ")) 151 } 152 153 func (s *state) reset() { 154 s.argRealRegs = s.argRealRegs[:0] 155 for i, l := 0, len(s.vrStates); i <= s.maxVRegIDEncountered && i < l; i++ { 156 s.vrStates[i].reset() 157 } 158 s.maxVRegIDEncountered = -1 159 s.allocatedRegSet = regSet(0) 160 s.regsInUse.reset() 161 } 162 163 func (a *Allocator) getBlockState(bID int) *blockState { 164 if bID >= len(a.blockIDToBlockState) { 165 a.blockIDToBlockState = append(a.blockIDToBlockState, make([]*blockState, bID+1)...) 166 } 167 168 st := a.blockIDToBlockState[bID] 169 if st == nil { 170 st = a.blockStatePool.Allocate() 171 a.blockIDToBlockState[bID] = st 172 } 173 return st 174 } 175 176 func (s *state) setVRegState(v VReg, r RealReg) { 177 id := int(v.ID()) 178 if id >= len(s.vrStates) { 179 s.vrStates = append(s.vrStates, make([]vrState, id+1-len(s.vrStates))...) 180 s.vrStates = s.vrStates[:cap(s.vrStates)] 181 } 182 183 st := &s.vrStates[id] 184 st.r = r 185 st.v = v 186 } 187 188 func (vs *vrState) reset() { 189 vs.r = RealRegInvalid 190 vs.defInstr = nil 191 vs.defBlk = nil 192 vs.spilled = false 193 vs.lca = nil 194 vs.isPhi = false 195 vs.phiDefInstList = nil 196 } 197 198 func (s *state) getVRegState(v VReg) *vrState { 199 id := int(v.ID()) 200 if id >= len(s.vrStates) { 201 s.setVRegState(v, RealRegInvalid) 202 } 203 if s.maxVRegIDEncountered < id { 204 s.maxVRegIDEncountered = id 205 } 206 return &s.vrStates[id] 207 } 208 209 func (s *state) useRealReg(r RealReg, v VReg) { 210 if s.regsInUse.has(r) { 211 panic("BUG: useRealReg: the given real register is already used") 212 } 213 s.regsInUse.add(r, v) 214 s.setVRegState(v, r) 215 s.allocatedRegSet = s.allocatedRegSet.add(r) 216 } 217 218 func (s *state) releaseRealReg(r RealReg) { 219 current := s.regsInUse.get(r) 220 if current.Valid() { 221 s.regsInUse.remove(r) 222 s.setVRegState(current, RealRegInvalid) 223 } 224 } 225 226 // recordReload records that the given VReg is reloaded in the given block. 227 // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value. 228 func (vs *vrState) recordReload(f Function, blk Block) { 229 vs.spilled = true 230 if vs.lca == nil { 231 if wazevoapi.RegAllocLoggingEnabled { 232 fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID()) 233 } 234 vs.lca = blk 235 } else { 236 if wazevoapi.RegAllocLoggingEnabled { 237 fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID()) 238 } 239 vs.lca = f.LowestCommonAncestor(vs.lca, blk) 240 if wazevoapi.RegAllocLoggingEnabled { 241 fmt.Printf("updated lca=%d\n", vs.lca.ID()) 242 } 243 } 244 } 245 246 func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask regSet) (r RealReg) { 247 r = RealRegInvalid 248 var lastUseAt programCounter = math.MinInt32 249 var spillVReg VReg 250 for _, candidateReal := range allocatable { 251 if forbiddenMask.has(candidateReal) { 252 continue 253 } 254 255 using := s.regsInUse.get(candidateReal) 256 if using == VRegInvalid { 257 // This is not used at this point. 258 return candidateReal 259 } 260 261 if last := s.getVRegState(using).lastUse; last > lastUseAt { 262 lastUseAt = last 263 r = candidateReal 264 spillVReg = using 265 } 266 } 267 268 if r == RealRegInvalid { 269 panic("not found any allocatable register") 270 } 271 272 if wazevoapi.RegAllocLoggingEnabled { 273 fmt.Printf("\tspilling v%d when: %s\n", spillVReg.ID(), forbiddenMask.format(a.regInfo)) 274 } 275 s.releaseRealReg(r) 276 return r 277 } 278 279 func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask regSet) RealReg { 280 for _, r := range allocatable { 281 if !s.regsInUse.has(r) && !forbiddenMask.has(r) { 282 return r 283 } 284 } 285 return RealRegInvalid 286 } 287 288 func (s *state) resetAt(bs *blockState, liveIns map[VReg]struct{}) { 289 s.regsInUse.range_(func(_ RealReg, vr VReg) { 290 s.setVRegState(vr, RealRegInvalid) 291 }) 292 s.regsInUse.reset() 293 bs.endRegs.range_(func(r RealReg, v VReg) { 294 if _, ok := liveIns[v]; ok { 295 s.regsInUse.add(r, v) 296 s.setVRegState(v, r) 297 } 298 }) 299 } 300 301 func resetBlockState(b *blockState) { 302 b.visited = false 303 b.endRegs.reset() 304 b.startRegs.reset() 305 b.startFromPredIndex = -1 306 } 307 308 func (b *blockState) dump(a *RegisterInfo) { 309 fmt.Println("\t\tblockState:") 310 fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a)) 311 fmt.Println("\t\t\tendRegs:", b.endRegs.format(a)) 312 fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex) 313 fmt.Println("\t\t\tvisited:", b.visited) 314 } 315 316 // DoAllocation performs register allocation on the given Function. 317 func (a *Allocator) DoAllocation(f Function) { 318 a.livenessAnalysis(f) 319 a.alloc(f) 320 a.determineCalleeSavedRealRegs(f) 321 f.Done() 322 } 323 324 func (a *Allocator) determineCalleeSavedRealRegs(f Function) { 325 a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0] 326 a.state.allocatedRegSet.range_(func(allocatedRealReg RealReg) { 327 if a.regInfo.isCalleeSaved(allocatedRealReg) { 328 a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg]) 329 } 330 }) 331 f.ClobberedRegisters(a.allocatedCalleeSavedRegs) 332 } 333 334 // phiBlk returns the block that defines the given phi value, nil otherwise. 335 func (s *state) phiBlk(v VReg) Block { 336 vs := s.getVRegState(v) 337 if vs.isPhi { 338 return vs.defBlk 339 } 340 return nil 341 } 342 343 // liveAnalysis constructs Allocator.blockLivenessData. 344 // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2. 345 func (a *Allocator) livenessAnalysis(f Function) { 346 // First, we need to allocate blockLivenessData. 347 s := &a.state 348 for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter. 349 a.allocateBlockLivenessData(blk.ID()) 350 351 // We should gather phi value data. 352 for _, p := range blk.BlockParams(&a.vs) { 353 vs := s.getVRegState(p) 354 vs.isPhi = true 355 vs.defBlk = blk 356 } 357 if blk.ID() > a.maxBlockID { 358 a.maxBlockID = blk.ID() 359 } 360 } 361 362 // Run the Algorithm 9.2 in the bool. 363 for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { 364 blkID := blk.ID() 365 info := a.livenessDataAt(blkID) 366 367 ns := blk.Succs() 368 for i := 0; i < ns; i++ { 369 succ := blk.Succ(i) 370 if succ == nil { 371 continue 372 } 373 374 succID := succ.ID() 375 succInfo := a.livenessDataAt(succID) 376 if !succInfo.seen { // This means the back edge. 377 continue 378 } 379 380 for v := range succInfo.liveIns { 381 if s.phiBlk(v) != succ { 382 info.liveOuts[v] = struct{}{} 383 info.liveIns[v] = struct{}{} 384 } 385 } 386 } 387 388 for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { 389 390 var use, def VReg 391 for _, def = range instr.Defs(&a.vs) { 392 if !def.IsRealReg() { 393 delete(info.liveIns, def) 394 } 395 } 396 for _, use = range instr.Uses(&a.vs) { 397 if !use.IsRealReg() { 398 info.liveIns[use] = struct{}{} 399 } 400 } 401 402 // If the destination is a phi value, and ... 403 if def.Valid() && s.phiBlk(def) != nil { 404 if use.Valid() && use.IsRealReg() { 405 // If the source is a real register, this is the beginning of the function. 406 a.state.argRealRegs = append(a.state.argRealRegs, use) 407 } else { 408 // Otherwise, this is the definition of the phi value for the successor block. 409 // So we need to make it outlive the block. 410 info.liveOuts[def] = struct{}{} 411 } 412 } 413 } 414 info.seen = true 415 } 416 417 nrs := f.LoopNestingForestRoots() 418 for i := 0; i < nrs; i++ { 419 root := f.LoopNestingForestRoot(i) 420 a.loopTreeDFS(root) 421 } 422 } 423 424 // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way. 425 func (a *Allocator) loopTreeDFS(entry Block) { 426 a.blks = a.blks[:0] 427 a.blks = append(a.blks, entry) 428 429 s := &a.state 430 for len(a.blks) > 0 { 431 tail := len(a.blks) - 1 432 loop := a.blks[tail] 433 a.blks = a.blks[:tail] 434 a.vs = a.vs[:0] 435 436 info := a.livenessDataAt(loop.ID()) 437 for v := range info.liveIns { 438 if s.phiBlk(v) != loop { 439 a.vs = append(a.vs, v) 440 info.liveOuts[v] = struct{}{} 441 } 442 } 443 444 cn := loop.LoopNestingForestChildren() 445 for i := 0; i < cn; i++ { 446 child := loop.LoopNestingForestChild(i) 447 childID := child.ID() 448 childInfo := a.livenessDataAt(childID) 449 for _, v := range a.vs { 450 childInfo.liveIns[v] = struct{}{} 451 childInfo.liveOuts[v] = struct{}{} 452 } 453 if child.LoopHeader() { 454 a.blks = append(a.blks, child) 455 } 456 } 457 } 458 } 459 460 // alloc allocates registers for the given function by iterating the blocks in the reverse postorder. 461 // The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go 462 // In short, this is a simply linear scan register allocation where each block inherits the register allocation state from 463 // one of its predecessors. Each block inherits the selected state and starts allocation from there. 464 // If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state"). 465 // The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because 466 // at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely, 467 // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value. 468 // 469 // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^. 470 func (a *Allocator) alloc(f Function) { 471 // First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block). 472 for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { 473 if wazevoapi.RegAllocLoggingEnabled { 474 fmt.Printf("========== allocating blk%d ========\n", blk.ID()) 475 } 476 a.allocBlock(f, blk) 477 } 478 // After the allocation, we all know the start and end state of each block. So we can fix the merge states. 479 for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { 480 a.fixMergeState(f, blk) 481 } 482 // Finally, we insert the spill instructions as we know all the places where the reloads happen. 483 a.scheduleSpills(f) 484 } 485 486 func (a *Allocator) allocBlock(f Function, blk Block) { 487 bID := blk.ID() 488 liveness := a.livenessDataAt(bID) 489 s := &a.state 490 currentBlkState := a.getBlockState(bID) 491 492 preds := blk.Preds() 493 var predState *blockState 494 switch preds { 495 case 0: // This is the entry block. 496 case 1: 497 predID := blk.Pred(0).ID() 498 predState = a.getBlockState(predID) 499 currentBlkState.startFromPredIndex = 0 500 default: 501 // TODO: there should be some better heuristic to choose the predecessor. 502 for i := 0; i < preds; i++ { 503 predID := blk.Pred(i).ID() 504 if _predState := a.getBlockState(predID); _predState.visited { 505 predState = _predState 506 currentBlkState.startFromPredIndex = i 507 break 508 } 509 } 510 } 511 if predState == nil { 512 if !blk.Entry() { 513 panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID())) 514 } 515 for _, u := range s.argRealRegs { 516 s.useRealReg(u.RealReg(), u) 517 } 518 } else if predState != nil { 519 if wazevoapi.RegAllocLoggingEnabled { 520 fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n", 521 bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex) 522 } 523 s.resetAt(predState, liveness.liveIns) 524 } 525 526 s.regsInUse.range_(func(allocated RealReg, v VReg) { 527 currentBlkState.startRegs.add(allocated, v) 528 }) 529 530 // Update the last use of each VReg. 531 var pc programCounter 532 for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { 533 for _, use := range instr.Uses(&a.vs) { 534 if !use.IsRealReg() { 535 s.getVRegState(use).lastUse = pc 536 } 537 } 538 pc++ 539 } 540 // Reset the last use of the liveOuts. 541 for outlive := range liveness.liveOuts { 542 s.getVRegState(outlive).lastUse = math.MaxInt32 543 } 544 545 pc = 0 546 for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { 547 if wazevoapi.RegAllocLoggingEnabled { 548 fmt.Println(instr) 549 } 550 551 var currentUsedSet regSet 552 killSet := a.reals[:0] 553 554 // Gather the set of registers that will be used in the current instruction. 555 for _, use := range instr.Uses(&a.vs) { 556 if use.IsRealReg() { 557 r := use.RealReg() 558 currentUsedSet = currentUsedSet.add(r) 559 if a.allocatableSet.has(r) { 560 killSet = append(killSet, r) 561 } 562 } else { 563 vs := s.getVRegState(use) 564 if r := vs.r; r != RealRegInvalid { 565 currentUsedSet = currentUsedSet.add(r) 566 } 567 } 568 } 569 570 for i, use := range instr.Uses(&a.vs) { 571 if !use.IsRealReg() { 572 vs := s.getVRegState(use) 573 killed := liveness.isKilledAt(vs, pc) 574 r := vs.r 575 576 if r == RealRegInvalid { 577 r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet) 578 vs.recordReload(f, blk) 579 f.ReloadRegisterBefore(use.SetRealReg(r), instr) 580 s.useRealReg(r, use) 581 } 582 if wazevoapi.RegAllocLoggingEnabled { 583 fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r)) 584 } 585 instr.AssignUse(i, use.SetRealReg(r)) 586 currentUsedSet = currentUsedSet.add(r) 587 if killed { 588 if wazevoapi.RegAllocLoggingEnabled { 589 fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r)) 590 } 591 killSet = append(killSet, r) 592 } 593 } 594 } 595 596 isIndirect := instr.IsIndirectCall() 597 call := instr.IsCall() || isIndirect 598 if call { 599 addr := RealRegInvalid 600 if instr.IsIndirectCall() { 601 addr = a.vs[0].RealReg() 602 } 603 a.releaseCallerSavedRegs(addr) 604 } 605 606 for _, r := range killSet { 607 s.releaseRealReg(r) 608 } 609 a.reals = killSet 610 611 defs := instr.Defs(&a.vs) 612 switch { 613 case len(defs) > 1: 614 if !call { 615 panic("only call can have multiple defs") 616 } 617 // Call's defining register are all caller-saved registers. 618 // Therefore, we can assume that all of them are allocatable. 619 for _, def := range defs { 620 s.useRealReg(def.RealReg(), def) 621 } 622 case len(defs) == 1: 623 def := defs[0] 624 if def.IsRealReg() { 625 r := def.RealReg() 626 if a.allocatableSet.has(r) { 627 if s.regsInUse.has(r) { 628 s.releaseRealReg(r) 629 } 630 s.useRealReg(r, def) 631 } 632 } else { 633 vState := s.getVRegState(def) 634 r := vState.r 635 // Allocate a new real register if `def` is not currently assigned one. 636 // It can happen when multiple instructions define the same VReg (e.g. const loads). 637 if r == RealRegInvalid { 638 if instr.IsCopy() { 639 copySrc := instr.Uses(&a.vs)[0].RealReg() 640 if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) { 641 r = copySrc 642 } 643 } 644 if r == RealRegInvalid { 645 typ := def.RegType() 646 r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], regSet(0)) 647 } 648 s.useRealReg(r, def) 649 } 650 instr.AssignDef(def.SetRealReg(r)) 651 if wazevoapi.RegAllocLoggingEnabled { 652 fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r)) 653 } 654 if vState.isPhi { 655 n := a.phiDefInstListPool.Allocate() 656 n.instr = instr 657 n.next = vState.phiDefInstList 658 vState.phiDefInstList = n 659 } else { 660 vState.defInstr = instr 661 vState.defBlk = blk 662 } 663 } 664 } 665 if wazevoapi.RegAllocLoggingEnabled { 666 fmt.Println(instr) 667 } 668 pc++ 669 } 670 671 s.regsInUse.range_(func(allocated RealReg, v VReg) { 672 currentBlkState.endRegs.add(allocated, v) 673 }) 674 675 currentBlkState.visited = true 676 if wazevoapi.RegAllocLoggingEnabled { 677 currentBlkState.dump(a.regInfo) 678 } 679 } 680 681 func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { 682 s := &a.state 683 684 for i := 0; i < 64; i++ { 685 allocated := RealReg(i) 686 if allocated == addrReg { // If this is the call indirect, we should not touch the addr register. 687 continue 688 } 689 if v := s.regsInUse.get(allocated); v.Valid() { 690 if v.IsRealReg() { 691 continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg. 692 } 693 if !a.regInfo.isCallerSaved(allocated) { 694 // If this is not a caller-saved register, it is safe to keep it across the call. 695 continue 696 } 697 s.releaseRealReg(allocated) 698 } 699 } 700 } 701 702 func (a *Allocator) fixMergeState(f Function, blk Block) { 703 preds := blk.Preds() 704 if preds <= 1 { 705 return 706 } 707 708 s := &a.state 709 710 // Restores the state at the beginning of the block. 711 bID := blk.ID() 712 blkSt := a.getBlockState(bID) 713 desiredOccupants := &blkSt.startRegs 714 aliveOnRegVRegs := make(map[VReg]RealReg) 715 for i := 0; i < 64; i++ { 716 r := RealReg(i) 717 if v := blkSt.startRegs.get(r); v.Valid() { 718 aliveOnRegVRegs[v] = r 719 } 720 } 721 722 if wazevoapi.RegAllocLoggingEnabled { 723 fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo)) 724 } 725 726 currentOccupants := &a.currentOccupants 727 for i := 0; i < preds; i++ { 728 currentOccupants.reset() 729 if i == blkSt.startFromPredIndex { 730 continue 731 } 732 733 currentOccupantsRev := make(map[VReg]RealReg) 734 pred := blk.Pred(i) 735 predSt := a.getBlockState(pred.ID()) 736 for ii := 0; ii < 64; ii++ { 737 r := RealReg(ii) 738 if v := predSt.endRegs.get(r); v.Valid() { 739 if _, ok := aliveOnRegVRegs[v]; !ok { 740 continue 741 } 742 currentOccupants.add(r, v) 743 currentOccupantsRev[v] = r 744 } 745 } 746 747 s.resetAt(predSt, a.livenessDataAt(bID).liveIns) 748 749 // Finds the free registers if any. 750 intTmp, floatTmp := VRegInvalid, VRegInvalid 751 if intFree := s.findAllocatable( 752 a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set, 753 ); intFree != RealRegInvalid { 754 intTmp = FromRealReg(intFree, RegTypeInt) 755 } 756 if floatFree := s.findAllocatable( 757 a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set, 758 ); floatFree != RealRegInvalid { 759 floatTmp = FromRealReg(floatFree, RegTypeFloat) 760 } 761 762 if wazevoapi.RegAllocLoggingEnabled { 763 fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) 764 } 765 766 for ii := 0; ii < 64; ii++ { 767 r := RealReg(ii) 768 desiredVReg := desiredOccupants.get(r) 769 if !desiredVReg.Valid() { 770 continue 771 } 772 773 currentVReg := currentOccupants.get(r) 774 if desiredVReg.ID() == currentVReg.ID() { 775 continue 776 } 777 778 typ := desiredVReg.RegType() 779 var tmpRealReg VReg 780 if typ == RegTypeInt { 781 tmpRealReg = intTmp 782 } else { 783 tmpRealReg = floatTmp 784 } 785 a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ) 786 } 787 } 788 } 789 790 func (a *Allocator) reconcileEdge(f Function, 791 r RealReg, 792 pred Block, 793 currentOccupants *regInUseSet, 794 currentOccupantsRev map[VReg]RealReg, 795 currentVReg, desiredVReg VReg, 796 freeReg VReg, 797 typ RegType, 798 ) { 799 s := &a.state 800 if currentVReg.Valid() { 801 // Both are on reg. 802 er, ok := currentOccupantsRev[desiredVReg] 803 if !ok { 804 if wazevoapi.RegAllocLoggingEnabled { 805 fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n", 806 desiredVReg.ID(), a.regInfo.RealRegName(r), 807 ) 808 } 809 // This case is that the desired value is on the stack, but currentVReg is on the target register. 810 // We need to move the current value to the stack, and reload the desired value. 811 // TODO: we can do better here. 812 f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstr()) 813 delete(currentOccupantsRev, currentVReg) 814 815 s.getVRegState(desiredVReg).recordReload(f, pred) 816 f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstr()) 817 currentOccupants.add(r, desiredVReg) 818 currentOccupantsRev[desiredVReg] = r 819 return 820 } 821 822 if wazevoapi.RegAllocLoggingEnabled { 823 fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", 824 desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), 825 ) 826 } 827 f.SwapAtEndOfBlock( 828 currentVReg.SetRealReg(r), 829 desiredVReg.SetRealReg(er), 830 freeReg, 831 pred, 832 ) 833 s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) 834 currentOccupantsRev[desiredVReg] = r 835 currentOccupantsRev[currentVReg] = er 836 currentOccupants.add(r, desiredVReg) 837 currentOccupants.add(er, currentVReg) 838 if wazevoapi.RegAllocLoggingEnabled { 839 fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) 840 } 841 } else { 842 // Desired is on reg, but currently the target register is not used. 843 if wazevoapi.RegAllocLoggingEnabled { 844 fmt.Printf("\t\tv%d is desired to be on %s, current not used\n", 845 desiredVReg.ID(), a.regInfo.RealRegName(r), 846 ) 847 } 848 if currentReg, ok := currentOccupantsRev[desiredVReg]; ok { 849 f.InsertMoveBefore( 850 FromRealReg(r, typ), 851 desiredVReg.SetRealReg(currentReg), 852 pred.LastInstr(), 853 ) 854 currentOccupants.remove(currentReg) 855 } else { 856 s.getVRegState(desiredVReg).recordReload(f, pred) 857 f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstr()) 858 } 859 currentOccupantsRev[desiredVReg] = r 860 currentOccupants.add(r, desiredVReg) 861 } 862 863 if wazevoapi.RegAllocLoggingEnabled { 864 fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) 865 } 866 } 867 868 func (a *Allocator) scheduleSpills(f Function) { 869 vrStates := a.state.vrStates 870 for i := 0; i <= a.state.maxVRegIDEncountered; i++ { 871 vs := &vrStates[i] 872 if vs.spilled { 873 a.scheduleSpill(f, vs) 874 } 875 } 876 } 877 878 func (a *Allocator) scheduleSpill(f Function, vs *vrState) { 879 v := vs.v 880 // If the value is the phi value, we need to insert a spill after each phi definition. 881 if vs.isPhi { 882 for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next { 883 def := defInstr.instr.Defs(&a.vs)[0] 884 f.StoreRegisterAfter(def, defInstr.instr) 885 } 886 return 887 } 888 889 pos := vs.lca 890 definingBlk := vs.defBlk 891 r := RealRegInvalid 892 if wazevoapi.RegAllocLoggingEnabled { 893 fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID()) 894 } 895 for pos != definingBlk { 896 st := a.getBlockState(pos.ID()) 897 for ii := 0; ii < 64; ii++ { 898 rr := RealReg(ii) 899 if st.startRegs.get(rr) == v { 900 r = rr 901 // Already in the register, so we can place the spill at the beginning of the block. 902 break 903 } 904 } 905 906 if r != RealRegInvalid { 907 break 908 } 909 910 pos = f.Idom(pos) 911 } 912 913 if pos == definingBlk { 914 defInstr := vs.defInstr 915 defInstr.Defs(&a.vs) 916 if wazevoapi.RegAllocLoggingEnabled { 917 fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr) 918 } 919 f.StoreRegisterAfter(a.vs[0], defInstr) 920 } else { 921 // Found an ancestor block that holds the value in the register at the beginning of the block. 922 // We need to insert a spill before the last use. 923 first := pos.FirstInstr() 924 if wazevoapi.RegAllocLoggingEnabled { 925 fmt.Printf("schedule spill v%d before %v\n", v.ID(), first) 926 } 927 f.StoreRegisterAfter(v.SetRealReg(r), first) 928 } 929 } 930 931 // Reset resets the allocator's internal state so that it can be reused. 932 func (a *Allocator) Reset() { 933 a.state.reset() 934 for i := 0; i <= a.maxBlockID && i < len(a.blockIDToBlockState); i++ { 935 a.blockLivenessData[i] = nil 936 a.blockIDToBlockState[i] = nil 937 } 938 a.blockStatePool.Reset() 939 a.blockLivenessDataPool.Reset() 940 a.phiDefInstListPool.Reset() 941 942 a.vs = a.vs[:0] 943 a.maxBlockID = -1 944 } 945 946 func (a *Allocator) allocateBlockLivenessData(blockID int) *blockLivenessData { 947 if blockID >= len(a.blockLivenessData) { 948 a.blockLivenessData = append(a.blockLivenessData, make([]*blockLivenessData, (blockID+1)-len(a.blockLivenessData))...) 949 } 950 info := a.blockLivenessData[blockID] 951 if info == nil { 952 info = a.blockLivenessDataPool.Allocate() 953 a.blockLivenessData[blockID] = info 954 } 955 return info 956 } 957 958 func (a *Allocator) livenessDataAt(blockID int) (info *blockLivenessData) { 959 info = a.blockLivenessData[blockID] 960 return 961 } 962 963 func resetBlockLivenessData(i *blockLivenessData) { 964 i.seen = false 965 i.liveOuts = resetMap(i.liveOuts) 966 i.liveIns = resetMap(i.liveIns) 967 } 968 969 func resetMap[K comparable, V any](m map[K]V) map[K]V { 970 if m == nil { 971 m = make(map[K]V) 972 } else { 973 for v := range m { 974 delete(m, v) 975 } 976 } 977 return m 978 } 979 980 // Format is for debugging. 981 func (i *blockLivenessData) Format(ri *RegisterInfo) string { 982 var buf strings.Builder 983 buf.WriteString("\t\tblockLivenessData:") 984 buf.WriteString("\n\t\t\tliveOuts: ") 985 for v := range i.liveOuts { 986 if v.IsRealReg() { 987 buf.WriteString(fmt.Sprintf("%s ", ri.RealRegName(v.RealReg()))) 988 } else { 989 buf.WriteString(fmt.Sprintf("%v ", v)) 990 } 991 } 992 buf.WriteString("\n\t\t\tliveIns: ") 993 for v := range i.liveIns { 994 if v.IsRealReg() { 995 buf.WriteString(fmt.Sprintf("%s ", ri.RealRegName(v.RealReg()))) 996 } else { 997 buf.WriteString(fmt.Sprintf("%v ", v)) 998 } 999 } 1000 buf.WriteString(fmt.Sprintf("\n\t\t\tseen: %v", i.seen)) 1001 return buf.String() 1002 } 1003 1004 func (i *blockLivenessData) isKilledAt(vs *vrState, pos programCounter) bool { 1005 v := vs.v 1006 if vs.lastUse == pos { 1007 if _, ok := i.liveOuts[v]; !ok { 1008 return true 1009 } 1010 } 1011 return false 1012 } 1013 1014 func (r *RegisterInfo) isCalleeSaved(reg RealReg) bool { 1015 return r.CalleeSavedRegisters[reg] 1016 } 1017 1018 func (r *RegisterInfo) isCallerSaved(reg RealReg) bool { 1019 return r.CallerSavedRegisters[reg] 1020 }