github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/regalloc/regalloc.go (about) 1 // Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in 2 // api.go. 3 // 4 // References: 5 // - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf 6 // - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm 7 // - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf 8 // - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis. 9 // - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go 10 package regalloc 11 12 import ( 13 "fmt" 14 "math" 15 "strings" 16 17 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" 18 ) 19 20 // NewAllocator returns a new Allocator. 21 func NewAllocator(allocatableRegs *RegisterInfo) Allocator { 22 a := Allocator{ 23 regInfo: allocatableRegs, 24 phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList), 25 blockStates: wazevoapi.NewIDedPool[blockState](resetBlockState), 26 } 27 a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState) 28 a.state.reset() 29 for _, regs := range allocatableRegs.AllocatableRegisters { 30 for _, r := range regs { 31 a.allocatableSet = a.allocatableSet.add(r) 32 } 33 } 34 return a 35 } 36 37 type ( 38 // RegisterInfo holds the statically-known ISA-specific register information. 39 RegisterInfo struct { 40 // AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum. 41 // The order matters: the first element is the most preferred one when allocating. 42 AllocatableRegisters [NumRegType][]RealReg 43 CalleeSavedRegisters RegSet 44 CallerSavedRegisters RegSet 45 RealRegToVReg []VReg 46 // RealRegName returns the name of the given RealReg for debugging. 47 RealRegName func(r RealReg) string 48 RealRegType func(r RealReg) RegType 49 } 50 51 // Allocator is a register allocator. 52 Allocator struct { 53 // regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator. 54 regInfo *RegisterInfo 55 // allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA. 56 allocatableSet RegSet 57 allocatedCalleeSavedRegs []VReg 58 vs []VReg 59 vs2 []VRegID 60 phiDefInstListPool wazevoapi.Pool[phiDefInstList] 61 62 // Followings are re-used during various places. 63 blks []Block 64 reals []RealReg 65 currentOccupants regInUseSet 66 67 // Following two fields are updated while iterating the blocks in the reverse postorder. 68 state state 69 blockStates wazevoapi.IDedPool[blockState] 70 } 71 72 // programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg. 73 programCounter int32 74 75 state struct { 76 argRealRegs []VReg 77 regsInUse regInUseSet 78 vrStates wazevoapi.IDedPool[vrState] 79 80 currentBlockID int32 81 82 // allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function. 83 allocatedRegSet RegSet 84 } 85 86 blockState struct { 87 // liveIns is a list of VReg that are live at the beginning of the block. 88 liveIns []VRegID 89 // seen is true if the block is visited during the liveness analysis. 90 seen bool 91 // visited is true if the block is visited during the allocation phase. 92 visited bool 93 startFromPredIndex int 94 // startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges. 95 startRegs regInUseSet 96 // endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges. 97 endRegs regInUseSet 98 } 99 100 vrState struct { 101 v VReg 102 r RealReg 103 // defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil. 104 defInstr Instr 105 // defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value. 106 defBlk Block 107 // lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that 108 // reloads this value. This is used to determine the spill location. Only valid if spilled=true. 109 lca Block 110 // lastUse is the program counter of the last use of this value. This changes while iterating the block, and 111 // should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID. 112 lastUse programCounter 113 lastUseUpdatedAtBlockID int32 114 // spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program. 115 // 116 // Note that this field is used during liveness analysis for different purpose. This is used to determine the 117 // value is live-in or not. 118 spilled bool 119 // isPhi is true if this is a phi value. 120 isPhi bool 121 desiredLoc desiredLoc 122 // phiDefInstList is a list of instructions that defines this phi value. 123 // This is used to determine the spill location, and only valid if isPhi=true. 124 *phiDefInstList 125 } 126 127 // phiDefInstList is a linked list of instructions that defines a phi value. 128 phiDefInstList struct { 129 instr Instr 130 v VReg 131 next *phiDefInstList 132 } 133 134 // desiredLoc represents a desired location for a VReg. 135 desiredLoc uint16 136 // desiredLocKind is a kind of desired location for a VReg. 137 desiredLocKind uint16 138 ) 139 140 const ( 141 // desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified. 142 desiredLocKindUnspecified desiredLocKind = iota 143 // desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values. 144 desiredLocKindStack 145 // desiredLocKindReg is a kind of desired location for a VReg that is in a register. 146 desiredLocKindReg 147 desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified) 148 desiredLocStack = desiredLoc(desiredLocKindStack) 149 ) 150 151 func newDesiredLocReg(r RealReg) desiredLoc { 152 return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2) 153 } 154 155 func (d desiredLoc) realReg() RealReg { 156 return RealReg(d >> 2) 157 } 158 159 func (d desiredLoc) stack() bool { 160 return d&3 == desiredLoc(desiredLocKindStack) 161 } 162 163 func resetPhiDefInstList(l *phiDefInstList) { 164 l.instr = nil 165 l.next = nil 166 l.v = VRegInvalid 167 } 168 169 func (s *state) dump(info *RegisterInfo) { //nolint:unused 170 fmt.Println("\t\tstate:") 171 fmt.Println("\t\t\targRealRegs:", s.argRealRegs) 172 fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info)) 173 fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info)) 174 fmt.Println("\t\t\tused:", s.regsInUse.format(info)) 175 var strs []string 176 for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ { 177 vs := s.vrStates.Get(i) 178 if vs == nil { 179 continue 180 } 181 if vs.r != RealRegInvalid { 182 strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r))) 183 } 184 } 185 fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", ")) 186 } 187 188 func (s *state) reset() { 189 s.argRealRegs = s.argRealRegs[:0] 190 s.vrStates.Reset() 191 s.allocatedRegSet = RegSet(0) 192 s.regsInUse.reset() 193 s.currentBlockID = -1 194 } 195 196 func (s *state) setVRegState(v VReg, r RealReg) { 197 id := int(v.ID()) 198 st := s.vrStates.GetOrAllocate(id) 199 st.r = r 200 st.v = v 201 } 202 203 func resetVrState(vs *vrState) { 204 vs.v = VRegInvalid 205 vs.r = RealRegInvalid 206 vs.defInstr = nil 207 vs.defBlk = nil 208 vs.spilled = false 209 vs.lastUse = -1 210 vs.lastUseUpdatedAtBlockID = -1 211 vs.lca = nil 212 vs.isPhi = false 213 vs.phiDefInstList = nil 214 vs.desiredLoc = desiredLocUnspecified 215 } 216 217 func (s *state) getVRegState(v VRegID) *vrState { 218 return s.vrStates.GetOrAllocate(int(v)) 219 } 220 221 func (s *state) useRealReg(r RealReg, v VReg) { 222 if s.regsInUse.has(r) { 223 panic("BUG: useRealReg: the given real register is already used") 224 } 225 s.regsInUse.add(r, v) 226 s.setVRegState(v, r) 227 s.allocatedRegSet = s.allocatedRegSet.add(r) 228 } 229 230 func (s *state) releaseRealReg(r RealReg) { 231 current := s.regsInUse.get(r) 232 if current.Valid() { 233 s.regsInUse.remove(r) 234 s.setVRegState(current, RealRegInvalid) 235 } 236 } 237 238 // recordReload records that the given VReg is reloaded in the given block. 239 // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value. 240 func (vs *vrState) recordReload(f Function, blk Block) { 241 vs.spilled = true 242 if vs.lca == nil { 243 if wazevoapi.RegAllocLoggingEnabled { 244 fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID()) 245 } 246 vs.lca = blk 247 } else { 248 if wazevoapi.RegAllocLoggingEnabled { 249 fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID()) 250 } 251 vs.lca = f.LowestCommonAncestor(vs.lca, blk) 252 if wazevoapi.RegAllocLoggingEnabled { 253 fmt.Printf("updated lca=%d\n", vs.lca.ID()) 254 } 255 } 256 } 257 258 func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) { 259 r = RealRegInvalid 260 // First, check if the preferredMask has any allocatable register. 261 if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) { 262 for _, candidateReal := range allocatable { 263 // TODO: we should ensure the preferred register is in the allocatable set in the first place, 264 // but right now, just in case, we check it here. 265 if candidateReal == preferred { 266 return preferred 267 } 268 } 269 } 270 271 var lastUseAt programCounter 272 var spillVReg VReg 273 for _, candidateReal := range allocatable { 274 if forbiddenMask.has(candidateReal) { 275 continue 276 } 277 278 using := s.regsInUse.get(candidateReal) 279 if using == VRegInvalid { 280 // This is not used at this point. 281 return candidateReal 282 } 283 284 // Real registers in use should not be spilled, so we skip them. 285 // For example, if the register is used as an argument register, and it might be 286 // spilled and not reloaded when it ends up being used as a temporary to pass 287 // stack based argument. 288 if using.IsRealReg() { 289 continue 290 } 291 292 isPreferred := candidateReal == preferred 293 294 // last == -1 means the value won't be used anymore. 295 if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) { 296 lastUseAt = last 297 r = candidateReal 298 spillVReg = using 299 if isPreferred { 300 break 301 } 302 } 303 } 304 305 if r == RealRegInvalid { 306 panic("not found any allocatable register") 307 } 308 309 if wazevoapi.RegAllocLoggingEnabled { 310 fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo)) 311 } 312 s.releaseRealReg(r) 313 return r 314 } 315 316 func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg { 317 for _, r := range allocatable { 318 if !s.regsInUse.has(r) && !forbiddenMask.has(r) { 319 return r 320 } 321 } 322 return RealRegInvalid 323 } 324 325 func (s *state) resetAt(bs *blockState) { 326 s.regsInUse.range_(func(_ RealReg, vr VReg) { 327 s.setVRegState(vr, RealRegInvalid) 328 }) 329 s.regsInUse.reset() 330 bs.endRegs.range_(func(r RealReg, v VReg) { 331 id := int(v.ID()) 332 st := s.vrStates.GetOrAllocate(id) 333 if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn { 334 s.regsInUse.add(r, v) 335 s.setVRegState(v, r) 336 } 337 }) 338 } 339 340 func resetBlockState(b *blockState) { 341 b.seen = false 342 b.visited = false 343 b.endRegs.reset() 344 b.startRegs.reset() 345 b.startFromPredIndex = -1 346 b.liveIns = b.liveIns[:0] 347 } 348 349 func (b *blockState) dump(a *RegisterInfo) { 350 fmt.Println("\t\tblockState:") 351 fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a)) 352 fmt.Println("\t\t\tendRegs:", b.endRegs.format(a)) 353 fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex) 354 fmt.Println("\t\t\tvisited:", b.visited) 355 } 356 357 // DoAllocation performs register allocation on the given Function. 358 func (a *Allocator) DoAllocation(f Function) { 359 a.livenessAnalysis(f) 360 a.alloc(f) 361 a.determineCalleeSavedRealRegs(f) 362 } 363 364 func (a *Allocator) determineCalleeSavedRealRegs(f Function) { 365 a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0] 366 a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) { 367 if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) { 368 a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg]) 369 } 370 }) 371 f.ClobberedRegisters(a.allocatedCalleeSavedRegs) 372 } 373 374 func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState { 375 return a.blockStates.GetOrAllocate(int(blockID)) 376 } 377 378 // phiBlk returns the block that defines the given phi value, nil otherwise. 379 func (s *state) phiBlk(v VRegID) Block { 380 vs := s.getVRegState(v) 381 if vs.isPhi { 382 return vs.defBlk 383 } 384 return nil 385 } 386 387 const ( 388 programCounterLiveIn = math.MinInt32 389 programCounterLiveOut = math.MaxInt32 390 ) 391 392 // liveAnalysis constructs Allocator.blockLivenessData. 393 // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2. 394 func (a *Allocator) livenessAnalysis(f Function) { 395 s := &a.state 396 for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter. 397 398 // We should gather phi value data. 399 for _, p := range blk.BlockParams(&a.vs) { 400 vs := s.getVRegState(p.ID()) 401 vs.isPhi = true 402 vs.defBlk = blk 403 } 404 } 405 406 for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { 407 blkID := blk.ID() 408 info := a.getOrAllocateBlockState(blkID) 409 410 a.vs2 = a.vs2[:0] 411 const ( 412 flagDeleted = false 413 flagLive = true 414 ) 415 ns := blk.Succs() 416 for i := 0; i < ns; i++ { 417 succ := blk.Succ(i) 418 if succ == nil { 419 continue 420 } 421 422 succID := succ.ID() 423 succInfo := a.getOrAllocateBlockState(succID) 424 if !succInfo.seen { // This means the back edge. 425 continue 426 } 427 428 for _, v := range succInfo.liveIns { 429 if s.phiBlk(v) != succ { 430 st := s.getVRegState(v) 431 // We use .spilled field to store the flag. 432 st.spilled = flagLive 433 a.vs2 = append(a.vs2, v) 434 } 435 } 436 } 437 438 for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { 439 440 var use, def VReg 441 for _, def = range instr.Defs(&a.vs) { 442 if !def.IsRealReg() { 443 id := def.ID() 444 st := s.getVRegState(id) 445 // We use .spilled field to store the flag. 446 st.spilled = flagDeleted 447 a.vs2 = append(a.vs2, id) 448 } 449 } 450 for _, use = range instr.Uses(&a.vs) { 451 if !use.IsRealReg() { 452 id := use.ID() 453 st := s.getVRegState(id) 454 // We use .spilled field to store the flag. 455 st.spilled = flagLive 456 a.vs2 = append(a.vs2, id) 457 } 458 } 459 460 if def.Valid() && s.phiBlk(def.ID()) != nil { 461 if use.Valid() && use.IsRealReg() { 462 // If the destination is a phi value, and the source is a real register, this is the beginning of the function. 463 a.state.argRealRegs = append(a.state.argRealRegs, use) 464 } 465 } 466 } 467 468 for _, v := range a.vs2 { 469 st := s.getVRegState(v) 470 // We use .spilled field to store the flag. 471 if st.spilled == flagLive { //nolint:gosimple 472 info.liveIns = append(info.liveIns, v) 473 st.spilled = false 474 } 475 } 476 477 info.seen = true 478 } 479 480 nrs := f.LoopNestingForestRoots() 481 for i := 0; i < nrs; i++ { 482 root := f.LoopNestingForestRoot(i) 483 a.loopTreeDFS(root) 484 } 485 } 486 487 // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way. 488 func (a *Allocator) loopTreeDFS(entry Block) { 489 a.blks = a.blks[:0] 490 a.blks = append(a.blks, entry) 491 492 s := &a.state 493 for len(a.blks) > 0 { 494 tail := len(a.blks) - 1 495 loop := a.blks[tail] 496 a.blks = a.blks[:tail] 497 a.vs2 = a.vs2[:0] 498 const ( 499 flagDone = false 500 flagPending = true 501 ) 502 info := a.getOrAllocateBlockState(loop.ID()) 503 for _, v := range info.liveIns { 504 if s.phiBlk(v) != loop { 505 a.vs2 = append(a.vs2, v) 506 st := s.getVRegState(v) 507 // We use .spilled field to store the flag. 508 st.spilled = flagPending 509 } 510 } 511 512 var siblingAddedView []VRegID 513 cn := loop.LoopNestingForestChildren() 514 for i := 0; i < cn; i++ { 515 child := loop.LoopNestingForestChild(i) 516 childID := child.ID() 517 childInfo := a.getOrAllocateBlockState(childID) 518 519 if i == 0 { 520 begin := len(childInfo.liveIns) 521 for _, v := range a.vs2 { 522 st := s.getVRegState(v) 523 // We use .spilled field to store the flag. 524 if st.spilled == flagPending { //nolint:gosimple 525 st.spilled = flagDone 526 // TODO: deduplicate, though I don't think it has much impact. 527 childInfo.liveIns = append(childInfo.liveIns, v) 528 } 529 } 530 siblingAddedView = childInfo.liveIns[begin:] 531 } else { 532 // TODO: deduplicate, though I don't think it has much impact. 533 childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...) 534 } 535 536 if child.LoopHeader() { 537 a.blks = append(a.blks, child) 538 } 539 } 540 541 if cn == 0 { 542 // If there's no forest child, we haven't cleared the .spilled field at this point. 543 for _, v := range a.vs2 { 544 st := s.getVRegState(v) 545 st.spilled = false 546 } 547 } 548 } 549 } 550 551 // alloc allocates registers for the given function by iterating the blocks in the reverse postorder. 552 // The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go 553 // In short, this is a simply linear scan register allocation where each block inherits the register allocation state from 554 // one of its predecessors. Each block inherits the selected state and starts allocation from there. 555 // If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state"). 556 // The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because 557 // at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely, 558 // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value. 559 // 560 // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^. 561 func (a *Allocator) alloc(f Function) { 562 // First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block). 563 for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { 564 if wazevoapi.RegAllocLoggingEnabled { 565 fmt.Printf("========== allocating blk%d ========\n", blk.ID()) 566 } 567 if blk.Entry() { 568 a.finalizeStartReg(blk) 569 } 570 a.allocBlock(f, blk) 571 } 572 // After the allocation, we all know the start and end state of each block. So we can fix the merge states. 573 for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { 574 a.fixMergeState(f, blk) 575 } 576 // Finally, we insert the spill instructions as we know all the places where the reloads happen. 577 a.scheduleSpills(f) 578 } 579 580 func (a *Allocator) updateLiveInVRState(liveness *blockState) { 581 currentBlockID := a.state.currentBlockID 582 for _, v := range liveness.liveIns { 583 vs := a.state.getVRegState(v) 584 vs.lastUse = programCounterLiveIn 585 vs.lastUseUpdatedAtBlockID = currentBlockID 586 } 587 } 588 589 func (a *Allocator) finalizeStartReg(blk Block) { 590 bID := blk.ID() 591 liveness := a.getOrAllocateBlockState(bID) 592 s := &a.state 593 currentBlkState := a.getOrAllocateBlockState(bID) 594 if currentBlkState.startFromPredIndex > -1 { 595 return 596 } 597 598 s.currentBlockID = bID 599 a.updateLiveInVRState(liveness) 600 601 preds := blk.Preds() 602 var predState *blockState 603 switch preds { 604 case 0: // This is the entry block. 605 case 1: 606 predID := blk.Pred(0).ID() 607 predState = a.getOrAllocateBlockState(predID) 608 currentBlkState.startFromPredIndex = 0 609 default: 610 // TODO: there should be some better heuristic to choose the predecessor. 611 for i := 0; i < preds; i++ { 612 predID := blk.Pred(i).ID() 613 if _predState := a.getOrAllocateBlockState(predID); _predState.visited { 614 predState = _predState 615 currentBlkState.startFromPredIndex = i 616 break 617 } 618 } 619 } 620 if predState == nil { 621 if !blk.Entry() { 622 panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID())) 623 } 624 for _, u := range s.argRealRegs { 625 s.useRealReg(u.RealReg(), u) 626 } 627 currentBlkState.startFromPredIndex = 0 628 } else if predState != nil { 629 if wazevoapi.RegAllocLoggingEnabled { 630 fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n", 631 bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex) 632 } 633 s.resetAt(predState) 634 } 635 636 s.regsInUse.range_(func(allocated RealReg, v VReg) { 637 currentBlkState.startRegs.add(allocated, v) 638 }) 639 if wazevoapi.RegAllocLoggingEnabled { 640 fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo)) 641 } 642 } 643 644 func (a *Allocator) allocBlock(f Function, blk Block) { 645 bID := blk.ID() 646 s := &a.state 647 currentBlkState := a.getOrAllocateBlockState(bID) 648 s.currentBlockID = bID 649 650 if currentBlkState.startFromPredIndex < 0 { 651 panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock") 652 } 653 654 // Clears the previous state. 655 s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) { 656 s.setVRegState(vr, RealRegInvalid) 657 }) 658 s.regsInUse.reset() 659 // Then set the start state. 660 currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) { 661 s.useRealReg(allocatedRealReg, vr) 662 }) 663 664 desiredUpdated := a.vs2[:0] 665 666 // Update the last use of each VReg. 667 var pc programCounter 668 for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { 669 var use, def VReg 670 for _, use = range instr.Uses(&a.vs) { 671 if !use.IsRealReg() { 672 s.getVRegState(use.ID()).lastUse = pc 673 } 674 } 675 676 if instr.IsCopy() { 677 def = instr.Defs(&a.vs)[0] 678 r := def.RealReg() 679 if r != RealRegInvalid { 680 useID := use.ID() 681 vs := s.getVRegState(useID) 682 if !vs.isPhi { // TODO: no idea why do we need this. 683 vs.desiredLoc = newDesiredLocReg(r) 684 desiredUpdated = append(desiredUpdated, useID) 685 } 686 } 687 } 688 pc++ 689 } 690 691 // Mark all live-out values by checking live-in of the successors. 692 // While doing so, we also update the desired register values. 693 var succ Block 694 for i, ns := 0, blk.Succs(); i < ns; i++ { 695 succ = blk.Succ(i) 696 if succ == nil { 697 continue 698 } 699 700 succID := succ.ID() 701 succState := a.getOrAllocateBlockState(succID) 702 for _, v := range succState.liveIns { 703 if s.phiBlk(v) != succ { 704 st := s.getVRegState(v) 705 st.lastUse = programCounterLiveOut 706 } 707 } 708 709 if succState.startFromPredIndex > -1 { 710 if wazevoapi.RegAllocLoggingEnabled { 711 fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo)) 712 } 713 succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) { 714 vs := s.getVRegState(vr.ID()) 715 vs.desiredLoc = newDesiredLocReg(allocatedRealReg) 716 desiredUpdated = append(desiredUpdated, vr.ID()) 717 }) 718 for _, p := range succ.BlockParams(&a.vs) { 719 vs := s.getVRegState(p.ID()) 720 if vs.desiredLoc.realReg() == RealRegInvalid { 721 vs.desiredLoc = desiredLocStack 722 desiredUpdated = append(desiredUpdated, p.ID()) 723 } 724 } 725 } 726 } 727 728 // Propagate the desired register values from the end of the block to the beginning. 729 for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { 730 if instr.IsCopy() { 731 def := instr.Defs(&a.vs)[0] 732 defState := s.getVRegState(def.ID()) 733 desired := defState.desiredLoc.realReg() 734 if desired == RealRegInvalid { 735 continue 736 } 737 738 use := instr.Uses(&a.vs)[0] 739 useID := use.ID() 740 useState := s.getVRegState(useID) 741 if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified { 742 useState.desiredLoc = newDesiredLocReg(desired) 743 desiredUpdated = append(desiredUpdated, useID) 744 } 745 } 746 } 747 748 pc = 0 749 for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { 750 if wazevoapi.RegAllocLoggingEnabled { 751 fmt.Println(instr) 752 } 753 754 var currentUsedSet RegSet 755 killSet := a.reals[:0] 756 757 // Gather the set of registers that will be used in the current instruction. 758 for _, use := range instr.Uses(&a.vs) { 759 if use.IsRealReg() { 760 r := use.RealReg() 761 currentUsedSet = currentUsedSet.add(r) 762 if a.allocatableSet.has(r) { 763 killSet = append(killSet, r) 764 } 765 } else { 766 vs := s.getVRegState(use.ID()) 767 if r := vs.r; r != RealRegInvalid { 768 currentUsedSet = currentUsedSet.add(r) 769 } 770 } 771 } 772 773 for i, use := range instr.Uses(&a.vs) { 774 if !use.IsRealReg() { 775 vs := s.getVRegState(use.ID()) 776 killed := vs.lastUse == pc 777 r := vs.r 778 779 if r == RealRegInvalid { 780 r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet, 781 // Prefer the desired register if it's available. 782 vs.desiredLoc.realReg()) 783 vs.recordReload(f, blk) 784 f.ReloadRegisterBefore(use.SetRealReg(r), instr) 785 s.useRealReg(r, use) 786 } 787 if wazevoapi.RegAllocLoggingEnabled { 788 fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r)) 789 } 790 instr.AssignUse(i, use.SetRealReg(r)) 791 currentUsedSet = currentUsedSet.add(r) 792 if killed { 793 if wazevoapi.RegAllocLoggingEnabled { 794 fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r)) 795 } 796 killSet = append(killSet, r) 797 } 798 } 799 } 800 801 isIndirect := instr.IsIndirectCall() 802 call := instr.IsCall() || isIndirect 803 if call { 804 addr := RealRegInvalid 805 if instr.IsIndirectCall() { 806 addr = a.vs[0].RealReg() 807 } 808 a.releaseCallerSavedRegs(addr) 809 } 810 811 for _, r := range killSet { 812 s.releaseRealReg(r) 813 } 814 a.reals = killSet 815 816 defs := instr.Defs(&a.vs) 817 switch { 818 case len(defs) > 1: 819 // Some instructions define multiple values on real registers. 820 // E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx. 821 // 822 // Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs 823 // that require allocations. If we need to support such case, we need to add the logic to handle it here, 824 // though is there any such instruction? 825 for _, def := range defs { 826 if !def.IsRealReg() { 827 panic("BUG: multiple defs should be on real registers") 828 } 829 r := def.RealReg() 830 if s.regsInUse.has(r) { 831 s.releaseRealReg(r) 832 } 833 s.useRealReg(r, def) 834 } 835 case len(defs) == 1: 836 def := defs[0] 837 if def.IsRealReg() { 838 r := def.RealReg() 839 if a.allocatableSet.has(r) { 840 if s.regsInUse.has(r) { 841 s.releaseRealReg(r) 842 } 843 s.useRealReg(r, def) 844 } 845 } else { 846 vState := s.getVRegState(def.ID()) 847 r := vState.r 848 849 if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid { 850 if r != desired { 851 if (vState.isPhi && vState.defBlk == succ) || 852 // If this is not a phi and it's already assigned a real reg, 853 // this value has multiple definitions, hence we cannot assign the desired register. 854 (!s.regsInUse.has(desired) && r == RealRegInvalid) { 855 // If the phi value is passed via a real register, we force the value to be in the desired register. 856 if wazevoapi.RegAllocLoggingEnabled { 857 fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired)) 858 } 859 if r != RealRegInvalid { 860 // If the value is already in a different real register, we release it to change the state. 861 // Otherwise, multiple registers might have the same values at the end, which results in 862 // messing up the merge state reconciliation. 863 s.releaseRealReg(r) 864 } 865 r = desired 866 s.releaseRealReg(r) 867 s.useRealReg(r, def) 868 } 869 } 870 } 871 872 // Allocate a new real register if `def` is not currently assigned one. 873 // It can happen when multiple instructions define the same VReg (e.g. const loads). 874 if r == RealRegInvalid { 875 if instr.IsCopy() { 876 copySrc := instr.Uses(&a.vs)[0].RealReg() 877 if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) { 878 r = copySrc 879 } 880 } 881 if r == RealRegInvalid { 882 typ := def.RegType() 883 r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid) 884 } 885 s.useRealReg(r, def) 886 } 887 dr := def.SetRealReg(r) 888 instr.AssignDef(dr) 889 if wazevoapi.RegAllocLoggingEnabled { 890 fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r)) 891 } 892 if vState.isPhi { 893 if vState.desiredLoc.stack() { // Stack based phi value. 894 f.StoreRegisterAfter(dr, instr) 895 // Release the real register as it's not used anymore. 896 s.releaseRealReg(r) 897 } else { 898 // Only the register based phis are necessary to track the defining instructions 899 // since the stack-based phis are already having stores inserted ^. 900 n := a.phiDefInstListPool.Allocate() 901 n.instr = instr 902 n.next = vState.phiDefInstList 903 n.v = dr 904 vState.phiDefInstList = n 905 } 906 } else { 907 vState.defInstr = instr 908 vState.defBlk = blk 909 } 910 } 911 } 912 if wazevoapi.RegAllocLoggingEnabled { 913 fmt.Println(instr) 914 } 915 pc++ 916 } 917 918 s.regsInUse.range_(func(allocated RealReg, v VReg) { 919 currentBlkState.endRegs.add(allocated, v) 920 }) 921 922 currentBlkState.visited = true 923 if wazevoapi.RegAllocLoggingEnabled { 924 currentBlkState.dump(a.regInfo) 925 } 926 927 // Reset the desired end location. 928 for _, v := range desiredUpdated { 929 vs := s.getVRegState(v) 930 vs.desiredLoc = desiredLocUnspecified 931 } 932 a.vs2 = desiredUpdated[:0] 933 934 for i := 0; i < blk.Succs(); i++ { 935 succ := blk.Succ(i) 936 if succ == nil { 937 continue 938 } 939 // If the successor is not visited yet, finalize the start state. 940 a.finalizeStartReg(succ) 941 } 942 } 943 944 func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { 945 s := &a.state 946 947 for i := 0; i < 64; i++ { 948 allocated := RealReg(i) 949 if allocated == addrReg { // If this is the call indirect, we should not touch the addr register. 950 continue 951 } 952 if v := s.regsInUse.get(allocated); v.Valid() { 953 if v.IsRealReg() { 954 continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg. 955 } 956 if !a.regInfo.CallerSavedRegisters.has(allocated) { 957 // If this is not a caller-saved register, it is safe to keep it across the call. 958 continue 959 } 960 s.releaseRealReg(allocated) 961 } 962 } 963 } 964 965 func (a *Allocator) fixMergeState(f Function, blk Block) { 966 preds := blk.Preds() 967 if preds <= 1 { 968 return 969 } 970 971 s := &a.state 972 973 // Restores the state at the beginning of the block. 974 bID := blk.ID() 975 blkSt := a.getOrAllocateBlockState(bID) 976 desiredOccupants := &blkSt.startRegs 977 aliveOnRegVRegs := make(map[VReg]RealReg) 978 for i := 0; i < 64; i++ { 979 r := RealReg(i) 980 if v := blkSt.startRegs.get(r); v.Valid() { 981 aliveOnRegVRegs[v] = r 982 } 983 } 984 985 if wazevoapi.RegAllocLoggingEnabled { 986 fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo)) 987 } 988 989 s.currentBlockID = bID 990 a.updateLiveInVRState(a.getOrAllocateBlockState(bID)) 991 992 currentOccupants := &a.currentOccupants 993 for i := 0; i < preds; i++ { 994 currentOccupants.reset() 995 if i == blkSt.startFromPredIndex { 996 continue 997 } 998 999 currentOccupantsRev := make(map[VReg]RealReg) 1000 pred := blk.Pred(i) 1001 predSt := a.getOrAllocateBlockState(pred.ID()) 1002 for ii := 0; ii < 64; ii++ { 1003 r := RealReg(ii) 1004 if v := predSt.endRegs.get(r); v.Valid() { 1005 if _, ok := aliveOnRegVRegs[v]; !ok { 1006 continue 1007 } 1008 currentOccupants.add(r, v) 1009 currentOccupantsRev[v] = r 1010 } 1011 } 1012 1013 s.resetAt(predSt) 1014 1015 // Finds the free registers if any. 1016 intTmp, floatTmp := VRegInvalid, VRegInvalid 1017 if intFree := s.findAllocatable( 1018 a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set, 1019 ); intFree != RealRegInvalid { 1020 intTmp = FromRealReg(intFree, RegTypeInt) 1021 } 1022 if floatFree := s.findAllocatable( 1023 a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set, 1024 ); floatFree != RealRegInvalid { 1025 floatTmp = FromRealReg(floatFree, RegTypeFloat) 1026 } 1027 1028 if wazevoapi.RegAllocLoggingEnabled { 1029 fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) 1030 } 1031 1032 for ii := 0; ii < 64; ii++ { 1033 r := RealReg(ii) 1034 desiredVReg := desiredOccupants.get(r) 1035 if !desiredVReg.Valid() { 1036 continue 1037 } 1038 1039 currentVReg := currentOccupants.get(r) 1040 if desiredVReg.ID() == currentVReg.ID() { 1041 continue 1042 } 1043 1044 typ := desiredVReg.RegType() 1045 var tmpRealReg VReg 1046 if typ == RegTypeInt { 1047 tmpRealReg = intTmp 1048 } else { 1049 tmpRealReg = floatTmp 1050 } 1051 a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ) 1052 } 1053 } 1054 } 1055 1056 func (a *Allocator) reconcileEdge(f Function, 1057 r RealReg, 1058 pred Block, 1059 currentOccupants *regInUseSet, 1060 currentOccupantsRev map[VReg]RealReg, 1061 currentVReg, desiredVReg VReg, 1062 freeReg VReg, 1063 typ RegType, 1064 ) { 1065 s := &a.state 1066 if currentVReg.Valid() { 1067 // Both are on reg. 1068 er, ok := currentOccupantsRev[desiredVReg] 1069 if !ok { 1070 if wazevoapi.RegAllocLoggingEnabled { 1071 fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n", 1072 desiredVReg.ID(), a.regInfo.RealRegName(r), 1073 ) 1074 } 1075 // This case is that the desired value is on the stack, but currentVReg is on the target register. 1076 // We need to move the current value to the stack, and reload the desired value. 1077 // TODO: we can do better here. 1078 f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion()) 1079 delete(currentOccupantsRev, currentVReg) 1080 1081 s.getVRegState(desiredVReg.ID()).recordReload(f, pred) 1082 f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) 1083 currentOccupants.add(r, desiredVReg) 1084 currentOccupantsRev[desiredVReg] = r 1085 return 1086 } 1087 1088 if wazevoapi.RegAllocLoggingEnabled { 1089 fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", 1090 desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), 1091 ) 1092 } 1093 f.SwapBefore( 1094 currentVReg.SetRealReg(r), 1095 desiredVReg.SetRealReg(er), 1096 freeReg, 1097 pred.LastInstrForInsertion(), 1098 ) 1099 s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) 1100 currentOccupantsRev[desiredVReg] = r 1101 currentOccupantsRev[currentVReg] = er 1102 currentOccupants.add(r, desiredVReg) 1103 currentOccupants.add(er, currentVReg) 1104 if wazevoapi.RegAllocLoggingEnabled { 1105 fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) 1106 } 1107 } else { 1108 // Desired is on reg, but currently the target register is not used. 1109 if wazevoapi.RegAllocLoggingEnabled { 1110 fmt.Printf("\t\tv%d is desired to be on %s, current not used\n", 1111 desiredVReg.ID(), a.regInfo.RealRegName(r), 1112 ) 1113 } 1114 if currentReg, ok := currentOccupantsRev[desiredVReg]; ok { 1115 f.InsertMoveBefore( 1116 FromRealReg(r, typ), 1117 desiredVReg.SetRealReg(currentReg), 1118 pred.LastInstrForInsertion(), 1119 ) 1120 currentOccupants.remove(currentReg) 1121 } else { 1122 s.getVRegState(desiredVReg.ID()).recordReload(f, pred) 1123 f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) 1124 } 1125 currentOccupantsRev[desiredVReg] = r 1126 currentOccupants.add(r, desiredVReg) 1127 } 1128 1129 if wazevoapi.RegAllocLoggingEnabled { 1130 fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) 1131 } 1132 } 1133 1134 func (a *Allocator) scheduleSpills(f Function) { 1135 states := a.state.vrStates 1136 for i := 0; i <= states.MaxIDEncountered(); i++ { 1137 vs := states.Get(i) 1138 if vs == nil { 1139 continue 1140 } 1141 if vs.spilled { 1142 a.scheduleSpill(f, vs) 1143 } 1144 } 1145 } 1146 1147 func (a *Allocator) scheduleSpill(f Function, vs *vrState) { 1148 v := vs.v 1149 // If the value is the phi value, we need to insert a spill after each phi definition. 1150 if vs.isPhi { 1151 for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next { 1152 f.StoreRegisterAfter(defInstr.v, defInstr.instr) 1153 } 1154 return 1155 } 1156 1157 pos := vs.lca 1158 definingBlk := vs.defBlk 1159 r := RealRegInvalid 1160 if definingBlk == nil { 1161 panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String())) 1162 } 1163 if pos == nil { 1164 panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String())) 1165 } 1166 1167 if wazevoapi.RegAllocLoggingEnabled { 1168 fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID()) 1169 } 1170 for pos != definingBlk { 1171 st := a.getOrAllocateBlockState(pos.ID()) 1172 for ii := 0; ii < 64; ii++ { 1173 rr := RealReg(ii) 1174 if st.startRegs.get(rr) == v { 1175 r = rr 1176 // Already in the register, so we can place the spill at the beginning of the block. 1177 break 1178 } 1179 } 1180 1181 if r != RealRegInvalid { 1182 break 1183 } 1184 1185 pos = f.Idom(pos) 1186 } 1187 1188 if pos == definingBlk { 1189 defInstr := vs.defInstr 1190 defInstr.Defs(&a.vs) 1191 if wazevoapi.RegAllocLoggingEnabled { 1192 fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr) 1193 } 1194 f.StoreRegisterAfter(a.vs[0], defInstr) 1195 } else { 1196 // Found an ancestor block that holds the value in the register at the beginning of the block. 1197 // We need to insert a spill before the last use. 1198 first := pos.FirstInstr() 1199 if wazevoapi.RegAllocLoggingEnabled { 1200 fmt.Printf("schedule spill v%d before %v\n", v.ID(), first) 1201 } 1202 f.StoreRegisterAfter(v.SetRealReg(r), first) 1203 } 1204 } 1205 1206 // Reset resets the allocator's internal state so that it can be reused. 1207 func (a *Allocator) Reset() { 1208 a.state.reset() 1209 a.blockStates.Reset() 1210 a.phiDefInstListPool.Reset() 1211 a.vs = a.vs[:0] 1212 }