github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "github.com/gagliardetto/golang-go/cmd/compile/internal/types" 118 "github.com/gagliardetto/golang-go/cmd/internal/objabi" 119 "github.com/gagliardetto/golang-go/cmd/internal/src" 120 "github.com/gagliardetto/golang-go/cmd/internal/sys" 121 "fmt" 122 "math/bits" 123 "unsafe" 124 ) 125 126 const ( 127 moveSpills = iota 128 logSpills 129 regDebug 130 stackDebug 131 ) 132 133 // distance is a measure of how far into the future values are used. 134 // distance is measured in units of instructions. 135 const ( 136 likelyDistance = 1 137 normalDistance = 10 138 unlikelyDistance = 100 139 ) 140 141 // regalloc performs register allocation on f. It sets f.RegAlloc 142 // to the resulting allocation. 143 func regalloc(f *Func) { 144 var s regAllocState 145 s.init(f) 146 s.regalloc(f) 147 } 148 149 type register uint8 150 151 const noRegister register = 255 152 153 // A regMask encodes a set of machine registers. 154 // TODO: regMask -> regSet? 155 type regMask uint64 156 157 func (m regMask) String() string { 158 s := "" 159 for r := register(0); m != 0; r++ { 160 if m>>r&1 == 0 { 161 continue 162 } 163 m &^= regMask(1) << r 164 if s != "" { 165 s += " " 166 } 167 s += fmt.Sprintf("r%d", r) 168 } 169 return s 170 } 171 172 func (s *regAllocState) RegMaskString(m regMask) string { 173 str := "" 174 for r := register(0); m != 0; r++ { 175 if m>>r&1 == 0 { 176 continue 177 } 178 m &^= regMask(1) << r 179 if str != "" { 180 str += " " 181 } 182 str += s.registers[r].String() 183 } 184 return str 185 } 186 187 // countRegs returns the number of set bits in the register mask. 188 func countRegs(r regMask) int { 189 return bits.OnesCount64(uint64(r)) 190 } 191 192 // pickReg picks an arbitrary register from the register mask. 193 func pickReg(r regMask) register { 194 if r == 0 { 195 panic("can't pick a register from an empty set") 196 } 197 // pick the lowest one 198 return register(bits.TrailingZeros64(uint64(r))) 199 } 200 201 type use struct { 202 dist int32 // distance from start of the block to a use of a value 203 pos src.XPos // source position of the use 204 next *use // linked list of uses of a value in nondecreasing dist order 205 } 206 207 // A valState records the register allocation state for a (pre-regalloc) value. 208 type valState struct { 209 regs regMask // the set of registers holding a Value (usually just one) 210 uses *use // list of uses in this block 211 spill *Value // spilled copy of the Value (if any) 212 restoreMin int32 // minimum of all restores' blocks' sdom.entry 213 restoreMax int32 // maximum of all restores' blocks' sdom.exit 214 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 215 rematerializeable bool // cached value of v.rematerializeable() 216 } 217 218 type regState struct { 219 v *Value // Original (preregalloc) Value stored in this register. 220 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 221 // If a register is unused, v==c==nil 222 } 223 224 type regAllocState struct { 225 f *Func 226 227 sdom SparseTree 228 registers []Register 229 numRegs register 230 SPReg register 231 SBReg register 232 GReg register 233 allocatable regMask 234 235 // for each block, its primary predecessor. 236 // A predecessor of b is primary if it is the closest 237 // predecessor that appears before b in the layout order. 238 // We record the index in the Preds list where the primary predecessor sits. 239 primary []int32 240 241 // live values at the end of each block. live[b.ID] is a list of value IDs 242 // which are live at the end of b, together with a count of how many instructions 243 // forward to the next use. 244 live [][]liveInfo 245 // desired register assignments at the end of each block. 246 // Note that this is a static map computed before allocation occurs. Dynamic 247 // register desires (from partially completed allocations) will trump 248 // this information. 249 desired []desiredState 250 251 // current state of each (preregalloc) Value 252 values []valState 253 254 // ID of SP, SB values 255 sp, sb ID 256 257 // For each Value, map from its value ID back to the 258 // preregalloc Value it was derived from. 259 orig []*Value 260 261 // current state of each register 262 regs []regState 263 264 // registers that contain values which can't be kicked out 265 nospill regMask 266 267 // mask of registers currently in use 268 used regMask 269 270 // mask of registers used in the current instruction 271 tmpused regMask 272 273 // current block we're working on 274 curBlock *Block 275 276 // cache of use records 277 freeUseRecords *use 278 279 // endRegs[blockid] is the register state at the end of each block. 280 // encoded as a set of endReg records. 281 endRegs [][]endReg 282 283 // startRegs[blockid] is the register state at the start of merge blocks. 284 // saved state does not include the state of phi ops in the block. 285 startRegs [][]startReg 286 287 // spillLive[blockid] is the set of live spills at the end of each block 288 spillLive [][]ID 289 290 // a set of copies we generated to move things around, and 291 // whether it is used in shuffle. Unused copies will be deleted. 292 copies map[*Value]bool 293 294 loopnest *loopnest 295 296 // choose a good order in which to visit blocks for allocation purposes. 297 visitOrder []*Block 298 } 299 300 type endReg struct { 301 r register 302 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 303 c *Value // cached version of the value 304 } 305 306 type startReg struct { 307 r register 308 v *Value // pre-regalloc value needed in this register 309 c *Value // cached version of the value 310 pos src.XPos // source position of use of this register 311 } 312 313 // freeReg frees up register r. Any current user of r is kicked out. 314 func (s *regAllocState) freeReg(r register) { 315 v := s.regs[r].v 316 if v == nil { 317 s.f.Fatalf("tried to free an already free register %d\n", r) 318 } 319 320 // Mark r as unused. 321 if s.f.pass.debug > regDebug { 322 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c) 323 } 324 s.regs[r] = regState{} 325 s.values[v.ID].regs &^= regMask(1) << r 326 s.used &^= regMask(1) << r 327 } 328 329 // freeRegs frees up all registers listed in m. 330 func (s *regAllocState) freeRegs(m regMask) { 331 for m&s.used != 0 { 332 s.freeReg(pickReg(m & s.used)) 333 } 334 } 335 336 // setOrig records that c's original value is the same as 337 // v's original value. 338 func (s *regAllocState) setOrig(c *Value, v *Value) { 339 for int(c.ID) >= len(s.orig) { 340 s.orig = append(s.orig, nil) 341 } 342 if s.orig[c.ID] != nil { 343 s.f.Fatalf("orig value set twice %s %s", c, v) 344 } 345 s.orig[c.ID] = s.orig[v.ID] 346 } 347 348 // assignReg assigns register r to hold c, a copy of v. 349 // r must be unused. 350 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 351 if s.f.pass.debug > regDebug { 352 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c) 353 } 354 if s.regs[r].v != nil { 355 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 356 } 357 358 // Update state. 359 s.regs[r] = regState{v, c} 360 s.values[v.ID].regs |= regMask(1) << r 361 s.used |= regMask(1) << r 362 s.f.setHome(c, &s.registers[r]) 363 } 364 365 // allocReg chooses a register from the set of registers in mask. 366 // If there is no unused register, a Value will be kicked out of 367 // a register to make room. 368 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 369 if v.OnWasmStack { 370 return noRegister 371 } 372 373 mask &= s.allocatable 374 mask &^= s.nospill 375 if mask == 0 { 376 s.f.Fatalf("no register available for %s", v.LongString()) 377 } 378 379 // Pick an unused register if one is available. 380 if mask&^s.used != 0 { 381 return pickReg(mask &^ s.used) 382 } 383 384 // Pick a value to spill. Spill the value with the 385 // farthest-in-the-future use. 386 // TODO: Prefer registers with already spilled Values? 387 // TODO: Modify preference using affinity graph. 388 // TODO: if a single value is in multiple registers, spill one of them 389 // before spilling a value in just a single register. 390 391 // Find a register to spill. We spill the register containing the value 392 // whose next use is as far in the future as possible. 393 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 394 var r register 395 maxuse := int32(-1) 396 for t := register(0); t < s.numRegs; t++ { 397 if mask>>t&1 == 0 { 398 continue 399 } 400 v := s.regs[t].v 401 if n := s.values[v.ID].uses.dist; n > maxuse { 402 // v's next use is farther in the future than any value 403 // we've seen so far. A new best spill candidate. 404 r = t 405 maxuse = n 406 } 407 } 408 if maxuse == -1 { 409 s.f.Fatalf("couldn't find register to spill") 410 } 411 412 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm { 413 // TODO(neelance): In theory this should never happen, because all wasm registers are equal. 414 // So if there is still a free register, the allocation should have picked that one in the first place instead of 415 // trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization. 416 s.freeReg(r) 417 return r 418 } 419 420 // Try to move it around before kicking out, if there is a free register. 421 // We generate a Copy and record it. It will be deleted if never used. 422 v2 := s.regs[r].v 423 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 424 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 425 r2 := pickReg(m) 426 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 427 s.copies[c] = false 428 if s.f.pass.debug > regDebug { 429 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2]) 430 } 431 s.setOrig(c, v2) 432 s.assignReg(r2, v2, c) 433 } 434 s.freeReg(r) 435 return r 436 } 437 438 // makeSpill returns a Value which represents the spilled value of v. 439 // b is the block in which the spill is used. 440 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 441 vi := &s.values[v.ID] 442 if vi.spill != nil { 443 // Final block not known - keep track of subtree where restores reside. 444 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 445 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 446 return vi.spill 447 } 448 // Make a spill for v. We don't know where we want 449 // to put it yet, so we leave it blockless for now. 450 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 451 // We also don't know what the spill's arg will be. 452 // Leave it argless for now. 453 s.setOrig(spill, v) 454 vi.spill = spill 455 vi.restoreMin = s.sdom[b.ID].entry 456 vi.restoreMax = s.sdom[b.ID].exit 457 return spill 458 } 459 460 // allocValToReg allocates v to a register selected from regMask and 461 // returns the register copy of v. Any previous user is kicked out and spilled 462 // (if necessary). Load code is added at the current pc. If nospill is set the 463 // allocated register is marked nospill so the assignment cannot be 464 // undone until the caller allows it by clearing nospill. Returns a 465 // *Value which is either v or a copy of v allocated to the chosen register. 466 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 467 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() { 468 c := v.copyIntoWithXPos(s.curBlock, pos) 469 c.OnWasmStack = true 470 s.setOrig(c, v) 471 return c 472 } 473 if v.OnWasmStack { 474 return v 475 } 476 477 vi := &s.values[v.ID] 478 pos = pos.WithNotStmt() 479 // Check if v is already in a requested register. 480 if mask&vi.regs != 0 { 481 r := pickReg(mask & vi.regs) 482 if s.regs[r].v != v || s.regs[r].c == nil { 483 panic("bad register state") 484 } 485 if nospill { 486 s.nospill |= regMask(1) << r 487 } 488 return s.regs[r].c 489 } 490 491 var r register 492 // If nospill is set, the value is used immediately, so it can live on the WebAssembly stack. 493 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm 494 if !onWasmStack { 495 // Allocate a register. 496 r = s.allocReg(mask, v) 497 } 498 499 // Allocate v to the new register. 500 var c *Value 501 if vi.regs != 0 { 502 // Copy from a register that v is already in. 503 r2 := pickReg(vi.regs) 504 if s.regs[r2].v != v { 505 panic("bad register state") 506 } 507 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 508 } else if v.rematerializeable() { 509 // Rematerialize instead of loading from the spill location. 510 c = v.copyIntoWithXPos(s.curBlock, pos) 511 } else { 512 // Load v from its spill location. 513 spill := s.makeSpill(v, s.curBlock) 514 if s.f.pass.debug > logSpills { 515 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 516 } 517 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 518 } 519 520 s.setOrig(c, v) 521 522 if onWasmStack { 523 c.OnWasmStack = true 524 return c 525 } 526 527 s.assignReg(r, v, c) 528 if c.Op == OpLoadReg && s.isGReg(r) { 529 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString()) 530 } 531 if nospill { 532 s.nospill |= regMask(1) << r 533 } 534 return c 535 } 536 537 // isLeaf reports whether f performs any calls. 538 func isLeaf(f *Func) bool { 539 for _, b := range f.Blocks { 540 for _, v := range b.Values { 541 if opcodeTable[v.Op].call { 542 return false 543 } 544 } 545 } 546 return true 547 } 548 549 func (s *regAllocState) init(f *Func) { 550 s.f = f 551 s.f.RegAlloc = s.f.Cache.locs[:0] 552 s.registers = f.Config.registers 553 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 554 s.f.Fatalf("bad number of registers: %d", nr) 555 } else { 556 s.numRegs = register(nr) 557 } 558 // Locate SP, SB, and g registers. 559 s.SPReg = noRegister 560 s.SBReg = noRegister 561 s.GReg = noRegister 562 for r := register(0); r < s.numRegs; r++ { 563 switch s.registers[r].String() { 564 case "SP": 565 s.SPReg = r 566 case "SB": 567 s.SBReg = r 568 case "g": 569 s.GReg = r 570 } 571 } 572 // Make sure we found all required registers. 573 switch noRegister { 574 case s.SPReg: 575 s.f.Fatalf("no SP register found") 576 case s.SBReg: 577 s.f.Fatalf("no SB register found") 578 case s.GReg: 579 if f.Config.hasGReg { 580 s.f.Fatalf("no g register found") 581 } 582 } 583 584 // Figure out which registers we're allowed to use. 585 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 586 s.allocatable &^= 1 << s.SPReg 587 s.allocatable &^= 1 << s.SBReg 588 if s.f.Config.hasGReg { 589 s.allocatable &^= 1 << s.GReg 590 } 591 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 { 592 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 593 } 594 if s.f.Config.LinkReg != -1 { 595 if isLeaf(f) { 596 // Leaf functions don't save/restore the link register. 597 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 598 } 599 if s.f.Config.arch == "arm" && objabi.GOARM == 5 { 600 // On ARMv5 we insert softfloat calls at each FP instruction. 601 // This clobbers LR almost everywhere. Disable allocating LR 602 // on ARMv5. 603 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 604 } 605 } 606 if s.f.Config.ctxt.Flag_dynlink { 607 switch s.f.Config.arch { 608 case "amd64": 609 s.allocatable &^= 1 << 15 // R15 610 case "arm": 611 s.allocatable &^= 1 << 9 // R9 612 case "ppc64le": // R2 already reserved. 613 // nothing to do 614 case "arm64": 615 // nothing to do? 616 case "386": 617 // nothing to do. 618 // Note that for Flag_shared (position independent code) 619 // we do need to be careful, but that carefulness is hidden 620 // in the rewrite rules so we always have a free register 621 // available for global load/stores. See gen/386.rules (search for Flag_shared). 622 case "s390x": 623 s.allocatable &^= 1 << 11 // R11 624 default: 625 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 626 } 627 } 628 if s.f.Config.use387 { 629 s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) 630 } 631 632 // Linear scan register allocation can be influenced by the order in which blocks appear. 633 // Decouple the register allocation order from the generated block order. 634 // This also creates an opportunity for experiments to find a better order. 635 s.visitOrder = layoutRegallocOrder(f) 636 637 // Compute block order. This array allows us to distinguish forward edges 638 // from backward edges and compute how far they go. 639 blockOrder := make([]int32, f.NumBlocks()) 640 for i, b := range s.visitOrder { 641 blockOrder[b.ID] = int32(i) 642 } 643 644 s.regs = make([]regState, s.numRegs) 645 nv := f.NumValues() 646 if cap(s.f.Cache.regallocValues) >= nv { 647 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv] 648 } else { 649 s.f.Cache.regallocValues = make([]valState, nv) 650 } 651 s.values = s.f.Cache.regallocValues 652 s.orig = make([]*Value, nv) 653 s.copies = make(map[*Value]bool) 654 for _, b := range s.visitOrder { 655 for _, v := range b.Values { 656 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 657 s.values[v.ID].needReg = true 658 s.values[v.ID].rematerializeable = v.rematerializeable() 659 s.orig[v.ID] = v 660 } 661 // Note: needReg is false for values returning Tuple types. 662 // Instead, we mark the corresponding Selects as needReg. 663 } 664 } 665 s.computeLive() 666 667 // Compute primary predecessors. 668 s.primary = make([]int32, f.NumBlocks()) 669 for _, b := range s.visitOrder { 670 best := -1 671 for i, e := range b.Preds { 672 p := e.b 673 if blockOrder[p.ID] >= blockOrder[b.ID] { 674 continue // backward edge 675 } 676 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] { 677 best = i 678 } 679 } 680 s.primary[b.ID] = int32(best) 681 } 682 683 s.endRegs = make([][]endReg, f.NumBlocks()) 684 s.startRegs = make([][]startReg, f.NumBlocks()) 685 s.spillLive = make([][]ID, f.NumBlocks()) 686 s.sdom = f.Sdom() 687 688 // wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack. 689 if f.Config.ctxt.Arch.Arch == sys.ArchWasm { 690 canLiveOnStack := f.newSparseSet(f.NumValues()) 691 defer f.retSparseSet(canLiveOnStack) 692 for _, b := range f.Blocks { 693 // New block. Clear candidate set. 694 canLiveOnStack.clear() 695 for _, c := range b.ControlValues() { 696 if c.Uses == 1 && !opcodeTable[c.Op].generic { 697 canLiveOnStack.add(c.ID) 698 } 699 } 700 // Walking backwards. 701 for i := len(b.Values) - 1; i >= 0; i-- { 702 v := b.Values[i] 703 if canLiveOnStack.contains(v.ID) { 704 v.OnWasmStack = true 705 } else { 706 // Value can not live on stack. Values are not allowed to be reordered, so clear candidate set. 707 canLiveOnStack.clear() 708 } 709 for _, arg := range v.Args { 710 // Value can live on the stack if: 711 // - it is only used once 712 // - it is used in the same basic block 713 // - it is not a "mem" value 714 // - it is a WebAssembly op 715 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic { 716 canLiveOnStack.add(arg.ID) 717 } 718 } 719 } 720 } 721 } 722 } 723 724 // Adds a use record for id at distance dist from the start of the block. 725 // All calls to addUse must happen with nonincreasing dist. 726 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 727 r := s.freeUseRecords 728 if r != nil { 729 s.freeUseRecords = r.next 730 } else { 731 r = &use{} 732 } 733 r.dist = dist 734 r.pos = pos 735 r.next = s.values[id].uses 736 s.values[id].uses = r 737 if r.next != nil && dist > r.next.dist { 738 s.f.Fatalf("uses added in wrong order") 739 } 740 } 741 742 // advanceUses advances the uses of v's args from the state before v to the state after v. 743 // Any values which have no more uses are deallocated from registers. 744 func (s *regAllocState) advanceUses(v *Value) { 745 for _, a := range v.Args { 746 if !s.values[a.ID].needReg { 747 continue 748 } 749 ai := &s.values[a.ID] 750 r := ai.uses 751 ai.uses = r.next 752 if r.next == nil { 753 // Value is dead, free all registers that hold it. 754 s.freeRegs(ai.regs) 755 } 756 r.next = s.freeUseRecords 757 s.freeUseRecords = r 758 } 759 } 760 761 // liveAfterCurrentInstruction reports whether v is live after 762 // the current instruction is completed. v must be used by the 763 // current instruction. 764 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 765 u := s.values[v.ID].uses 766 d := u.dist 767 for u != nil && u.dist == d { 768 u = u.next 769 } 770 return u != nil && u.dist > d 771 } 772 773 // Sets the state of the registers to that encoded in regs. 774 func (s *regAllocState) setState(regs []endReg) { 775 s.freeRegs(s.used) 776 for _, x := range regs { 777 s.assignReg(x.r, x.v, x.c) 778 } 779 } 780 781 // compatRegs returns the set of registers which can store a type t. 782 func (s *regAllocState) compatRegs(t *types.Type) regMask { 783 var m regMask 784 if t.IsTuple() || t.IsFlags() { 785 return 0 786 } 787 if t.IsFloat() || t == types.TypeInt128 { 788 if t.Etype == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 { 789 m = s.f.Config.fp32RegMask 790 } else if t.Etype == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 { 791 m = s.f.Config.fp64RegMask 792 } else { 793 m = s.f.Config.fpRegMask 794 } 795 } else { 796 m = s.f.Config.gpRegMask 797 } 798 return m & s.allocatable 799 } 800 801 // regspec returns the regInfo for operation op. 802 func (s *regAllocState) regspec(op Op) regInfo { 803 if op == OpConvert { 804 // OpConvert is a generic op, so it doesn't have a 805 // register set in the static table. It can use any 806 // allocatable integer register. 807 m := s.allocatable & s.f.Config.gpRegMask 808 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}} 809 } 810 return opcodeTable[op].reg 811 } 812 813 func (s *regAllocState) isGReg(r register) bool { 814 return s.f.Config.hasGReg && s.GReg == r 815 } 816 817 func (s *regAllocState) regalloc(f *Func) { 818 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 819 defer f.retSparseSet(regValLiveSet) 820 var oldSched []*Value 821 var phis []*Value 822 var phiRegs []register 823 var args []*Value 824 825 // Data structure used for computing desired registers. 826 var desired desiredState 827 828 // Desired registers for inputs & outputs for each instruction in the block. 829 type dentry struct { 830 out [4]register // desired output registers 831 in [3][4]register // desired input registers (for inputs 0,1, and 2) 832 } 833 var dinfo []dentry 834 835 if f.Entry != f.Blocks[0] { 836 f.Fatalf("entry block must be first") 837 } 838 839 for _, b := range s.visitOrder { 840 if s.f.pass.debug > regDebug { 841 fmt.Printf("Begin processing block %v\n", b) 842 } 843 s.curBlock = b 844 845 // Initialize regValLiveSet and uses fields for this block. 846 // Walk backwards through the block doing liveness analysis. 847 regValLiveSet.clear() 848 for _, e := range s.live[b.ID] { 849 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 850 regValLiveSet.add(e.ID) 851 } 852 for _, v := range b.ControlValues() { 853 if s.values[v.ID].needReg { 854 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values 855 regValLiveSet.add(v.ID) 856 } 857 } 858 for i := len(b.Values) - 1; i >= 0; i-- { 859 v := b.Values[i] 860 regValLiveSet.remove(v.ID) 861 if v.Op == OpPhi { 862 // Remove v from the live set, but don't add 863 // any inputs. This is the state the len(b.Preds)>1 864 // case below desires; it wants to process phis specially. 865 continue 866 } 867 if opcodeTable[v.Op].call { 868 // Function call clobbers all the registers but SP and SB. 869 regValLiveSet.clear() 870 if s.sp != 0 && s.values[s.sp].uses != nil { 871 regValLiveSet.add(s.sp) 872 } 873 if s.sb != 0 && s.values[s.sb].uses != nil { 874 regValLiveSet.add(s.sb) 875 } 876 } 877 for _, a := range v.Args { 878 if !s.values[a.ID].needReg { 879 continue 880 } 881 s.addUse(a.ID, int32(i), v.Pos) 882 regValLiveSet.add(a.ID) 883 } 884 } 885 if s.f.pass.debug > regDebug { 886 fmt.Printf("use distances for %s\n", b) 887 for i := range s.values { 888 vi := &s.values[i] 889 u := vi.uses 890 if u == nil { 891 continue 892 } 893 fmt.Printf(" v%d:", i) 894 for u != nil { 895 fmt.Printf(" %d", u.dist) 896 u = u.next 897 } 898 fmt.Println() 899 } 900 } 901 902 // Make a copy of the block schedule so we can generate a new one in place. 903 // We make a separate copy for phis and regular values. 904 nphi := 0 905 for _, v := range b.Values { 906 if v.Op != OpPhi { 907 break 908 } 909 nphi++ 910 } 911 phis = append(phis[:0], b.Values[:nphi]...) 912 oldSched = append(oldSched[:0], b.Values[nphi:]...) 913 b.Values = b.Values[:0] 914 915 // Initialize start state of block. 916 if b == f.Entry { 917 // Regalloc state is empty to start. 918 if nphi > 0 { 919 f.Fatalf("phis in entry block") 920 } 921 } else if len(b.Preds) == 1 { 922 // Start regalloc state with the end state of the previous block. 923 s.setState(s.endRegs[b.Preds[0].b.ID]) 924 if nphi > 0 { 925 f.Fatalf("phis in single-predecessor block") 926 } 927 // Drop any values which are no longer live. 928 // This may happen because at the end of p, a value may be 929 // live but only used by some other successor of p. 930 for r := register(0); r < s.numRegs; r++ { 931 v := s.regs[r].v 932 if v != nil && !regValLiveSet.contains(v.ID) { 933 s.freeReg(r) 934 } 935 } 936 } else { 937 // This is the complicated case. We have more than one predecessor, 938 // which means we may have Phi ops. 939 940 // Start with the final register state of the primary predecessor 941 idx := s.primary[b.ID] 942 if idx < 0 { 943 f.Fatalf("block with no primary predecessor %s", b) 944 } 945 p := b.Preds[idx].b 946 s.setState(s.endRegs[p.ID]) 947 948 if s.f.pass.debug > regDebug { 949 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 950 for _, x := range s.endRegs[p.ID] { 951 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c) 952 } 953 } 954 955 // Decide on registers for phi ops. Use the registers determined 956 // by the primary predecessor if we can. 957 // TODO: pick best of (already processed) predecessors? 958 // Majority vote? Deepest nesting level? 959 phiRegs = phiRegs[:0] 960 var phiUsed regMask 961 962 for _, v := range phis { 963 if !s.values[v.ID].needReg { 964 phiRegs = append(phiRegs, noRegister) 965 continue 966 } 967 a := v.Args[idx] 968 // Some instructions target not-allocatable registers. 969 // They're not suitable for further (phi-function) allocation. 970 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 971 if m != 0 { 972 r := pickReg(m) 973 phiUsed |= regMask(1) << r 974 phiRegs = append(phiRegs, r) 975 } else { 976 phiRegs = append(phiRegs, noRegister) 977 } 978 } 979 980 // Second pass - deallocate all in-register phi inputs. 981 for i, v := range phis { 982 if !s.values[v.ID].needReg { 983 continue 984 } 985 a := v.Args[idx] 986 r := phiRegs[i] 987 if r == noRegister { 988 continue 989 } 990 if regValLiveSet.contains(a.ID) { 991 // Input value is still live (it is used by something other than Phi). 992 // Try to move it around before kicking out, if there is a free register. 993 // We generate a Copy in the predecessor block and record it. It will be 994 // deleted later if never used. 995 // 996 // Pick a free register. At this point some registers used in the predecessor 997 // block may have been deallocated. Those are the ones used for Phis. Exclude 998 // them (and they are not going to be helpful anyway). 999 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 1000 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 1001 r2 := pickReg(m) 1002 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 1003 s.copies[c] = false 1004 if s.f.pass.debug > regDebug { 1005 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2]) 1006 } 1007 s.setOrig(c, a) 1008 s.assignReg(r2, a, c) 1009 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 1010 } 1011 } 1012 s.freeReg(r) 1013 } 1014 1015 // Copy phi ops into new schedule. 1016 b.Values = append(b.Values, phis...) 1017 1018 // Third pass - pick registers for phis whose inputs 1019 // were not in a register. 1020 for i, v := range phis { 1021 if !s.values[v.ID].needReg { 1022 continue 1023 } 1024 if phiRegs[i] != noRegister { 1025 continue 1026 } 1027 if s.f.Config.use387 && v.Type.IsFloat() { 1028 continue // 387 can't handle floats in registers between blocks 1029 } 1030 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 1031 if m != 0 { 1032 r := pickReg(m) 1033 phiRegs[i] = r 1034 phiUsed |= regMask(1) << r 1035 } 1036 } 1037 1038 // Set registers for phis. Add phi spill code. 1039 for i, v := range phis { 1040 if !s.values[v.ID].needReg { 1041 continue 1042 } 1043 r := phiRegs[i] 1044 if r == noRegister { 1045 // stack-based phi 1046 // Spills will be inserted in all the predecessors below. 1047 s.values[v.ID].spill = v // v starts life spilled 1048 continue 1049 } 1050 // register-based phi 1051 s.assignReg(r, v, v) 1052 } 1053 1054 // Deallocate any values which are no longer live. Phis are excluded. 1055 for r := register(0); r < s.numRegs; r++ { 1056 if phiUsed>>r&1 != 0 { 1057 continue 1058 } 1059 v := s.regs[r].v 1060 if v != nil && !regValLiveSet.contains(v.ID) { 1061 s.freeReg(r) 1062 } 1063 } 1064 1065 // Save the starting state for use by merge edges. 1066 // We append to a stack allocated variable that we'll 1067 // later copy into s.startRegs in one fell swoop, to save 1068 // on allocations. 1069 regList := make([]startReg, 0, 32) 1070 for r := register(0); r < s.numRegs; r++ { 1071 v := s.regs[r].v 1072 if v == nil { 1073 continue 1074 } 1075 if phiUsed>>r&1 != 0 { 1076 // Skip registers that phis used, we'll handle those 1077 // specially during merge edge processing. 1078 continue 1079 } 1080 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 1081 } 1082 s.startRegs[b.ID] = make([]startReg, len(regList)) 1083 copy(s.startRegs[b.ID], regList) 1084 1085 if s.f.pass.debug > regDebug { 1086 fmt.Printf("after phis\n") 1087 for _, x := range s.startRegs[b.ID] { 1088 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID) 1089 } 1090 } 1091 } 1092 1093 // Allocate space to record the desired registers for each value. 1094 if l := len(oldSched); cap(dinfo) < l { 1095 dinfo = make([]dentry, l) 1096 } else { 1097 dinfo = dinfo[:l] 1098 for i := range dinfo { 1099 dinfo[i] = dentry{} 1100 } 1101 } 1102 1103 // Load static desired register info at the end of the block. 1104 desired.copy(&s.desired[b.ID]) 1105 1106 // Check actual assigned registers at the start of the next block(s). 1107 // Dynamically assigned registers will trump the static 1108 // desired registers computed during liveness analysis. 1109 // Note that we do this phase after startRegs is set above, so that 1110 // we get the right behavior for a block which branches to itself. 1111 for _, e := range b.Succs { 1112 succ := e.b 1113 // TODO: prioritize likely successor? 1114 for _, x := range s.startRegs[succ.ID] { 1115 desired.add(x.v.ID, x.r) 1116 } 1117 // Process phi ops in succ. 1118 pidx := e.i 1119 for _, v := range succ.Values { 1120 if v.Op != OpPhi { 1121 break 1122 } 1123 if !s.values[v.ID].needReg { 1124 continue 1125 } 1126 rp, ok := s.f.getHome(v.ID).(*Register) 1127 if !ok { 1128 continue 1129 } 1130 desired.add(v.Args[pidx].ID, register(rp.num)) 1131 } 1132 } 1133 // Walk values backwards computing desired register info. 1134 // See computeLive for more comments. 1135 for i := len(oldSched) - 1; i >= 0; i-- { 1136 v := oldSched[i] 1137 prefs := desired.remove(v.ID) 1138 regspec := s.regspec(v.Op) 1139 desired.clobber(regspec.clobbers) 1140 for _, j := range regspec.inputs { 1141 if countRegs(j.regs) != 1 { 1142 continue 1143 } 1144 desired.clobber(j.regs) 1145 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1146 } 1147 if opcodeTable[v.Op].resultInArg0 { 1148 if opcodeTable[v.Op].commutative { 1149 desired.addList(v.Args[1].ID, prefs) 1150 } 1151 desired.addList(v.Args[0].ID, prefs) 1152 } 1153 // Save desired registers for this value. 1154 dinfo[i].out = prefs 1155 for j, a := range v.Args { 1156 if j >= len(dinfo[i].in) { 1157 break 1158 } 1159 dinfo[i].in[j] = desired.get(a.ID) 1160 } 1161 } 1162 1163 // Process all the non-phi values. 1164 for idx, v := range oldSched { 1165 if s.f.pass.debug > regDebug { 1166 fmt.Printf(" processing %s\n", v.LongString()) 1167 } 1168 regspec := s.regspec(v.Op) 1169 if v.Op == OpPhi { 1170 f.Fatalf("phi %s not at start of block", v) 1171 } 1172 if v.Op == OpSP { 1173 s.assignReg(s.SPReg, v, v) 1174 b.Values = append(b.Values, v) 1175 s.advanceUses(v) 1176 s.sp = v.ID 1177 continue 1178 } 1179 if v.Op == OpSB { 1180 s.assignReg(s.SBReg, v, v) 1181 b.Values = append(b.Values, v) 1182 s.advanceUses(v) 1183 s.sb = v.ID 1184 continue 1185 } 1186 if v.Op == OpSelect0 || v.Op == OpSelect1 { 1187 if s.values[v.ID].needReg { 1188 var i = 0 1189 if v.Op == OpSelect1 { 1190 i = 1 1191 } 1192 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1193 } 1194 b.Values = append(b.Values, v) 1195 s.advanceUses(v) 1196 goto issueSpill 1197 } 1198 if v.Op == OpGetG && s.f.Config.hasGReg { 1199 // use hardware g register 1200 if s.regs[s.GReg].v != nil { 1201 s.freeReg(s.GReg) // kick out the old value 1202 } 1203 s.assignReg(s.GReg, v, v) 1204 b.Values = append(b.Values, v) 1205 s.advanceUses(v) 1206 goto issueSpill 1207 } 1208 if v.Op == OpArg { 1209 // Args are "pre-spilled" values. We don't allocate 1210 // any register here. We just set up the spill pointer to 1211 // point at itself and any later user will restore it to use it. 1212 s.values[v.ID].spill = v 1213 b.Values = append(b.Values, v) 1214 s.advanceUses(v) 1215 continue 1216 } 1217 if v.Op == OpKeepAlive { 1218 // Make sure the argument to v is still live here. 1219 s.advanceUses(v) 1220 a := v.Args[0] 1221 vi := &s.values[a.ID] 1222 if vi.regs == 0 && !vi.rematerializeable { 1223 // Use the spill location. 1224 // This forces later liveness analysis to make the 1225 // value live at this point. 1226 v.SetArg(0, s.makeSpill(a, b)) 1227 } else if _, ok := a.Aux.(GCNode); ok && vi.rematerializeable { 1228 // Rematerializeable value with a gc.Node. This is the address of 1229 // a stack object (e.g. an LEAQ). Keep the object live. 1230 // Change it to VarLive, which is what plive expects for locals. 1231 v.Op = OpVarLive 1232 v.SetArgs1(v.Args[1]) 1233 v.Aux = a.Aux 1234 } else { 1235 // In-register and rematerializeable values are already live. 1236 // These are typically rematerializeable constants like nil, 1237 // or values of a variable that were modified since the last call. 1238 v.Op = OpCopy 1239 v.SetArgs1(v.Args[1]) 1240 } 1241 b.Values = append(b.Values, v) 1242 continue 1243 } 1244 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1245 // No register allocation required (or none specified yet) 1246 s.freeRegs(regspec.clobbers) 1247 b.Values = append(b.Values, v) 1248 s.advanceUses(v) 1249 continue 1250 } 1251 1252 if s.values[v.ID].rematerializeable { 1253 // Value is rematerializeable, don't issue it here. 1254 // It will get issued just before each use (see 1255 // allocValueToReg). 1256 for _, a := range v.Args { 1257 a.Uses-- 1258 } 1259 s.advanceUses(v) 1260 continue 1261 } 1262 1263 if s.f.pass.debug > regDebug { 1264 fmt.Printf("value %s\n", v.LongString()) 1265 fmt.Printf(" out:") 1266 for _, r := range dinfo[idx].out { 1267 if r != noRegister { 1268 fmt.Printf(" %s", &s.registers[r]) 1269 } 1270 } 1271 fmt.Println() 1272 for i := 0; i < len(v.Args) && i < 3; i++ { 1273 fmt.Printf(" in%d:", i) 1274 for _, r := range dinfo[idx].in[i] { 1275 if r != noRegister { 1276 fmt.Printf(" %s", &s.registers[r]) 1277 } 1278 } 1279 fmt.Println() 1280 } 1281 } 1282 1283 // Move arguments to registers. Process in an ordering defined 1284 // by the register specification (most constrained first). 1285 args = append(args[:0], v.Args...) 1286 for _, i := range regspec.inputs { 1287 mask := i.regs 1288 if mask&s.values[args[i.idx].ID].regs == 0 { 1289 // Need a new register for the input. 1290 mask &= s.allocatable 1291 mask &^= s.nospill 1292 // Used desired register if available. 1293 if i.idx < 3 { 1294 for _, r := range dinfo[idx].in[i.idx] { 1295 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1296 // Desired register is allowed and unused. 1297 mask = regMask(1) << r 1298 break 1299 } 1300 } 1301 } 1302 // Avoid registers we're saving for other values. 1303 if mask&^desired.avoid != 0 { 1304 mask &^= desired.avoid 1305 } 1306 } 1307 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos) 1308 } 1309 1310 // If the output clobbers the input register, make sure we have 1311 // at least two copies of the input register so we don't 1312 // have to reload the value from the spill location. 1313 if opcodeTable[v.Op].resultInArg0 { 1314 var m regMask 1315 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1316 // arg0 is dead. We can clobber its register. 1317 goto ok 1318 } 1319 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) { 1320 args[0], args[1] = args[1], args[0] 1321 goto ok 1322 } 1323 if s.values[v.Args[0].ID].rematerializeable { 1324 // We can rematerialize the input, don't worry about clobbering it. 1325 goto ok 1326 } 1327 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable { 1328 args[0], args[1] = args[1], args[0] 1329 goto ok 1330 } 1331 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1332 // we have at least 2 copies of arg0. We can afford to clobber one. 1333 goto ok 1334 } 1335 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1336 args[0], args[1] = args[1], args[0] 1337 goto ok 1338 } 1339 1340 // We can't overwrite arg0 (or arg1, if commutative). So we 1341 // need to make a copy of an input so we have a register we can modify. 1342 1343 // Possible new registers to copy into. 1344 m = s.compatRegs(v.Args[0].Type) &^ s.used 1345 if m == 0 { 1346 // No free registers. In this case we'll just clobber 1347 // an input and future uses of that input must use a restore. 1348 // TODO(khr): We should really do this like allocReg does it, 1349 // spilling the value with the most distant next use. 1350 goto ok 1351 } 1352 1353 // Try to move an input to the desired output. 1354 for _, r := range dinfo[idx].out { 1355 if r != noRegister && m>>r&1 != 0 { 1356 m = regMask(1) << r 1357 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1358 // Note: we update args[0] so the instruction will 1359 // use the register copy we just made. 1360 goto ok 1361 } 1362 } 1363 // Try to copy input to its desired location & use its old 1364 // location as the result register. 1365 for _, r := range dinfo[idx].in[0] { 1366 if r != noRegister && m>>r&1 != 0 { 1367 m = regMask(1) << r 1368 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1369 s.copies[c] = false 1370 // Note: no update to args[0] so the instruction will 1371 // use the original copy. 1372 goto ok 1373 } 1374 } 1375 if opcodeTable[v.Op].commutative { 1376 for _, r := range dinfo[idx].in[1] { 1377 if r != noRegister && m>>r&1 != 0 { 1378 m = regMask(1) << r 1379 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1380 s.copies[c] = false 1381 args[0], args[1] = args[1], args[0] 1382 goto ok 1383 } 1384 } 1385 } 1386 // Avoid future fixed uses if we can. 1387 if m&^desired.avoid != 0 { 1388 m &^= desired.avoid 1389 } 1390 // Save input 0 to a new register so we can clobber it. 1391 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1392 s.copies[c] = false 1393 } 1394 1395 ok: 1396 // Now that all args are in regs, we're ready to issue the value itself. 1397 // Before we pick a register for the output value, allow input registers 1398 // to be deallocated. We do this here so that the output can use the 1399 // same register as a dying input. 1400 if !opcodeTable[v.Op].resultNotInArgs { 1401 s.tmpused = s.nospill 1402 s.nospill = 0 1403 s.advanceUses(v) // frees any registers holding args that are no longer live 1404 } 1405 1406 // Dump any registers which will be clobbered 1407 s.freeRegs(regspec.clobbers) 1408 s.tmpused |= regspec.clobbers 1409 1410 // Pick registers for outputs. 1411 { 1412 outRegs := [2]register{noRegister, noRegister} 1413 var used regMask 1414 for _, out := range regspec.outputs { 1415 mask := out.regs & s.allocatable &^ used 1416 if mask == 0 { 1417 continue 1418 } 1419 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1420 if !opcodeTable[v.Op].commutative { 1421 // Output must use the same register as input 0. 1422 r := register(s.f.getHome(args[0].ID).(*Register).num) 1423 mask = regMask(1) << r 1424 } else { 1425 // Output must use the same register as input 0 or 1. 1426 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1427 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1428 // Check r0 and r1 for desired output register. 1429 found := false 1430 for _, r := range dinfo[idx].out { 1431 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1432 mask = regMask(1) << r 1433 found = true 1434 if r == r1 { 1435 args[0], args[1] = args[1], args[0] 1436 } 1437 break 1438 } 1439 } 1440 if !found { 1441 // Neither are desired, pick r0. 1442 mask = regMask(1) << r0 1443 } 1444 } 1445 } 1446 for _, r := range dinfo[idx].out { 1447 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1448 // Desired register is allowed and unused. 1449 mask = regMask(1) << r 1450 break 1451 } 1452 } 1453 // Avoid registers we're saving for other values. 1454 if mask&^desired.avoid&^s.nospill != 0 { 1455 mask &^= desired.avoid 1456 } 1457 r := s.allocReg(mask, v) 1458 outRegs[out.idx] = r 1459 used |= regMask(1) << r 1460 s.tmpused |= regMask(1) << r 1461 } 1462 // Record register choices 1463 if v.Type.IsTuple() { 1464 var outLocs LocPair 1465 if r := outRegs[0]; r != noRegister { 1466 outLocs[0] = &s.registers[r] 1467 } 1468 if r := outRegs[1]; r != noRegister { 1469 outLocs[1] = &s.registers[r] 1470 } 1471 s.f.setHome(v, outLocs) 1472 // Note that subsequent SelectX instructions will do the assignReg calls. 1473 } else { 1474 if r := outRegs[0]; r != noRegister { 1475 s.assignReg(r, v, v) 1476 } 1477 } 1478 } 1479 1480 // deallocate dead args, if we have not done so 1481 if opcodeTable[v.Op].resultNotInArgs { 1482 s.nospill = 0 1483 s.advanceUses(v) // frees any registers holding args that are no longer live 1484 } 1485 s.tmpused = 0 1486 1487 // Issue the Value itself. 1488 for i, a := range args { 1489 v.SetArg(i, a) // use register version of arguments 1490 } 1491 b.Values = append(b.Values, v) 1492 1493 issueSpill: 1494 } 1495 1496 // Copy the control values - we need this so we can reduce the 1497 // uses property of these values later. 1498 controls := append(make([]*Value, 0, 2), b.ControlValues()...) 1499 1500 // Load control values into registers. 1501 for i, v := range b.ControlValues() { 1502 if !s.values[v.ID].needReg { 1503 continue 1504 } 1505 if s.f.pass.debug > regDebug { 1506 fmt.Printf(" processing control %s\n", v.LongString()) 1507 } 1508 // We assume that a control input can be passed in any 1509 // type-compatible register. If this turns out not to be true, 1510 // we'll need to introduce a regspec for a block's control value. 1511 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos)) 1512 } 1513 1514 // Reduce the uses of the control values once registers have been loaded. 1515 // This loop is equivalent to the advanceUses method. 1516 for _, v := range controls { 1517 vi := &s.values[v.ID] 1518 if !vi.needReg { 1519 continue 1520 } 1521 // Remove this use from the uses list. 1522 u := vi.uses 1523 vi.uses = u.next 1524 if u.next == nil { 1525 s.freeRegs(vi.regs) // value is dead 1526 } 1527 u.next = s.freeUseRecords 1528 s.freeUseRecords = u 1529 } 1530 1531 // Spill any values that can't live across basic block boundaries. 1532 if s.f.Config.use387 { 1533 s.freeRegs(s.f.Config.fpRegMask) 1534 } 1535 1536 // If we are approaching a merge point and we are the primary 1537 // predecessor of it, find live values that we use soon after 1538 // the merge point and promote them to registers now. 1539 if len(b.Succs) == 1 { 1540 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil { 1541 s.freeReg(s.GReg) // Spill value in G register before any merge. 1542 } 1543 // For this to be worthwhile, the loop must have no calls in it. 1544 top := b.Succs[0].b 1545 loop := s.loopnest.b2l[top.ID] 1546 if loop == nil || loop.header != top || loop.containsUnavoidableCall { 1547 goto badloop 1548 } 1549 1550 // TODO: sort by distance, pick the closest ones? 1551 for _, live := range s.live[b.ID] { 1552 if live.dist >= unlikelyDistance { 1553 // Don't preload anything live after the loop. 1554 continue 1555 } 1556 vid := live.ID 1557 vi := &s.values[vid] 1558 if vi.regs != 0 { 1559 continue 1560 } 1561 if vi.rematerializeable { 1562 continue 1563 } 1564 v := s.orig[vid] 1565 if s.f.Config.use387 && v.Type.IsFloat() { 1566 continue // 387 can't handle floats in registers between blocks 1567 } 1568 m := s.compatRegs(v.Type) &^ s.used 1569 if m&^desired.avoid != 0 { 1570 m &^= desired.avoid 1571 } 1572 if m != 0 { 1573 s.allocValToReg(v, m, false, b.Pos) 1574 } 1575 } 1576 } 1577 badloop: 1578 ; 1579 1580 // Save end-of-block register state. 1581 // First count how many, this cuts allocations in half. 1582 k := 0 1583 for r := register(0); r < s.numRegs; r++ { 1584 v := s.regs[r].v 1585 if v == nil { 1586 continue 1587 } 1588 k++ 1589 } 1590 regList := make([]endReg, 0, k) 1591 for r := register(0); r < s.numRegs; r++ { 1592 v := s.regs[r].v 1593 if v == nil { 1594 continue 1595 } 1596 regList = append(regList, endReg{r, v, s.regs[r].c}) 1597 } 1598 s.endRegs[b.ID] = regList 1599 1600 if checkEnabled { 1601 regValLiveSet.clear() 1602 for _, x := range s.live[b.ID] { 1603 regValLiveSet.add(x.ID) 1604 } 1605 for r := register(0); r < s.numRegs; r++ { 1606 v := s.regs[r].v 1607 if v == nil { 1608 continue 1609 } 1610 if !regValLiveSet.contains(v.ID) { 1611 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1612 } 1613 } 1614 } 1615 1616 // If a value is live at the end of the block and 1617 // isn't in a register, generate a use for the spill location. 1618 // We need to remember this information so that 1619 // the liveness analysis in stackalloc is correct. 1620 for _, e := range s.live[b.ID] { 1621 vi := &s.values[e.ID] 1622 if vi.regs != 0 { 1623 // in a register, we'll use that source for the merge. 1624 continue 1625 } 1626 if vi.rematerializeable { 1627 // we'll rematerialize during the merge. 1628 continue 1629 } 1630 //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1631 spill := s.makeSpill(s.orig[e.ID], b) 1632 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1633 } 1634 1635 // Clear any final uses. 1636 // All that is left should be the pseudo-uses added for values which 1637 // are live at the end of b. 1638 for _, e := range s.live[b.ID] { 1639 u := s.values[e.ID].uses 1640 if u == nil { 1641 f.Fatalf("live at end, no uses v%d", e.ID) 1642 } 1643 if u.next != nil { 1644 f.Fatalf("live at end, too many uses v%d", e.ID) 1645 } 1646 s.values[e.ID].uses = nil 1647 u.next = s.freeUseRecords 1648 s.freeUseRecords = u 1649 } 1650 } 1651 1652 // Decide where the spills we generated will go. 1653 s.placeSpills() 1654 1655 // Anything that didn't get a register gets a stack location here. 1656 // (StoreReg, stack-based phis, inputs, ...) 1657 stacklive := stackalloc(s.f, s.spillLive) 1658 1659 // Fix up all merge edges. 1660 s.shuffle(stacklive) 1661 1662 // Erase any copies we never used. 1663 // Also, an unused copy might be the only use of another copy, 1664 // so continue erasing until we reach a fixed point. 1665 for { 1666 progress := false 1667 for c, used := range s.copies { 1668 if !used && c.Uses == 0 { 1669 if s.f.pass.debug > regDebug { 1670 fmt.Printf("delete copied value %s\n", c.LongString()) 1671 } 1672 c.RemoveArg(0) 1673 f.freeValue(c) 1674 delete(s.copies, c) 1675 progress = true 1676 } 1677 } 1678 if !progress { 1679 break 1680 } 1681 } 1682 1683 for _, b := range s.visitOrder { 1684 i := 0 1685 for _, v := range b.Values { 1686 if v.Op == OpInvalid { 1687 continue 1688 } 1689 b.Values[i] = v 1690 i++ 1691 } 1692 b.Values = b.Values[:i] 1693 } 1694 } 1695 1696 func (s *regAllocState) placeSpills() { 1697 f := s.f 1698 1699 // Precompute some useful info. 1700 phiRegs := make([]regMask, f.NumBlocks()) 1701 for _, b := range s.visitOrder { 1702 var m regMask 1703 for _, v := range b.Values { 1704 if v.Op != OpPhi { 1705 break 1706 } 1707 if r, ok := f.getHome(v.ID).(*Register); ok { 1708 m |= regMask(1) << uint(r.num) 1709 } 1710 } 1711 phiRegs[b.ID] = m 1712 } 1713 1714 // Start maps block IDs to the list of spills 1715 // that go at the start of the block (but after any phis). 1716 start := map[ID][]*Value{} 1717 // After maps value IDs to the list of spills 1718 // that go immediately after that value ID. 1719 after := map[ID][]*Value{} 1720 1721 for i := range s.values { 1722 vi := s.values[i] 1723 spill := vi.spill 1724 if spill == nil { 1725 continue 1726 } 1727 if spill.Block != nil { 1728 // Some spills are already fully set up, 1729 // like OpArgs and stack-based phis. 1730 continue 1731 } 1732 v := s.orig[i] 1733 1734 // Walk down the dominator tree looking for a good place to 1735 // put the spill of v. At the start "best" is the best place 1736 // we have found so far. 1737 // TODO: find a way to make this O(1) without arbitrary cutoffs. 1738 best := v.Block 1739 bestArg := v 1740 var bestDepth int16 1741 if l := s.loopnest.b2l[best.ID]; l != nil { 1742 bestDepth = l.depth 1743 } 1744 b := best 1745 const maxSpillSearch = 100 1746 for i := 0; i < maxSpillSearch; i++ { 1747 // Find the child of b in the dominator tree which 1748 // dominates all restores. 1749 p := b 1750 b = nil 1751 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 1752 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 1753 // c also dominates all restores. Walk down into c. 1754 b = c 1755 break 1756 } 1757 } 1758 if b == nil { 1759 // Ran out of blocks which dominate all restores. 1760 break 1761 } 1762 1763 var depth int16 1764 if l := s.loopnest.b2l[b.ID]; l != nil { 1765 depth = l.depth 1766 } 1767 if depth > bestDepth { 1768 // Don't push the spill into a deeper loop. 1769 continue 1770 } 1771 1772 // If v is in a register at the start of b, we can 1773 // place the spill here (after the phis). 1774 if len(b.Preds) == 1 { 1775 for _, e := range s.endRegs[b.Preds[0].b.ID] { 1776 if e.v == v { 1777 // Found a better spot for the spill. 1778 best = b 1779 bestArg = e.c 1780 bestDepth = depth 1781 break 1782 } 1783 } 1784 } else { 1785 for _, e := range s.startRegs[b.ID] { 1786 if e.v == v { 1787 // Found a better spot for the spill. 1788 best = b 1789 bestArg = e.c 1790 bestDepth = depth 1791 break 1792 } 1793 } 1794 } 1795 } 1796 1797 // Put the spill in the best block we found. 1798 spill.Block = best 1799 spill.AddArg(bestArg) 1800 if best == v.Block && v.Op != OpPhi { 1801 // Place immediately after v. 1802 after[v.ID] = append(after[v.ID], spill) 1803 } else { 1804 // Place at the start of best block. 1805 start[best.ID] = append(start[best.ID], spill) 1806 } 1807 } 1808 1809 // Insert spill instructions into the block schedules. 1810 var oldSched []*Value 1811 for _, b := range s.visitOrder { 1812 nphi := 0 1813 for _, v := range b.Values { 1814 if v.Op != OpPhi { 1815 break 1816 } 1817 nphi++ 1818 } 1819 oldSched = append(oldSched[:0], b.Values[nphi:]...) 1820 b.Values = b.Values[:nphi] 1821 b.Values = append(b.Values, start[b.ID]...) 1822 for _, v := range oldSched { 1823 b.Values = append(b.Values, v) 1824 b.Values = append(b.Values, after[v.ID]...) 1825 } 1826 } 1827 } 1828 1829 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 1830 func (s *regAllocState) shuffle(stacklive [][]ID) { 1831 var e edgeState 1832 e.s = s 1833 e.cache = map[ID][]*Value{} 1834 e.contents = map[Location]contentRecord{} 1835 if s.f.pass.debug > regDebug { 1836 fmt.Printf("shuffle %s\n", s.f.Name) 1837 fmt.Println(s.f.String()) 1838 } 1839 1840 for _, b := range s.visitOrder { 1841 if len(b.Preds) <= 1 { 1842 continue 1843 } 1844 e.b = b 1845 for i, edge := range b.Preds { 1846 p := edge.b 1847 e.p = p 1848 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 1849 e.process() 1850 } 1851 } 1852 1853 if s.f.pass.debug > regDebug { 1854 fmt.Printf("post shuffle %s\n", s.f.Name) 1855 fmt.Println(s.f.String()) 1856 } 1857 } 1858 1859 type edgeState struct { 1860 s *regAllocState 1861 p, b *Block // edge goes from p->b. 1862 1863 // for each pre-regalloc value, a list of equivalent cached values 1864 cache map[ID][]*Value 1865 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 1866 1867 // map from location to the value it contains 1868 contents map[Location]contentRecord 1869 1870 // desired destination locations 1871 destinations []dstRecord 1872 extra []dstRecord 1873 1874 usedRegs regMask // registers currently holding something 1875 uniqueRegs regMask // registers holding the only copy of a value 1876 finalRegs regMask // registers holding final target 1877 rematerializeableRegs regMask // registers that hold rematerializeable values 1878 } 1879 1880 type contentRecord struct { 1881 vid ID // pre-regalloc value 1882 c *Value // cached value 1883 final bool // this is a satisfied destination 1884 pos src.XPos // source position of use of the value 1885 } 1886 1887 type dstRecord struct { 1888 loc Location // register or stack slot 1889 vid ID // pre-regalloc value it should contain 1890 splice **Value // place to store reference to the generating instruction 1891 pos src.XPos // source position of use of this location 1892 } 1893 1894 // setup initializes the edge state for shuffling. 1895 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 1896 if e.s.f.pass.debug > regDebug { 1897 fmt.Printf("edge %s->%s\n", e.p, e.b) 1898 } 1899 1900 // Clear state. 1901 for _, vid := range e.cachedVals { 1902 delete(e.cache, vid) 1903 } 1904 e.cachedVals = e.cachedVals[:0] 1905 for k := range e.contents { 1906 delete(e.contents, k) 1907 } 1908 e.usedRegs = 0 1909 e.uniqueRegs = 0 1910 e.finalRegs = 0 1911 e.rematerializeableRegs = 0 1912 1913 // Live registers can be sources. 1914 for _, x := range srcReg { 1915 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 1916 } 1917 // So can all of the spill locations. 1918 for _, spillID := range stacklive { 1919 v := e.s.orig[spillID] 1920 spill := e.s.values[v.ID].spill 1921 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) { 1922 // Spills were placed that only dominate the uses found 1923 // during the first regalloc pass. The edge fixup code 1924 // can't use a spill location if the spill doesn't dominate 1925 // the edge. 1926 // We are guaranteed that if the spill doesn't dominate this edge, 1927 // then the value is available in a register (because we called 1928 // makeSpill for every value not in a register at the start 1929 // of an edge). 1930 continue 1931 } 1932 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 1933 } 1934 1935 // Figure out all the destinations we need. 1936 dsts := e.destinations[:0] 1937 for _, x := range dstReg { 1938 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 1939 } 1940 // Phis need their args to end up in a specific location. 1941 for _, v := range e.b.Values { 1942 if v.Op != OpPhi { 1943 break 1944 } 1945 loc := e.s.f.getHome(v.ID) 1946 if loc == nil { 1947 continue 1948 } 1949 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 1950 } 1951 e.destinations = dsts 1952 1953 if e.s.f.pass.debug > regDebug { 1954 for _, vid := range e.cachedVals { 1955 a := e.cache[vid] 1956 for _, c := range a { 1957 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c) 1958 } 1959 } 1960 for _, d := range e.destinations { 1961 fmt.Printf("dst %s: v%d\n", d.loc, d.vid) 1962 } 1963 } 1964 } 1965 1966 // process generates code to move all the values to the right destination locations. 1967 func (e *edgeState) process() { 1968 dsts := e.destinations 1969 1970 // Process the destinations until they are all satisfied. 1971 for len(dsts) > 0 { 1972 i := 0 1973 for _, d := range dsts { 1974 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 1975 // Failed - save for next iteration. 1976 dsts[i] = d 1977 i++ 1978 } 1979 } 1980 if i < len(dsts) { 1981 // Made some progress. Go around again. 1982 dsts = dsts[:i] 1983 1984 // Append any extras destinations we generated. 1985 dsts = append(dsts, e.extra...) 1986 e.extra = e.extra[:0] 1987 continue 1988 } 1989 1990 // We made no progress. That means that any 1991 // remaining unsatisfied moves are in simple cycles. 1992 // For example, A -> B -> C -> D -> A. 1993 // A ----> B 1994 // ^ | 1995 // | | 1996 // | v 1997 // D <---- C 1998 1999 // To break the cycle, we pick an unused register, say R, 2000 // and put a copy of B there. 2001 // A ----> B 2002 // ^ | 2003 // | | 2004 // | v 2005 // D <---- C <---- R=copyofB 2006 // When we resume the outer loop, the A->B move can now proceed, 2007 // and eventually the whole cycle completes. 2008 2009 // Copy any cycle location to a temp register. This duplicates 2010 // one of the cycle entries, allowing the just duplicated value 2011 // to be overwritten and the cycle to proceed. 2012 d := dsts[0] 2013 loc := d.loc 2014 vid := e.contents[loc].vid 2015 c := e.contents[loc].c 2016 r := e.findRegFor(c.Type) 2017 if e.s.f.pass.debug > regDebug { 2018 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c) 2019 } 2020 e.erase(r) 2021 pos := d.pos.WithNotStmt() 2022 if _, isReg := loc.(*Register); isReg { 2023 c = e.p.NewValue1(pos, OpCopy, c.Type, c) 2024 } else { 2025 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2026 } 2027 e.set(r, vid, c, false, pos) 2028 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) { 2029 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString()) 2030 } 2031 } 2032 } 2033 2034 // processDest generates code to put value vid into location loc. Returns true 2035 // if progress was made. 2036 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 2037 pos = pos.WithNotStmt() 2038 occupant := e.contents[loc] 2039 if occupant.vid == vid { 2040 // Value is already in the correct place. 2041 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 2042 if splice != nil { 2043 (*splice).Uses-- 2044 *splice = occupant.c 2045 occupant.c.Uses++ 2046 } 2047 // Note: if splice==nil then c will appear dead. This is 2048 // non-SSA formed code, so be careful after this pass not to run 2049 // deadcode elimination. 2050 if _, ok := e.s.copies[occupant.c]; ok { 2051 // The copy at occupant.c was used to avoid spill. 2052 e.s.copies[occupant.c] = true 2053 } 2054 return true 2055 } 2056 2057 // Check if we're allowed to clobber the destination location. 2058 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 2059 // We can't overwrite the last copy 2060 // of a value that needs to survive. 2061 return false 2062 } 2063 2064 // Copy from a source of v, register preferred. 2065 v := e.s.orig[vid] 2066 var c *Value 2067 var src Location 2068 if e.s.f.pass.debug > regDebug { 2069 fmt.Printf("moving v%d to %s\n", vid, loc) 2070 fmt.Printf("sources of v%d:", vid) 2071 } 2072 for _, w := range e.cache[vid] { 2073 h := e.s.f.getHome(w.ID) 2074 if e.s.f.pass.debug > regDebug { 2075 fmt.Printf(" %s:%s", h, w) 2076 } 2077 _, isreg := h.(*Register) 2078 if src == nil || isreg { 2079 c = w 2080 src = h 2081 } 2082 } 2083 if e.s.f.pass.debug > regDebug { 2084 if src != nil { 2085 fmt.Printf(" [use %s]\n", src) 2086 } else { 2087 fmt.Printf(" [no source]\n") 2088 } 2089 } 2090 _, dstReg := loc.(*Register) 2091 2092 // Pre-clobber destination. This avoids the 2093 // following situation: 2094 // - v is currently held in R0 and stacktmp0. 2095 // - We want to copy stacktmp1 to stacktmp0. 2096 // - We choose R0 as the temporary register. 2097 // During the copy, both R0 and stacktmp0 are 2098 // clobbered, losing both copies of v. Oops! 2099 // Erasing the destination early means R0 will not 2100 // be chosen as the temp register, as it will then 2101 // be the last copy of v. 2102 e.erase(loc) 2103 var x *Value 2104 if c == nil || e.s.values[vid].rematerializeable { 2105 if !e.s.values[vid].rematerializeable { 2106 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 2107 } 2108 if dstReg { 2109 x = v.copyInto(e.p) 2110 } else { 2111 // Rematerialize into stack slot. Need a free 2112 // register to accomplish this. 2113 r := e.findRegFor(v.Type) 2114 e.erase(r) 2115 x = v.copyIntoWithXPos(e.p, pos) 2116 e.set(r, vid, x, false, pos) 2117 // Make sure we spill with the size of the slot, not the 2118 // size of x (which might be wider due to our dropping 2119 // of narrowing conversions). 2120 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 2121 } 2122 } else { 2123 // Emit move from src to dst. 2124 _, srcReg := src.(*Register) 2125 if srcReg { 2126 if dstReg { 2127 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 2128 } else { 2129 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 2130 } 2131 } else { 2132 if dstReg { 2133 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2134 } else { 2135 // mem->mem. Use temp register. 2136 r := e.findRegFor(c.Type) 2137 e.erase(r) 2138 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2139 e.set(r, vid, t, false, pos) 2140 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 2141 } 2142 } 2143 } 2144 e.set(loc, vid, x, true, pos) 2145 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) { 2146 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString()) 2147 } 2148 if splice != nil { 2149 (*splice).Uses-- 2150 *splice = x 2151 x.Uses++ 2152 } 2153 return true 2154 } 2155 2156 // set changes the contents of location loc to hold the given value and its cached representative. 2157 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2158 e.s.f.setHome(c, loc) 2159 e.contents[loc] = contentRecord{vid, c, final, pos} 2160 a := e.cache[vid] 2161 if len(a) == 0 { 2162 e.cachedVals = append(e.cachedVals, vid) 2163 } 2164 a = append(a, c) 2165 e.cache[vid] = a 2166 if r, ok := loc.(*Register); ok { 2167 e.usedRegs |= regMask(1) << uint(r.num) 2168 if final { 2169 e.finalRegs |= regMask(1) << uint(r.num) 2170 } 2171 if len(a) == 1 { 2172 e.uniqueRegs |= regMask(1) << uint(r.num) 2173 } 2174 if len(a) == 2 { 2175 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2176 e.uniqueRegs &^= regMask(1) << uint(t.num) 2177 } 2178 } 2179 if e.s.values[vid].rematerializeable { 2180 e.rematerializeableRegs |= regMask(1) << uint(r.num) 2181 } 2182 } 2183 if e.s.f.pass.debug > regDebug { 2184 fmt.Printf("%s\n", c.LongString()) 2185 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c) 2186 } 2187 } 2188 2189 // erase removes any user of loc. 2190 func (e *edgeState) erase(loc Location) { 2191 cr := e.contents[loc] 2192 if cr.c == nil { 2193 return 2194 } 2195 vid := cr.vid 2196 2197 if cr.final { 2198 // Add a destination to move this value back into place. 2199 // Make sure it gets added to the tail of the destination queue 2200 // so we make progress on other moves first. 2201 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2202 } 2203 2204 // Remove c from the list of cached values. 2205 a := e.cache[vid] 2206 for i, c := range a { 2207 if e.s.f.getHome(c.ID) == loc { 2208 if e.s.f.pass.debug > regDebug { 2209 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c) 2210 } 2211 a[i], a = a[len(a)-1], a[:len(a)-1] 2212 break 2213 } 2214 } 2215 e.cache[vid] = a 2216 2217 // Update register masks. 2218 if r, ok := loc.(*Register); ok { 2219 e.usedRegs &^= regMask(1) << uint(r.num) 2220 if cr.final { 2221 e.finalRegs &^= regMask(1) << uint(r.num) 2222 } 2223 e.rematerializeableRegs &^= regMask(1) << uint(r.num) 2224 } 2225 if len(a) == 1 { 2226 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2227 e.uniqueRegs |= regMask(1) << uint(r.num) 2228 } 2229 } 2230 } 2231 2232 // findRegFor finds a register we can use to make a temp copy of type typ. 2233 func (e *edgeState) findRegFor(typ *types.Type) Location { 2234 // Which registers are possibilities. 2235 types := &e.s.f.Config.Types 2236 m := e.s.compatRegs(typ) 2237 2238 // Pick a register. In priority order: 2239 // 1) an unused register 2240 // 2) a non-unique register not holding a final value 2241 // 3) a non-unique register 2242 // 4) a register holding a rematerializeable value 2243 x := m &^ e.usedRegs 2244 if x != 0 { 2245 return &e.s.registers[pickReg(x)] 2246 } 2247 x = m &^ e.uniqueRegs &^ e.finalRegs 2248 if x != 0 { 2249 return &e.s.registers[pickReg(x)] 2250 } 2251 x = m &^ e.uniqueRegs 2252 if x != 0 { 2253 return &e.s.registers[pickReg(x)] 2254 } 2255 x = m & e.rematerializeableRegs 2256 if x != 0 { 2257 return &e.s.registers[pickReg(x)] 2258 } 2259 2260 // No register is available. 2261 // Pick a register to spill. 2262 for _, vid := range e.cachedVals { 2263 a := e.cache[vid] 2264 for _, c := range a { 2265 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2266 if !c.rematerializeable() { 2267 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2268 // Allocate a temp location to spill a register to. 2269 // The type of the slot is immaterial - it will not be live across 2270 // any safepoint. Just use a type big enough to hold any register. 2271 t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64} 2272 // TODO: reuse these slots. They'll need to be erased first. 2273 e.set(t, vid, x, false, c.Pos) 2274 if e.s.f.pass.debug > regDebug { 2275 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString()) 2276 } 2277 } 2278 // r will now be overwritten by the caller. At some point 2279 // later, the newly saved value will be moved back to its 2280 // final destination in processDest. 2281 return r 2282 } 2283 } 2284 } 2285 2286 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs) 2287 for _, vid := range e.cachedVals { 2288 a := e.cache[vid] 2289 for _, c := range a { 2290 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID)) 2291 } 2292 } 2293 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2294 return nil 2295 } 2296 2297 // rematerializeable reports whether the register allocator should recompute 2298 // a value instead of spilling/restoring it. 2299 func (v *Value) rematerializeable() bool { 2300 if !opcodeTable[v.Op].rematerializeable { 2301 return false 2302 } 2303 for _, a := range v.Args { 2304 // SP and SB (generated by OpSP and OpSB) are always available. 2305 if a.Op != OpSP && a.Op != OpSB { 2306 return false 2307 } 2308 } 2309 return true 2310 } 2311 2312 type liveInfo struct { 2313 ID ID // ID of value 2314 dist int32 // # of instructions before next use 2315 pos src.XPos // source position of next use 2316 } 2317 2318 // computeLive computes a map from block ID to a list of value IDs live at the end 2319 // of that block. Together with the value ID is a count of how many instructions 2320 // to the next use of that value. The resulting map is stored in s.live. 2321 // computeLive also computes the desired register information at the end of each block. 2322 // This desired register information is stored in s.desired. 2323 // TODO: this could be quadratic if lots of variables are live across lots of 2324 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2325 // of this function) require only linear size & time. 2326 func (s *regAllocState) computeLive() { 2327 f := s.f 2328 s.live = make([][]liveInfo, f.NumBlocks()) 2329 s.desired = make([]desiredState, f.NumBlocks()) 2330 var phis []*Value 2331 2332 live := f.newSparseMap(f.NumValues()) 2333 defer f.retSparseMap(live) 2334 t := f.newSparseMap(f.NumValues()) 2335 defer f.retSparseMap(t) 2336 2337 // Keep track of which value we want in each register. 2338 var desired desiredState 2339 2340 // Instead of iterating over f.Blocks, iterate over their postordering. 2341 // Liveness information flows backward, so starting at the end 2342 // increases the probability that we will stabilize quickly. 2343 // TODO: Do a better job yet. Here's one possibility: 2344 // Calculate the dominator tree and locate all strongly connected components. 2345 // If a value is live in one block of an SCC, it is live in all. 2346 // Walk the dominator tree from end to beginning, just once, treating SCC 2347 // components as single blocks, duplicated calculated liveness information 2348 // out to all of them. 2349 po := f.postorder() 2350 s.loopnest = f.loopnest() 2351 s.loopnest.calculateDepths() 2352 for { 2353 changed := false 2354 2355 for _, b := range po { 2356 // Start with known live values at the end of the block. 2357 // Add len(b.Values) to adjust from end-of-block distance 2358 // to beginning-of-block distance. 2359 live.clear() 2360 for _, e := range s.live[b.ID] { 2361 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2362 } 2363 2364 // Mark control values as live 2365 for _, c := range b.ControlValues() { 2366 if s.values[c.ID].needReg { 2367 live.set(c.ID, int32(len(b.Values)), b.Pos) 2368 } 2369 } 2370 2371 // Propagate backwards to the start of the block 2372 // Assumes Values have been scheduled. 2373 phis = phis[:0] 2374 for i := len(b.Values) - 1; i >= 0; i-- { 2375 v := b.Values[i] 2376 live.remove(v.ID) 2377 if v.Op == OpPhi { 2378 // save phi ops for later 2379 phis = append(phis, v) 2380 continue 2381 } 2382 if opcodeTable[v.Op].call { 2383 c := live.contents() 2384 for i := range c { 2385 c[i].val += unlikelyDistance 2386 } 2387 } 2388 for _, a := range v.Args { 2389 if s.values[a.ID].needReg { 2390 live.set(a.ID, int32(i), v.Pos) 2391 } 2392 } 2393 } 2394 // Propagate desired registers backwards. 2395 desired.copy(&s.desired[b.ID]) 2396 for i := len(b.Values) - 1; i >= 0; i-- { 2397 v := b.Values[i] 2398 prefs := desired.remove(v.ID) 2399 if v.Op == OpPhi { 2400 // TODO: if v is a phi, save desired register for phi inputs. 2401 // For now, we just drop it and don't propagate 2402 // desired registers back though phi nodes. 2403 continue 2404 } 2405 regspec := s.regspec(v.Op) 2406 // Cancel desired registers if they get clobbered. 2407 desired.clobber(regspec.clobbers) 2408 // Update desired registers if there are any fixed register inputs. 2409 for _, j := range regspec.inputs { 2410 if countRegs(j.regs) != 1 { 2411 continue 2412 } 2413 desired.clobber(j.regs) 2414 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2415 } 2416 // Set desired register of input 0 if this is a 2-operand instruction. 2417 if opcodeTable[v.Op].resultInArg0 { 2418 if opcodeTable[v.Op].commutative { 2419 desired.addList(v.Args[1].ID, prefs) 2420 } 2421 desired.addList(v.Args[0].ID, prefs) 2422 } 2423 } 2424 2425 // For each predecessor of b, expand its list of live-at-end values. 2426 // invariant: live contains the values live at the start of b (excluding phi inputs) 2427 for i, e := range b.Preds { 2428 p := e.b 2429 // Compute additional distance for the edge. 2430 // Note: delta must be at least 1 to distinguish the control 2431 // value use from the first user in a successor block. 2432 delta := int32(normalDistance) 2433 if len(p.Succs) == 2 { 2434 if p.Succs[0].b == b && p.Likely == BranchLikely || 2435 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2436 delta = likelyDistance 2437 } 2438 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2439 p.Succs[1].b == b && p.Likely == BranchLikely { 2440 delta = unlikelyDistance 2441 } 2442 } 2443 2444 // Update any desired registers at the end of p. 2445 s.desired[p.ID].merge(&desired) 2446 2447 // Start t off with the previously known live values at the end of p. 2448 t.clear() 2449 for _, e := range s.live[p.ID] { 2450 t.set(e.ID, e.dist, e.pos) 2451 } 2452 update := false 2453 2454 // Add new live values from scanning this block. 2455 for _, e := range live.contents() { 2456 d := e.val + delta 2457 if !t.contains(e.key) || d < t.get(e.key) { 2458 update = true 2459 t.set(e.key, d, e.aux) 2460 } 2461 } 2462 // Also add the correct arg from the saved phi values. 2463 // All phis are at distance delta (we consider them 2464 // simultaneously happening at the start of the block). 2465 for _, v := range phis { 2466 id := v.Args[i].ID 2467 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2468 update = true 2469 t.set(id, delta, v.Pos) 2470 } 2471 } 2472 2473 if !update { 2474 continue 2475 } 2476 // The live set has changed, update it. 2477 l := s.live[p.ID][:0] 2478 if cap(l) < t.size() { 2479 l = make([]liveInfo, 0, t.size()) 2480 } 2481 for _, e := range t.contents() { 2482 l = append(l, liveInfo{e.key, e.val, e.aux}) 2483 } 2484 s.live[p.ID] = l 2485 changed = true 2486 } 2487 } 2488 2489 if !changed { 2490 break 2491 } 2492 } 2493 if f.pass.debug > regDebug { 2494 fmt.Println("live values at end of each block") 2495 for _, b := range f.Blocks { 2496 fmt.Printf(" %s:", b) 2497 for _, x := range s.live[b.ID] { 2498 fmt.Printf(" v%d", x.ID) 2499 for _, e := range s.desired[b.ID].entries { 2500 if e.ID != x.ID { 2501 continue 2502 } 2503 fmt.Printf("[") 2504 first := true 2505 for _, r := range e.regs { 2506 if r == noRegister { 2507 continue 2508 } 2509 if !first { 2510 fmt.Printf(",") 2511 } 2512 fmt.Print(&s.registers[r]) 2513 first = false 2514 } 2515 fmt.Printf("]") 2516 } 2517 } 2518 if avoid := s.desired[b.ID].avoid; avoid != 0 { 2519 fmt.Printf(" avoid=%v", s.RegMaskString(avoid)) 2520 } 2521 fmt.Println() 2522 } 2523 } 2524 } 2525 2526 // A desiredState represents desired register assignments. 2527 type desiredState struct { 2528 // Desired assignments will be small, so we just use a list 2529 // of valueID+registers entries. 2530 entries []desiredStateEntry 2531 // Registers that other values want to be in. This value will 2532 // contain at least the union of the regs fields of entries, but 2533 // may contain additional entries for values that were once in 2534 // this data structure but are no longer. 2535 avoid regMask 2536 } 2537 type desiredStateEntry struct { 2538 // (pre-regalloc) value 2539 ID ID 2540 // Registers it would like to be in, in priority order. 2541 // Unused slots are filled with noRegister. 2542 regs [4]register 2543 } 2544 2545 func (d *desiredState) clear() { 2546 d.entries = d.entries[:0] 2547 d.avoid = 0 2548 } 2549 2550 // get returns a list of desired registers for value vid. 2551 func (d *desiredState) get(vid ID) [4]register { 2552 for _, e := range d.entries { 2553 if e.ID == vid { 2554 return e.regs 2555 } 2556 } 2557 return [4]register{noRegister, noRegister, noRegister, noRegister} 2558 } 2559 2560 // add records that we'd like value vid to be in register r. 2561 func (d *desiredState) add(vid ID, r register) { 2562 d.avoid |= regMask(1) << r 2563 for i := range d.entries { 2564 e := &d.entries[i] 2565 if e.ID != vid { 2566 continue 2567 } 2568 if e.regs[0] == r { 2569 // Already known and highest priority 2570 return 2571 } 2572 for j := 1; j < len(e.regs); j++ { 2573 if e.regs[j] == r { 2574 // Move from lower priority to top priority 2575 copy(e.regs[1:], e.regs[:j]) 2576 e.regs[0] = r 2577 return 2578 } 2579 } 2580 copy(e.regs[1:], e.regs[:]) 2581 e.regs[0] = r 2582 return 2583 } 2584 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2585 } 2586 2587 func (d *desiredState) addList(vid ID, regs [4]register) { 2588 // regs is in priority order, so iterate in reverse order. 2589 for i := len(regs) - 1; i >= 0; i-- { 2590 r := regs[i] 2591 if r != noRegister { 2592 d.add(vid, r) 2593 } 2594 } 2595 } 2596 2597 // clobber erases any desired registers in the set m. 2598 func (d *desiredState) clobber(m regMask) { 2599 for i := 0; i < len(d.entries); { 2600 e := &d.entries[i] 2601 j := 0 2602 for _, r := range e.regs { 2603 if r != noRegister && m>>r&1 == 0 { 2604 e.regs[j] = r 2605 j++ 2606 } 2607 } 2608 if j == 0 { 2609 // No more desired registers for this value. 2610 d.entries[i] = d.entries[len(d.entries)-1] 2611 d.entries = d.entries[:len(d.entries)-1] 2612 continue 2613 } 2614 for ; j < len(e.regs); j++ { 2615 e.regs[j] = noRegister 2616 } 2617 i++ 2618 } 2619 d.avoid &^= m 2620 } 2621 2622 // copy copies a desired state from another desiredState x. 2623 func (d *desiredState) copy(x *desiredState) { 2624 d.entries = append(d.entries[:0], x.entries...) 2625 d.avoid = x.avoid 2626 } 2627 2628 // remove removes the desired registers for vid and returns them. 2629 func (d *desiredState) remove(vid ID) [4]register { 2630 for i := range d.entries { 2631 if d.entries[i].ID == vid { 2632 regs := d.entries[i].regs 2633 d.entries[i] = d.entries[len(d.entries)-1] 2634 d.entries = d.entries[:len(d.entries)-1] 2635 return regs 2636 } 2637 } 2638 return [4]register{noRegister, noRegister, noRegister, noRegister} 2639 } 2640 2641 // merge merges another desired state x into d. 2642 func (d *desiredState) merge(x *desiredState) { 2643 d.avoid |= x.avoid 2644 // There should only be a few desired registers, so 2645 // linear insert is ok. 2646 for _, e := range x.entries { 2647 d.addList(e.ID, e.regs) 2648 } 2649 } 2650 2651 func min32(x, y int32) int32 { 2652 if x < y { 2653 return x 2654 } 2655 return y 2656 } 2657 func max32(x, y int32) int32 { 2658 if x > y { 2659 return x 2660 } 2661 return y 2662 }