github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 118 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 119 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 120 "github.com/bir3/gocompiler/src/cmd/internal/src" 121 "github.com/bir3/gocompiler/src/cmd/internal/sys" 122 "fmt" 123 "github.com/bir3/gocompiler/src/internal/buildcfg" 124 "math/bits" 125 "unsafe" 126 ) 127 128 const ( 129 moveSpills = iota 130 logSpills 131 regDebug 132 stackDebug 133 ) 134 135 // distance is a measure of how far into the future values are used. 136 // distance is measured in units of instructions. 137 const ( 138 likelyDistance = 1 139 normalDistance = 10 140 unlikelyDistance = 100 141 ) 142 143 // regalloc performs register allocation on f. It sets f.RegAlloc 144 // to the resulting allocation. 145 func regalloc(f *Func) { 146 var s regAllocState 147 s.init(f) 148 s.regalloc(f) 149 s.close() 150 } 151 152 type register uint8 153 154 const noRegister register = 255 155 156 // For bulk initializing 157 var noRegisters [32]register = [32]register{ 158 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 162 } 163 164 // A regMask encodes a set of machine registers. 165 // TODO: regMask -> regSet? 166 type regMask uint64 167 168 func (m regMask) String() string { 169 s := "" 170 for r := register(0); m != 0; r++ { 171 if m>>r&1 == 0 { 172 continue 173 } 174 m &^= regMask(1) << r 175 if s != "" { 176 s += " " 177 } 178 s += fmt.Sprintf("r%d", r) 179 } 180 return s 181 } 182 183 func (s *regAllocState) RegMaskString(m regMask) string { 184 str := "" 185 for r := register(0); m != 0; r++ { 186 if m>>r&1 == 0 { 187 continue 188 } 189 m &^= regMask(1) << r 190 if str != "" { 191 str += " " 192 } 193 str += s.registers[r].String() 194 } 195 return str 196 } 197 198 // countRegs returns the number of set bits in the register mask. 199 func countRegs(r regMask) int { 200 return bits.OnesCount64(uint64(r)) 201 } 202 203 // pickReg picks an arbitrary register from the register mask. 204 func pickReg(r regMask) register { 205 if r == 0 { 206 panic("can't pick a register from an empty set") 207 } 208 // pick the lowest one 209 return register(bits.TrailingZeros64(uint64(r))) 210 } 211 212 type use struct { 213 dist int32 // distance from start of the block to a use of a value 214 pos src.XPos // source position of the use 215 next *use // linked list of uses of a value in nondecreasing dist order 216 } 217 218 // A valState records the register allocation state for a (pre-regalloc) value. 219 type valState struct { 220 regs regMask // the set of registers holding a Value (usually just one) 221 uses *use // list of uses in this block 222 spill *Value // spilled copy of the Value (if any) 223 restoreMin int32 // minimum of all restores' blocks' sdom.entry 224 restoreMax int32 // maximum of all restores' blocks' sdom.exit 225 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 226 rematerializeable bool // cached value of v.rematerializeable() 227 } 228 229 type regState struct { 230 v *Value // Original (preregalloc) Value stored in this register. 231 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 232 // If a register is unused, v==c==nil 233 } 234 235 type regAllocState struct { 236 f *Func 237 238 sdom SparseTree 239 registers []Register 240 numRegs register 241 SPReg register 242 SBReg register 243 GReg register 244 allocatable regMask 245 246 // live values at the end of each block. live[b.ID] is a list of value IDs 247 // which are live at the end of b, together with a count of how many instructions 248 // forward to the next use. 249 live [][]liveInfo 250 // desired register assignments at the end of each block. 251 // Note that this is a static map computed before allocation occurs. Dynamic 252 // register desires (from partially completed allocations) will trump 253 // this information. 254 desired []desiredState 255 256 // current state of each (preregalloc) Value 257 values []valState 258 259 // ID of SP, SB values 260 sp, sb ID 261 262 // For each Value, map from its value ID back to the 263 // preregalloc Value it was derived from. 264 orig []*Value 265 266 // current state of each register 267 regs []regState 268 269 // registers that contain values which can't be kicked out 270 nospill regMask 271 272 // mask of registers currently in use 273 used regMask 274 275 // mask of registers used since the start of the current block 276 usedSinceBlockStart regMask 277 278 // mask of registers used in the current instruction 279 tmpused regMask 280 281 // current block we're working on 282 curBlock *Block 283 284 // cache of use records 285 freeUseRecords *use 286 287 // endRegs[blockid] is the register state at the end of each block. 288 // encoded as a set of endReg records. 289 endRegs [][]endReg 290 291 // startRegs[blockid] is the register state at the start of merge blocks. 292 // saved state does not include the state of phi ops in the block. 293 startRegs [][]startReg 294 295 // startRegsMask is a mask of the registers in startRegs[curBlock.ID]. 296 // Registers dropped from startRegsMask are later synchronoized back to 297 // startRegs by dropping from there as well. 298 startRegsMask regMask 299 300 // spillLive[blockid] is the set of live spills at the end of each block 301 spillLive [][]ID 302 303 // a set of copies we generated to move things around, and 304 // whether it is used in shuffle. Unused copies will be deleted. 305 copies map[*Value]bool 306 307 loopnest *loopnest 308 309 // choose a good order in which to visit blocks for allocation purposes. 310 visitOrder []*Block 311 312 // blockOrder[b.ID] corresponds to the index of block b in visitOrder. 313 blockOrder []int32 314 315 // whether to insert instructions that clobber dead registers at call sites 316 doClobber bool 317 } 318 319 type endReg struct { 320 r register 321 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 322 c *Value // cached version of the value 323 } 324 325 type startReg struct { 326 r register 327 v *Value // pre-regalloc value needed in this register 328 c *Value // cached version of the value 329 pos src.XPos // source position of use of this register 330 } 331 332 // freeReg frees up register r. Any current user of r is kicked out. 333 func (s *regAllocState) freeReg(r register) { 334 v := s.regs[r].v 335 if v == nil { 336 s.f.Fatalf("tried to free an already free register %d\n", r) 337 } 338 339 // Mark r as unused. 340 if s.f.pass.debug > regDebug { 341 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c) 342 } 343 s.regs[r] = regState{} 344 s.values[v.ID].regs &^= regMask(1) << r 345 s.used &^= regMask(1) << r 346 } 347 348 // freeRegs frees up all registers listed in m. 349 func (s *regAllocState) freeRegs(m regMask) { 350 for m&s.used != 0 { 351 s.freeReg(pickReg(m & s.used)) 352 } 353 } 354 355 // clobberRegs inserts instructions that clobber registers listed in m. 356 func (s *regAllocState) clobberRegs(m regMask) { 357 m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them 358 for m != 0 { 359 r := pickReg(m) 360 m &^= 1 << r 361 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid) 362 s.f.setHome(x, &s.registers[r]) 363 } 364 } 365 366 // setOrig records that c's original value is the same as 367 // v's original value. 368 func (s *regAllocState) setOrig(c *Value, v *Value) { 369 if int(c.ID) >= cap(s.orig) { 370 x := s.f.Cache.allocValueSlice(int(c.ID) + 1) 371 copy(x, s.orig) 372 s.f.Cache.freeValueSlice(s.orig) 373 s.orig = x 374 } 375 for int(c.ID) >= len(s.orig) { 376 s.orig = append(s.orig, nil) 377 } 378 if s.orig[c.ID] != nil { 379 s.f.Fatalf("orig value set twice %s %s", c, v) 380 } 381 s.orig[c.ID] = s.orig[v.ID] 382 } 383 384 // assignReg assigns register r to hold c, a copy of v. 385 // r must be unused. 386 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 387 if s.f.pass.debug > regDebug { 388 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c) 389 } 390 if s.regs[r].v != nil { 391 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 392 } 393 394 // Update state. 395 s.regs[r] = regState{v, c} 396 s.values[v.ID].regs |= regMask(1) << r 397 s.used |= regMask(1) << r 398 s.f.setHome(c, &s.registers[r]) 399 } 400 401 // allocReg chooses a register from the set of registers in mask. 402 // If there is no unused register, a Value will be kicked out of 403 // a register to make room. 404 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 405 if v.OnWasmStack { 406 return noRegister 407 } 408 409 mask &= s.allocatable 410 mask &^= s.nospill 411 if mask == 0 { 412 s.f.Fatalf("no register available for %s", v.LongString()) 413 } 414 415 // Pick an unused register if one is available. 416 if mask&^s.used != 0 { 417 r := pickReg(mask &^ s.used) 418 s.usedSinceBlockStart |= regMask(1) << r 419 return r 420 } 421 422 // Pick a value to spill. Spill the value with the 423 // farthest-in-the-future use. 424 // TODO: Prefer registers with already spilled Values? 425 // TODO: Modify preference using affinity graph. 426 // TODO: if a single value is in multiple registers, spill one of them 427 // before spilling a value in just a single register. 428 429 // Find a register to spill. We spill the register containing the value 430 // whose next use is as far in the future as possible. 431 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 432 var r register 433 maxuse := int32(-1) 434 for t := register(0); t < s.numRegs; t++ { 435 if mask>>t&1 == 0 { 436 continue 437 } 438 v := s.regs[t].v 439 if n := s.values[v.ID].uses.dist; n > maxuse { 440 // v's next use is farther in the future than any value 441 // we've seen so far. A new best spill candidate. 442 r = t 443 maxuse = n 444 } 445 } 446 if maxuse == -1 { 447 s.f.Fatalf("couldn't find register to spill") 448 } 449 450 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm { 451 // TODO(neelance): In theory this should never happen, because all wasm registers are equal. 452 // So if there is still a free register, the allocation should have picked that one in the first place instead of 453 // trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization. 454 s.freeReg(r) 455 return r 456 } 457 458 // Try to move it around before kicking out, if there is a free register. 459 // We generate a Copy and record it. It will be deleted if never used. 460 v2 := s.regs[r].v 461 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 462 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 463 s.usedSinceBlockStart |= regMask(1) << r 464 r2 := pickReg(m) 465 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 466 s.copies[c] = false 467 if s.f.pass.debug > regDebug { 468 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2]) 469 } 470 s.setOrig(c, v2) 471 s.assignReg(r2, v2, c) 472 } 473 474 // If the evicted register isn't used between the start of the block 475 // and now then there is no reason to even request it on entry. We can 476 // drop from startRegs in that case. 477 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 { 478 if s.startRegsMask&(regMask(1)<<r) == 1 { 479 if s.f.pass.debug > regDebug { 480 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r]) 481 } 482 s.startRegsMask &^= regMask(1) << r 483 } 484 } 485 486 s.freeReg(r) 487 s.usedSinceBlockStart |= regMask(1) << r 488 return r 489 } 490 491 // makeSpill returns a Value which represents the spilled value of v. 492 // b is the block in which the spill is used. 493 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 494 vi := &s.values[v.ID] 495 if vi.spill != nil { 496 // Final block not known - keep track of subtree where restores reside. 497 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 498 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 499 return vi.spill 500 } 501 // Make a spill for v. We don't know where we want 502 // to put it yet, so we leave it blockless for now. 503 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 504 // We also don't know what the spill's arg will be. 505 // Leave it argless for now. 506 s.setOrig(spill, v) 507 vi.spill = spill 508 vi.restoreMin = s.sdom[b.ID].entry 509 vi.restoreMax = s.sdom[b.ID].exit 510 return spill 511 } 512 513 // allocValToReg allocates v to a register selected from regMask and 514 // returns the register copy of v. Any previous user is kicked out and spilled 515 // (if necessary). Load code is added at the current pc. If nospill is set the 516 // allocated register is marked nospill so the assignment cannot be 517 // undone until the caller allows it by clearing nospill. Returns a 518 // *Value which is either v or a copy of v allocated to the chosen register. 519 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 520 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() { 521 c := v.copyIntoWithXPos(s.curBlock, pos) 522 c.OnWasmStack = true 523 s.setOrig(c, v) 524 return c 525 } 526 if v.OnWasmStack { 527 return v 528 } 529 530 vi := &s.values[v.ID] 531 pos = pos.WithNotStmt() 532 // Check if v is already in a requested register. 533 if mask&vi.regs != 0 { 534 r := pickReg(mask & vi.regs) 535 if s.regs[r].v != v || s.regs[r].c == nil { 536 panic("bad register state") 537 } 538 if nospill { 539 s.nospill |= regMask(1) << r 540 } 541 s.usedSinceBlockStart |= regMask(1) << r 542 return s.regs[r].c 543 } 544 545 var r register 546 // If nospill is set, the value is used immediately, so it can live on the WebAssembly stack. 547 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm 548 if !onWasmStack { 549 // Allocate a register. 550 r = s.allocReg(mask, v) 551 } 552 553 // Allocate v to the new register. 554 var c *Value 555 if vi.regs != 0 { 556 // Copy from a register that v is already in. 557 r2 := pickReg(vi.regs) 558 if s.regs[r2].v != v { 559 panic("bad register state") 560 } 561 s.usedSinceBlockStart |= regMask(1) << r2 562 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 563 } else if v.rematerializeable() { 564 // Rematerialize instead of loading from the spill location. 565 c = v.copyIntoWithXPos(s.curBlock, pos) 566 } else { 567 // Load v from its spill location. 568 spill := s.makeSpill(v, s.curBlock) 569 if s.f.pass.debug > logSpills { 570 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 571 } 572 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 573 } 574 575 s.setOrig(c, v) 576 577 if onWasmStack { 578 c.OnWasmStack = true 579 return c 580 } 581 582 s.assignReg(r, v, c) 583 if c.Op == OpLoadReg && s.isGReg(r) { 584 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString()) 585 } 586 if nospill { 587 s.nospill |= regMask(1) << r 588 } 589 return c 590 } 591 592 // isLeaf reports whether f performs any calls. 593 func isLeaf(f *Func) bool { 594 for _, b := range f.Blocks { 595 for _, v := range b.Values { 596 if v.Op.IsCall() && !v.Op.IsTailCall() { 597 // tail call is not counted as it does not save the return PC or need a frame 598 return false 599 } 600 } 601 } 602 return true 603 } 604 605 // needRegister reports whether v needs a register. 606 func (v *Value) needRegister() bool { 607 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() 608 } 609 610 func (s *regAllocState) init(f *Func) { 611 s.f = f 612 s.f.RegAlloc = s.f.Cache.locs[:0] 613 s.registers = f.Config.registers 614 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 615 s.f.Fatalf("bad number of registers: %d", nr) 616 } else { 617 s.numRegs = register(nr) 618 } 619 // Locate SP, SB, and g registers. 620 s.SPReg = noRegister 621 s.SBReg = noRegister 622 s.GReg = noRegister 623 for r := register(0); r < s.numRegs; r++ { 624 switch s.registers[r].String() { 625 case "SP": 626 s.SPReg = r 627 case "SB": 628 s.SBReg = r 629 case "g": 630 s.GReg = r 631 } 632 } 633 // Make sure we found all required registers. 634 switch noRegister { 635 case s.SPReg: 636 s.f.Fatalf("no SP register found") 637 case s.SBReg: 638 s.f.Fatalf("no SB register found") 639 case s.GReg: 640 if f.Config.hasGReg { 641 s.f.Fatalf("no g register found") 642 } 643 } 644 645 // Figure out which registers we're allowed to use. 646 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 647 s.allocatable &^= 1 << s.SPReg 648 s.allocatable &^= 1 << s.SBReg 649 if s.f.Config.hasGReg { 650 s.allocatable &^= 1 << s.GReg 651 } 652 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 { 653 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 654 } 655 if s.f.Config.LinkReg != -1 { 656 if isLeaf(f) { 657 // Leaf functions don't save/restore the link register. 658 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 659 } 660 } 661 if s.f.Config.ctxt.Flag_dynlink { 662 switch s.f.Config.arch { 663 case "386": 664 // nothing to do. 665 // Note that for Flag_shared (position independent code) 666 // we do need to be careful, but that carefulness is hidden 667 // in the rewrite rules so we always have a free register 668 // available for global load/stores. See _gen/386.rules (search for Flag_shared). 669 case "amd64": 670 s.allocatable &^= 1 << 15 // R15 671 case "arm": 672 s.allocatable &^= 1 << 9 // R9 673 case "arm64": 674 // nothing to do 675 case "loong64": // R2 (aka TP) already reserved. 676 // nothing to do 677 case "ppc64le": // R2 already reserved. 678 // nothing to do 679 case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved. 680 // nothing to do 681 case "s390x": 682 s.allocatable &^= 1 << 11 // R11 683 default: 684 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 685 } 686 } 687 688 // Linear scan register allocation can be influenced by the order in which blocks appear. 689 // Decouple the register allocation order from the generated block order. 690 // This also creates an opportunity for experiments to find a better order. 691 s.visitOrder = layoutRegallocOrder(f) 692 693 // Compute block order. This array allows us to distinguish forward edges 694 // from backward edges and compute how far they go. 695 s.blockOrder = make([]int32, f.NumBlocks()) 696 for i, b := range s.visitOrder { 697 s.blockOrder[b.ID] = int32(i) 698 } 699 700 s.regs = make([]regState, s.numRegs) 701 nv := f.NumValues() 702 if cap(s.f.Cache.regallocValues) >= nv { 703 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv] 704 } else { 705 s.f.Cache.regallocValues = make([]valState, nv) 706 } 707 s.values = s.f.Cache.regallocValues 708 s.orig = s.f.Cache.allocValueSlice(nv) 709 s.copies = make(map[*Value]bool) 710 for _, b := range s.visitOrder { 711 for _, v := range b.Values { 712 if v.needRegister() { 713 s.values[v.ID].needReg = true 714 s.values[v.ID].rematerializeable = v.rematerializeable() 715 s.orig[v.ID] = v 716 } 717 // Note: needReg is false for values returning Tuple types. 718 // Instead, we mark the corresponding Selects as needReg. 719 } 720 } 721 s.computeLive() 722 723 s.endRegs = make([][]endReg, f.NumBlocks()) 724 s.startRegs = make([][]startReg, f.NumBlocks()) 725 s.spillLive = make([][]ID, f.NumBlocks()) 726 s.sdom = f.Sdom() 727 728 // wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack. 729 if f.Config.ctxt.Arch.Arch == sys.ArchWasm { 730 canLiveOnStack := f.newSparseSet(f.NumValues()) 731 defer f.retSparseSet(canLiveOnStack) 732 for _, b := range f.Blocks { 733 // New block. Clear candidate set. 734 canLiveOnStack.clear() 735 for _, c := range b.ControlValues() { 736 if c.Uses == 1 && !opcodeTable[c.Op].generic { 737 canLiveOnStack.add(c.ID) 738 } 739 } 740 // Walking backwards. 741 for i := len(b.Values) - 1; i >= 0; i-- { 742 v := b.Values[i] 743 if canLiveOnStack.contains(v.ID) { 744 v.OnWasmStack = true 745 } else { 746 // Value can not live on stack. Values are not allowed to be reordered, so clear candidate set. 747 canLiveOnStack.clear() 748 } 749 for _, arg := range v.Args { 750 // Value can live on the stack if: 751 // - it is only used once 752 // - it is used in the same basic block 753 // - it is not a "mem" value 754 // - it is a WebAssembly op 755 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic { 756 canLiveOnStack.add(arg.ID) 757 } 758 } 759 } 760 } 761 } 762 763 // The clobberdeadreg experiment inserts code to clobber dead registers 764 // at call sites. 765 // Ignore huge functions to avoid doing too much work. 766 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 { 767 // TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH. 768 s.doClobber = true 769 } 770 } 771 772 func (s *regAllocState) close() { 773 s.f.Cache.freeValueSlice(s.orig) 774 } 775 776 // Adds a use record for id at distance dist from the start of the block. 777 // All calls to addUse must happen with nonincreasing dist. 778 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 779 r := s.freeUseRecords 780 if r != nil { 781 s.freeUseRecords = r.next 782 } else { 783 r = &use{} 784 } 785 r.dist = dist 786 r.pos = pos 787 r.next = s.values[id].uses 788 s.values[id].uses = r 789 if r.next != nil && dist > r.next.dist { 790 s.f.Fatalf("uses added in wrong order") 791 } 792 } 793 794 // advanceUses advances the uses of v's args from the state before v to the state after v. 795 // Any values which have no more uses are deallocated from registers. 796 func (s *regAllocState) advanceUses(v *Value) { 797 for _, a := range v.Args { 798 if !s.values[a.ID].needReg { 799 continue 800 } 801 ai := &s.values[a.ID] 802 r := ai.uses 803 ai.uses = r.next 804 if r.next == nil { 805 // Value is dead, free all registers that hold it. 806 s.freeRegs(ai.regs) 807 } 808 r.next = s.freeUseRecords 809 s.freeUseRecords = r 810 } 811 } 812 813 // liveAfterCurrentInstruction reports whether v is live after 814 // the current instruction is completed. v must be used by the 815 // current instruction. 816 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 817 u := s.values[v.ID].uses 818 if u == nil { 819 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID])) 820 } 821 d := u.dist 822 for u != nil && u.dist == d { 823 u = u.next 824 } 825 return u != nil && u.dist > d 826 } 827 828 // Sets the state of the registers to that encoded in regs. 829 func (s *regAllocState) setState(regs []endReg) { 830 s.freeRegs(s.used) 831 for _, x := range regs { 832 s.assignReg(x.r, x.v, x.c) 833 } 834 } 835 836 // compatRegs returns the set of registers which can store a type t. 837 func (s *regAllocState) compatRegs(t *types.Type) regMask { 838 var m regMask 839 if t.IsTuple() || t.IsFlags() { 840 return 0 841 } 842 if t.IsFloat() || t == types.TypeInt128 { 843 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 { 844 m = s.f.Config.fp32RegMask 845 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 { 846 m = s.f.Config.fp64RegMask 847 } else { 848 m = s.f.Config.fpRegMask 849 } 850 } else { 851 m = s.f.Config.gpRegMask 852 } 853 return m & s.allocatable 854 } 855 856 // regspec returns the regInfo for operation op. 857 func (s *regAllocState) regspec(v *Value) regInfo { 858 op := v.Op 859 if op == OpConvert { 860 // OpConvert is a generic op, so it doesn't have a 861 // register set in the static table. It can use any 862 // allocatable integer register. 863 m := s.allocatable & s.f.Config.gpRegMask 864 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}} 865 } 866 if op == OpArgIntReg { 867 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()] 868 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}} 869 } 870 if op == OpArgFloatReg { 871 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()] 872 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}} 873 } 874 if op.IsCall() { 875 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil { 876 return *ac.Reg(&opcodeTable[op].reg, s.f.Config) 877 } 878 } 879 if op == OpMakeResult && s.f.OwnAux.reg != nil { 880 return *s.f.OwnAux.ResultReg(s.f.Config) 881 } 882 return opcodeTable[op].reg 883 } 884 885 func (s *regAllocState) isGReg(r register) bool { 886 return s.f.Config.hasGReg && s.GReg == r 887 } 888 889 // Dummy value used to represent the value being held in a temporary register. 890 var tmpVal Value 891 892 func (s *regAllocState) regalloc(f *Func) { 893 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 894 defer f.retSparseSet(regValLiveSet) 895 var oldSched []*Value 896 var phis []*Value 897 var phiRegs []register 898 var args []*Value 899 900 // Data structure used for computing desired registers. 901 var desired desiredState 902 903 // Desired registers for inputs & outputs for each instruction in the block. 904 type dentry struct { 905 out [4]register // desired output registers 906 in [3][4]register // desired input registers (for inputs 0,1, and 2) 907 } 908 var dinfo []dentry 909 910 if f.Entry != f.Blocks[0] { 911 f.Fatalf("entry block must be first") 912 } 913 914 for _, b := range s.visitOrder { 915 if s.f.pass.debug > regDebug { 916 fmt.Printf("Begin processing block %v\n", b) 917 } 918 s.curBlock = b 919 s.startRegsMask = 0 920 s.usedSinceBlockStart = 0 921 922 // Initialize regValLiveSet and uses fields for this block. 923 // Walk backwards through the block doing liveness analysis. 924 regValLiveSet.clear() 925 for _, e := range s.live[b.ID] { 926 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 927 regValLiveSet.add(e.ID) 928 } 929 for _, v := range b.ControlValues() { 930 if s.values[v.ID].needReg { 931 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values 932 regValLiveSet.add(v.ID) 933 } 934 } 935 for i := len(b.Values) - 1; i >= 0; i-- { 936 v := b.Values[i] 937 regValLiveSet.remove(v.ID) 938 if v.Op == OpPhi { 939 // Remove v from the live set, but don't add 940 // any inputs. This is the state the len(b.Preds)>1 941 // case below desires; it wants to process phis specially. 942 continue 943 } 944 if opcodeTable[v.Op].call { 945 // Function call clobbers all the registers but SP and SB. 946 regValLiveSet.clear() 947 if s.sp != 0 && s.values[s.sp].uses != nil { 948 regValLiveSet.add(s.sp) 949 } 950 if s.sb != 0 && s.values[s.sb].uses != nil { 951 regValLiveSet.add(s.sb) 952 } 953 } 954 for _, a := range v.Args { 955 if !s.values[a.ID].needReg { 956 continue 957 } 958 s.addUse(a.ID, int32(i), v.Pos) 959 regValLiveSet.add(a.ID) 960 } 961 } 962 if s.f.pass.debug > regDebug { 963 fmt.Printf("use distances for %s\n", b) 964 for i := range s.values { 965 vi := &s.values[i] 966 u := vi.uses 967 if u == nil { 968 continue 969 } 970 fmt.Printf(" v%d:", i) 971 for u != nil { 972 fmt.Printf(" %d", u.dist) 973 u = u.next 974 } 975 fmt.Println() 976 } 977 } 978 979 // Make a copy of the block schedule so we can generate a new one in place. 980 // We make a separate copy for phis and regular values. 981 nphi := 0 982 for _, v := range b.Values { 983 if v.Op != OpPhi { 984 break 985 } 986 nphi++ 987 } 988 phis = append(phis[:0], b.Values[:nphi]...) 989 oldSched = append(oldSched[:0], b.Values[nphi:]...) 990 b.Values = b.Values[:0] 991 992 // Initialize start state of block. 993 if b == f.Entry { 994 // Regalloc state is empty to start. 995 if nphi > 0 { 996 f.Fatalf("phis in entry block") 997 } 998 } else if len(b.Preds) == 1 { 999 // Start regalloc state with the end state of the previous block. 1000 s.setState(s.endRegs[b.Preds[0].b.ID]) 1001 if nphi > 0 { 1002 f.Fatalf("phis in single-predecessor block") 1003 } 1004 // Drop any values which are no longer live. 1005 // This may happen because at the end of p, a value may be 1006 // live but only used by some other successor of p. 1007 for r := register(0); r < s.numRegs; r++ { 1008 v := s.regs[r].v 1009 if v != nil && !regValLiveSet.contains(v.ID) { 1010 s.freeReg(r) 1011 } 1012 } 1013 } else { 1014 // This is the complicated case. We have more than one predecessor, 1015 // which means we may have Phi ops. 1016 1017 // Start with the final register state of the predecessor with least spill values. 1018 // This is based on the following points: 1019 // 1, The less spill value indicates that the register pressure of this path is smaller, 1020 // so the values of this block are more likely to be allocated to registers. 1021 // 2, Avoid the predecessor that contains the function call, because the predecessor that 1022 // contains the function call usually generates a lot of spills and lose the previous 1023 // allocation state. 1024 // TODO: Improve this part. At least the size of endRegs of the predecessor also has 1025 // an impact on the code size and compiler speed. But it is not easy to find a simple 1026 // and efficient method that combines multiple factors. 1027 idx := -1 1028 for i, p := range b.Preds { 1029 // If the predecessor has not been visited yet, skip it because its end state 1030 // (redRegs and spillLive) has not been computed yet. 1031 pb := p.b 1032 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] { 1033 continue 1034 } 1035 if idx == -1 { 1036 idx = i 1037 continue 1038 } 1039 pSel := b.Preds[idx].b 1040 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) { 1041 idx = i 1042 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) { 1043 // Use a bit of likely information. After critical pass, pb and pSel must 1044 // be plain blocks, so check edge pb->pb.Preds instead of edge pb->b. 1045 // TODO: improve the prediction of the likely predecessor. The following 1046 // method is only suitable for the simplest cases. For complex cases, 1047 // the prediction may be inaccurate, but this does not affect the 1048 // correctness of the program. 1049 // According to the layout algorithm, the predecessor with the 1050 // smaller blockOrder is the true branch, and the test results show 1051 // that it is better to choose the predecessor with a smaller 1052 // blockOrder than no choice. 1053 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] { 1054 idx = i 1055 } 1056 } 1057 } 1058 if idx < 0 { 1059 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b) 1060 } 1061 p := b.Preds[idx].b 1062 s.setState(s.endRegs[p.ID]) 1063 1064 if s.f.pass.debug > regDebug { 1065 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 1066 for _, x := range s.endRegs[p.ID] { 1067 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c) 1068 } 1069 } 1070 1071 // Decide on registers for phi ops. Use the registers determined 1072 // by the primary predecessor if we can. 1073 // TODO: pick best of (already processed) predecessors? 1074 // Majority vote? Deepest nesting level? 1075 phiRegs = phiRegs[:0] 1076 var phiUsed regMask 1077 1078 for _, v := range phis { 1079 if !s.values[v.ID].needReg { 1080 phiRegs = append(phiRegs, noRegister) 1081 continue 1082 } 1083 a := v.Args[idx] 1084 // Some instructions target not-allocatable registers. 1085 // They're not suitable for further (phi-function) allocation. 1086 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 1087 if m != 0 { 1088 r := pickReg(m) 1089 phiUsed |= regMask(1) << r 1090 phiRegs = append(phiRegs, r) 1091 } else { 1092 phiRegs = append(phiRegs, noRegister) 1093 } 1094 } 1095 1096 // Second pass - deallocate all in-register phi inputs. 1097 for i, v := range phis { 1098 if !s.values[v.ID].needReg { 1099 continue 1100 } 1101 a := v.Args[idx] 1102 r := phiRegs[i] 1103 if r == noRegister { 1104 continue 1105 } 1106 if regValLiveSet.contains(a.ID) { 1107 // Input value is still live (it is used by something other than Phi). 1108 // Try to move it around before kicking out, if there is a free register. 1109 // We generate a Copy in the predecessor block and record it. It will be 1110 // deleted later if never used. 1111 // 1112 // Pick a free register. At this point some registers used in the predecessor 1113 // block may have been deallocated. Those are the ones used for Phis. Exclude 1114 // them (and they are not going to be helpful anyway). 1115 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 1116 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 1117 r2 := pickReg(m) 1118 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 1119 s.copies[c] = false 1120 if s.f.pass.debug > regDebug { 1121 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2]) 1122 } 1123 s.setOrig(c, a) 1124 s.assignReg(r2, a, c) 1125 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 1126 } 1127 } 1128 s.freeReg(r) 1129 } 1130 1131 // Copy phi ops into new schedule. 1132 b.Values = append(b.Values, phis...) 1133 1134 // Third pass - pick registers for phis whose input 1135 // was not in a register in the primary predecessor. 1136 for i, v := range phis { 1137 if !s.values[v.ID].needReg { 1138 continue 1139 } 1140 if phiRegs[i] != noRegister { 1141 continue 1142 } 1143 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 1144 // If one of the other inputs of v is in a register, and the register is available, 1145 // select this register, which can save some unnecessary copies. 1146 for i, pe := range b.Preds { 1147 if i == idx { 1148 continue 1149 } 1150 ri := noRegister 1151 for _, er := range s.endRegs[pe.b.ID] { 1152 if er.v == s.orig[v.Args[i].ID] { 1153 ri = er.r 1154 break 1155 } 1156 } 1157 if ri != noRegister && m>>ri&1 != 0 { 1158 m = regMask(1) << ri 1159 break 1160 } 1161 } 1162 if m != 0 { 1163 r := pickReg(m) 1164 phiRegs[i] = r 1165 phiUsed |= regMask(1) << r 1166 } 1167 } 1168 1169 // Set registers for phis. Add phi spill code. 1170 for i, v := range phis { 1171 if !s.values[v.ID].needReg { 1172 continue 1173 } 1174 r := phiRegs[i] 1175 if r == noRegister { 1176 // stack-based phi 1177 // Spills will be inserted in all the predecessors below. 1178 s.values[v.ID].spill = v // v starts life spilled 1179 continue 1180 } 1181 // register-based phi 1182 s.assignReg(r, v, v) 1183 } 1184 1185 // Deallocate any values which are no longer live. Phis are excluded. 1186 for r := register(0); r < s.numRegs; r++ { 1187 if phiUsed>>r&1 != 0 { 1188 continue 1189 } 1190 v := s.regs[r].v 1191 if v != nil && !regValLiveSet.contains(v.ID) { 1192 s.freeReg(r) 1193 } 1194 } 1195 1196 // Save the starting state for use by merge edges. 1197 // We append to a stack allocated variable that we'll 1198 // later copy into s.startRegs in one fell swoop, to save 1199 // on allocations. 1200 regList := make([]startReg, 0, 32) 1201 for r := register(0); r < s.numRegs; r++ { 1202 v := s.regs[r].v 1203 if v == nil { 1204 continue 1205 } 1206 if phiUsed>>r&1 != 0 { 1207 // Skip registers that phis used, we'll handle those 1208 // specially during merge edge processing. 1209 continue 1210 } 1211 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 1212 s.startRegsMask |= regMask(1) << r 1213 } 1214 s.startRegs[b.ID] = make([]startReg, len(regList)) 1215 copy(s.startRegs[b.ID], regList) 1216 1217 if s.f.pass.debug > regDebug { 1218 fmt.Printf("after phis\n") 1219 for _, x := range s.startRegs[b.ID] { 1220 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID) 1221 } 1222 } 1223 } 1224 1225 // Allocate space to record the desired registers for each value. 1226 if l := len(oldSched); cap(dinfo) < l { 1227 dinfo = make([]dentry, l) 1228 } else { 1229 dinfo = dinfo[:l] 1230 for i := range dinfo { 1231 dinfo[i] = dentry{} 1232 } 1233 } 1234 1235 // Load static desired register info at the end of the block. 1236 desired.copy(&s.desired[b.ID]) 1237 1238 // Check actual assigned registers at the start of the next block(s). 1239 // Dynamically assigned registers will trump the static 1240 // desired registers computed during liveness analysis. 1241 // Note that we do this phase after startRegs is set above, so that 1242 // we get the right behavior for a block which branches to itself. 1243 for _, e := range b.Succs { 1244 succ := e.b 1245 // TODO: prioritize likely successor? 1246 for _, x := range s.startRegs[succ.ID] { 1247 desired.add(x.v.ID, x.r) 1248 } 1249 // Process phi ops in succ. 1250 pidx := e.i 1251 for _, v := range succ.Values { 1252 if v.Op != OpPhi { 1253 break 1254 } 1255 if !s.values[v.ID].needReg { 1256 continue 1257 } 1258 rp, ok := s.f.getHome(v.ID).(*Register) 1259 if !ok { 1260 // If v is not assigned a register, pick a register assigned to one of v's inputs. 1261 // Hopefully v will get assigned that register later. 1262 // If the inputs have allocated register information, add it to desired, 1263 // which may reduce spill or copy operations when the register is available. 1264 for _, a := range v.Args { 1265 rp, ok = s.f.getHome(a.ID).(*Register) 1266 if ok { 1267 break 1268 } 1269 } 1270 if !ok { 1271 continue 1272 } 1273 } 1274 desired.add(v.Args[pidx].ID, register(rp.num)) 1275 } 1276 } 1277 // Walk values backwards computing desired register info. 1278 // See computeLive for more comments. 1279 for i := len(oldSched) - 1; i >= 0; i-- { 1280 v := oldSched[i] 1281 prefs := desired.remove(v.ID) 1282 regspec := s.regspec(v) 1283 desired.clobber(regspec.clobbers) 1284 for _, j := range regspec.inputs { 1285 if countRegs(j.regs) != 1 { 1286 continue 1287 } 1288 desired.clobber(j.regs) 1289 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1290 } 1291 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 { 1292 if opcodeTable[v.Op].commutative { 1293 desired.addList(v.Args[1].ID, prefs) 1294 } 1295 desired.addList(v.Args[0].ID, prefs) 1296 } 1297 // Save desired registers for this value. 1298 dinfo[i].out = prefs 1299 for j, a := range v.Args { 1300 if j >= len(dinfo[i].in) { 1301 break 1302 } 1303 dinfo[i].in[j] = desired.get(a.ID) 1304 } 1305 } 1306 1307 // Process all the non-phi values. 1308 for idx, v := range oldSched { 1309 tmpReg := noRegister 1310 if s.f.pass.debug > regDebug { 1311 fmt.Printf(" processing %s\n", v.LongString()) 1312 } 1313 regspec := s.regspec(v) 1314 if v.Op == OpPhi { 1315 f.Fatalf("phi %s not at start of block", v) 1316 } 1317 if v.Op == OpSP { 1318 s.assignReg(s.SPReg, v, v) 1319 b.Values = append(b.Values, v) 1320 s.advanceUses(v) 1321 s.sp = v.ID 1322 continue 1323 } 1324 if v.Op == OpSB { 1325 s.assignReg(s.SBReg, v, v) 1326 b.Values = append(b.Values, v) 1327 s.advanceUses(v) 1328 s.sb = v.ID 1329 continue 1330 } 1331 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN { 1332 if s.values[v.ID].needReg { 1333 if v.Op == OpSelectN { 1334 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v) 1335 } else { 1336 var i = 0 1337 if v.Op == OpSelect1 { 1338 i = 1 1339 } 1340 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1341 } 1342 } 1343 b.Values = append(b.Values, v) 1344 s.advanceUses(v) 1345 continue 1346 } 1347 if v.Op == OpGetG && s.f.Config.hasGReg { 1348 // use hardware g register 1349 if s.regs[s.GReg].v != nil { 1350 s.freeReg(s.GReg) // kick out the old value 1351 } 1352 s.assignReg(s.GReg, v, v) 1353 b.Values = append(b.Values, v) 1354 s.advanceUses(v) 1355 continue 1356 } 1357 if v.Op == OpArg { 1358 // Args are "pre-spilled" values. We don't allocate 1359 // any register here. We just set up the spill pointer to 1360 // point at itself and any later user will restore it to use it. 1361 s.values[v.ID].spill = v 1362 b.Values = append(b.Values, v) 1363 s.advanceUses(v) 1364 continue 1365 } 1366 if v.Op == OpKeepAlive { 1367 // Make sure the argument to v is still live here. 1368 s.advanceUses(v) 1369 a := v.Args[0] 1370 vi := &s.values[a.ID] 1371 if vi.regs == 0 && !vi.rematerializeable { 1372 // Use the spill location. 1373 // This forces later liveness analysis to make the 1374 // value live at this point. 1375 v.SetArg(0, s.makeSpill(a, b)) 1376 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable { 1377 // Rematerializeable value with a gc.Node. This is the address of 1378 // a stack object (e.g. an LEAQ). Keep the object live. 1379 // Change it to VarLive, which is what plive expects for locals. 1380 v.Op = OpVarLive 1381 v.SetArgs1(v.Args[1]) 1382 v.Aux = a.Aux 1383 } else { 1384 // In-register and rematerializeable values are already live. 1385 // These are typically rematerializeable constants like nil, 1386 // or values of a variable that were modified since the last call. 1387 v.Op = OpCopy 1388 v.SetArgs1(v.Args[1]) 1389 } 1390 b.Values = append(b.Values, v) 1391 continue 1392 } 1393 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1394 // No register allocation required (or none specified yet) 1395 if s.doClobber && v.Op.IsCall() { 1396 s.clobberRegs(regspec.clobbers) 1397 } 1398 s.freeRegs(regspec.clobbers) 1399 b.Values = append(b.Values, v) 1400 s.advanceUses(v) 1401 continue 1402 } 1403 1404 if s.values[v.ID].rematerializeable { 1405 // Value is rematerializeable, don't issue it here. 1406 // It will get issued just before each use (see 1407 // allocValueToReg). 1408 for _, a := range v.Args { 1409 a.Uses-- 1410 } 1411 s.advanceUses(v) 1412 continue 1413 } 1414 1415 if s.f.pass.debug > regDebug { 1416 fmt.Printf("value %s\n", v.LongString()) 1417 fmt.Printf(" out:") 1418 for _, r := range dinfo[idx].out { 1419 if r != noRegister { 1420 fmt.Printf(" %s", &s.registers[r]) 1421 } 1422 } 1423 fmt.Println() 1424 for i := 0; i < len(v.Args) && i < 3; i++ { 1425 fmt.Printf(" in%d:", i) 1426 for _, r := range dinfo[idx].in[i] { 1427 if r != noRegister { 1428 fmt.Printf(" %s", &s.registers[r]) 1429 } 1430 } 1431 fmt.Println() 1432 } 1433 } 1434 1435 // Move arguments to registers. 1436 // First, if an arg must be in a specific register and it is already 1437 // in place, keep it. 1438 args = append(args[:0], make([]*Value, len(v.Args))...) 1439 for i, a := range v.Args { 1440 if !s.values[a.ID].needReg { 1441 args[i] = a 1442 } 1443 } 1444 for _, i := range regspec.inputs { 1445 mask := i.regs 1446 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 { 1447 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) 1448 } 1449 } 1450 // Then, if an arg must be in a specific register and that 1451 // register is free, allocate that one. Otherwise when processing 1452 // another input we may kick a value into the free register, which 1453 // then will be kicked out again. 1454 // This is a common case for passing-in-register arguments for 1455 // function calls. 1456 for { 1457 freed := false 1458 for _, i := range regspec.inputs { 1459 if args[i.idx] != nil { 1460 continue // already allocated 1461 } 1462 mask := i.regs 1463 if countRegs(mask) == 1 && mask&^s.used != 0 { 1464 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) 1465 // If the input is in other registers that will be clobbered by v, 1466 // or the input is dead, free the registers. This may make room 1467 // for other inputs. 1468 oldregs := s.values[v.Args[i.idx].ID].regs 1469 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) { 1470 s.freeRegs(oldregs &^ mask &^ s.nospill) 1471 freed = true 1472 } 1473 } 1474 } 1475 if !freed { 1476 break 1477 } 1478 } 1479 // Last, allocate remaining ones, in an ordering defined 1480 // by the register specification (most constrained first). 1481 for _, i := range regspec.inputs { 1482 if args[i.idx] != nil { 1483 continue // already allocated 1484 } 1485 mask := i.regs 1486 if mask&s.values[v.Args[i.idx].ID].regs == 0 { 1487 // Need a new register for the input. 1488 mask &= s.allocatable 1489 mask &^= s.nospill 1490 // Used desired register if available. 1491 if i.idx < 3 { 1492 for _, r := range dinfo[idx].in[i.idx] { 1493 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1494 // Desired register is allowed and unused. 1495 mask = regMask(1) << r 1496 break 1497 } 1498 } 1499 } 1500 // Avoid registers we're saving for other values. 1501 if mask&^desired.avoid != 0 { 1502 mask &^= desired.avoid 1503 } 1504 } 1505 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) 1506 } 1507 1508 // If the output clobbers the input register, make sure we have 1509 // at least two copies of the input register so we don't 1510 // have to reload the value from the spill location. 1511 if opcodeTable[v.Op].resultInArg0 { 1512 var m regMask 1513 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1514 // arg0 is dead. We can clobber its register. 1515 goto ok 1516 } 1517 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) { 1518 args[0], args[1] = args[1], args[0] 1519 goto ok 1520 } 1521 if s.values[v.Args[0].ID].rematerializeable { 1522 // We can rematerialize the input, don't worry about clobbering it. 1523 goto ok 1524 } 1525 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable { 1526 args[0], args[1] = args[1], args[0] 1527 goto ok 1528 } 1529 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1530 // we have at least 2 copies of arg0. We can afford to clobber one. 1531 goto ok 1532 } 1533 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1534 args[0], args[1] = args[1], args[0] 1535 goto ok 1536 } 1537 1538 // We can't overwrite arg0 (or arg1, if commutative). So we 1539 // need to make a copy of an input so we have a register we can modify. 1540 1541 // Possible new registers to copy into. 1542 m = s.compatRegs(v.Args[0].Type) &^ s.used 1543 if m == 0 { 1544 // No free registers. In this case we'll just clobber 1545 // an input and future uses of that input must use a restore. 1546 // TODO(khr): We should really do this like allocReg does it, 1547 // spilling the value with the most distant next use. 1548 goto ok 1549 } 1550 1551 // Try to move an input to the desired output, if allowed. 1552 for _, r := range dinfo[idx].out { 1553 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 { 1554 m = regMask(1) << r 1555 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1556 // Note: we update args[0] so the instruction will 1557 // use the register copy we just made. 1558 goto ok 1559 } 1560 } 1561 // Try to copy input to its desired location & use its old 1562 // location as the result register. 1563 for _, r := range dinfo[idx].in[0] { 1564 if r != noRegister && m>>r&1 != 0 { 1565 m = regMask(1) << r 1566 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1567 s.copies[c] = false 1568 // Note: no update to args[0] so the instruction will 1569 // use the original copy. 1570 goto ok 1571 } 1572 } 1573 if opcodeTable[v.Op].commutative { 1574 for _, r := range dinfo[idx].in[1] { 1575 if r != noRegister && m>>r&1 != 0 { 1576 m = regMask(1) << r 1577 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1578 s.copies[c] = false 1579 args[0], args[1] = args[1], args[0] 1580 goto ok 1581 } 1582 } 1583 } 1584 1585 // Avoid future fixed uses if we can. 1586 if m&^desired.avoid != 0 { 1587 m &^= desired.avoid 1588 } 1589 // Save input 0 to a new register so we can clobber it. 1590 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1591 s.copies[c] = false 1592 1593 // Normally we use the register of the old copy of input 0 as the target. 1594 // However, if input 0 is already in its desired register then we use 1595 // the register of the new copy instead. 1596 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 { 1597 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok { 1598 r := register(rp.num) 1599 for _, r2 := range dinfo[idx].in[0] { 1600 if r == r2 { 1601 args[0] = c 1602 break 1603 } 1604 } 1605 } 1606 } 1607 } 1608 1609 ok: 1610 // Pick a temporary register if needed. 1611 // It should be distinct from all the input registers, so we 1612 // allocate it after all the input registers, but before 1613 // the input registers are freed via advanceUses below. 1614 // (Not all instructions need that distinct part, but it is conservative.) 1615 if opcodeTable[v.Op].needIntTemp { 1616 m := s.allocatable & s.f.Config.gpRegMask 1617 if m&^desired.avoid&^s.nospill != 0 { 1618 m &^= desired.avoid 1619 } 1620 tmpReg = s.allocReg(m, &tmpVal) 1621 s.nospill |= regMask(1) << tmpReg 1622 } 1623 1624 // Now that all args are in regs, we're ready to issue the value itself. 1625 // Before we pick a register for the output value, allow input registers 1626 // to be deallocated. We do this here so that the output can use the 1627 // same register as a dying input. 1628 if !opcodeTable[v.Op].resultNotInArgs { 1629 s.tmpused = s.nospill 1630 s.nospill = 0 1631 s.advanceUses(v) // frees any registers holding args that are no longer live 1632 } 1633 1634 // Dump any registers which will be clobbered 1635 if s.doClobber && v.Op.IsCall() { 1636 // clobber registers that are marked as clobber in regmask, but 1637 // don't clobber inputs. 1638 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill) 1639 } 1640 s.freeRegs(regspec.clobbers) 1641 s.tmpused |= regspec.clobbers 1642 1643 // Pick registers for outputs. 1644 { 1645 outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below. 1646 maxOutIdx := -1 1647 var used regMask 1648 if tmpReg != noRegister { 1649 // Ensure output registers are distinct from the temporary register. 1650 // (Not all instructions need that distinct part, but it is conservative.) 1651 used |= regMask(1) << tmpReg 1652 } 1653 for _, out := range regspec.outputs { 1654 mask := out.regs & s.allocatable &^ used 1655 if mask == 0 { 1656 continue 1657 } 1658 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1659 if !opcodeTable[v.Op].commutative { 1660 // Output must use the same register as input 0. 1661 r := register(s.f.getHome(args[0].ID).(*Register).num) 1662 if mask>>r&1 == 0 { 1663 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString()) 1664 } 1665 mask = regMask(1) << r 1666 } else { 1667 // Output must use the same register as input 0 or 1. 1668 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1669 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1670 // Check r0 and r1 for desired output register. 1671 found := false 1672 for _, r := range dinfo[idx].out { 1673 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1674 mask = regMask(1) << r 1675 found = true 1676 if r == r1 { 1677 args[0], args[1] = args[1], args[0] 1678 } 1679 break 1680 } 1681 } 1682 if !found { 1683 // Neither are desired, pick r0. 1684 mask = regMask(1) << r0 1685 } 1686 } 1687 } 1688 if out.idx == 0 { // desired registers only apply to the first element of a tuple result 1689 for _, r := range dinfo[idx].out { 1690 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1691 // Desired register is allowed and unused. 1692 mask = regMask(1) << r 1693 break 1694 } 1695 } 1696 } 1697 // Avoid registers we're saving for other values. 1698 if mask&^desired.avoid&^s.nospill&^s.used != 0 { 1699 mask &^= desired.avoid 1700 } 1701 r := s.allocReg(mask, v) 1702 if out.idx > maxOutIdx { 1703 maxOutIdx = out.idx 1704 } 1705 outRegs[out.idx] = r 1706 used |= regMask(1) << r 1707 s.tmpused |= regMask(1) << r 1708 } 1709 // Record register choices 1710 if v.Type.IsTuple() { 1711 var outLocs LocPair 1712 if r := outRegs[0]; r != noRegister { 1713 outLocs[0] = &s.registers[r] 1714 } 1715 if r := outRegs[1]; r != noRegister { 1716 outLocs[1] = &s.registers[r] 1717 } 1718 s.f.setHome(v, outLocs) 1719 // Note that subsequent SelectX instructions will do the assignReg calls. 1720 } else if v.Type.IsResults() { 1721 // preallocate outLocs to the right size, which is maxOutIdx+1 1722 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1) 1723 for i := 0; i <= maxOutIdx; i++ { 1724 if r := outRegs[i]; r != noRegister { 1725 outLocs[i] = &s.registers[r] 1726 } 1727 } 1728 s.f.setHome(v, outLocs) 1729 } else { 1730 if r := outRegs[0]; r != noRegister { 1731 s.assignReg(r, v, v) 1732 } 1733 } 1734 if tmpReg != noRegister { 1735 // Remember the temp register allocation, if any. 1736 if s.f.tempRegs == nil { 1737 s.f.tempRegs = map[ID]*Register{} 1738 } 1739 s.f.tempRegs[v.ID] = &s.registers[tmpReg] 1740 } 1741 } 1742 1743 // deallocate dead args, if we have not done so 1744 if opcodeTable[v.Op].resultNotInArgs { 1745 s.nospill = 0 1746 s.advanceUses(v) // frees any registers holding args that are no longer live 1747 } 1748 s.tmpused = 0 1749 1750 // Issue the Value itself. 1751 for i, a := range args { 1752 v.SetArg(i, a) // use register version of arguments 1753 } 1754 b.Values = append(b.Values, v) 1755 } 1756 1757 // Copy the control values - we need this so we can reduce the 1758 // uses property of these values later. 1759 controls := append(make([]*Value, 0, 2), b.ControlValues()...) 1760 1761 // Load control values into registers. 1762 for i, v := range b.ControlValues() { 1763 if !s.values[v.ID].needReg { 1764 continue 1765 } 1766 if s.f.pass.debug > regDebug { 1767 fmt.Printf(" processing control %s\n", v.LongString()) 1768 } 1769 // We assume that a control input can be passed in any 1770 // type-compatible register. If this turns out not to be true, 1771 // we'll need to introduce a regspec for a block's control value. 1772 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos)) 1773 } 1774 1775 // Reduce the uses of the control values once registers have been loaded. 1776 // This loop is equivalent to the advanceUses method. 1777 for _, v := range controls { 1778 vi := &s.values[v.ID] 1779 if !vi.needReg { 1780 continue 1781 } 1782 // Remove this use from the uses list. 1783 u := vi.uses 1784 vi.uses = u.next 1785 if u.next == nil { 1786 s.freeRegs(vi.regs) // value is dead 1787 } 1788 u.next = s.freeUseRecords 1789 s.freeUseRecords = u 1790 } 1791 1792 // If we are approaching a merge point and we are the primary 1793 // predecessor of it, find live values that we use soon after 1794 // the merge point and promote them to registers now. 1795 if len(b.Succs) == 1 { 1796 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil { 1797 s.freeReg(s.GReg) // Spill value in G register before any merge. 1798 } 1799 // For this to be worthwhile, the loop must have no calls in it. 1800 top := b.Succs[0].b 1801 loop := s.loopnest.b2l[top.ID] 1802 if loop == nil || loop.header != top || loop.containsUnavoidableCall { 1803 goto badloop 1804 } 1805 1806 // TODO: sort by distance, pick the closest ones? 1807 for _, live := range s.live[b.ID] { 1808 if live.dist >= unlikelyDistance { 1809 // Don't preload anything live after the loop. 1810 continue 1811 } 1812 vid := live.ID 1813 vi := &s.values[vid] 1814 if vi.regs != 0 { 1815 continue 1816 } 1817 if vi.rematerializeable { 1818 continue 1819 } 1820 v := s.orig[vid] 1821 m := s.compatRegs(v.Type) &^ s.used 1822 // Used desired register if available. 1823 outerloop: 1824 for _, e := range desired.entries { 1825 if e.ID != v.ID { 1826 continue 1827 } 1828 for _, r := range e.regs { 1829 if r != noRegister && m>>r&1 != 0 { 1830 m = regMask(1) << r 1831 break outerloop 1832 } 1833 } 1834 } 1835 if m&^desired.avoid != 0 { 1836 m &^= desired.avoid 1837 } 1838 if m != 0 { 1839 s.allocValToReg(v, m, false, b.Pos) 1840 } 1841 } 1842 } 1843 badloop: 1844 ; 1845 1846 // Save end-of-block register state. 1847 // First count how many, this cuts allocations in half. 1848 k := 0 1849 for r := register(0); r < s.numRegs; r++ { 1850 v := s.regs[r].v 1851 if v == nil { 1852 continue 1853 } 1854 k++ 1855 } 1856 regList := make([]endReg, 0, k) 1857 for r := register(0); r < s.numRegs; r++ { 1858 v := s.regs[r].v 1859 if v == nil { 1860 continue 1861 } 1862 regList = append(regList, endReg{r, v, s.regs[r].c}) 1863 } 1864 s.endRegs[b.ID] = regList 1865 1866 if checkEnabled { 1867 regValLiveSet.clear() 1868 for _, x := range s.live[b.ID] { 1869 regValLiveSet.add(x.ID) 1870 } 1871 for r := register(0); r < s.numRegs; r++ { 1872 v := s.regs[r].v 1873 if v == nil { 1874 continue 1875 } 1876 if !regValLiveSet.contains(v.ID) { 1877 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1878 } 1879 } 1880 } 1881 1882 // If a value is live at the end of the block and 1883 // isn't in a register, generate a use for the spill location. 1884 // We need to remember this information so that 1885 // the liveness analysis in stackalloc is correct. 1886 for _, e := range s.live[b.ID] { 1887 vi := &s.values[e.ID] 1888 if vi.regs != 0 { 1889 // in a register, we'll use that source for the merge. 1890 continue 1891 } 1892 if vi.rematerializeable { 1893 // we'll rematerialize during the merge. 1894 continue 1895 } 1896 if s.f.pass.debug > regDebug { 1897 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1898 } 1899 spill := s.makeSpill(s.orig[e.ID], b) 1900 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1901 } 1902 1903 // Clear any final uses. 1904 // All that is left should be the pseudo-uses added for values which 1905 // are live at the end of b. 1906 for _, e := range s.live[b.ID] { 1907 u := s.values[e.ID].uses 1908 if u == nil { 1909 f.Fatalf("live at end, no uses v%d", e.ID) 1910 } 1911 if u.next != nil { 1912 f.Fatalf("live at end, too many uses v%d", e.ID) 1913 } 1914 s.values[e.ID].uses = nil 1915 u.next = s.freeUseRecords 1916 s.freeUseRecords = u 1917 } 1918 1919 // allocReg may have dropped registers from startRegsMask that 1920 // aren't actually needed in startRegs. Synchronize back to 1921 // startRegs. 1922 // 1923 // This must be done before placing spills, which will look at 1924 // startRegs to decide if a block is a valid block for a spill. 1925 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) { 1926 regs := make([]startReg, 0, c) 1927 for _, sr := range s.startRegs[b.ID] { 1928 if s.startRegsMask&(regMask(1)<<sr.r) == 0 { 1929 continue 1930 } 1931 regs = append(regs, sr) 1932 } 1933 s.startRegs[b.ID] = regs 1934 } 1935 } 1936 1937 // Decide where the spills we generated will go. 1938 s.placeSpills() 1939 1940 // Anything that didn't get a register gets a stack location here. 1941 // (StoreReg, stack-based phis, inputs, ...) 1942 stacklive := stackalloc(s.f, s.spillLive) 1943 1944 // Fix up all merge edges. 1945 s.shuffle(stacklive) 1946 1947 // Erase any copies we never used. 1948 // Also, an unused copy might be the only use of another copy, 1949 // so continue erasing until we reach a fixed point. 1950 for { 1951 progress := false 1952 for c, used := range s.copies { 1953 if !used && c.Uses == 0 { 1954 if s.f.pass.debug > regDebug { 1955 fmt.Printf("delete copied value %s\n", c.LongString()) 1956 } 1957 c.resetArgs() 1958 f.freeValue(c) 1959 delete(s.copies, c) 1960 progress = true 1961 } 1962 } 1963 if !progress { 1964 break 1965 } 1966 } 1967 1968 for _, b := range s.visitOrder { 1969 i := 0 1970 for _, v := range b.Values { 1971 if v.Op == OpInvalid { 1972 continue 1973 } 1974 b.Values[i] = v 1975 i++ 1976 } 1977 b.Values = b.Values[:i] 1978 } 1979 } 1980 1981 func (s *regAllocState) placeSpills() { 1982 mustBeFirst := func(op Op) bool { 1983 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg 1984 } 1985 1986 // Start maps block IDs to the list of spills 1987 // that go at the start of the block (but after any phis). 1988 start := map[ID][]*Value{} 1989 // After maps value IDs to the list of spills 1990 // that go immediately after that value ID. 1991 after := map[ID][]*Value{} 1992 1993 for i := range s.values { 1994 vi := s.values[i] 1995 spill := vi.spill 1996 if spill == nil { 1997 continue 1998 } 1999 if spill.Block != nil { 2000 // Some spills are already fully set up, 2001 // like OpArgs and stack-based phis. 2002 continue 2003 } 2004 v := s.orig[i] 2005 2006 // Walk down the dominator tree looking for a good place to 2007 // put the spill of v. At the start "best" is the best place 2008 // we have found so far. 2009 // TODO: find a way to make this O(1) without arbitrary cutoffs. 2010 if v == nil { 2011 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString())) 2012 } 2013 best := v.Block 2014 bestArg := v 2015 var bestDepth int16 2016 if l := s.loopnest.b2l[best.ID]; l != nil { 2017 bestDepth = l.depth 2018 } 2019 b := best 2020 const maxSpillSearch = 100 2021 for i := 0; i < maxSpillSearch; i++ { 2022 // Find the child of b in the dominator tree which 2023 // dominates all restores. 2024 p := b 2025 b = nil 2026 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 2027 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 2028 // c also dominates all restores. Walk down into c. 2029 b = c 2030 break 2031 } 2032 } 2033 if b == nil { 2034 // Ran out of blocks which dominate all restores. 2035 break 2036 } 2037 2038 var depth int16 2039 if l := s.loopnest.b2l[b.ID]; l != nil { 2040 depth = l.depth 2041 } 2042 if depth > bestDepth { 2043 // Don't push the spill into a deeper loop. 2044 continue 2045 } 2046 2047 // If v is in a register at the start of b, we can 2048 // place the spill here (after the phis). 2049 if len(b.Preds) == 1 { 2050 for _, e := range s.endRegs[b.Preds[0].b.ID] { 2051 if e.v == v { 2052 // Found a better spot for the spill. 2053 best = b 2054 bestArg = e.c 2055 bestDepth = depth 2056 break 2057 } 2058 } 2059 } else { 2060 for _, e := range s.startRegs[b.ID] { 2061 if e.v == v { 2062 // Found a better spot for the spill. 2063 best = b 2064 bestArg = e.c 2065 bestDepth = depth 2066 break 2067 } 2068 } 2069 } 2070 } 2071 2072 // Put the spill in the best block we found. 2073 spill.Block = best 2074 spill.AddArg(bestArg) 2075 if best == v.Block && !mustBeFirst(v.Op) { 2076 // Place immediately after v. 2077 after[v.ID] = append(after[v.ID], spill) 2078 } else { 2079 // Place at the start of best block. 2080 start[best.ID] = append(start[best.ID], spill) 2081 } 2082 } 2083 2084 // Insert spill instructions into the block schedules. 2085 var oldSched []*Value 2086 for _, b := range s.visitOrder { 2087 nfirst := 0 2088 for _, v := range b.Values { 2089 if !mustBeFirst(v.Op) { 2090 break 2091 } 2092 nfirst++ 2093 } 2094 oldSched = append(oldSched[:0], b.Values[nfirst:]...) 2095 b.Values = b.Values[:nfirst] 2096 b.Values = append(b.Values, start[b.ID]...) 2097 for _, v := range oldSched { 2098 b.Values = append(b.Values, v) 2099 b.Values = append(b.Values, after[v.ID]...) 2100 } 2101 } 2102 } 2103 2104 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 2105 func (s *regAllocState) shuffle(stacklive [][]ID) { 2106 var e edgeState 2107 e.s = s 2108 e.cache = map[ID][]*Value{} 2109 e.contents = map[Location]contentRecord{} 2110 if s.f.pass.debug > regDebug { 2111 fmt.Printf("shuffle %s\n", s.f.Name) 2112 fmt.Println(s.f.String()) 2113 } 2114 2115 for _, b := range s.visitOrder { 2116 if len(b.Preds) <= 1 { 2117 continue 2118 } 2119 e.b = b 2120 for i, edge := range b.Preds { 2121 p := edge.b 2122 e.p = p 2123 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 2124 e.process() 2125 } 2126 } 2127 2128 if s.f.pass.debug > regDebug { 2129 fmt.Printf("post shuffle %s\n", s.f.Name) 2130 fmt.Println(s.f.String()) 2131 } 2132 } 2133 2134 type edgeState struct { 2135 s *regAllocState 2136 p, b *Block // edge goes from p->b. 2137 2138 // for each pre-regalloc value, a list of equivalent cached values 2139 cache map[ID][]*Value 2140 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 2141 2142 // map from location to the value it contains 2143 contents map[Location]contentRecord 2144 2145 // desired destination locations 2146 destinations []dstRecord 2147 extra []dstRecord 2148 2149 usedRegs regMask // registers currently holding something 2150 uniqueRegs regMask // registers holding the only copy of a value 2151 finalRegs regMask // registers holding final target 2152 rematerializeableRegs regMask // registers that hold rematerializeable values 2153 } 2154 2155 type contentRecord struct { 2156 vid ID // pre-regalloc value 2157 c *Value // cached value 2158 final bool // this is a satisfied destination 2159 pos src.XPos // source position of use of the value 2160 } 2161 2162 type dstRecord struct { 2163 loc Location // register or stack slot 2164 vid ID // pre-regalloc value it should contain 2165 splice **Value // place to store reference to the generating instruction 2166 pos src.XPos // source position of use of this location 2167 } 2168 2169 // setup initializes the edge state for shuffling. 2170 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 2171 if e.s.f.pass.debug > regDebug { 2172 fmt.Printf("edge %s->%s\n", e.p, e.b) 2173 } 2174 2175 // Clear state. 2176 for _, vid := range e.cachedVals { 2177 delete(e.cache, vid) 2178 } 2179 e.cachedVals = e.cachedVals[:0] 2180 for k := range e.contents { 2181 delete(e.contents, k) 2182 } 2183 e.usedRegs = 0 2184 e.uniqueRegs = 0 2185 e.finalRegs = 0 2186 e.rematerializeableRegs = 0 2187 2188 // Live registers can be sources. 2189 for _, x := range srcReg { 2190 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 2191 } 2192 // So can all of the spill locations. 2193 for _, spillID := range stacklive { 2194 v := e.s.orig[spillID] 2195 spill := e.s.values[v.ID].spill 2196 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) { 2197 // Spills were placed that only dominate the uses found 2198 // during the first regalloc pass. The edge fixup code 2199 // can't use a spill location if the spill doesn't dominate 2200 // the edge. 2201 // We are guaranteed that if the spill doesn't dominate this edge, 2202 // then the value is available in a register (because we called 2203 // makeSpill for every value not in a register at the start 2204 // of an edge). 2205 continue 2206 } 2207 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 2208 } 2209 2210 // Figure out all the destinations we need. 2211 dsts := e.destinations[:0] 2212 for _, x := range dstReg { 2213 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 2214 } 2215 // Phis need their args to end up in a specific location. 2216 for _, v := range e.b.Values { 2217 if v.Op != OpPhi { 2218 break 2219 } 2220 loc := e.s.f.getHome(v.ID) 2221 if loc == nil { 2222 continue 2223 } 2224 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 2225 } 2226 e.destinations = dsts 2227 2228 if e.s.f.pass.debug > regDebug { 2229 for _, vid := range e.cachedVals { 2230 a := e.cache[vid] 2231 for _, c := range a { 2232 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c) 2233 } 2234 } 2235 for _, d := range e.destinations { 2236 fmt.Printf("dst %s: v%d\n", d.loc, d.vid) 2237 } 2238 } 2239 } 2240 2241 // process generates code to move all the values to the right destination locations. 2242 func (e *edgeState) process() { 2243 dsts := e.destinations 2244 2245 // Process the destinations until they are all satisfied. 2246 for len(dsts) > 0 { 2247 i := 0 2248 for _, d := range dsts { 2249 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 2250 // Failed - save for next iteration. 2251 dsts[i] = d 2252 i++ 2253 } 2254 } 2255 if i < len(dsts) { 2256 // Made some progress. Go around again. 2257 dsts = dsts[:i] 2258 2259 // Append any extras destinations we generated. 2260 dsts = append(dsts, e.extra...) 2261 e.extra = e.extra[:0] 2262 continue 2263 } 2264 2265 // We made no progress. That means that any 2266 // remaining unsatisfied moves are in simple cycles. 2267 // For example, A -> B -> C -> D -> A. 2268 // A ----> B 2269 // ^ | 2270 // | | 2271 // | v 2272 // D <---- C 2273 2274 // To break the cycle, we pick an unused register, say R, 2275 // and put a copy of B there. 2276 // A ----> B 2277 // ^ | 2278 // | | 2279 // | v 2280 // D <---- C <---- R=copyofB 2281 // When we resume the outer loop, the A->B move can now proceed, 2282 // and eventually the whole cycle completes. 2283 2284 // Copy any cycle location to a temp register. This duplicates 2285 // one of the cycle entries, allowing the just duplicated value 2286 // to be overwritten and the cycle to proceed. 2287 d := dsts[0] 2288 loc := d.loc 2289 vid := e.contents[loc].vid 2290 c := e.contents[loc].c 2291 r := e.findRegFor(c.Type) 2292 if e.s.f.pass.debug > regDebug { 2293 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c) 2294 } 2295 e.erase(r) 2296 pos := d.pos.WithNotStmt() 2297 if _, isReg := loc.(*Register); isReg { 2298 c = e.p.NewValue1(pos, OpCopy, c.Type, c) 2299 } else { 2300 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2301 } 2302 e.set(r, vid, c, false, pos) 2303 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) { 2304 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString()) 2305 } 2306 } 2307 } 2308 2309 // processDest generates code to put value vid into location loc. Returns true 2310 // if progress was made. 2311 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 2312 pos = pos.WithNotStmt() 2313 occupant := e.contents[loc] 2314 if occupant.vid == vid { 2315 // Value is already in the correct place. 2316 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 2317 if splice != nil { 2318 (*splice).Uses-- 2319 *splice = occupant.c 2320 occupant.c.Uses++ 2321 } 2322 // Note: if splice==nil then c will appear dead. This is 2323 // non-SSA formed code, so be careful after this pass not to run 2324 // deadcode elimination. 2325 if _, ok := e.s.copies[occupant.c]; ok { 2326 // The copy at occupant.c was used to avoid spill. 2327 e.s.copies[occupant.c] = true 2328 } 2329 return true 2330 } 2331 2332 // Check if we're allowed to clobber the destination location. 2333 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 2334 // We can't overwrite the last copy 2335 // of a value that needs to survive. 2336 return false 2337 } 2338 2339 // Copy from a source of v, register preferred. 2340 v := e.s.orig[vid] 2341 var c *Value 2342 var src Location 2343 if e.s.f.pass.debug > regDebug { 2344 fmt.Printf("moving v%d to %s\n", vid, loc) 2345 fmt.Printf("sources of v%d:", vid) 2346 } 2347 for _, w := range e.cache[vid] { 2348 h := e.s.f.getHome(w.ID) 2349 if e.s.f.pass.debug > regDebug { 2350 fmt.Printf(" %s:%s", h, w) 2351 } 2352 _, isreg := h.(*Register) 2353 if src == nil || isreg { 2354 c = w 2355 src = h 2356 } 2357 } 2358 if e.s.f.pass.debug > regDebug { 2359 if src != nil { 2360 fmt.Printf(" [use %s]\n", src) 2361 } else { 2362 fmt.Printf(" [no source]\n") 2363 } 2364 } 2365 _, dstReg := loc.(*Register) 2366 2367 // Pre-clobber destination. This avoids the 2368 // following situation: 2369 // - v is currently held in R0 and stacktmp0. 2370 // - We want to copy stacktmp1 to stacktmp0. 2371 // - We choose R0 as the temporary register. 2372 // During the copy, both R0 and stacktmp0 are 2373 // clobbered, losing both copies of v. Oops! 2374 // Erasing the destination early means R0 will not 2375 // be chosen as the temp register, as it will then 2376 // be the last copy of v. 2377 e.erase(loc) 2378 var x *Value 2379 if c == nil || e.s.values[vid].rematerializeable { 2380 if !e.s.values[vid].rematerializeable { 2381 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 2382 } 2383 if dstReg { 2384 x = v.copyInto(e.p) 2385 } else { 2386 // Rematerialize into stack slot. Need a free 2387 // register to accomplish this. 2388 r := e.findRegFor(v.Type) 2389 e.erase(r) 2390 x = v.copyIntoWithXPos(e.p, pos) 2391 e.set(r, vid, x, false, pos) 2392 // Make sure we spill with the size of the slot, not the 2393 // size of x (which might be wider due to our dropping 2394 // of narrowing conversions). 2395 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 2396 } 2397 } else { 2398 // Emit move from src to dst. 2399 _, srcReg := src.(*Register) 2400 if srcReg { 2401 if dstReg { 2402 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 2403 } else { 2404 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 2405 } 2406 } else { 2407 if dstReg { 2408 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2409 } else { 2410 // mem->mem. Use temp register. 2411 r := e.findRegFor(c.Type) 2412 e.erase(r) 2413 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2414 e.set(r, vid, t, false, pos) 2415 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 2416 } 2417 } 2418 } 2419 e.set(loc, vid, x, true, pos) 2420 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) { 2421 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString()) 2422 } 2423 if splice != nil { 2424 (*splice).Uses-- 2425 *splice = x 2426 x.Uses++ 2427 } 2428 return true 2429 } 2430 2431 // set changes the contents of location loc to hold the given value and its cached representative. 2432 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2433 e.s.f.setHome(c, loc) 2434 e.contents[loc] = contentRecord{vid, c, final, pos} 2435 a := e.cache[vid] 2436 if len(a) == 0 { 2437 e.cachedVals = append(e.cachedVals, vid) 2438 } 2439 a = append(a, c) 2440 e.cache[vid] = a 2441 if r, ok := loc.(*Register); ok { 2442 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 { 2443 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c) 2444 } 2445 e.usedRegs |= regMask(1) << uint(r.num) 2446 if final { 2447 e.finalRegs |= regMask(1) << uint(r.num) 2448 } 2449 if len(a) == 1 { 2450 e.uniqueRegs |= regMask(1) << uint(r.num) 2451 } 2452 if len(a) == 2 { 2453 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2454 e.uniqueRegs &^= regMask(1) << uint(t.num) 2455 } 2456 } 2457 if e.s.values[vid].rematerializeable { 2458 e.rematerializeableRegs |= regMask(1) << uint(r.num) 2459 } 2460 } 2461 if e.s.f.pass.debug > regDebug { 2462 fmt.Printf("%s\n", c.LongString()) 2463 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c) 2464 } 2465 } 2466 2467 // erase removes any user of loc. 2468 func (e *edgeState) erase(loc Location) { 2469 cr := e.contents[loc] 2470 if cr.c == nil { 2471 return 2472 } 2473 vid := cr.vid 2474 2475 if cr.final { 2476 // Add a destination to move this value back into place. 2477 // Make sure it gets added to the tail of the destination queue 2478 // so we make progress on other moves first. 2479 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2480 } 2481 2482 // Remove c from the list of cached values. 2483 a := e.cache[vid] 2484 for i, c := range a { 2485 if e.s.f.getHome(c.ID) == loc { 2486 if e.s.f.pass.debug > regDebug { 2487 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c) 2488 } 2489 a[i], a = a[len(a)-1], a[:len(a)-1] 2490 break 2491 } 2492 } 2493 e.cache[vid] = a 2494 2495 // Update register masks. 2496 if r, ok := loc.(*Register); ok { 2497 e.usedRegs &^= regMask(1) << uint(r.num) 2498 if cr.final { 2499 e.finalRegs &^= regMask(1) << uint(r.num) 2500 } 2501 e.rematerializeableRegs &^= regMask(1) << uint(r.num) 2502 } 2503 if len(a) == 1 { 2504 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2505 e.uniqueRegs |= regMask(1) << uint(r.num) 2506 } 2507 } 2508 } 2509 2510 // findRegFor finds a register we can use to make a temp copy of type typ. 2511 func (e *edgeState) findRegFor(typ *types.Type) Location { 2512 // Which registers are possibilities. 2513 types := &e.s.f.Config.Types 2514 m := e.s.compatRegs(typ) 2515 2516 // Pick a register. In priority order: 2517 // 1) an unused register 2518 // 2) a non-unique register not holding a final value 2519 // 3) a non-unique register 2520 // 4) a register holding a rematerializeable value 2521 x := m &^ e.usedRegs 2522 if x != 0 { 2523 return &e.s.registers[pickReg(x)] 2524 } 2525 x = m &^ e.uniqueRegs &^ e.finalRegs 2526 if x != 0 { 2527 return &e.s.registers[pickReg(x)] 2528 } 2529 x = m &^ e.uniqueRegs 2530 if x != 0 { 2531 return &e.s.registers[pickReg(x)] 2532 } 2533 x = m & e.rematerializeableRegs 2534 if x != 0 { 2535 return &e.s.registers[pickReg(x)] 2536 } 2537 2538 // No register is available. 2539 // Pick a register to spill. 2540 for _, vid := range e.cachedVals { 2541 a := e.cache[vid] 2542 for _, c := range a { 2543 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2544 if !c.rematerializeable() { 2545 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2546 // Allocate a temp location to spill a register to. 2547 // The type of the slot is immaterial - it will not be live across 2548 // any safepoint. Just use a type big enough to hold any register. 2549 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64} 2550 // TODO: reuse these slots. They'll need to be erased first. 2551 e.set(t, vid, x, false, c.Pos) 2552 if e.s.f.pass.debug > regDebug { 2553 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString()) 2554 } 2555 } 2556 // r will now be overwritten by the caller. At some point 2557 // later, the newly saved value will be moved back to its 2558 // final destination in processDest. 2559 return r 2560 } 2561 } 2562 } 2563 2564 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs) 2565 for _, vid := range e.cachedVals { 2566 a := e.cache[vid] 2567 for _, c := range a { 2568 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID)) 2569 } 2570 } 2571 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2572 return nil 2573 } 2574 2575 // rematerializeable reports whether the register allocator should recompute 2576 // a value instead of spilling/restoring it. 2577 func (v *Value) rematerializeable() bool { 2578 if !opcodeTable[v.Op].rematerializeable { 2579 return false 2580 } 2581 for _, a := range v.Args { 2582 // SP and SB (generated by OpSP and OpSB) are always available. 2583 if a.Op != OpSP && a.Op != OpSB { 2584 return false 2585 } 2586 } 2587 return true 2588 } 2589 2590 type liveInfo struct { 2591 ID ID // ID of value 2592 dist int32 // # of instructions before next use 2593 pos src.XPos // source position of next use 2594 } 2595 2596 // computeLive computes a map from block ID to a list of value IDs live at the end 2597 // of that block. Together with the value ID is a count of how many instructions 2598 // to the next use of that value. The resulting map is stored in s.live. 2599 // computeLive also computes the desired register information at the end of each block. 2600 // This desired register information is stored in s.desired. 2601 // TODO: this could be quadratic if lots of variables are live across lots of 2602 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2603 // of this function) require only linear size & time. 2604 func (s *regAllocState) computeLive() { 2605 f := s.f 2606 s.live = make([][]liveInfo, f.NumBlocks()) 2607 s.desired = make([]desiredState, f.NumBlocks()) 2608 var phis []*Value 2609 2610 live := f.newSparseMapPos(f.NumValues()) 2611 defer f.retSparseMapPos(live) 2612 t := f.newSparseMapPos(f.NumValues()) 2613 defer f.retSparseMapPos(t) 2614 2615 // Keep track of which value we want in each register. 2616 var desired desiredState 2617 2618 // Instead of iterating over f.Blocks, iterate over their postordering. 2619 // Liveness information flows backward, so starting at the end 2620 // increases the probability that we will stabilize quickly. 2621 // TODO: Do a better job yet. Here's one possibility: 2622 // Calculate the dominator tree and locate all strongly connected components. 2623 // If a value is live in one block of an SCC, it is live in all. 2624 // Walk the dominator tree from end to beginning, just once, treating SCC 2625 // components as single blocks, duplicated calculated liveness information 2626 // out to all of them. 2627 po := f.postorder() 2628 s.loopnest = f.loopnest() 2629 s.loopnest.calculateDepths() 2630 for { 2631 changed := false 2632 2633 for _, b := range po { 2634 // Start with known live values at the end of the block. 2635 // Add len(b.Values) to adjust from end-of-block distance 2636 // to beginning-of-block distance. 2637 live.clear() 2638 for _, e := range s.live[b.ID] { 2639 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2640 } 2641 2642 // Mark control values as live 2643 for _, c := range b.ControlValues() { 2644 if s.values[c.ID].needReg { 2645 live.set(c.ID, int32(len(b.Values)), b.Pos) 2646 } 2647 } 2648 2649 // Propagate backwards to the start of the block 2650 // Assumes Values have been scheduled. 2651 phis = phis[:0] 2652 for i := len(b.Values) - 1; i >= 0; i-- { 2653 v := b.Values[i] 2654 live.remove(v.ID) 2655 if v.Op == OpPhi { 2656 // save phi ops for later 2657 phis = append(phis, v) 2658 continue 2659 } 2660 if opcodeTable[v.Op].call { 2661 c := live.contents() 2662 for i := range c { 2663 c[i].val += unlikelyDistance 2664 } 2665 } 2666 for _, a := range v.Args { 2667 if s.values[a.ID].needReg { 2668 live.set(a.ID, int32(i), v.Pos) 2669 } 2670 } 2671 } 2672 // Propagate desired registers backwards. 2673 desired.copy(&s.desired[b.ID]) 2674 for i := len(b.Values) - 1; i >= 0; i-- { 2675 v := b.Values[i] 2676 prefs := desired.remove(v.ID) 2677 if v.Op == OpPhi { 2678 // TODO: if v is a phi, save desired register for phi inputs. 2679 // For now, we just drop it and don't propagate 2680 // desired registers back though phi nodes. 2681 continue 2682 } 2683 regspec := s.regspec(v) 2684 // Cancel desired registers if they get clobbered. 2685 desired.clobber(regspec.clobbers) 2686 // Update desired registers if there are any fixed register inputs. 2687 for _, j := range regspec.inputs { 2688 if countRegs(j.regs) != 1 { 2689 continue 2690 } 2691 desired.clobber(j.regs) 2692 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2693 } 2694 // Set desired register of input 0 if this is a 2-operand instruction. 2695 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 { 2696 // ADDQconst is added here because we want to treat it as resultInArg0 for 2697 // the purposes of desired registers, even though it is not an absolute requirement. 2698 // This is because we'd rather implement it as ADDQ instead of LEAQ. 2699 // Same for ADDLconst 2700 // Select0 is added here to propagate the desired register to the tuple-generating instruction. 2701 if opcodeTable[v.Op].commutative { 2702 desired.addList(v.Args[1].ID, prefs) 2703 } 2704 desired.addList(v.Args[0].ID, prefs) 2705 } 2706 } 2707 2708 // For each predecessor of b, expand its list of live-at-end values. 2709 // invariant: live contains the values live at the start of b (excluding phi inputs) 2710 for i, e := range b.Preds { 2711 p := e.b 2712 // Compute additional distance for the edge. 2713 // Note: delta must be at least 1 to distinguish the control 2714 // value use from the first user in a successor block. 2715 delta := int32(normalDistance) 2716 if len(p.Succs) == 2 { 2717 if p.Succs[0].b == b && p.Likely == BranchLikely || 2718 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2719 delta = likelyDistance 2720 } 2721 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2722 p.Succs[1].b == b && p.Likely == BranchLikely { 2723 delta = unlikelyDistance 2724 } 2725 } 2726 2727 // Update any desired registers at the end of p. 2728 s.desired[p.ID].merge(&desired) 2729 2730 // Start t off with the previously known live values at the end of p. 2731 t.clear() 2732 for _, e := range s.live[p.ID] { 2733 t.set(e.ID, e.dist, e.pos) 2734 } 2735 update := false 2736 2737 // Add new live values from scanning this block. 2738 for _, e := range live.contents() { 2739 d := e.val + delta 2740 if !t.contains(e.key) || d < t.get(e.key) { 2741 update = true 2742 t.set(e.key, d, e.pos) 2743 } 2744 } 2745 // Also add the correct arg from the saved phi values. 2746 // All phis are at distance delta (we consider them 2747 // simultaneously happening at the start of the block). 2748 for _, v := range phis { 2749 id := v.Args[i].ID 2750 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2751 update = true 2752 t.set(id, delta, v.Pos) 2753 } 2754 } 2755 2756 if !update { 2757 continue 2758 } 2759 // The live set has changed, update it. 2760 l := s.live[p.ID][:0] 2761 if cap(l) < t.size() { 2762 l = make([]liveInfo, 0, t.size()) 2763 } 2764 for _, e := range t.contents() { 2765 l = append(l, liveInfo{e.key, e.val, e.pos}) 2766 } 2767 s.live[p.ID] = l 2768 changed = true 2769 } 2770 } 2771 2772 if !changed { 2773 break 2774 } 2775 } 2776 if f.pass.debug > regDebug { 2777 fmt.Println("live values at end of each block") 2778 for _, b := range f.Blocks { 2779 fmt.Printf(" %s:", b) 2780 for _, x := range s.live[b.ID] { 2781 fmt.Printf(" v%d(%d)", x.ID, x.dist) 2782 for _, e := range s.desired[b.ID].entries { 2783 if e.ID != x.ID { 2784 continue 2785 } 2786 fmt.Printf("[") 2787 first := true 2788 for _, r := range e.regs { 2789 if r == noRegister { 2790 continue 2791 } 2792 if !first { 2793 fmt.Printf(",") 2794 } 2795 fmt.Print(&s.registers[r]) 2796 first = false 2797 } 2798 fmt.Printf("]") 2799 } 2800 } 2801 if avoid := s.desired[b.ID].avoid; avoid != 0 { 2802 fmt.Printf(" avoid=%v", s.RegMaskString(avoid)) 2803 } 2804 fmt.Println() 2805 } 2806 } 2807 } 2808 2809 // A desiredState represents desired register assignments. 2810 type desiredState struct { 2811 // Desired assignments will be small, so we just use a list 2812 // of valueID+registers entries. 2813 entries []desiredStateEntry 2814 // Registers that other values want to be in. This value will 2815 // contain at least the union of the regs fields of entries, but 2816 // may contain additional entries for values that were once in 2817 // this data structure but are no longer. 2818 avoid regMask 2819 } 2820 type desiredStateEntry struct { 2821 // (pre-regalloc) value 2822 ID ID 2823 // Registers it would like to be in, in priority order. 2824 // Unused slots are filled with noRegister. 2825 // For opcodes that return tuples, we track desired registers only 2826 // for the first element of the tuple. 2827 regs [4]register 2828 } 2829 2830 func (d *desiredState) clear() { 2831 d.entries = d.entries[:0] 2832 d.avoid = 0 2833 } 2834 2835 // get returns a list of desired registers for value vid. 2836 func (d *desiredState) get(vid ID) [4]register { 2837 for _, e := range d.entries { 2838 if e.ID == vid { 2839 return e.regs 2840 } 2841 } 2842 return [4]register{noRegister, noRegister, noRegister, noRegister} 2843 } 2844 2845 // add records that we'd like value vid to be in register r. 2846 func (d *desiredState) add(vid ID, r register) { 2847 d.avoid |= regMask(1) << r 2848 for i := range d.entries { 2849 e := &d.entries[i] 2850 if e.ID != vid { 2851 continue 2852 } 2853 if e.regs[0] == r { 2854 // Already known and highest priority 2855 return 2856 } 2857 for j := 1; j < len(e.regs); j++ { 2858 if e.regs[j] == r { 2859 // Move from lower priority to top priority 2860 copy(e.regs[1:], e.regs[:j]) 2861 e.regs[0] = r 2862 return 2863 } 2864 } 2865 copy(e.regs[1:], e.regs[:]) 2866 e.regs[0] = r 2867 return 2868 } 2869 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2870 } 2871 2872 func (d *desiredState) addList(vid ID, regs [4]register) { 2873 // regs is in priority order, so iterate in reverse order. 2874 for i := len(regs) - 1; i >= 0; i-- { 2875 r := regs[i] 2876 if r != noRegister { 2877 d.add(vid, r) 2878 } 2879 } 2880 } 2881 2882 // clobber erases any desired registers in the set m. 2883 func (d *desiredState) clobber(m regMask) { 2884 for i := 0; i < len(d.entries); { 2885 e := &d.entries[i] 2886 j := 0 2887 for _, r := range e.regs { 2888 if r != noRegister && m>>r&1 == 0 { 2889 e.regs[j] = r 2890 j++ 2891 } 2892 } 2893 if j == 0 { 2894 // No more desired registers for this value. 2895 d.entries[i] = d.entries[len(d.entries)-1] 2896 d.entries = d.entries[:len(d.entries)-1] 2897 continue 2898 } 2899 for ; j < len(e.regs); j++ { 2900 e.regs[j] = noRegister 2901 } 2902 i++ 2903 } 2904 d.avoid &^= m 2905 } 2906 2907 // copy copies a desired state from another desiredState x. 2908 func (d *desiredState) copy(x *desiredState) { 2909 d.entries = append(d.entries[:0], x.entries...) 2910 d.avoid = x.avoid 2911 } 2912 2913 // remove removes the desired registers for vid and returns them. 2914 func (d *desiredState) remove(vid ID) [4]register { 2915 for i := range d.entries { 2916 if d.entries[i].ID == vid { 2917 regs := d.entries[i].regs 2918 d.entries[i] = d.entries[len(d.entries)-1] 2919 d.entries = d.entries[:len(d.entries)-1] 2920 return regs 2921 } 2922 } 2923 return [4]register{noRegister, noRegister, noRegister, noRegister} 2924 } 2925 2926 // merge merges another desired state x into d. 2927 func (d *desiredState) merge(x *desiredState) { 2928 d.avoid |= x.avoid 2929 // There should only be a few desired registers, so 2930 // linear insert is ok. 2931 for _, e := range x.entries { 2932 d.addList(e.ID, e.regs) 2933 } 2934 } 2935 2936 func min32(x, y int32) int32 { 2937 if x < y { 2938 return x 2939 } 2940 return y 2941 } 2942 func max32(x, y int32) int32 { 2943 if x > y { 2944 return x 2945 } 2946 return y 2947 }