github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 118 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 119 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 120 "github.com/bir3/gocompiler/src/cmd/internal/src" 121 "github.com/bir3/gocompiler/src/cmd/internal/sys" 122 "fmt" 123 "github.com/bir3/gocompiler/src/internal/buildcfg" 124 "math/bits" 125 "unsafe" 126 ) 127 128 const ( 129 moveSpills = iota 130 logSpills 131 regDebug 132 stackDebug 133 ) 134 135 // distance is a measure of how far into the future values are used. 136 // distance is measured in units of instructions. 137 const ( 138 likelyDistance = 1 139 normalDistance = 10 140 unlikelyDistance = 100 141 ) 142 143 // regalloc performs register allocation on f. It sets f.RegAlloc 144 // to the resulting allocation. 145 func regalloc(f *Func) { 146 var s regAllocState 147 s.init(f) 148 s.regalloc(f) 149 s.close() 150 } 151 152 type register uint8 153 154 const noRegister register = 255 155 156 // For bulk initializing 157 var noRegisters [32]register = [32]register{ 158 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, 162 } 163 164 // A regMask encodes a set of machine registers. 165 // TODO: regMask -> regSet? 166 type regMask uint64 167 168 func (m regMask) String() string { 169 s := "" 170 for r := register(0); m != 0; r++ { 171 if m>>r&1 == 0 { 172 continue 173 } 174 m &^= regMask(1) << r 175 if s != "" { 176 s += " " 177 } 178 s += fmt.Sprintf("r%d", r) 179 } 180 return s 181 } 182 183 func (s *regAllocState) RegMaskString(m regMask) string { 184 str := "" 185 for r := register(0); m != 0; r++ { 186 if m>>r&1 == 0 { 187 continue 188 } 189 m &^= regMask(1) << r 190 if str != "" { 191 str += " " 192 } 193 str += s.registers[r].String() 194 } 195 return str 196 } 197 198 // countRegs returns the number of set bits in the register mask. 199 func countRegs(r regMask) int { 200 return bits.OnesCount64(uint64(r)) 201 } 202 203 // pickReg picks an arbitrary register from the register mask. 204 func pickReg(r regMask) register { 205 if r == 0 { 206 panic("can't pick a register from an empty set") 207 } 208 // pick the lowest one 209 return register(bits.TrailingZeros64(uint64(r))) 210 } 211 212 type use struct { 213 dist int32 // distance from start of the block to a use of a value 214 pos src.XPos // source position of the use 215 next *use // linked list of uses of a value in nondecreasing dist order 216 } 217 218 // A valState records the register allocation state for a (pre-regalloc) value. 219 type valState struct { 220 regs regMask // the set of registers holding a Value (usually just one) 221 uses *use // list of uses in this block 222 spill *Value // spilled copy of the Value (if any) 223 restoreMin int32 // minimum of all restores' blocks' sdom.entry 224 restoreMax int32 // maximum of all restores' blocks' sdom.exit 225 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 226 rematerializeable bool // cached value of v.rematerializeable() 227 } 228 229 type regState struct { 230 v *Value // Original (preregalloc) Value stored in this register. 231 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 232 // If a register is unused, v==c==nil 233 } 234 235 type regAllocState struct { 236 f *Func 237 238 sdom SparseTree 239 registers []Register 240 numRegs register 241 SPReg register 242 SBReg register 243 GReg register 244 allocatable regMask 245 246 // live values at the end of each block. live[b.ID] is a list of value IDs 247 // which are live at the end of b, together with a count of how many instructions 248 // forward to the next use. 249 live [][]liveInfo 250 // desired register assignments at the end of each block. 251 // Note that this is a static map computed before allocation occurs. Dynamic 252 // register desires (from partially completed allocations) will trump 253 // this information. 254 desired []desiredState 255 256 // current state of each (preregalloc) Value 257 values []valState 258 259 // ID of SP, SB values 260 sp, sb ID 261 262 // For each Value, map from its value ID back to the 263 // preregalloc Value it was derived from. 264 orig []*Value 265 266 // current state of each register 267 regs []regState 268 269 // registers that contain values which can't be kicked out 270 nospill regMask 271 272 // mask of registers currently in use 273 used regMask 274 275 // mask of registers used in the current instruction 276 tmpused regMask 277 278 // current block we're working on 279 curBlock *Block 280 281 // cache of use records 282 freeUseRecords *use 283 284 // endRegs[blockid] is the register state at the end of each block. 285 // encoded as a set of endReg records. 286 endRegs [][]endReg 287 288 // startRegs[blockid] is the register state at the start of merge blocks. 289 // saved state does not include the state of phi ops in the block. 290 startRegs [][]startReg 291 292 // spillLive[blockid] is the set of live spills at the end of each block 293 spillLive [][]ID 294 295 // a set of copies we generated to move things around, and 296 // whether it is used in shuffle. Unused copies will be deleted. 297 copies map[*Value]bool 298 299 loopnest *loopnest 300 301 // choose a good order in which to visit blocks for allocation purposes. 302 visitOrder []*Block 303 304 // blockOrder[b.ID] corresponds to the index of block b in visitOrder. 305 blockOrder []int32 306 307 // whether to insert instructions that clobber dead registers at call sites 308 doClobber bool 309 } 310 311 type endReg struct { 312 r register 313 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 314 c *Value // cached version of the value 315 } 316 317 type startReg struct { 318 r register 319 v *Value // pre-regalloc value needed in this register 320 c *Value // cached version of the value 321 pos src.XPos // source position of use of this register 322 } 323 324 // freeReg frees up register r. Any current user of r is kicked out. 325 func (s *regAllocState) freeReg(r register) { 326 v := s.regs[r].v 327 if v == nil { 328 s.f.Fatalf("tried to free an already free register %d\n", r) 329 } 330 331 // Mark r as unused. 332 if s.f.pass.debug > regDebug { 333 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c) 334 } 335 s.regs[r] = regState{} 336 s.values[v.ID].regs &^= regMask(1) << r 337 s.used &^= regMask(1) << r 338 } 339 340 // freeRegs frees up all registers listed in m. 341 func (s *regAllocState) freeRegs(m regMask) { 342 for m&s.used != 0 { 343 s.freeReg(pickReg(m & s.used)) 344 } 345 } 346 347 // clobberRegs inserts instructions that clobber registers listed in m. 348 func (s *regAllocState) clobberRegs(m regMask) { 349 m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them 350 for m != 0 { 351 r := pickReg(m) 352 m &^= 1 << r 353 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid) 354 s.f.setHome(x, &s.registers[r]) 355 } 356 } 357 358 // setOrig records that c's original value is the same as 359 // v's original value. 360 func (s *regAllocState) setOrig(c *Value, v *Value) { 361 if int(c.ID) >= cap(s.orig) { 362 x := s.f.Cache.allocValueSlice(int(c.ID) + 1) 363 copy(x, s.orig) 364 s.f.Cache.freeValueSlice(s.orig) 365 s.orig = x 366 } 367 for int(c.ID) >= len(s.orig) { 368 s.orig = append(s.orig, nil) 369 } 370 if s.orig[c.ID] != nil { 371 s.f.Fatalf("orig value set twice %s %s", c, v) 372 } 373 s.orig[c.ID] = s.orig[v.ID] 374 } 375 376 // assignReg assigns register r to hold c, a copy of v. 377 // r must be unused. 378 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 379 if s.f.pass.debug > regDebug { 380 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c) 381 } 382 if s.regs[r].v != nil { 383 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 384 } 385 386 // Update state. 387 s.regs[r] = regState{v, c} 388 s.values[v.ID].regs |= regMask(1) << r 389 s.used |= regMask(1) << r 390 s.f.setHome(c, &s.registers[r]) 391 } 392 393 // allocReg chooses a register from the set of registers in mask. 394 // If there is no unused register, a Value will be kicked out of 395 // a register to make room. 396 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 397 if v.OnWasmStack { 398 return noRegister 399 } 400 401 mask &= s.allocatable 402 mask &^= s.nospill 403 if mask == 0 { 404 s.f.Fatalf("no register available for %s", v.LongString()) 405 } 406 407 // Pick an unused register if one is available. 408 if mask&^s.used != 0 { 409 return pickReg(mask &^ s.used) 410 } 411 412 // Pick a value to spill. Spill the value with the 413 // farthest-in-the-future use. 414 // TODO: Prefer registers with already spilled Values? 415 // TODO: Modify preference using affinity graph. 416 // TODO: if a single value is in multiple registers, spill one of them 417 // before spilling a value in just a single register. 418 419 // Find a register to spill. We spill the register containing the value 420 // whose next use is as far in the future as possible. 421 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 422 var r register 423 maxuse := int32(-1) 424 for t := register(0); t < s.numRegs; t++ { 425 if mask>>t&1 == 0 { 426 continue 427 } 428 v := s.regs[t].v 429 if n := s.values[v.ID].uses.dist; n > maxuse { 430 // v's next use is farther in the future than any value 431 // we've seen so far. A new best spill candidate. 432 r = t 433 maxuse = n 434 } 435 } 436 if maxuse == -1 { 437 s.f.Fatalf("couldn't find register to spill") 438 } 439 440 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm { 441 // TODO(neelance): In theory this should never happen, because all wasm registers are equal. 442 // So if there is still a free register, the allocation should have picked that one in the first place instead of 443 // trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization. 444 s.freeReg(r) 445 return r 446 } 447 448 // Try to move it around before kicking out, if there is a free register. 449 // We generate a Copy and record it. It will be deleted if never used. 450 v2 := s.regs[r].v 451 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 452 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 453 r2 := pickReg(m) 454 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 455 s.copies[c] = false 456 if s.f.pass.debug > regDebug { 457 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2]) 458 } 459 s.setOrig(c, v2) 460 s.assignReg(r2, v2, c) 461 } 462 s.freeReg(r) 463 return r 464 } 465 466 // makeSpill returns a Value which represents the spilled value of v. 467 // b is the block in which the spill is used. 468 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 469 vi := &s.values[v.ID] 470 if vi.spill != nil { 471 // Final block not known - keep track of subtree where restores reside. 472 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 473 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 474 return vi.spill 475 } 476 // Make a spill for v. We don't know where we want 477 // to put it yet, so we leave it blockless for now. 478 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 479 // We also don't know what the spill's arg will be. 480 // Leave it argless for now. 481 s.setOrig(spill, v) 482 vi.spill = spill 483 vi.restoreMin = s.sdom[b.ID].entry 484 vi.restoreMax = s.sdom[b.ID].exit 485 return spill 486 } 487 488 // allocValToReg allocates v to a register selected from regMask and 489 // returns the register copy of v. Any previous user is kicked out and spilled 490 // (if necessary). Load code is added at the current pc. If nospill is set the 491 // allocated register is marked nospill so the assignment cannot be 492 // undone until the caller allows it by clearing nospill. Returns a 493 // *Value which is either v or a copy of v allocated to the chosen register. 494 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 495 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() { 496 c := v.copyIntoWithXPos(s.curBlock, pos) 497 c.OnWasmStack = true 498 s.setOrig(c, v) 499 return c 500 } 501 if v.OnWasmStack { 502 return v 503 } 504 505 vi := &s.values[v.ID] 506 pos = pos.WithNotStmt() 507 // Check if v is already in a requested register. 508 if mask&vi.regs != 0 { 509 r := pickReg(mask & vi.regs) 510 if s.regs[r].v != v || s.regs[r].c == nil { 511 panic("bad register state") 512 } 513 if nospill { 514 s.nospill |= regMask(1) << r 515 } 516 return s.regs[r].c 517 } 518 519 var r register 520 // If nospill is set, the value is used immediately, so it can live on the WebAssembly stack. 521 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm 522 if !onWasmStack { 523 // Allocate a register. 524 r = s.allocReg(mask, v) 525 } 526 527 // Allocate v to the new register. 528 var c *Value 529 if vi.regs != 0 { 530 // Copy from a register that v is already in. 531 r2 := pickReg(vi.regs) 532 if s.regs[r2].v != v { 533 panic("bad register state") 534 } 535 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 536 } else if v.rematerializeable() { 537 // Rematerialize instead of loading from the spill location. 538 c = v.copyIntoWithXPos(s.curBlock, pos) 539 } else { 540 // Load v from its spill location. 541 spill := s.makeSpill(v, s.curBlock) 542 if s.f.pass.debug > logSpills { 543 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 544 } 545 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 546 } 547 548 s.setOrig(c, v) 549 550 if onWasmStack { 551 c.OnWasmStack = true 552 return c 553 } 554 555 s.assignReg(r, v, c) 556 if c.Op == OpLoadReg && s.isGReg(r) { 557 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString()) 558 } 559 if nospill { 560 s.nospill |= regMask(1) << r 561 } 562 return c 563 } 564 565 // isLeaf reports whether f performs any calls. 566 func isLeaf(f *Func) bool { 567 for _, b := range f.Blocks { 568 for _, v := range b.Values { 569 if v.Op.IsCall() && !v.Op.IsTailCall() { 570 // tail call is not counted as it does not save the return PC or need a frame 571 return false 572 } 573 } 574 } 575 return true 576 } 577 578 func (s *regAllocState) init(f *Func) { 579 s.f = f 580 s.f.RegAlloc = s.f.Cache.locs[:0] 581 s.registers = f.Config.registers 582 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 583 s.f.Fatalf("bad number of registers: %d", nr) 584 } else { 585 s.numRegs = register(nr) 586 } 587 // Locate SP, SB, and g registers. 588 s.SPReg = noRegister 589 s.SBReg = noRegister 590 s.GReg = noRegister 591 for r := register(0); r < s.numRegs; r++ { 592 switch s.registers[r].String() { 593 case "SP": 594 s.SPReg = r 595 case "SB": 596 s.SBReg = r 597 case "g": 598 s.GReg = r 599 } 600 } 601 // Make sure we found all required registers. 602 switch noRegister { 603 case s.SPReg: 604 s.f.Fatalf("no SP register found") 605 case s.SBReg: 606 s.f.Fatalf("no SB register found") 607 case s.GReg: 608 if f.Config.hasGReg { 609 s.f.Fatalf("no g register found") 610 } 611 } 612 613 // Figure out which registers we're allowed to use. 614 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 615 s.allocatable &^= 1 << s.SPReg 616 s.allocatable &^= 1 << s.SBReg 617 if s.f.Config.hasGReg { 618 s.allocatable &^= 1 << s.GReg 619 } 620 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 { 621 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 622 } 623 if s.f.Config.LinkReg != -1 { 624 if isLeaf(f) { 625 // Leaf functions don't save/restore the link register. 626 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 627 } 628 } 629 if s.f.Config.ctxt.Flag_dynlink { 630 switch s.f.Config.arch { 631 case "386": 632 // nothing to do. 633 // Note that for Flag_shared (position independent code) 634 // we do need to be careful, but that carefulness is hidden 635 // in the rewrite rules so we always have a free register 636 // available for global load/stores. See _gen/386.rules (search for Flag_shared). 637 case "amd64": 638 s.allocatable &^= 1 << 15 // R15 639 case "arm": 640 s.allocatable &^= 1 << 9 // R9 641 case "arm64": 642 // nothing to do 643 case "ppc64le": // R2 already reserved. 644 // nothing to do 645 case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved. 646 // nothing to do 647 case "s390x": 648 s.allocatable &^= 1 << 11 // R11 649 default: 650 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 651 } 652 } 653 654 // Linear scan register allocation can be influenced by the order in which blocks appear. 655 // Decouple the register allocation order from the generated block order. 656 // This also creates an opportunity for experiments to find a better order. 657 s.visitOrder = layoutRegallocOrder(f) 658 659 // Compute block order. This array allows us to distinguish forward edges 660 // from backward edges and compute how far they go. 661 s.blockOrder = make([]int32, f.NumBlocks()) 662 for i, b := range s.visitOrder { 663 s.blockOrder[b.ID] = int32(i) 664 } 665 666 s.regs = make([]regState, s.numRegs) 667 nv := f.NumValues() 668 if cap(s.f.Cache.regallocValues) >= nv { 669 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv] 670 } else { 671 s.f.Cache.regallocValues = make([]valState, nv) 672 } 673 s.values = s.f.Cache.regallocValues 674 s.orig = s.f.Cache.allocValueSlice(nv) 675 s.copies = make(map[*Value]bool) 676 for _, b := range s.visitOrder { 677 for _, v := range b.Values { 678 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 679 s.values[v.ID].needReg = true 680 s.values[v.ID].rematerializeable = v.rematerializeable() 681 s.orig[v.ID] = v 682 } 683 // Note: needReg is false for values returning Tuple types. 684 // Instead, we mark the corresponding Selects as needReg. 685 } 686 } 687 s.computeLive() 688 689 s.endRegs = make([][]endReg, f.NumBlocks()) 690 s.startRegs = make([][]startReg, f.NumBlocks()) 691 s.spillLive = make([][]ID, f.NumBlocks()) 692 s.sdom = f.Sdom() 693 694 // wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack. 695 if f.Config.ctxt.Arch.Arch == sys.ArchWasm { 696 canLiveOnStack := f.newSparseSet(f.NumValues()) 697 defer f.retSparseSet(canLiveOnStack) 698 for _, b := range f.Blocks { 699 // New block. Clear candidate set. 700 canLiveOnStack.clear() 701 for _, c := range b.ControlValues() { 702 if c.Uses == 1 && !opcodeTable[c.Op].generic { 703 canLiveOnStack.add(c.ID) 704 } 705 } 706 // Walking backwards. 707 for i := len(b.Values) - 1; i >= 0; i-- { 708 v := b.Values[i] 709 if canLiveOnStack.contains(v.ID) { 710 v.OnWasmStack = true 711 } else { 712 // Value can not live on stack. Values are not allowed to be reordered, so clear candidate set. 713 canLiveOnStack.clear() 714 } 715 for _, arg := range v.Args { 716 // Value can live on the stack if: 717 // - it is only used once 718 // - it is used in the same basic block 719 // - it is not a "mem" value 720 // - it is a WebAssembly op 721 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic { 722 canLiveOnStack.add(arg.ID) 723 } 724 } 725 } 726 } 727 } 728 729 // The clobberdeadreg experiment inserts code to clobber dead registers 730 // at call sites. 731 // Ignore huge functions to avoid doing too much work. 732 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 { 733 // TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH. 734 s.doClobber = true 735 } 736 } 737 738 func (s *regAllocState) close() { 739 s.f.Cache.freeValueSlice(s.orig) 740 } 741 742 // Adds a use record for id at distance dist from the start of the block. 743 // All calls to addUse must happen with nonincreasing dist. 744 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 745 r := s.freeUseRecords 746 if r != nil { 747 s.freeUseRecords = r.next 748 } else { 749 r = &use{} 750 } 751 r.dist = dist 752 r.pos = pos 753 r.next = s.values[id].uses 754 s.values[id].uses = r 755 if r.next != nil && dist > r.next.dist { 756 s.f.Fatalf("uses added in wrong order") 757 } 758 } 759 760 // advanceUses advances the uses of v's args from the state before v to the state after v. 761 // Any values which have no more uses are deallocated from registers. 762 func (s *regAllocState) advanceUses(v *Value) { 763 for _, a := range v.Args { 764 if !s.values[a.ID].needReg { 765 continue 766 } 767 ai := &s.values[a.ID] 768 r := ai.uses 769 ai.uses = r.next 770 if r.next == nil { 771 // Value is dead, free all registers that hold it. 772 s.freeRegs(ai.regs) 773 } 774 r.next = s.freeUseRecords 775 s.freeUseRecords = r 776 } 777 } 778 779 // liveAfterCurrentInstruction reports whether v is live after 780 // the current instruction is completed. v must be used by the 781 // current instruction. 782 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 783 u := s.values[v.ID].uses 784 if u == nil { 785 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID])) 786 } 787 d := u.dist 788 for u != nil && u.dist == d { 789 u = u.next 790 } 791 return u != nil && u.dist > d 792 } 793 794 // Sets the state of the registers to that encoded in regs. 795 func (s *regAllocState) setState(regs []endReg) { 796 s.freeRegs(s.used) 797 for _, x := range regs { 798 s.assignReg(x.r, x.v, x.c) 799 } 800 } 801 802 // compatRegs returns the set of registers which can store a type t. 803 func (s *regAllocState) compatRegs(t *types.Type) regMask { 804 var m regMask 805 if t.IsTuple() || t.IsFlags() { 806 return 0 807 } 808 if t.IsFloat() || t == types.TypeInt128 { 809 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 { 810 m = s.f.Config.fp32RegMask 811 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 { 812 m = s.f.Config.fp64RegMask 813 } else { 814 m = s.f.Config.fpRegMask 815 } 816 } else { 817 m = s.f.Config.gpRegMask 818 } 819 return m & s.allocatable 820 } 821 822 // regspec returns the regInfo for operation op. 823 func (s *regAllocState) regspec(v *Value) regInfo { 824 op := v.Op 825 if op == OpConvert { 826 // OpConvert is a generic op, so it doesn't have a 827 // register set in the static table. It can use any 828 // allocatable integer register. 829 m := s.allocatable & s.f.Config.gpRegMask 830 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}} 831 } 832 if op == OpArgIntReg { 833 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()] 834 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}} 835 } 836 if op == OpArgFloatReg { 837 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()] 838 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}} 839 } 840 if op.IsCall() { 841 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil { 842 return *ac.Reg(&opcodeTable[op].reg, s.f.Config) 843 } 844 } 845 if op == OpMakeResult && s.f.OwnAux.reg != nil { 846 return *s.f.OwnAux.ResultReg(s.f.Config) 847 } 848 return opcodeTable[op].reg 849 } 850 851 func (s *regAllocState) isGReg(r register) bool { 852 return s.f.Config.hasGReg && s.GReg == r 853 } 854 855 // Dummy value used to represent the value being held in a temporary register. 856 var tmpVal Value 857 858 func (s *regAllocState) regalloc(f *Func) { 859 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 860 defer f.retSparseSet(regValLiveSet) 861 var oldSched []*Value 862 var phis []*Value 863 var phiRegs []register 864 var args []*Value 865 866 // Data structure used for computing desired registers. 867 var desired desiredState 868 869 // Desired registers for inputs & outputs for each instruction in the block. 870 type dentry struct { 871 out [4]register // desired output registers 872 in [3][4]register // desired input registers (for inputs 0,1, and 2) 873 } 874 var dinfo []dentry 875 876 if f.Entry != f.Blocks[0] { 877 f.Fatalf("entry block must be first") 878 } 879 880 for _, b := range s.visitOrder { 881 if s.f.pass.debug > regDebug { 882 fmt.Printf("Begin processing block %v\n", b) 883 } 884 s.curBlock = b 885 886 // Initialize regValLiveSet and uses fields for this block. 887 // Walk backwards through the block doing liveness analysis. 888 regValLiveSet.clear() 889 for _, e := range s.live[b.ID] { 890 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 891 regValLiveSet.add(e.ID) 892 } 893 for _, v := range b.ControlValues() { 894 if s.values[v.ID].needReg { 895 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values 896 regValLiveSet.add(v.ID) 897 } 898 } 899 for i := len(b.Values) - 1; i >= 0; i-- { 900 v := b.Values[i] 901 regValLiveSet.remove(v.ID) 902 if v.Op == OpPhi { 903 // Remove v from the live set, but don't add 904 // any inputs. This is the state the len(b.Preds)>1 905 // case below desires; it wants to process phis specially. 906 continue 907 } 908 if opcodeTable[v.Op].call { 909 // Function call clobbers all the registers but SP and SB. 910 regValLiveSet.clear() 911 if s.sp != 0 && s.values[s.sp].uses != nil { 912 regValLiveSet.add(s.sp) 913 } 914 if s.sb != 0 && s.values[s.sb].uses != nil { 915 regValLiveSet.add(s.sb) 916 } 917 } 918 for _, a := range v.Args { 919 if !s.values[a.ID].needReg { 920 continue 921 } 922 s.addUse(a.ID, int32(i), v.Pos) 923 regValLiveSet.add(a.ID) 924 } 925 } 926 if s.f.pass.debug > regDebug { 927 fmt.Printf("use distances for %s\n", b) 928 for i := range s.values { 929 vi := &s.values[i] 930 u := vi.uses 931 if u == nil { 932 continue 933 } 934 fmt.Printf(" v%d:", i) 935 for u != nil { 936 fmt.Printf(" %d", u.dist) 937 u = u.next 938 } 939 fmt.Println() 940 } 941 } 942 943 // Make a copy of the block schedule so we can generate a new one in place. 944 // We make a separate copy for phis and regular values. 945 nphi := 0 946 for _, v := range b.Values { 947 if v.Op != OpPhi { 948 break 949 } 950 nphi++ 951 } 952 phis = append(phis[:0], b.Values[:nphi]...) 953 oldSched = append(oldSched[:0], b.Values[nphi:]...) 954 b.Values = b.Values[:0] 955 956 // Initialize start state of block. 957 if b == f.Entry { 958 // Regalloc state is empty to start. 959 if nphi > 0 { 960 f.Fatalf("phis in entry block") 961 } 962 } else if len(b.Preds) == 1 { 963 // Start regalloc state with the end state of the previous block. 964 s.setState(s.endRegs[b.Preds[0].b.ID]) 965 if nphi > 0 { 966 f.Fatalf("phis in single-predecessor block") 967 } 968 // Drop any values which are no longer live. 969 // This may happen because at the end of p, a value may be 970 // live but only used by some other successor of p. 971 for r := register(0); r < s.numRegs; r++ { 972 v := s.regs[r].v 973 if v != nil && !regValLiveSet.contains(v.ID) { 974 s.freeReg(r) 975 } 976 } 977 } else { 978 // This is the complicated case. We have more than one predecessor, 979 // which means we may have Phi ops. 980 981 // Start with the final register state of the predecessor with least spill values. 982 // This is based on the following points: 983 // 1, The less spill value indicates that the register pressure of this path is smaller, 984 // so the values of this block are more likely to be allocated to registers. 985 // 2, Avoid the predecessor that contains the function call, because the predecessor that 986 // contains the function call usually generates a lot of spills and lose the previous 987 // allocation state. 988 // TODO: Improve this part. At least the size of endRegs of the predecessor also has 989 // an impact on the code size and compiler speed. But it is not easy to find a simple 990 // and efficient method that combines multiple factors. 991 idx := -1 992 for i, p := range b.Preds { 993 // If the predecessor has not been visited yet, skip it because its end state 994 // (redRegs and spillLive) has not been computed yet. 995 pb := p.b 996 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] { 997 continue 998 } 999 if idx == -1 { 1000 idx = i 1001 continue 1002 } 1003 pSel := b.Preds[idx].b 1004 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) { 1005 idx = i 1006 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) { 1007 // Use a bit of likely information. After critical pass, pb and pSel must 1008 // be plain blocks, so check edge pb->pb.Preds instead of edge pb->b. 1009 // TODO: improve the prediction of the likely predecessor. The following 1010 // method is only suitable for the simplest cases. For complex cases, 1011 // the prediction may be inaccurate, but this does not affect the 1012 // correctness of the program. 1013 // According to the layout algorithm, the predecessor with the 1014 // smaller blockOrder is the true branch, and the test results show 1015 // that it is better to choose the predecessor with a smaller 1016 // blockOrder than no choice. 1017 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] { 1018 idx = i 1019 } 1020 } 1021 } 1022 if idx < 0 { 1023 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b) 1024 } 1025 p := b.Preds[idx].b 1026 s.setState(s.endRegs[p.ID]) 1027 1028 if s.f.pass.debug > regDebug { 1029 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 1030 for _, x := range s.endRegs[p.ID] { 1031 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c) 1032 } 1033 } 1034 1035 // Decide on registers for phi ops. Use the registers determined 1036 // by the primary predecessor if we can. 1037 // TODO: pick best of (already processed) predecessors? 1038 // Majority vote? Deepest nesting level? 1039 phiRegs = phiRegs[:0] 1040 var phiUsed regMask 1041 1042 for _, v := range phis { 1043 if !s.values[v.ID].needReg { 1044 phiRegs = append(phiRegs, noRegister) 1045 continue 1046 } 1047 a := v.Args[idx] 1048 // Some instructions target not-allocatable registers. 1049 // They're not suitable for further (phi-function) allocation. 1050 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 1051 if m != 0 { 1052 r := pickReg(m) 1053 phiUsed |= regMask(1) << r 1054 phiRegs = append(phiRegs, r) 1055 } else { 1056 phiRegs = append(phiRegs, noRegister) 1057 } 1058 } 1059 1060 // Second pass - deallocate all in-register phi inputs. 1061 for i, v := range phis { 1062 if !s.values[v.ID].needReg { 1063 continue 1064 } 1065 a := v.Args[idx] 1066 r := phiRegs[i] 1067 if r == noRegister { 1068 continue 1069 } 1070 if regValLiveSet.contains(a.ID) { 1071 // Input value is still live (it is used by something other than Phi). 1072 // Try to move it around before kicking out, if there is a free register. 1073 // We generate a Copy in the predecessor block and record it. It will be 1074 // deleted later if never used. 1075 // 1076 // Pick a free register. At this point some registers used in the predecessor 1077 // block may have been deallocated. Those are the ones used for Phis. Exclude 1078 // them (and they are not going to be helpful anyway). 1079 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 1080 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 1081 r2 := pickReg(m) 1082 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 1083 s.copies[c] = false 1084 if s.f.pass.debug > regDebug { 1085 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2]) 1086 } 1087 s.setOrig(c, a) 1088 s.assignReg(r2, a, c) 1089 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 1090 } 1091 } 1092 s.freeReg(r) 1093 } 1094 1095 // Copy phi ops into new schedule. 1096 b.Values = append(b.Values, phis...) 1097 1098 // Third pass - pick registers for phis whose input 1099 // was not in a register in the primary predecessor. 1100 for i, v := range phis { 1101 if !s.values[v.ID].needReg { 1102 continue 1103 } 1104 if phiRegs[i] != noRegister { 1105 continue 1106 } 1107 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 1108 // If one of the other inputs of v is in a register, and the register is available, 1109 // select this register, which can save some unnecessary copies. 1110 for i, pe := range b.Preds { 1111 if i == idx { 1112 continue 1113 } 1114 ri := noRegister 1115 for _, er := range s.endRegs[pe.b.ID] { 1116 if er.v == s.orig[v.Args[i].ID] { 1117 ri = er.r 1118 break 1119 } 1120 } 1121 if ri != noRegister && m>>ri&1 != 0 { 1122 m = regMask(1) << ri 1123 break 1124 } 1125 } 1126 if m != 0 { 1127 r := pickReg(m) 1128 phiRegs[i] = r 1129 phiUsed |= regMask(1) << r 1130 } 1131 } 1132 1133 // Set registers for phis. Add phi spill code. 1134 for i, v := range phis { 1135 if !s.values[v.ID].needReg { 1136 continue 1137 } 1138 r := phiRegs[i] 1139 if r == noRegister { 1140 // stack-based phi 1141 // Spills will be inserted in all the predecessors below. 1142 s.values[v.ID].spill = v // v starts life spilled 1143 continue 1144 } 1145 // register-based phi 1146 s.assignReg(r, v, v) 1147 } 1148 1149 // Deallocate any values which are no longer live. Phis are excluded. 1150 for r := register(0); r < s.numRegs; r++ { 1151 if phiUsed>>r&1 != 0 { 1152 continue 1153 } 1154 v := s.regs[r].v 1155 if v != nil && !regValLiveSet.contains(v.ID) { 1156 s.freeReg(r) 1157 } 1158 } 1159 1160 // Save the starting state for use by merge edges. 1161 // We append to a stack allocated variable that we'll 1162 // later copy into s.startRegs in one fell swoop, to save 1163 // on allocations. 1164 regList := make([]startReg, 0, 32) 1165 for r := register(0); r < s.numRegs; r++ { 1166 v := s.regs[r].v 1167 if v == nil { 1168 continue 1169 } 1170 if phiUsed>>r&1 != 0 { 1171 // Skip registers that phis used, we'll handle those 1172 // specially during merge edge processing. 1173 continue 1174 } 1175 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 1176 } 1177 s.startRegs[b.ID] = make([]startReg, len(regList)) 1178 copy(s.startRegs[b.ID], regList) 1179 1180 if s.f.pass.debug > regDebug { 1181 fmt.Printf("after phis\n") 1182 for _, x := range s.startRegs[b.ID] { 1183 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID) 1184 } 1185 } 1186 } 1187 1188 // Allocate space to record the desired registers for each value. 1189 if l := len(oldSched); cap(dinfo) < l { 1190 dinfo = make([]dentry, l) 1191 } else { 1192 dinfo = dinfo[:l] 1193 for i := range dinfo { 1194 dinfo[i] = dentry{} 1195 } 1196 } 1197 1198 // Load static desired register info at the end of the block. 1199 desired.copy(&s.desired[b.ID]) 1200 1201 // Check actual assigned registers at the start of the next block(s). 1202 // Dynamically assigned registers will trump the static 1203 // desired registers computed during liveness analysis. 1204 // Note that we do this phase after startRegs is set above, so that 1205 // we get the right behavior for a block which branches to itself. 1206 for _, e := range b.Succs { 1207 succ := e.b 1208 // TODO: prioritize likely successor? 1209 for _, x := range s.startRegs[succ.ID] { 1210 desired.add(x.v.ID, x.r) 1211 } 1212 // Process phi ops in succ. 1213 pidx := e.i 1214 for _, v := range succ.Values { 1215 if v.Op != OpPhi { 1216 break 1217 } 1218 if !s.values[v.ID].needReg { 1219 continue 1220 } 1221 rp, ok := s.f.getHome(v.ID).(*Register) 1222 if !ok { 1223 // If v is not assigned a register, pick a register assigned to one of v's inputs. 1224 // Hopefully v will get assigned that register later. 1225 // If the inputs have allocated register information, add it to desired, 1226 // which may reduce spill or copy operations when the register is available. 1227 for _, a := range v.Args { 1228 rp, ok = s.f.getHome(a.ID).(*Register) 1229 if ok { 1230 break 1231 } 1232 } 1233 if !ok { 1234 continue 1235 } 1236 } 1237 desired.add(v.Args[pidx].ID, register(rp.num)) 1238 } 1239 } 1240 // Walk values backwards computing desired register info. 1241 // See computeLive for more comments. 1242 for i := len(oldSched) - 1; i >= 0; i-- { 1243 v := oldSched[i] 1244 prefs := desired.remove(v.ID) 1245 regspec := s.regspec(v) 1246 desired.clobber(regspec.clobbers) 1247 for _, j := range regspec.inputs { 1248 if countRegs(j.regs) != 1 { 1249 continue 1250 } 1251 desired.clobber(j.regs) 1252 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1253 } 1254 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 { 1255 if opcodeTable[v.Op].commutative { 1256 desired.addList(v.Args[1].ID, prefs) 1257 } 1258 desired.addList(v.Args[0].ID, prefs) 1259 } 1260 // Save desired registers for this value. 1261 dinfo[i].out = prefs 1262 for j, a := range v.Args { 1263 if j >= len(dinfo[i].in) { 1264 break 1265 } 1266 dinfo[i].in[j] = desired.get(a.ID) 1267 } 1268 } 1269 1270 // Process all the non-phi values. 1271 for idx, v := range oldSched { 1272 tmpReg := noRegister 1273 if s.f.pass.debug > regDebug { 1274 fmt.Printf(" processing %s\n", v.LongString()) 1275 } 1276 regspec := s.regspec(v) 1277 if v.Op == OpPhi { 1278 f.Fatalf("phi %s not at start of block", v) 1279 } 1280 if v.Op == OpSP { 1281 s.assignReg(s.SPReg, v, v) 1282 b.Values = append(b.Values, v) 1283 s.advanceUses(v) 1284 s.sp = v.ID 1285 continue 1286 } 1287 if v.Op == OpSB { 1288 s.assignReg(s.SBReg, v, v) 1289 b.Values = append(b.Values, v) 1290 s.advanceUses(v) 1291 s.sb = v.ID 1292 continue 1293 } 1294 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN { 1295 if s.values[v.ID].needReg { 1296 if v.Op == OpSelectN { 1297 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v) 1298 } else { 1299 var i = 0 1300 if v.Op == OpSelect1 { 1301 i = 1 1302 } 1303 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1304 } 1305 } 1306 b.Values = append(b.Values, v) 1307 s.advanceUses(v) 1308 continue 1309 } 1310 if v.Op == OpGetG && s.f.Config.hasGReg { 1311 // use hardware g register 1312 if s.regs[s.GReg].v != nil { 1313 s.freeReg(s.GReg) // kick out the old value 1314 } 1315 s.assignReg(s.GReg, v, v) 1316 b.Values = append(b.Values, v) 1317 s.advanceUses(v) 1318 continue 1319 } 1320 if v.Op == OpArg { 1321 // Args are "pre-spilled" values. We don't allocate 1322 // any register here. We just set up the spill pointer to 1323 // point at itself and any later user will restore it to use it. 1324 s.values[v.ID].spill = v 1325 b.Values = append(b.Values, v) 1326 s.advanceUses(v) 1327 continue 1328 } 1329 if v.Op == OpKeepAlive { 1330 // Make sure the argument to v is still live here. 1331 s.advanceUses(v) 1332 a := v.Args[0] 1333 vi := &s.values[a.ID] 1334 if vi.regs == 0 && !vi.rematerializeable { 1335 // Use the spill location. 1336 // This forces later liveness analysis to make the 1337 // value live at this point. 1338 v.SetArg(0, s.makeSpill(a, b)) 1339 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable { 1340 // Rematerializeable value with a gc.Node. This is the address of 1341 // a stack object (e.g. an LEAQ). Keep the object live. 1342 // Change it to VarLive, which is what plive expects for locals. 1343 v.Op = OpVarLive 1344 v.SetArgs1(v.Args[1]) 1345 v.Aux = a.Aux 1346 } else { 1347 // In-register and rematerializeable values are already live. 1348 // These are typically rematerializeable constants like nil, 1349 // or values of a variable that were modified since the last call. 1350 v.Op = OpCopy 1351 v.SetArgs1(v.Args[1]) 1352 } 1353 b.Values = append(b.Values, v) 1354 continue 1355 } 1356 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1357 // No register allocation required (or none specified yet) 1358 if s.doClobber && v.Op.IsCall() { 1359 s.clobberRegs(regspec.clobbers) 1360 } 1361 s.freeRegs(regspec.clobbers) 1362 b.Values = append(b.Values, v) 1363 s.advanceUses(v) 1364 continue 1365 } 1366 1367 if s.values[v.ID].rematerializeable { 1368 // Value is rematerializeable, don't issue it here. 1369 // It will get issued just before each use (see 1370 // allocValueToReg). 1371 for _, a := range v.Args { 1372 a.Uses-- 1373 } 1374 s.advanceUses(v) 1375 continue 1376 } 1377 1378 if s.f.pass.debug > regDebug { 1379 fmt.Printf("value %s\n", v.LongString()) 1380 fmt.Printf(" out:") 1381 for _, r := range dinfo[idx].out { 1382 if r != noRegister { 1383 fmt.Printf(" %s", &s.registers[r]) 1384 } 1385 } 1386 fmt.Println() 1387 for i := 0; i < len(v.Args) && i < 3; i++ { 1388 fmt.Printf(" in%d:", i) 1389 for _, r := range dinfo[idx].in[i] { 1390 if r != noRegister { 1391 fmt.Printf(" %s", &s.registers[r]) 1392 } 1393 } 1394 fmt.Println() 1395 } 1396 } 1397 1398 // Move arguments to registers. 1399 // First, if an arg must be in a specific register and it is already 1400 // in place, keep it. 1401 args = append(args[:0], make([]*Value, len(v.Args))...) 1402 for i, a := range v.Args { 1403 if !s.values[a.ID].needReg { 1404 args[i] = a 1405 } 1406 } 1407 for _, i := range regspec.inputs { 1408 mask := i.regs 1409 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 { 1410 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) 1411 } 1412 } 1413 // Then, if an arg must be in a specific register and that 1414 // register is free, allocate that one. Otherwise when processing 1415 // another input we may kick a value into the free register, which 1416 // then will be kicked out again. 1417 // This is a common case for passing-in-register arguments for 1418 // function calls. 1419 for { 1420 freed := false 1421 for _, i := range regspec.inputs { 1422 if args[i.idx] != nil { 1423 continue // already allocated 1424 } 1425 mask := i.regs 1426 if countRegs(mask) == 1 && mask&^s.used != 0 { 1427 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) 1428 // If the input is in other registers that will be clobbered by v, 1429 // or the input is dead, free the registers. This may make room 1430 // for other inputs. 1431 oldregs := s.values[v.Args[i.idx].ID].regs 1432 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) { 1433 s.freeRegs(oldregs &^ mask &^ s.nospill) 1434 freed = true 1435 } 1436 } 1437 } 1438 if !freed { 1439 break 1440 } 1441 } 1442 // Last, allocate remaining ones, in an ordering defined 1443 // by the register specification (most constrained first). 1444 for _, i := range regspec.inputs { 1445 if args[i.idx] != nil { 1446 continue // already allocated 1447 } 1448 mask := i.regs 1449 if mask&s.values[v.Args[i.idx].ID].regs == 0 { 1450 // Need a new register for the input. 1451 mask &= s.allocatable 1452 mask &^= s.nospill 1453 // Used desired register if available. 1454 if i.idx < 3 { 1455 for _, r := range dinfo[idx].in[i.idx] { 1456 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1457 // Desired register is allowed and unused. 1458 mask = regMask(1) << r 1459 break 1460 } 1461 } 1462 } 1463 // Avoid registers we're saving for other values. 1464 if mask&^desired.avoid != 0 { 1465 mask &^= desired.avoid 1466 } 1467 } 1468 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) 1469 } 1470 1471 // If the output clobbers the input register, make sure we have 1472 // at least two copies of the input register so we don't 1473 // have to reload the value from the spill location. 1474 if opcodeTable[v.Op].resultInArg0 { 1475 var m regMask 1476 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1477 // arg0 is dead. We can clobber its register. 1478 goto ok 1479 } 1480 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) { 1481 args[0], args[1] = args[1], args[0] 1482 goto ok 1483 } 1484 if s.values[v.Args[0].ID].rematerializeable { 1485 // We can rematerialize the input, don't worry about clobbering it. 1486 goto ok 1487 } 1488 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable { 1489 args[0], args[1] = args[1], args[0] 1490 goto ok 1491 } 1492 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1493 // we have at least 2 copies of arg0. We can afford to clobber one. 1494 goto ok 1495 } 1496 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1497 args[0], args[1] = args[1], args[0] 1498 goto ok 1499 } 1500 1501 // We can't overwrite arg0 (or arg1, if commutative). So we 1502 // need to make a copy of an input so we have a register we can modify. 1503 1504 // Possible new registers to copy into. 1505 m = s.compatRegs(v.Args[0].Type) &^ s.used 1506 if m == 0 { 1507 // No free registers. In this case we'll just clobber 1508 // an input and future uses of that input must use a restore. 1509 // TODO(khr): We should really do this like allocReg does it, 1510 // spilling the value with the most distant next use. 1511 goto ok 1512 } 1513 1514 // Try to move an input to the desired output, if allowed. 1515 for _, r := range dinfo[idx].out { 1516 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 { 1517 m = regMask(1) << r 1518 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1519 // Note: we update args[0] so the instruction will 1520 // use the register copy we just made. 1521 goto ok 1522 } 1523 } 1524 // Try to copy input to its desired location & use its old 1525 // location as the result register. 1526 for _, r := range dinfo[idx].in[0] { 1527 if r != noRegister && m>>r&1 != 0 { 1528 m = regMask(1) << r 1529 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1530 s.copies[c] = false 1531 // Note: no update to args[0] so the instruction will 1532 // use the original copy. 1533 goto ok 1534 } 1535 } 1536 if opcodeTable[v.Op].commutative { 1537 for _, r := range dinfo[idx].in[1] { 1538 if r != noRegister && m>>r&1 != 0 { 1539 m = regMask(1) << r 1540 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1541 s.copies[c] = false 1542 args[0], args[1] = args[1], args[0] 1543 goto ok 1544 } 1545 } 1546 } 1547 // Avoid future fixed uses if we can. 1548 if m&^desired.avoid != 0 { 1549 m &^= desired.avoid 1550 } 1551 // Save input 0 to a new register so we can clobber it. 1552 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1553 s.copies[c] = false 1554 } 1555 1556 ok: 1557 // Pick a temporary register if needed. 1558 // It should be distinct from all the input registers, so we 1559 // allocate it after all the input registers, but before 1560 // the input registers are freed via advanceUses below. 1561 // (Not all instructions need that distinct part, but it is conservative.) 1562 if opcodeTable[v.Op].needIntTemp { 1563 m := s.allocatable & s.f.Config.gpRegMask 1564 if m&^desired.avoid&^s.nospill != 0 { 1565 m &^= desired.avoid 1566 } 1567 tmpReg = s.allocReg(m, &tmpVal) 1568 s.nospill |= regMask(1) << tmpReg 1569 } 1570 1571 // Now that all args are in regs, we're ready to issue the value itself. 1572 // Before we pick a register for the output value, allow input registers 1573 // to be deallocated. We do this here so that the output can use the 1574 // same register as a dying input. 1575 if !opcodeTable[v.Op].resultNotInArgs { 1576 s.tmpused = s.nospill 1577 s.nospill = 0 1578 s.advanceUses(v) // frees any registers holding args that are no longer live 1579 } 1580 1581 // Dump any registers which will be clobbered 1582 if s.doClobber && v.Op.IsCall() { 1583 // clobber registers that are marked as clobber in regmask, but 1584 // don't clobber inputs. 1585 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill) 1586 } 1587 s.freeRegs(regspec.clobbers) 1588 s.tmpused |= regspec.clobbers 1589 1590 // Pick registers for outputs. 1591 { 1592 outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below. 1593 maxOutIdx := -1 1594 var used regMask 1595 if tmpReg != noRegister { 1596 // Ensure output registers are distinct from the temporary register. 1597 // (Not all instructions need that distinct part, but it is conservative.) 1598 used |= regMask(1) << tmpReg 1599 } 1600 for _, out := range regspec.outputs { 1601 mask := out.regs & s.allocatable &^ used 1602 if mask == 0 { 1603 continue 1604 } 1605 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1606 if !opcodeTable[v.Op].commutative { 1607 // Output must use the same register as input 0. 1608 r := register(s.f.getHome(args[0].ID).(*Register).num) 1609 if mask>>r&1 == 0 { 1610 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString()) 1611 } 1612 mask = regMask(1) << r 1613 } else { 1614 // Output must use the same register as input 0 or 1. 1615 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1616 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1617 // Check r0 and r1 for desired output register. 1618 found := false 1619 for _, r := range dinfo[idx].out { 1620 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1621 mask = regMask(1) << r 1622 found = true 1623 if r == r1 { 1624 args[0], args[1] = args[1], args[0] 1625 } 1626 break 1627 } 1628 } 1629 if !found { 1630 // Neither are desired, pick r0. 1631 mask = regMask(1) << r0 1632 } 1633 } 1634 } 1635 if out.idx == 0 { // desired registers only apply to the first element of a tuple result 1636 for _, r := range dinfo[idx].out { 1637 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1638 // Desired register is allowed and unused. 1639 mask = regMask(1) << r 1640 break 1641 } 1642 } 1643 } 1644 // Avoid registers we're saving for other values. 1645 if mask&^desired.avoid&^s.nospill != 0 { 1646 mask &^= desired.avoid 1647 } 1648 r := s.allocReg(mask, v) 1649 if out.idx > maxOutIdx { 1650 maxOutIdx = out.idx 1651 } 1652 outRegs[out.idx] = r 1653 used |= regMask(1) << r 1654 s.tmpused |= regMask(1) << r 1655 } 1656 // Record register choices 1657 if v.Type.IsTuple() { 1658 var outLocs LocPair 1659 if r := outRegs[0]; r != noRegister { 1660 outLocs[0] = &s.registers[r] 1661 } 1662 if r := outRegs[1]; r != noRegister { 1663 outLocs[1] = &s.registers[r] 1664 } 1665 s.f.setHome(v, outLocs) 1666 // Note that subsequent SelectX instructions will do the assignReg calls. 1667 } else if v.Type.IsResults() { 1668 // preallocate outLocs to the right size, which is maxOutIdx+1 1669 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1) 1670 for i := 0; i <= maxOutIdx; i++ { 1671 if r := outRegs[i]; r != noRegister { 1672 outLocs[i] = &s.registers[r] 1673 } 1674 } 1675 s.f.setHome(v, outLocs) 1676 } else { 1677 if r := outRegs[0]; r != noRegister { 1678 s.assignReg(r, v, v) 1679 } 1680 } 1681 if tmpReg != noRegister { 1682 // Remember the temp register allocation, if any. 1683 if s.f.tempRegs == nil { 1684 s.f.tempRegs = map[ID]*Register{} 1685 } 1686 s.f.tempRegs[v.ID] = &s.registers[tmpReg] 1687 } 1688 } 1689 1690 // deallocate dead args, if we have not done so 1691 if opcodeTable[v.Op].resultNotInArgs { 1692 s.nospill = 0 1693 s.advanceUses(v) // frees any registers holding args that are no longer live 1694 } 1695 s.tmpused = 0 1696 1697 // Issue the Value itself. 1698 for i, a := range args { 1699 v.SetArg(i, a) // use register version of arguments 1700 } 1701 b.Values = append(b.Values, v) 1702 } 1703 1704 // Copy the control values - we need this so we can reduce the 1705 // uses property of these values later. 1706 controls := append(make([]*Value, 0, 2), b.ControlValues()...) 1707 1708 // Load control values into registers. 1709 for i, v := range b.ControlValues() { 1710 if !s.values[v.ID].needReg { 1711 continue 1712 } 1713 if s.f.pass.debug > regDebug { 1714 fmt.Printf(" processing control %s\n", v.LongString()) 1715 } 1716 // We assume that a control input can be passed in any 1717 // type-compatible register. If this turns out not to be true, 1718 // we'll need to introduce a regspec for a block's control value. 1719 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos)) 1720 } 1721 1722 // Reduce the uses of the control values once registers have been loaded. 1723 // This loop is equivalent to the advanceUses method. 1724 for _, v := range controls { 1725 vi := &s.values[v.ID] 1726 if !vi.needReg { 1727 continue 1728 } 1729 // Remove this use from the uses list. 1730 u := vi.uses 1731 vi.uses = u.next 1732 if u.next == nil { 1733 s.freeRegs(vi.regs) // value is dead 1734 } 1735 u.next = s.freeUseRecords 1736 s.freeUseRecords = u 1737 } 1738 1739 // If we are approaching a merge point and we are the primary 1740 // predecessor of it, find live values that we use soon after 1741 // the merge point and promote them to registers now. 1742 if len(b.Succs) == 1 { 1743 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil { 1744 s.freeReg(s.GReg) // Spill value in G register before any merge. 1745 } 1746 // For this to be worthwhile, the loop must have no calls in it. 1747 top := b.Succs[0].b 1748 loop := s.loopnest.b2l[top.ID] 1749 if loop == nil || loop.header != top || loop.containsUnavoidableCall { 1750 goto badloop 1751 } 1752 1753 // TODO: sort by distance, pick the closest ones? 1754 for _, live := range s.live[b.ID] { 1755 if live.dist >= unlikelyDistance { 1756 // Don't preload anything live after the loop. 1757 continue 1758 } 1759 vid := live.ID 1760 vi := &s.values[vid] 1761 if vi.regs != 0 { 1762 continue 1763 } 1764 if vi.rematerializeable { 1765 continue 1766 } 1767 v := s.orig[vid] 1768 m := s.compatRegs(v.Type) &^ s.used 1769 // Used desired register if available. 1770 outerloop: 1771 for _, e := range desired.entries { 1772 if e.ID != v.ID { 1773 continue 1774 } 1775 for _, r := range e.regs { 1776 if r != noRegister && m>>r&1 != 0 { 1777 m = regMask(1) << r 1778 break outerloop 1779 } 1780 } 1781 } 1782 if m&^desired.avoid != 0 { 1783 m &^= desired.avoid 1784 } 1785 if m != 0 { 1786 s.allocValToReg(v, m, false, b.Pos) 1787 } 1788 } 1789 } 1790 badloop: 1791 ; 1792 1793 // Save end-of-block register state. 1794 // First count how many, this cuts allocations in half. 1795 k := 0 1796 for r := register(0); r < s.numRegs; r++ { 1797 v := s.regs[r].v 1798 if v == nil { 1799 continue 1800 } 1801 k++ 1802 } 1803 regList := make([]endReg, 0, k) 1804 for r := register(0); r < s.numRegs; r++ { 1805 v := s.regs[r].v 1806 if v == nil { 1807 continue 1808 } 1809 regList = append(regList, endReg{r, v, s.regs[r].c}) 1810 } 1811 s.endRegs[b.ID] = regList 1812 1813 if checkEnabled { 1814 regValLiveSet.clear() 1815 for _, x := range s.live[b.ID] { 1816 regValLiveSet.add(x.ID) 1817 } 1818 for r := register(0); r < s.numRegs; r++ { 1819 v := s.regs[r].v 1820 if v == nil { 1821 continue 1822 } 1823 if !regValLiveSet.contains(v.ID) { 1824 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1825 } 1826 } 1827 } 1828 1829 // If a value is live at the end of the block and 1830 // isn't in a register, generate a use for the spill location. 1831 // We need to remember this information so that 1832 // the liveness analysis in stackalloc is correct. 1833 for _, e := range s.live[b.ID] { 1834 vi := &s.values[e.ID] 1835 if vi.regs != 0 { 1836 // in a register, we'll use that source for the merge. 1837 continue 1838 } 1839 if vi.rematerializeable { 1840 // we'll rematerialize during the merge. 1841 continue 1842 } 1843 if s.f.pass.debug > regDebug { 1844 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1845 } 1846 spill := s.makeSpill(s.orig[e.ID], b) 1847 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1848 } 1849 1850 // Clear any final uses. 1851 // All that is left should be the pseudo-uses added for values which 1852 // are live at the end of b. 1853 for _, e := range s.live[b.ID] { 1854 u := s.values[e.ID].uses 1855 if u == nil { 1856 f.Fatalf("live at end, no uses v%d", e.ID) 1857 } 1858 if u.next != nil { 1859 f.Fatalf("live at end, too many uses v%d", e.ID) 1860 } 1861 s.values[e.ID].uses = nil 1862 u.next = s.freeUseRecords 1863 s.freeUseRecords = u 1864 } 1865 } 1866 1867 // Decide where the spills we generated will go. 1868 s.placeSpills() 1869 1870 // Anything that didn't get a register gets a stack location here. 1871 // (StoreReg, stack-based phis, inputs, ...) 1872 stacklive := stackalloc(s.f, s.spillLive) 1873 1874 // Fix up all merge edges. 1875 s.shuffle(stacklive) 1876 1877 // Erase any copies we never used. 1878 // Also, an unused copy might be the only use of another copy, 1879 // so continue erasing until we reach a fixed point. 1880 for { 1881 progress := false 1882 for c, used := range s.copies { 1883 if !used && c.Uses == 0 { 1884 if s.f.pass.debug > regDebug { 1885 fmt.Printf("delete copied value %s\n", c.LongString()) 1886 } 1887 c.resetArgs() 1888 f.freeValue(c) 1889 delete(s.copies, c) 1890 progress = true 1891 } 1892 } 1893 if !progress { 1894 break 1895 } 1896 } 1897 1898 for _, b := range s.visitOrder { 1899 i := 0 1900 for _, v := range b.Values { 1901 if v.Op == OpInvalid { 1902 continue 1903 } 1904 b.Values[i] = v 1905 i++ 1906 } 1907 b.Values = b.Values[:i] 1908 } 1909 } 1910 1911 func (s *regAllocState) placeSpills() { 1912 mustBeFirst := func(op Op) bool { 1913 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg 1914 } 1915 1916 // Start maps block IDs to the list of spills 1917 // that go at the start of the block (but after any phis). 1918 start := map[ID][]*Value{} 1919 // After maps value IDs to the list of spills 1920 // that go immediately after that value ID. 1921 after := map[ID][]*Value{} 1922 1923 for i := range s.values { 1924 vi := s.values[i] 1925 spill := vi.spill 1926 if spill == nil { 1927 continue 1928 } 1929 if spill.Block != nil { 1930 // Some spills are already fully set up, 1931 // like OpArgs and stack-based phis. 1932 continue 1933 } 1934 v := s.orig[i] 1935 1936 // Walk down the dominator tree looking for a good place to 1937 // put the spill of v. At the start "best" is the best place 1938 // we have found so far. 1939 // TODO: find a way to make this O(1) without arbitrary cutoffs. 1940 if v == nil { 1941 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString())) 1942 } 1943 best := v.Block 1944 bestArg := v 1945 var bestDepth int16 1946 if l := s.loopnest.b2l[best.ID]; l != nil { 1947 bestDepth = l.depth 1948 } 1949 b := best 1950 const maxSpillSearch = 100 1951 for i := 0; i < maxSpillSearch; i++ { 1952 // Find the child of b in the dominator tree which 1953 // dominates all restores. 1954 p := b 1955 b = nil 1956 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 1957 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 1958 // c also dominates all restores. Walk down into c. 1959 b = c 1960 break 1961 } 1962 } 1963 if b == nil { 1964 // Ran out of blocks which dominate all restores. 1965 break 1966 } 1967 1968 var depth int16 1969 if l := s.loopnest.b2l[b.ID]; l != nil { 1970 depth = l.depth 1971 } 1972 if depth > bestDepth { 1973 // Don't push the spill into a deeper loop. 1974 continue 1975 } 1976 1977 // If v is in a register at the start of b, we can 1978 // place the spill here (after the phis). 1979 if len(b.Preds) == 1 { 1980 for _, e := range s.endRegs[b.Preds[0].b.ID] { 1981 if e.v == v { 1982 // Found a better spot for the spill. 1983 best = b 1984 bestArg = e.c 1985 bestDepth = depth 1986 break 1987 } 1988 } 1989 } else { 1990 for _, e := range s.startRegs[b.ID] { 1991 if e.v == v { 1992 // Found a better spot for the spill. 1993 best = b 1994 bestArg = e.c 1995 bestDepth = depth 1996 break 1997 } 1998 } 1999 } 2000 } 2001 2002 // Put the spill in the best block we found. 2003 spill.Block = best 2004 spill.AddArg(bestArg) 2005 if best == v.Block && !mustBeFirst(v.Op) { 2006 // Place immediately after v. 2007 after[v.ID] = append(after[v.ID], spill) 2008 } else { 2009 // Place at the start of best block. 2010 start[best.ID] = append(start[best.ID], spill) 2011 } 2012 } 2013 2014 // Insert spill instructions into the block schedules. 2015 var oldSched []*Value 2016 for _, b := range s.visitOrder { 2017 nfirst := 0 2018 for _, v := range b.Values { 2019 if !mustBeFirst(v.Op) { 2020 break 2021 } 2022 nfirst++ 2023 } 2024 oldSched = append(oldSched[:0], b.Values[nfirst:]...) 2025 b.Values = b.Values[:nfirst] 2026 b.Values = append(b.Values, start[b.ID]...) 2027 for _, v := range oldSched { 2028 b.Values = append(b.Values, v) 2029 b.Values = append(b.Values, after[v.ID]...) 2030 } 2031 } 2032 } 2033 2034 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 2035 func (s *regAllocState) shuffle(stacklive [][]ID) { 2036 var e edgeState 2037 e.s = s 2038 e.cache = map[ID][]*Value{} 2039 e.contents = map[Location]contentRecord{} 2040 if s.f.pass.debug > regDebug { 2041 fmt.Printf("shuffle %s\n", s.f.Name) 2042 fmt.Println(s.f.String()) 2043 } 2044 2045 for _, b := range s.visitOrder { 2046 if len(b.Preds) <= 1 { 2047 continue 2048 } 2049 e.b = b 2050 for i, edge := range b.Preds { 2051 p := edge.b 2052 e.p = p 2053 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 2054 e.process() 2055 } 2056 } 2057 2058 if s.f.pass.debug > regDebug { 2059 fmt.Printf("post shuffle %s\n", s.f.Name) 2060 fmt.Println(s.f.String()) 2061 } 2062 } 2063 2064 type edgeState struct { 2065 s *regAllocState 2066 p, b *Block // edge goes from p->b. 2067 2068 // for each pre-regalloc value, a list of equivalent cached values 2069 cache map[ID][]*Value 2070 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 2071 2072 // map from location to the value it contains 2073 contents map[Location]contentRecord 2074 2075 // desired destination locations 2076 destinations []dstRecord 2077 extra []dstRecord 2078 2079 usedRegs regMask // registers currently holding something 2080 uniqueRegs regMask // registers holding the only copy of a value 2081 finalRegs regMask // registers holding final target 2082 rematerializeableRegs regMask // registers that hold rematerializeable values 2083 } 2084 2085 type contentRecord struct { 2086 vid ID // pre-regalloc value 2087 c *Value // cached value 2088 final bool // this is a satisfied destination 2089 pos src.XPos // source position of use of the value 2090 } 2091 2092 type dstRecord struct { 2093 loc Location // register or stack slot 2094 vid ID // pre-regalloc value it should contain 2095 splice **Value // place to store reference to the generating instruction 2096 pos src.XPos // source position of use of this location 2097 } 2098 2099 // setup initializes the edge state for shuffling. 2100 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 2101 if e.s.f.pass.debug > regDebug { 2102 fmt.Printf("edge %s->%s\n", e.p, e.b) 2103 } 2104 2105 // Clear state. 2106 for _, vid := range e.cachedVals { 2107 delete(e.cache, vid) 2108 } 2109 e.cachedVals = e.cachedVals[:0] 2110 for k := range e.contents { 2111 delete(e.contents, k) 2112 } 2113 e.usedRegs = 0 2114 e.uniqueRegs = 0 2115 e.finalRegs = 0 2116 e.rematerializeableRegs = 0 2117 2118 // Live registers can be sources. 2119 for _, x := range srcReg { 2120 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 2121 } 2122 // So can all of the spill locations. 2123 for _, spillID := range stacklive { 2124 v := e.s.orig[spillID] 2125 spill := e.s.values[v.ID].spill 2126 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) { 2127 // Spills were placed that only dominate the uses found 2128 // during the first regalloc pass. The edge fixup code 2129 // can't use a spill location if the spill doesn't dominate 2130 // the edge. 2131 // We are guaranteed that if the spill doesn't dominate this edge, 2132 // then the value is available in a register (because we called 2133 // makeSpill for every value not in a register at the start 2134 // of an edge). 2135 continue 2136 } 2137 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 2138 } 2139 2140 // Figure out all the destinations we need. 2141 dsts := e.destinations[:0] 2142 for _, x := range dstReg { 2143 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 2144 } 2145 // Phis need their args to end up in a specific location. 2146 for _, v := range e.b.Values { 2147 if v.Op != OpPhi { 2148 break 2149 } 2150 loc := e.s.f.getHome(v.ID) 2151 if loc == nil { 2152 continue 2153 } 2154 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 2155 } 2156 e.destinations = dsts 2157 2158 if e.s.f.pass.debug > regDebug { 2159 for _, vid := range e.cachedVals { 2160 a := e.cache[vid] 2161 for _, c := range a { 2162 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c) 2163 } 2164 } 2165 for _, d := range e.destinations { 2166 fmt.Printf("dst %s: v%d\n", d.loc, d.vid) 2167 } 2168 } 2169 } 2170 2171 // process generates code to move all the values to the right destination locations. 2172 func (e *edgeState) process() { 2173 dsts := e.destinations 2174 2175 // Process the destinations until they are all satisfied. 2176 for len(dsts) > 0 { 2177 i := 0 2178 for _, d := range dsts { 2179 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 2180 // Failed - save for next iteration. 2181 dsts[i] = d 2182 i++ 2183 } 2184 } 2185 if i < len(dsts) { 2186 // Made some progress. Go around again. 2187 dsts = dsts[:i] 2188 2189 // Append any extras destinations we generated. 2190 dsts = append(dsts, e.extra...) 2191 e.extra = e.extra[:0] 2192 continue 2193 } 2194 2195 // We made no progress. That means that any 2196 // remaining unsatisfied moves are in simple cycles. 2197 // For example, A -> B -> C -> D -> A. 2198 // A ----> B 2199 // ^ | 2200 // | | 2201 // | v 2202 // D <---- C 2203 2204 // To break the cycle, we pick an unused register, say R, 2205 // and put a copy of B there. 2206 // A ----> B 2207 // ^ | 2208 // | | 2209 // | v 2210 // D <---- C <---- R=copyofB 2211 // When we resume the outer loop, the A->B move can now proceed, 2212 // and eventually the whole cycle completes. 2213 2214 // Copy any cycle location to a temp register. This duplicates 2215 // one of the cycle entries, allowing the just duplicated value 2216 // to be overwritten and the cycle to proceed. 2217 d := dsts[0] 2218 loc := d.loc 2219 vid := e.contents[loc].vid 2220 c := e.contents[loc].c 2221 r := e.findRegFor(c.Type) 2222 if e.s.f.pass.debug > regDebug { 2223 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c) 2224 } 2225 e.erase(r) 2226 pos := d.pos.WithNotStmt() 2227 if _, isReg := loc.(*Register); isReg { 2228 c = e.p.NewValue1(pos, OpCopy, c.Type, c) 2229 } else { 2230 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2231 } 2232 e.set(r, vid, c, false, pos) 2233 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) { 2234 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString()) 2235 } 2236 } 2237 } 2238 2239 // processDest generates code to put value vid into location loc. Returns true 2240 // if progress was made. 2241 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 2242 pos = pos.WithNotStmt() 2243 occupant := e.contents[loc] 2244 if occupant.vid == vid { 2245 // Value is already in the correct place. 2246 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 2247 if splice != nil { 2248 (*splice).Uses-- 2249 *splice = occupant.c 2250 occupant.c.Uses++ 2251 } 2252 // Note: if splice==nil then c will appear dead. This is 2253 // non-SSA formed code, so be careful after this pass not to run 2254 // deadcode elimination. 2255 if _, ok := e.s.copies[occupant.c]; ok { 2256 // The copy at occupant.c was used to avoid spill. 2257 e.s.copies[occupant.c] = true 2258 } 2259 return true 2260 } 2261 2262 // Check if we're allowed to clobber the destination location. 2263 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 2264 // We can't overwrite the last copy 2265 // of a value that needs to survive. 2266 return false 2267 } 2268 2269 // Copy from a source of v, register preferred. 2270 v := e.s.orig[vid] 2271 var c *Value 2272 var src Location 2273 if e.s.f.pass.debug > regDebug { 2274 fmt.Printf("moving v%d to %s\n", vid, loc) 2275 fmt.Printf("sources of v%d:", vid) 2276 } 2277 for _, w := range e.cache[vid] { 2278 h := e.s.f.getHome(w.ID) 2279 if e.s.f.pass.debug > regDebug { 2280 fmt.Printf(" %s:%s", h, w) 2281 } 2282 _, isreg := h.(*Register) 2283 if src == nil || isreg { 2284 c = w 2285 src = h 2286 } 2287 } 2288 if e.s.f.pass.debug > regDebug { 2289 if src != nil { 2290 fmt.Printf(" [use %s]\n", src) 2291 } else { 2292 fmt.Printf(" [no source]\n") 2293 } 2294 } 2295 _, dstReg := loc.(*Register) 2296 2297 // Pre-clobber destination. This avoids the 2298 // following situation: 2299 // - v is currently held in R0 and stacktmp0. 2300 // - We want to copy stacktmp1 to stacktmp0. 2301 // - We choose R0 as the temporary register. 2302 // During the copy, both R0 and stacktmp0 are 2303 // clobbered, losing both copies of v. Oops! 2304 // Erasing the destination early means R0 will not 2305 // be chosen as the temp register, as it will then 2306 // be the last copy of v. 2307 e.erase(loc) 2308 var x *Value 2309 if c == nil || e.s.values[vid].rematerializeable { 2310 if !e.s.values[vid].rematerializeable { 2311 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 2312 } 2313 if dstReg { 2314 x = v.copyInto(e.p) 2315 } else { 2316 // Rematerialize into stack slot. Need a free 2317 // register to accomplish this. 2318 r := e.findRegFor(v.Type) 2319 e.erase(r) 2320 x = v.copyIntoWithXPos(e.p, pos) 2321 e.set(r, vid, x, false, pos) 2322 // Make sure we spill with the size of the slot, not the 2323 // size of x (which might be wider due to our dropping 2324 // of narrowing conversions). 2325 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 2326 } 2327 } else { 2328 // Emit move from src to dst. 2329 _, srcReg := src.(*Register) 2330 if srcReg { 2331 if dstReg { 2332 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 2333 } else { 2334 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 2335 } 2336 } else { 2337 if dstReg { 2338 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2339 } else { 2340 // mem->mem. Use temp register. 2341 r := e.findRegFor(c.Type) 2342 e.erase(r) 2343 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2344 e.set(r, vid, t, false, pos) 2345 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 2346 } 2347 } 2348 } 2349 e.set(loc, vid, x, true, pos) 2350 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) { 2351 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString()) 2352 } 2353 if splice != nil { 2354 (*splice).Uses-- 2355 *splice = x 2356 x.Uses++ 2357 } 2358 return true 2359 } 2360 2361 // set changes the contents of location loc to hold the given value and its cached representative. 2362 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2363 e.s.f.setHome(c, loc) 2364 e.contents[loc] = contentRecord{vid, c, final, pos} 2365 a := e.cache[vid] 2366 if len(a) == 0 { 2367 e.cachedVals = append(e.cachedVals, vid) 2368 } 2369 a = append(a, c) 2370 e.cache[vid] = a 2371 if r, ok := loc.(*Register); ok { 2372 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 { 2373 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c) 2374 } 2375 e.usedRegs |= regMask(1) << uint(r.num) 2376 if final { 2377 e.finalRegs |= regMask(1) << uint(r.num) 2378 } 2379 if len(a) == 1 { 2380 e.uniqueRegs |= regMask(1) << uint(r.num) 2381 } 2382 if len(a) == 2 { 2383 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2384 e.uniqueRegs &^= regMask(1) << uint(t.num) 2385 } 2386 } 2387 if e.s.values[vid].rematerializeable { 2388 e.rematerializeableRegs |= regMask(1) << uint(r.num) 2389 } 2390 } 2391 if e.s.f.pass.debug > regDebug { 2392 fmt.Printf("%s\n", c.LongString()) 2393 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c) 2394 } 2395 } 2396 2397 // erase removes any user of loc. 2398 func (e *edgeState) erase(loc Location) { 2399 cr := e.contents[loc] 2400 if cr.c == nil { 2401 return 2402 } 2403 vid := cr.vid 2404 2405 if cr.final { 2406 // Add a destination to move this value back into place. 2407 // Make sure it gets added to the tail of the destination queue 2408 // so we make progress on other moves first. 2409 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2410 } 2411 2412 // Remove c from the list of cached values. 2413 a := e.cache[vid] 2414 for i, c := range a { 2415 if e.s.f.getHome(c.ID) == loc { 2416 if e.s.f.pass.debug > regDebug { 2417 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c) 2418 } 2419 a[i], a = a[len(a)-1], a[:len(a)-1] 2420 break 2421 } 2422 } 2423 e.cache[vid] = a 2424 2425 // Update register masks. 2426 if r, ok := loc.(*Register); ok { 2427 e.usedRegs &^= regMask(1) << uint(r.num) 2428 if cr.final { 2429 e.finalRegs &^= regMask(1) << uint(r.num) 2430 } 2431 e.rematerializeableRegs &^= regMask(1) << uint(r.num) 2432 } 2433 if len(a) == 1 { 2434 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2435 e.uniqueRegs |= regMask(1) << uint(r.num) 2436 } 2437 } 2438 } 2439 2440 // findRegFor finds a register we can use to make a temp copy of type typ. 2441 func (e *edgeState) findRegFor(typ *types.Type) Location { 2442 // Which registers are possibilities. 2443 types := &e.s.f.Config.Types 2444 m := e.s.compatRegs(typ) 2445 2446 // Pick a register. In priority order: 2447 // 1) an unused register 2448 // 2) a non-unique register not holding a final value 2449 // 3) a non-unique register 2450 // 4) a register holding a rematerializeable value 2451 x := m &^ e.usedRegs 2452 if x != 0 { 2453 return &e.s.registers[pickReg(x)] 2454 } 2455 x = m &^ e.uniqueRegs &^ e.finalRegs 2456 if x != 0 { 2457 return &e.s.registers[pickReg(x)] 2458 } 2459 x = m &^ e.uniqueRegs 2460 if x != 0 { 2461 return &e.s.registers[pickReg(x)] 2462 } 2463 x = m & e.rematerializeableRegs 2464 if x != 0 { 2465 return &e.s.registers[pickReg(x)] 2466 } 2467 2468 // No register is available. 2469 // Pick a register to spill. 2470 for _, vid := range e.cachedVals { 2471 a := e.cache[vid] 2472 for _, c := range a { 2473 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2474 if !c.rematerializeable() { 2475 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2476 // Allocate a temp location to spill a register to. 2477 // The type of the slot is immaterial - it will not be live across 2478 // any safepoint. Just use a type big enough to hold any register. 2479 t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64} 2480 // TODO: reuse these slots. They'll need to be erased first. 2481 e.set(t, vid, x, false, c.Pos) 2482 if e.s.f.pass.debug > regDebug { 2483 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString()) 2484 } 2485 } 2486 // r will now be overwritten by the caller. At some point 2487 // later, the newly saved value will be moved back to its 2488 // final destination in processDest. 2489 return r 2490 } 2491 } 2492 } 2493 2494 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs) 2495 for _, vid := range e.cachedVals { 2496 a := e.cache[vid] 2497 for _, c := range a { 2498 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID)) 2499 } 2500 } 2501 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2502 return nil 2503 } 2504 2505 // rematerializeable reports whether the register allocator should recompute 2506 // a value instead of spilling/restoring it. 2507 func (v *Value) rematerializeable() bool { 2508 if !opcodeTable[v.Op].rematerializeable { 2509 return false 2510 } 2511 for _, a := range v.Args { 2512 // SP and SB (generated by OpSP and OpSB) are always available. 2513 if a.Op != OpSP && a.Op != OpSB { 2514 return false 2515 } 2516 } 2517 return true 2518 } 2519 2520 type liveInfo struct { 2521 ID ID // ID of value 2522 dist int32 // # of instructions before next use 2523 pos src.XPos // source position of next use 2524 } 2525 2526 // computeLive computes a map from block ID to a list of value IDs live at the end 2527 // of that block. Together with the value ID is a count of how many instructions 2528 // to the next use of that value. The resulting map is stored in s.live. 2529 // computeLive also computes the desired register information at the end of each block. 2530 // This desired register information is stored in s.desired. 2531 // TODO: this could be quadratic if lots of variables are live across lots of 2532 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2533 // of this function) require only linear size & time. 2534 func (s *regAllocState) computeLive() { 2535 f := s.f 2536 s.live = make([][]liveInfo, f.NumBlocks()) 2537 s.desired = make([]desiredState, f.NumBlocks()) 2538 var phis []*Value 2539 2540 live := f.newSparseMapPos(f.NumValues()) 2541 defer f.retSparseMapPos(live) 2542 t := f.newSparseMapPos(f.NumValues()) 2543 defer f.retSparseMapPos(t) 2544 2545 // Keep track of which value we want in each register. 2546 var desired desiredState 2547 2548 // Instead of iterating over f.Blocks, iterate over their postordering. 2549 // Liveness information flows backward, so starting at the end 2550 // increases the probability that we will stabilize quickly. 2551 // TODO: Do a better job yet. Here's one possibility: 2552 // Calculate the dominator tree and locate all strongly connected components. 2553 // If a value is live in one block of an SCC, it is live in all. 2554 // Walk the dominator tree from end to beginning, just once, treating SCC 2555 // components as single blocks, duplicated calculated liveness information 2556 // out to all of them. 2557 po := f.postorder() 2558 s.loopnest = f.loopnest() 2559 s.loopnest.calculateDepths() 2560 for { 2561 changed := false 2562 2563 for _, b := range po { 2564 // Start with known live values at the end of the block. 2565 // Add len(b.Values) to adjust from end-of-block distance 2566 // to beginning-of-block distance. 2567 live.clear() 2568 for _, e := range s.live[b.ID] { 2569 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2570 } 2571 2572 // Mark control values as live 2573 for _, c := range b.ControlValues() { 2574 if s.values[c.ID].needReg { 2575 live.set(c.ID, int32(len(b.Values)), b.Pos) 2576 } 2577 } 2578 2579 // Propagate backwards to the start of the block 2580 // Assumes Values have been scheduled. 2581 phis = phis[:0] 2582 for i := len(b.Values) - 1; i >= 0; i-- { 2583 v := b.Values[i] 2584 live.remove(v.ID) 2585 if v.Op == OpPhi { 2586 // save phi ops for later 2587 phis = append(phis, v) 2588 continue 2589 } 2590 if opcodeTable[v.Op].call { 2591 c := live.contents() 2592 for i := range c { 2593 c[i].val += unlikelyDistance 2594 } 2595 } 2596 for _, a := range v.Args { 2597 if s.values[a.ID].needReg { 2598 live.set(a.ID, int32(i), v.Pos) 2599 } 2600 } 2601 } 2602 // Propagate desired registers backwards. 2603 desired.copy(&s.desired[b.ID]) 2604 for i := len(b.Values) - 1; i >= 0; i-- { 2605 v := b.Values[i] 2606 prefs := desired.remove(v.ID) 2607 if v.Op == OpPhi { 2608 // TODO: if v is a phi, save desired register for phi inputs. 2609 // For now, we just drop it and don't propagate 2610 // desired registers back though phi nodes. 2611 continue 2612 } 2613 regspec := s.regspec(v) 2614 // Cancel desired registers if they get clobbered. 2615 desired.clobber(regspec.clobbers) 2616 // Update desired registers if there are any fixed register inputs. 2617 for _, j := range regspec.inputs { 2618 if countRegs(j.regs) != 1 { 2619 continue 2620 } 2621 desired.clobber(j.regs) 2622 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2623 } 2624 // Set desired register of input 0 if this is a 2-operand instruction. 2625 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 { 2626 // ADDQconst is added here because we want to treat it as resultInArg0 for 2627 // the purposes of desired registers, even though it is not an absolute requirement. 2628 // This is because we'd rather implement it as ADDQ instead of LEAQ. 2629 // Same for ADDLconst 2630 // Select0 is added here to propagate the desired register to the tuple-generating instruction. 2631 if opcodeTable[v.Op].commutative { 2632 desired.addList(v.Args[1].ID, prefs) 2633 } 2634 desired.addList(v.Args[0].ID, prefs) 2635 } 2636 } 2637 2638 // For each predecessor of b, expand its list of live-at-end values. 2639 // invariant: live contains the values live at the start of b (excluding phi inputs) 2640 for i, e := range b.Preds { 2641 p := e.b 2642 // Compute additional distance for the edge. 2643 // Note: delta must be at least 1 to distinguish the control 2644 // value use from the first user in a successor block. 2645 delta := int32(normalDistance) 2646 if len(p.Succs) == 2 { 2647 if p.Succs[0].b == b && p.Likely == BranchLikely || 2648 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2649 delta = likelyDistance 2650 } 2651 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2652 p.Succs[1].b == b && p.Likely == BranchLikely { 2653 delta = unlikelyDistance 2654 } 2655 } 2656 2657 // Update any desired registers at the end of p. 2658 s.desired[p.ID].merge(&desired) 2659 2660 // Start t off with the previously known live values at the end of p. 2661 t.clear() 2662 for _, e := range s.live[p.ID] { 2663 t.set(e.ID, e.dist, e.pos) 2664 } 2665 update := false 2666 2667 // Add new live values from scanning this block. 2668 for _, e := range live.contents() { 2669 d := e.val + delta 2670 if !t.contains(e.key) || d < t.get(e.key) { 2671 update = true 2672 t.set(e.key, d, e.pos) 2673 } 2674 } 2675 // Also add the correct arg from the saved phi values. 2676 // All phis are at distance delta (we consider them 2677 // simultaneously happening at the start of the block). 2678 for _, v := range phis { 2679 id := v.Args[i].ID 2680 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2681 update = true 2682 t.set(id, delta, v.Pos) 2683 } 2684 } 2685 2686 if !update { 2687 continue 2688 } 2689 // The live set has changed, update it. 2690 l := s.live[p.ID][:0] 2691 if cap(l) < t.size() { 2692 l = make([]liveInfo, 0, t.size()) 2693 } 2694 for _, e := range t.contents() { 2695 l = append(l, liveInfo{e.key, e.val, e.pos}) 2696 } 2697 s.live[p.ID] = l 2698 changed = true 2699 } 2700 } 2701 2702 if !changed { 2703 break 2704 } 2705 } 2706 if f.pass.debug > regDebug { 2707 fmt.Println("live values at end of each block") 2708 for _, b := range f.Blocks { 2709 fmt.Printf(" %s:", b) 2710 for _, x := range s.live[b.ID] { 2711 fmt.Printf(" v%d(%d)", x.ID, x.dist) 2712 for _, e := range s.desired[b.ID].entries { 2713 if e.ID != x.ID { 2714 continue 2715 } 2716 fmt.Printf("[") 2717 first := true 2718 for _, r := range e.regs { 2719 if r == noRegister { 2720 continue 2721 } 2722 if !first { 2723 fmt.Printf(",") 2724 } 2725 fmt.Print(&s.registers[r]) 2726 first = false 2727 } 2728 fmt.Printf("]") 2729 } 2730 } 2731 if avoid := s.desired[b.ID].avoid; avoid != 0 { 2732 fmt.Printf(" avoid=%v", s.RegMaskString(avoid)) 2733 } 2734 fmt.Println() 2735 } 2736 } 2737 } 2738 2739 // A desiredState represents desired register assignments. 2740 type desiredState struct { 2741 // Desired assignments will be small, so we just use a list 2742 // of valueID+registers entries. 2743 entries []desiredStateEntry 2744 // Registers that other values want to be in. This value will 2745 // contain at least the union of the regs fields of entries, but 2746 // may contain additional entries for values that were once in 2747 // this data structure but are no longer. 2748 avoid regMask 2749 } 2750 type desiredStateEntry struct { 2751 // (pre-regalloc) value 2752 ID ID 2753 // Registers it would like to be in, in priority order. 2754 // Unused slots are filled with noRegister. 2755 // For opcodes that return tuples, we track desired registers only 2756 // for the first element of the tuple. 2757 regs [4]register 2758 } 2759 2760 func (d *desiredState) clear() { 2761 d.entries = d.entries[:0] 2762 d.avoid = 0 2763 } 2764 2765 // get returns a list of desired registers for value vid. 2766 func (d *desiredState) get(vid ID) [4]register { 2767 for _, e := range d.entries { 2768 if e.ID == vid { 2769 return e.regs 2770 } 2771 } 2772 return [4]register{noRegister, noRegister, noRegister, noRegister} 2773 } 2774 2775 // add records that we'd like value vid to be in register r. 2776 func (d *desiredState) add(vid ID, r register) { 2777 d.avoid |= regMask(1) << r 2778 for i := range d.entries { 2779 e := &d.entries[i] 2780 if e.ID != vid { 2781 continue 2782 } 2783 if e.regs[0] == r { 2784 // Already known and highest priority 2785 return 2786 } 2787 for j := 1; j < len(e.regs); j++ { 2788 if e.regs[j] == r { 2789 // Move from lower priority to top priority 2790 copy(e.regs[1:], e.regs[:j]) 2791 e.regs[0] = r 2792 return 2793 } 2794 } 2795 copy(e.regs[1:], e.regs[:]) 2796 e.regs[0] = r 2797 return 2798 } 2799 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2800 } 2801 2802 func (d *desiredState) addList(vid ID, regs [4]register) { 2803 // regs is in priority order, so iterate in reverse order. 2804 for i := len(regs) - 1; i >= 0; i-- { 2805 r := regs[i] 2806 if r != noRegister { 2807 d.add(vid, r) 2808 } 2809 } 2810 } 2811 2812 // clobber erases any desired registers in the set m. 2813 func (d *desiredState) clobber(m regMask) { 2814 for i := 0; i < len(d.entries); { 2815 e := &d.entries[i] 2816 j := 0 2817 for _, r := range e.regs { 2818 if r != noRegister && m>>r&1 == 0 { 2819 e.regs[j] = r 2820 j++ 2821 } 2822 } 2823 if j == 0 { 2824 // No more desired registers for this value. 2825 d.entries[i] = d.entries[len(d.entries)-1] 2826 d.entries = d.entries[:len(d.entries)-1] 2827 continue 2828 } 2829 for ; j < len(e.regs); j++ { 2830 e.regs[j] = noRegister 2831 } 2832 i++ 2833 } 2834 d.avoid &^= m 2835 } 2836 2837 // copy copies a desired state from another desiredState x. 2838 func (d *desiredState) copy(x *desiredState) { 2839 d.entries = append(d.entries[:0], x.entries...) 2840 d.avoid = x.avoid 2841 } 2842 2843 // remove removes the desired registers for vid and returns them. 2844 func (d *desiredState) remove(vid ID) [4]register { 2845 for i := range d.entries { 2846 if d.entries[i].ID == vid { 2847 regs := d.entries[i].regs 2848 d.entries[i] = d.entries[len(d.entries)-1] 2849 d.entries = d.entries[:len(d.entries)-1] 2850 return regs 2851 } 2852 } 2853 return [4]register{noRegister, noRegister, noRegister, noRegister} 2854 } 2855 2856 // merge merges another desired state x into d. 2857 func (d *desiredState) merge(x *desiredState) { 2858 d.avoid |= x.avoid 2859 // There should only be a few desired registers, so 2860 // linear insert is ok. 2861 for _, e := range x.entries { 2862 d.addList(e.ID, e.regs) 2863 } 2864 } 2865 2866 func min32(x, y int32) int32 { 2867 if x < y { 2868 return x 2869 } 2870 return y 2871 } 2872 func max32(x, y int32) int32 { 2873 if x > y { 2874 return x 2875 } 2876 return y 2877 }