github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "cmd/internal/objabi" 118 "cmd/internal/src" 119 "fmt" 120 "unsafe" 121 ) 122 123 const ( 124 moveSpills = iota 125 logSpills 126 regDebug 127 stackDebug 128 ) 129 130 // distance is a measure of how far into the future values are used. 131 // distance is measured in units of instructions. 132 const ( 133 likelyDistance = 1 134 normalDistance = 10 135 unlikelyDistance = 100 136 ) 137 138 // regalloc performs register allocation on f. It sets f.RegAlloc 139 // to the resulting allocation. 140 func regalloc(f *Func) { 141 var s regAllocState 142 s.init(f) 143 s.regalloc(f) 144 } 145 146 type register uint8 147 148 const noRegister register = 255 149 150 type regMask uint64 151 152 func (m regMask) String() string { 153 s := "" 154 for r := register(0); m != 0; r++ { 155 if m>>r&1 == 0 { 156 continue 157 } 158 m &^= regMask(1) << r 159 if s != "" { 160 s += " " 161 } 162 s += fmt.Sprintf("r%d", r) 163 } 164 return s 165 } 166 167 // countRegs returns the number of set bits in the register mask. 168 func countRegs(r regMask) int { 169 n := 0 170 for r != 0 { 171 n += int(r & 1) 172 r >>= 1 173 } 174 return n 175 } 176 177 // pickReg picks an arbitrary register from the register mask. 178 func pickReg(r regMask) register { 179 // pick the lowest one 180 if r == 0 { 181 panic("can't pick a register from an empty set") 182 } 183 for i := register(0); ; i++ { 184 if r&1 != 0 { 185 return i 186 } 187 r >>= 1 188 } 189 } 190 191 type use struct { 192 dist int32 // distance from start of the block to a use of a value 193 pos src.XPos // source position of the use 194 next *use // linked list of uses of a value in nondecreasing dist order 195 } 196 197 // A valState records the register allocation state for a (pre-regalloc) value. 198 type valState struct { 199 regs regMask // the set of registers holding a Value (usually just one) 200 uses *use // list of uses in this block 201 spill *Value // spilled copy of the Value (if any) 202 restoreMin int32 // minimum of all restores' blocks' sdom.entry 203 restoreMax int32 // maximum of all restores' blocks' sdom.exit 204 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 205 rematerializeable bool // cached value of v.rematerializeable() 206 } 207 208 type regState struct { 209 v *Value // Original (preregalloc) Value stored in this register. 210 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 211 // If a register is unused, v==c==nil 212 } 213 214 type regAllocState struct { 215 f *Func 216 217 sdom SparseTree 218 registers []Register 219 numRegs register 220 SPReg register 221 SBReg register 222 GReg register 223 allocatable regMask 224 225 // for each block, its primary predecessor. 226 // A predecessor of b is primary if it is the closest 227 // predecessor that appears before b in the layout order. 228 // We record the index in the Preds list where the primary predecessor sits. 229 primary []int32 230 231 // live values at the end of each block. live[b.ID] is a list of value IDs 232 // which are live at the end of b, together with a count of how many instructions 233 // forward to the next use. 234 live [][]liveInfo 235 // desired register assignments at the end of each block. 236 // Note that this is a static map computed before allocation occurs. Dynamic 237 // register desires (from partially completed allocations) will trump 238 // this information. 239 desired []desiredState 240 241 // current state of each (preregalloc) Value 242 values []valState 243 244 // ID of SP, SB values 245 sp, sb ID 246 247 // For each Value, map from its value ID back to the 248 // preregalloc Value it was derived from. 249 orig []*Value 250 251 // current state of each register 252 regs []regState 253 254 // registers that contain values which can't be kicked out 255 nospill regMask 256 257 // mask of registers currently in use 258 used regMask 259 260 // mask of registers used in the current instruction 261 tmpused regMask 262 263 // current block we're working on 264 curBlock *Block 265 266 // cache of use records 267 freeUseRecords *use 268 269 // endRegs[blockid] is the register state at the end of each block. 270 // encoded as a set of endReg records. 271 endRegs [][]endReg 272 273 // startRegs[blockid] is the register state at the start of merge blocks. 274 // saved state does not include the state of phi ops in the block. 275 startRegs [][]startReg 276 277 // spillLive[blockid] is the set of live spills at the end of each block 278 spillLive [][]ID 279 280 // a set of copies we generated to move things around, and 281 // whether it is used in shuffle. Unused copies will be deleted. 282 copies map[*Value]bool 283 284 loopnest *loopnest 285 } 286 287 type endReg struct { 288 r register 289 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 290 c *Value // cached version of the value 291 } 292 293 type startReg struct { 294 r register 295 v *Value // pre-regalloc value needed in this register 296 c *Value // cached version of the value 297 pos src.XPos // source position of use of this register 298 } 299 300 // freeReg frees up register r. Any current user of r is kicked out. 301 func (s *regAllocState) freeReg(r register) { 302 v := s.regs[r].v 303 if v == nil { 304 s.f.Fatalf("tried to free an already free register %d\n", r) 305 } 306 307 // Mark r as unused. 308 if s.f.pass.debug > regDebug { 309 fmt.Printf("freeReg %s (dump %s/%s)\n", s.registers[r].Name(), v, s.regs[r].c) 310 } 311 s.regs[r] = regState{} 312 s.values[v.ID].regs &^= regMask(1) << r 313 s.used &^= regMask(1) << r 314 } 315 316 // freeRegs frees up all registers listed in m. 317 func (s *regAllocState) freeRegs(m regMask) { 318 for m&s.used != 0 { 319 s.freeReg(pickReg(m & s.used)) 320 } 321 } 322 323 // setOrig records that c's original value is the same as 324 // v's original value. 325 func (s *regAllocState) setOrig(c *Value, v *Value) { 326 for int(c.ID) >= len(s.orig) { 327 s.orig = append(s.orig, nil) 328 } 329 if s.orig[c.ID] != nil { 330 s.f.Fatalf("orig value set twice %s %s", c, v) 331 } 332 s.orig[c.ID] = s.orig[v.ID] 333 } 334 335 // assignReg assigns register r to hold c, a copy of v. 336 // r must be unused. 337 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 338 if s.f.pass.debug > regDebug { 339 fmt.Printf("assignReg %s %s/%s\n", s.registers[r].Name(), v, c) 340 } 341 if s.regs[r].v != nil { 342 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 343 } 344 345 // Update state. 346 s.regs[r] = regState{v, c} 347 s.values[v.ID].regs |= regMask(1) << r 348 s.used |= regMask(1) << r 349 s.f.setHome(c, &s.registers[r]) 350 } 351 352 // allocReg chooses a register from the set of registers in mask. 353 // If there is no unused register, a Value will be kicked out of 354 // a register to make room. 355 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 356 mask &= s.allocatable 357 mask &^= s.nospill 358 if mask == 0 { 359 s.f.Fatalf("no register available for %s", v) 360 } 361 362 // Pick an unused register if one is available. 363 if mask&^s.used != 0 { 364 return pickReg(mask &^ s.used) 365 } 366 367 // Pick a value to spill. Spill the value with the 368 // farthest-in-the-future use. 369 // TODO: Prefer registers with already spilled Values? 370 // TODO: Modify preference using affinity graph. 371 // TODO: if a single value is in multiple registers, spill one of them 372 // before spilling a value in just a single register. 373 374 // Find a register to spill. We spill the register containing the value 375 // whose next use is as far in the future as possible. 376 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 377 var r register 378 maxuse := int32(-1) 379 for t := register(0); t < s.numRegs; t++ { 380 if mask>>t&1 == 0 { 381 continue 382 } 383 v := s.regs[t].v 384 if n := s.values[v.ID].uses.dist; n > maxuse { 385 // v's next use is farther in the future than any value 386 // we've seen so far. A new best spill candidate. 387 r = t 388 maxuse = n 389 } 390 } 391 if maxuse == -1 { 392 s.f.Fatalf("couldn't find register to spill") 393 } 394 395 // Try to move it around before kicking out, if there is a free register. 396 // We generate a Copy and record it. It will be deleted if never used. 397 v2 := s.regs[r].v 398 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 399 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 400 r2 := pickReg(m) 401 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 402 s.copies[c] = false 403 if s.f.pass.debug > regDebug { 404 fmt.Printf("copy %s to %s : %s\n", v2, c, s.registers[r2].Name()) 405 } 406 s.setOrig(c, v2) 407 s.assignReg(r2, v2, c) 408 } 409 s.freeReg(r) 410 return r 411 } 412 413 // makeSpill returns a Value which represents the spilled value of v. 414 // b is the block in which the spill is used. 415 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 416 vi := &s.values[v.ID] 417 if vi.spill != nil { 418 // Final block not known - keep track of subtree where restores reside. 419 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 420 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 421 return vi.spill 422 } 423 // Make a spill for v. We don't know where we want 424 // to put it yet, so we leave it blockless for now. 425 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 426 // We also don't know what the spill's arg will be. 427 // Leave it argless for now. 428 s.setOrig(spill, v) 429 vi.spill = spill 430 vi.restoreMin = s.sdom[b.ID].entry 431 vi.restoreMax = s.sdom[b.ID].exit 432 return spill 433 } 434 435 // allocValToReg allocates v to a register selected from regMask and 436 // returns the register copy of v. Any previous user is kicked out and spilled 437 // (if necessary). Load code is added at the current pc. If nospill is set the 438 // allocated register is marked nospill so the assignment cannot be 439 // undone until the caller allows it by clearing nospill. Returns a 440 // *Value which is either v or a copy of v allocated to the chosen register. 441 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 442 vi := &s.values[v.ID] 443 444 // Check if v is already in a requested register. 445 if mask&vi.regs != 0 { 446 r := pickReg(mask & vi.regs) 447 if s.regs[r].v != v || s.regs[r].c == nil { 448 panic("bad register state") 449 } 450 if nospill { 451 s.nospill |= regMask(1) << r 452 } 453 return s.regs[r].c 454 } 455 456 // Allocate a register. 457 r := s.allocReg(mask, v) 458 459 // Allocate v to the new register. 460 var c *Value 461 if vi.regs != 0 { 462 // Copy from a register that v is already in. 463 r2 := pickReg(vi.regs) 464 if s.regs[r2].v != v { 465 panic("bad register state") 466 } 467 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 468 } else if v.rematerializeable() { 469 // Rematerialize instead of loading from the spill location. 470 c = v.copyInto(s.curBlock) 471 } else { 472 // Load v from its spill location. 473 spill := s.makeSpill(v, s.curBlock) 474 if s.f.pass.debug > logSpills { 475 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 476 } 477 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 478 } 479 s.setOrig(c, v) 480 s.assignReg(r, v, c) 481 if nospill { 482 s.nospill |= regMask(1) << r 483 } 484 return c 485 } 486 487 // isLeaf reports whether f performs any calls. 488 func isLeaf(f *Func) bool { 489 for _, b := range f.Blocks { 490 for _, v := range b.Values { 491 if opcodeTable[v.Op].call { 492 return false 493 } 494 } 495 } 496 return true 497 } 498 499 func (s *regAllocState) init(f *Func) { 500 s.f = f 501 s.f.RegAlloc = s.f.Cache.locs[:0] 502 s.registers = f.Config.registers 503 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 504 s.f.Fatalf("bad number of registers: %d", nr) 505 } else { 506 s.numRegs = register(nr) 507 } 508 // Locate SP, SB, and g registers. 509 s.SPReg = noRegister 510 s.SBReg = noRegister 511 s.GReg = noRegister 512 for r := register(0); r < s.numRegs; r++ { 513 switch s.registers[r].Name() { 514 case "SP": 515 s.SPReg = r 516 case "SB": 517 s.SBReg = r 518 case "g": 519 s.GReg = r 520 } 521 } 522 // Make sure we found all required registers. 523 switch noRegister { 524 case s.SPReg: 525 s.f.Fatalf("no SP register found") 526 case s.SBReg: 527 s.f.Fatalf("no SB register found") 528 case s.GReg: 529 if f.Config.hasGReg { 530 s.f.Fatalf("no g register found") 531 } 532 } 533 534 // Figure out which registers we're allowed to use. 535 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 536 s.allocatable &^= 1 << s.SPReg 537 s.allocatable &^= 1 << s.SBReg 538 if s.f.Config.hasGReg { 539 s.allocatable &^= 1 << s.GReg 540 } 541 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 { 542 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 543 } 544 if s.f.Config.ctxt.Flag_shared { 545 switch s.f.Config.arch { 546 case "ppc64le": // R2 already reserved. 547 s.allocatable &^= 1 << 12 // R12 548 } 549 } 550 if s.f.Config.LinkReg != -1 { 551 if isLeaf(f) { 552 // Leaf functions don't save/restore the link register. 553 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 554 } 555 if s.f.Config.arch == "arm" && objabi.GOARM == 5 { 556 // On ARMv5 we insert softfloat calls at each FP instruction. 557 // This clobbers LR almost everywhere. Disable allocating LR 558 // on ARMv5. 559 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 560 } 561 } 562 if s.f.Config.ctxt.Flag_dynlink { 563 switch s.f.Config.arch { 564 case "amd64": 565 s.allocatable &^= 1 << 15 // R15 566 case "arm": 567 s.allocatable &^= 1 << 9 // R9 568 case "ppc64le": // R2 already reserved. 569 s.allocatable &^= 1 << 12 // R12 570 case "arm64": 571 // nothing to do? 572 case "386": 573 // nothing to do. 574 // Note that for Flag_shared (position independent code) 575 // we do need to be careful, but that carefulness is hidden 576 // in the rewrite rules so we always have a free register 577 // available for global load/stores. See gen/386.rules (search for Flag_shared). 578 case "s390x": 579 // nothing to do, R10 & R11 already reserved 580 default: 581 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 582 } 583 } 584 if s.f.Config.nacl { 585 switch s.f.Config.arch { 586 case "arm": 587 s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm 588 case "amd64p32": 589 s.allocatable &^= 1 << 5 // BP - reserved for nacl 590 s.allocatable &^= 1 << 15 // R15 - reserved for nacl 591 } 592 } 593 if s.f.Config.use387 { 594 s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) 595 } 596 597 s.regs = make([]regState, s.numRegs) 598 s.values = make([]valState, f.NumValues()) 599 s.orig = make([]*Value, f.NumValues()) 600 s.copies = make(map[*Value]bool) 601 for _, b := range f.Blocks { 602 for _, v := range b.Values { 603 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 604 s.values[v.ID].needReg = true 605 s.values[v.ID].rematerializeable = v.rematerializeable() 606 s.orig[v.ID] = v 607 } 608 // Note: needReg is false for values returning Tuple types. 609 // Instead, we mark the corresponding Selects as needReg. 610 } 611 } 612 s.computeLive() 613 614 // Compute block order. This array allows us to distinguish forward edges 615 // from backward edges and compute how far they go. 616 blockOrder := make([]int32, f.NumBlocks()) 617 for i, b := range f.Blocks { 618 blockOrder[b.ID] = int32(i) 619 } 620 621 // Compute primary predecessors. 622 s.primary = make([]int32, f.NumBlocks()) 623 for _, b := range f.Blocks { 624 best := -1 625 for i, e := range b.Preds { 626 p := e.b 627 if blockOrder[p.ID] >= blockOrder[b.ID] { 628 continue // backward edge 629 } 630 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] { 631 best = i 632 } 633 } 634 s.primary[b.ID] = int32(best) 635 } 636 637 s.endRegs = make([][]endReg, f.NumBlocks()) 638 s.startRegs = make([][]startReg, f.NumBlocks()) 639 s.spillLive = make([][]ID, f.NumBlocks()) 640 s.sdom = f.sdom() 641 } 642 643 // Adds a use record for id at distance dist from the start of the block. 644 // All calls to addUse must happen with nonincreasing dist. 645 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 646 r := s.freeUseRecords 647 if r != nil { 648 s.freeUseRecords = r.next 649 } else { 650 r = &use{} 651 } 652 r.dist = dist 653 r.pos = pos 654 r.next = s.values[id].uses 655 s.values[id].uses = r 656 if r.next != nil && dist > r.next.dist { 657 s.f.Fatalf("uses added in wrong order") 658 } 659 } 660 661 // advanceUses advances the uses of v's args from the state before v to the state after v. 662 // Any values which have no more uses are deallocated from registers. 663 func (s *regAllocState) advanceUses(v *Value) { 664 for _, a := range v.Args { 665 if !s.values[a.ID].needReg { 666 continue 667 } 668 ai := &s.values[a.ID] 669 r := ai.uses 670 ai.uses = r.next 671 if r.next == nil { 672 // Value is dead, free all registers that hold it. 673 s.freeRegs(ai.regs) 674 } 675 r.next = s.freeUseRecords 676 s.freeUseRecords = r 677 } 678 } 679 680 // liveAfterCurrentInstruction reports whether v is live after 681 // the current instruction is completed. v must be used by the 682 // current instruction. 683 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 684 u := s.values[v.ID].uses 685 d := u.dist 686 for u != nil && u.dist == d { 687 u = u.next 688 } 689 return u != nil && u.dist > d 690 } 691 692 // Sets the state of the registers to that encoded in regs. 693 func (s *regAllocState) setState(regs []endReg) { 694 s.freeRegs(s.used) 695 for _, x := range regs { 696 s.assignReg(x.r, x.v, x.c) 697 } 698 } 699 700 // compatRegs returns the set of registers which can store a type t. 701 func (s *regAllocState) compatRegs(t Type) regMask { 702 var m regMask 703 if t.IsTuple() || t.IsFlags() { 704 return 0 705 } 706 if t.IsFloat() || t == TypeInt128 { 707 m = s.f.Config.fpRegMask 708 } else { 709 m = s.f.Config.gpRegMask 710 } 711 return m & s.allocatable 712 } 713 714 func (s *regAllocState) regalloc(f *Func) { 715 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 716 defer f.retSparseSet(regValLiveSet) 717 var oldSched []*Value 718 var phis []*Value 719 var phiRegs []register 720 var args []*Value 721 722 // Data structure used for computing desired registers. 723 var desired desiredState 724 725 // Desired registers for inputs & outputs for each instruction in the block. 726 type dentry struct { 727 out [4]register // desired output registers 728 in [3][4]register // desired input registers (for inputs 0,1, and 2) 729 } 730 var dinfo []dentry 731 732 if f.Entry != f.Blocks[0] { 733 f.Fatalf("entry block must be first") 734 } 735 736 for _, b := range f.Blocks { 737 s.curBlock = b 738 739 // Initialize regValLiveSet and uses fields for this block. 740 // Walk backwards through the block doing liveness analysis. 741 regValLiveSet.clear() 742 for _, e := range s.live[b.ID] { 743 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 744 regValLiveSet.add(e.ID) 745 } 746 if v := b.Control; v != nil && s.values[v.ID].needReg { 747 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value 748 regValLiveSet.add(v.ID) 749 } 750 for i := len(b.Values) - 1; i >= 0; i-- { 751 v := b.Values[i] 752 regValLiveSet.remove(v.ID) 753 if v.Op == OpPhi { 754 // Remove v from the live set, but don't add 755 // any inputs. This is the state the len(b.Preds)>1 756 // case below desires; it wants to process phis specially. 757 continue 758 } 759 if opcodeTable[v.Op].call { 760 // Function call clobbers all the registers but SP and SB. 761 regValLiveSet.clear() 762 if s.sp != 0 && s.values[s.sp].uses != nil { 763 regValLiveSet.add(s.sp) 764 } 765 if s.sb != 0 && s.values[s.sb].uses != nil { 766 regValLiveSet.add(s.sb) 767 } 768 } 769 for _, a := range v.Args { 770 if !s.values[a.ID].needReg { 771 continue 772 } 773 s.addUse(a.ID, int32(i), v.Pos) 774 regValLiveSet.add(a.ID) 775 } 776 } 777 if s.f.pass.debug > regDebug { 778 fmt.Printf("uses for %s:%s\n", s.f.Name, b) 779 for i := range s.values { 780 vi := &s.values[i] 781 u := vi.uses 782 if u == nil { 783 continue 784 } 785 fmt.Printf(" v%d:", i) 786 for u != nil { 787 fmt.Printf(" %d", u.dist) 788 u = u.next 789 } 790 fmt.Println() 791 } 792 } 793 794 // Make a copy of the block schedule so we can generate a new one in place. 795 // We make a separate copy for phis and regular values. 796 nphi := 0 797 for _, v := range b.Values { 798 if v.Op != OpPhi { 799 break 800 } 801 nphi++ 802 } 803 phis = append(phis[:0], b.Values[:nphi]...) 804 oldSched = append(oldSched[:0], b.Values[nphi:]...) 805 b.Values = b.Values[:0] 806 807 // Initialize start state of block. 808 if b == f.Entry { 809 // Regalloc state is empty to start. 810 if nphi > 0 { 811 f.Fatalf("phis in entry block") 812 } 813 } else if len(b.Preds) == 1 { 814 // Start regalloc state with the end state of the previous block. 815 s.setState(s.endRegs[b.Preds[0].b.ID]) 816 if nphi > 0 { 817 f.Fatalf("phis in single-predecessor block") 818 } 819 // Drop any values which are no longer live. 820 // This may happen because at the end of p, a value may be 821 // live but only used by some other successor of p. 822 for r := register(0); r < s.numRegs; r++ { 823 v := s.regs[r].v 824 if v != nil && !regValLiveSet.contains(v.ID) { 825 s.freeReg(r) 826 } 827 } 828 } else { 829 // This is the complicated case. We have more than one predecessor, 830 // which means we may have Phi ops. 831 832 // Copy phi ops into new schedule. 833 b.Values = append(b.Values, phis...) 834 835 // Start with the final register state of the primary predecessor 836 idx := s.primary[b.ID] 837 if idx < 0 { 838 f.Fatalf("block with no primary predecessor %s", b) 839 } 840 p := b.Preds[idx].b 841 s.setState(s.endRegs[p.ID]) 842 843 if s.f.pass.debug > regDebug { 844 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 845 for _, x := range s.endRegs[p.ID] { 846 fmt.Printf(" %s: orig:%s cache:%s\n", s.registers[x.r].Name(), x.v, x.c) 847 } 848 } 849 850 // Decide on registers for phi ops. Use the registers determined 851 // by the primary predecessor if we can. 852 // TODO: pick best of (already processed) predecessors? 853 // Majority vote? Deepest nesting level? 854 phiRegs = phiRegs[:0] 855 var phiUsed regMask 856 for _, v := range phis { 857 if !s.values[v.ID].needReg { 858 phiRegs = append(phiRegs, noRegister) 859 continue 860 } 861 a := v.Args[idx] 862 // Some instructions target not-allocatable registers. 863 // They're not suitable for further (phi-function) allocation. 864 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 865 if m != 0 { 866 r := pickReg(m) 867 phiUsed |= regMask(1) << r 868 phiRegs = append(phiRegs, r) 869 } else { 870 phiRegs = append(phiRegs, noRegister) 871 } 872 } 873 874 // Second pass - deallocate any phi inputs which are now dead. 875 for i, v := range phis { 876 if !s.values[v.ID].needReg { 877 continue 878 } 879 a := v.Args[idx] 880 if !regValLiveSet.contains(a.ID) { 881 // Input is dead beyond the phi, deallocate 882 // anywhere else it might live. 883 s.freeRegs(s.values[a.ID].regs) 884 } else { 885 // Input is still live. 886 // Try to move it around before kicking out, if there is a free register. 887 // We generate a Copy in the predecessor block and record it. It will be 888 // deleted if never used. 889 r := phiRegs[i] 890 if r == noRegister { 891 continue 892 } 893 // Pick a free register. At this point some registers used in the predecessor 894 // block may have been deallocated. Those are the ones used for Phis. Exclude 895 // them (and they are not going to be helpful anyway). 896 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 897 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 898 r2 := pickReg(m) 899 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 900 s.copies[c] = false 901 if s.f.pass.debug > regDebug { 902 fmt.Printf("copy %s to %s : %s\n", a, c, s.registers[r2].Name()) 903 } 904 s.setOrig(c, a) 905 s.assignReg(r2, a, c) 906 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 907 } 908 s.freeReg(r) 909 } 910 } 911 912 // Third pass - pick registers for phis whose inputs 913 // were not in a register. 914 for i, v := range phis { 915 if !s.values[v.ID].needReg { 916 continue 917 } 918 if phiRegs[i] != noRegister { 919 continue 920 } 921 if s.f.Config.use387 && v.Type.IsFloat() { 922 continue // 387 can't handle floats in registers between blocks 923 } 924 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 925 if m != 0 { 926 r := pickReg(m) 927 phiRegs[i] = r 928 phiUsed |= regMask(1) << r 929 } 930 } 931 932 // Set registers for phis. Add phi spill code. 933 for i, v := range phis { 934 if !s.values[v.ID].needReg { 935 continue 936 } 937 r := phiRegs[i] 938 if r == noRegister { 939 // stack-based phi 940 // Spills will be inserted in all the predecessors below. 941 s.values[v.ID].spill = v // v starts life spilled 942 continue 943 } 944 // register-based phi 945 s.assignReg(r, v, v) 946 } 947 948 // Deallocate any values which are no longer live. Phis are excluded. 949 for r := register(0); r < s.numRegs; r++ { 950 if phiUsed>>r&1 != 0 { 951 continue 952 } 953 v := s.regs[r].v 954 if v != nil && !regValLiveSet.contains(v.ID) { 955 s.freeReg(r) 956 } 957 } 958 959 // Save the starting state for use by merge edges. 960 var regList []startReg 961 for r := register(0); r < s.numRegs; r++ { 962 v := s.regs[r].v 963 if v == nil { 964 continue 965 } 966 if phiUsed>>r&1 != 0 { 967 // Skip registers that phis used, we'll handle those 968 // specially during merge edge processing. 969 continue 970 } 971 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 972 } 973 s.startRegs[b.ID] = regList 974 975 if s.f.pass.debug > regDebug { 976 fmt.Printf("after phis\n") 977 for _, x := range s.startRegs[b.ID] { 978 fmt.Printf(" %s: v%d\n", s.registers[x.r].Name(), x.v.ID) 979 } 980 } 981 } 982 983 // Allocate space to record the desired registers for each value. 984 dinfo = dinfo[:0] 985 for i := 0; i < len(oldSched); i++ { 986 dinfo = append(dinfo, dentry{}) 987 } 988 989 // Load static desired register info at the end of the block. 990 desired.copy(&s.desired[b.ID]) 991 992 // Check actual assigned registers at the start of the next block(s). 993 // Dynamically assigned registers will trump the static 994 // desired registers computed during liveness analysis. 995 // Note that we do this phase after startRegs is set above, so that 996 // we get the right behavior for a block which branches to itself. 997 for _, e := range b.Succs { 998 succ := e.b 999 // TODO: prioritize likely successor? 1000 for _, x := range s.startRegs[succ.ID] { 1001 desired.add(x.v.ID, x.r) 1002 } 1003 // Process phi ops in succ. 1004 pidx := e.i 1005 for _, v := range succ.Values { 1006 if v.Op != OpPhi { 1007 break 1008 } 1009 if !s.values[v.ID].needReg { 1010 continue 1011 } 1012 rp, ok := s.f.getHome(v.ID).(*Register) 1013 if !ok { 1014 continue 1015 } 1016 desired.add(v.Args[pidx].ID, register(rp.num)) 1017 } 1018 } 1019 // Walk values backwards computing desired register info. 1020 // See computeLive for more comments. 1021 for i := len(oldSched) - 1; i >= 0; i-- { 1022 v := oldSched[i] 1023 prefs := desired.remove(v.ID) 1024 desired.clobber(opcodeTable[v.Op].reg.clobbers) 1025 for _, j := range opcodeTable[v.Op].reg.inputs { 1026 if countRegs(j.regs) != 1 { 1027 continue 1028 } 1029 desired.clobber(j.regs) 1030 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1031 } 1032 if opcodeTable[v.Op].resultInArg0 { 1033 if opcodeTable[v.Op].commutative { 1034 desired.addList(v.Args[1].ID, prefs) 1035 } 1036 desired.addList(v.Args[0].ID, prefs) 1037 } 1038 // Save desired registers for this value. 1039 dinfo[i].out = prefs 1040 for j, a := range v.Args { 1041 if j >= len(dinfo[i].in) { 1042 break 1043 } 1044 dinfo[i].in[j] = desired.get(a.ID) 1045 } 1046 } 1047 1048 // Process all the non-phi values. 1049 for idx, v := range oldSched { 1050 if s.f.pass.debug > regDebug { 1051 fmt.Printf(" processing %s\n", v.LongString()) 1052 } 1053 regspec := opcodeTable[v.Op].reg 1054 if v.Op == OpPhi { 1055 f.Fatalf("phi %s not at start of block", v) 1056 } 1057 if v.Op == OpSP { 1058 s.assignReg(s.SPReg, v, v) 1059 b.Values = append(b.Values, v) 1060 s.advanceUses(v) 1061 s.sp = v.ID 1062 continue 1063 } 1064 if v.Op == OpSB { 1065 s.assignReg(s.SBReg, v, v) 1066 b.Values = append(b.Values, v) 1067 s.advanceUses(v) 1068 s.sb = v.ID 1069 continue 1070 } 1071 if v.Op == OpSelect0 || v.Op == OpSelect1 { 1072 if s.values[v.ID].needReg { 1073 var i = 0 1074 if v.Op == OpSelect1 { 1075 i = 1 1076 } 1077 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1078 } 1079 b.Values = append(b.Values, v) 1080 s.advanceUses(v) 1081 goto issueSpill 1082 } 1083 if v.Op == OpGetG && s.f.Config.hasGReg { 1084 // use hardware g register 1085 if s.regs[s.GReg].v != nil { 1086 s.freeReg(s.GReg) // kick out the old value 1087 } 1088 s.assignReg(s.GReg, v, v) 1089 b.Values = append(b.Values, v) 1090 s.advanceUses(v) 1091 goto issueSpill 1092 } 1093 if v.Op == OpArg { 1094 // Args are "pre-spilled" values. We don't allocate 1095 // any register here. We just set up the spill pointer to 1096 // point at itself and any later user will restore it to use it. 1097 s.values[v.ID].spill = v 1098 b.Values = append(b.Values, v) 1099 s.advanceUses(v) 1100 continue 1101 } 1102 if v.Op == OpKeepAlive { 1103 // Make sure the argument to v is still live here. 1104 s.advanceUses(v) 1105 vi := &s.values[v.Args[0].ID] 1106 if vi.spill != nil { 1107 // Use the spill location. 1108 v.SetArg(0, vi.spill) 1109 } else { 1110 // No need to keep unspilled values live. 1111 // These are typically rematerializeable constants like nil, 1112 // or values of a variable that were modified since the last call. 1113 v.Op = OpCopy 1114 v.SetArgs1(v.Args[1]) 1115 } 1116 b.Values = append(b.Values, v) 1117 continue 1118 } 1119 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1120 // No register allocation required (or none specified yet) 1121 s.freeRegs(regspec.clobbers) 1122 b.Values = append(b.Values, v) 1123 s.advanceUses(v) 1124 continue 1125 } 1126 1127 if s.values[v.ID].rematerializeable { 1128 // Value is rematerializeable, don't issue it here. 1129 // It will get issued just before each use (see 1130 // allocValueToReg). 1131 for _, a := range v.Args { 1132 a.Uses-- 1133 } 1134 s.advanceUses(v) 1135 continue 1136 } 1137 1138 if s.f.pass.debug > regDebug { 1139 fmt.Printf("value %s\n", v.LongString()) 1140 fmt.Printf(" out:") 1141 for _, r := range dinfo[idx].out { 1142 if r != noRegister { 1143 fmt.Printf(" %s", s.registers[r].Name()) 1144 } 1145 } 1146 fmt.Println() 1147 for i := 0; i < len(v.Args) && i < 3; i++ { 1148 fmt.Printf(" in%d:", i) 1149 for _, r := range dinfo[idx].in[i] { 1150 if r != noRegister { 1151 fmt.Printf(" %s", s.registers[r].Name()) 1152 } 1153 } 1154 fmt.Println() 1155 } 1156 } 1157 1158 // Move arguments to registers. Process in an ordering defined 1159 // by the register specification (most constrained first). 1160 args = append(args[:0], v.Args...) 1161 for _, i := range regspec.inputs { 1162 mask := i.regs 1163 if mask&s.values[args[i.idx].ID].regs == 0 { 1164 // Need a new register for the input. 1165 mask &= s.allocatable 1166 mask &^= s.nospill 1167 // Used desired register if available. 1168 if i.idx < 3 { 1169 for _, r := range dinfo[idx].in[i.idx] { 1170 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1171 // Desired register is allowed and unused. 1172 mask = regMask(1) << r 1173 break 1174 } 1175 } 1176 } 1177 // Avoid registers we're saving for other values. 1178 if mask&^desired.avoid != 0 { 1179 mask &^= desired.avoid 1180 } 1181 } 1182 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos) 1183 } 1184 1185 // If the output clobbers the input register, make sure we have 1186 // at least two copies of the input register so we don't 1187 // have to reload the value from the spill location. 1188 if opcodeTable[v.Op].resultInArg0 { 1189 var m regMask 1190 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1191 // arg0 is dead. We can clobber its register. 1192 goto ok 1193 } 1194 if s.values[v.Args[0].ID].rematerializeable { 1195 // We can rematerialize the input, don't worry about clobbering it. 1196 goto ok 1197 } 1198 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1199 // we have at least 2 copies of arg0. We can afford to clobber one. 1200 goto ok 1201 } 1202 if opcodeTable[v.Op].commutative { 1203 if !s.liveAfterCurrentInstruction(v.Args[1]) { 1204 args[0], args[1] = args[1], args[0] 1205 goto ok 1206 } 1207 if s.values[v.Args[1].ID].rematerializeable { 1208 args[0], args[1] = args[1], args[0] 1209 goto ok 1210 } 1211 if countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1212 args[0], args[1] = args[1], args[0] 1213 goto ok 1214 } 1215 } 1216 1217 // We can't overwrite arg0 (or arg1, if commutative). So we 1218 // need to make a copy of an input so we have a register we can modify. 1219 1220 // Possible new registers to copy into. 1221 m = s.compatRegs(v.Args[0].Type) &^ s.used 1222 if m == 0 { 1223 // No free registers. In this case we'll just clobber 1224 // an input and future uses of that input must use a restore. 1225 // TODO(khr): We should really do this like allocReg does it, 1226 // spilling the value with the most distant next use. 1227 goto ok 1228 } 1229 1230 // Try to move an input to the desired output. 1231 for _, r := range dinfo[idx].out { 1232 if r != noRegister && m>>r&1 != 0 { 1233 m = regMask(1) << r 1234 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1235 // Note: we update args[0] so the instruction will 1236 // use the register copy we just made. 1237 goto ok 1238 } 1239 } 1240 // Try to copy input to its desired location & use its old 1241 // location as the result register. 1242 for _, r := range dinfo[idx].in[0] { 1243 if r != noRegister && m>>r&1 != 0 { 1244 m = regMask(1) << r 1245 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1246 s.copies[c] = false 1247 // Note: no update to args[0] so the instruction will 1248 // use the original copy. 1249 goto ok 1250 } 1251 } 1252 if opcodeTable[v.Op].commutative { 1253 for _, r := range dinfo[idx].in[1] { 1254 if r != noRegister && m>>r&1 != 0 { 1255 m = regMask(1) << r 1256 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1257 s.copies[c] = false 1258 args[0], args[1] = args[1], args[0] 1259 goto ok 1260 } 1261 } 1262 } 1263 // Avoid future fixed uses if we can. 1264 if m&^desired.avoid != 0 { 1265 m &^= desired.avoid 1266 } 1267 // Save input 0 to a new register so we can clobber it. 1268 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1269 s.copies[c] = false 1270 } 1271 1272 ok: 1273 // Now that all args are in regs, we're ready to issue the value itself. 1274 // Before we pick a register for the output value, allow input registers 1275 // to be deallocated. We do this here so that the output can use the 1276 // same register as a dying input. 1277 if !opcodeTable[v.Op].resultNotInArgs { 1278 s.tmpused = s.nospill 1279 s.nospill = 0 1280 s.advanceUses(v) // frees any registers holding args that are no longer live 1281 } 1282 1283 // Dump any registers which will be clobbered 1284 s.freeRegs(regspec.clobbers) 1285 s.tmpused |= regspec.clobbers 1286 1287 // Pick registers for outputs. 1288 { 1289 outRegs := [2]register{noRegister, noRegister} 1290 var used regMask 1291 for _, out := range regspec.outputs { 1292 mask := out.regs & s.allocatable &^ used 1293 if mask == 0 { 1294 continue 1295 } 1296 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1297 if !opcodeTable[v.Op].commutative { 1298 // Output must use the same register as input 0. 1299 r := register(s.f.getHome(args[0].ID).(*Register).num) 1300 mask = regMask(1) << r 1301 } else { 1302 // Output must use the same register as input 0 or 1. 1303 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1304 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1305 // Check r0 and r1 for desired output register. 1306 found := false 1307 for _, r := range dinfo[idx].out { 1308 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1309 mask = regMask(1) << r 1310 found = true 1311 if r == r1 { 1312 args[0], args[1] = args[1], args[0] 1313 } 1314 break 1315 } 1316 } 1317 if !found { 1318 // Neither are desired, pick r0. 1319 mask = regMask(1) << r0 1320 } 1321 } 1322 } 1323 for _, r := range dinfo[idx].out { 1324 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1325 // Desired register is allowed and unused. 1326 mask = regMask(1) << r 1327 break 1328 } 1329 } 1330 // Avoid registers we're saving for other values. 1331 if mask&^desired.avoid != 0 { 1332 mask &^= desired.avoid 1333 } 1334 r := s.allocReg(mask, v) 1335 outRegs[out.idx] = r 1336 used |= regMask(1) << r 1337 s.tmpused |= regMask(1) << r 1338 } 1339 // Record register choices 1340 if v.Type.IsTuple() { 1341 var outLocs LocPair 1342 if r := outRegs[0]; r != noRegister { 1343 outLocs[0] = &s.registers[r] 1344 } 1345 if r := outRegs[1]; r != noRegister { 1346 outLocs[1] = &s.registers[r] 1347 } 1348 s.f.setHome(v, outLocs) 1349 // Note that subsequent SelectX instructions will do the assignReg calls. 1350 } else { 1351 if r := outRegs[0]; r != noRegister { 1352 s.assignReg(r, v, v) 1353 } 1354 } 1355 } 1356 1357 // deallocate dead args, if we have not done so 1358 if opcodeTable[v.Op].resultNotInArgs { 1359 s.nospill = 0 1360 s.advanceUses(v) // frees any registers holding args that are no longer live 1361 } 1362 s.tmpused = 0 1363 1364 // Issue the Value itself. 1365 for i, a := range args { 1366 v.SetArg(i, a) // use register version of arguments 1367 } 1368 b.Values = append(b.Values, v) 1369 1370 issueSpill: 1371 } 1372 1373 // Load control value into reg. 1374 if v := b.Control; v != nil && s.values[v.ID].needReg { 1375 if s.f.pass.debug > regDebug { 1376 fmt.Printf(" processing control %s\n", v.LongString()) 1377 } 1378 // We assume that a control input can be passed in any 1379 // type-compatible register. If this turns out not to be true, 1380 // we'll need to introduce a regspec for a block's control value. 1381 b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos) 1382 if b.Control != v { 1383 v.Uses-- 1384 b.Control.Uses++ 1385 } 1386 // Remove this use from the uses list. 1387 vi := &s.values[v.ID] 1388 u := vi.uses 1389 vi.uses = u.next 1390 if u.next == nil { 1391 s.freeRegs(vi.regs) // value is dead 1392 } 1393 u.next = s.freeUseRecords 1394 s.freeUseRecords = u 1395 } 1396 1397 // Spill any values that can't live across basic block boundaries. 1398 if s.f.Config.use387 { 1399 s.freeRegs(s.f.Config.fpRegMask) 1400 } 1401 1402 // If we are approaching a merge point and we are the primary 1403 // predecessor of it, find live values that we use soon after 1404 // the merge point and promote them to registers now. 1405 if len(b.Succs) == 1 { 1406 // For this to be worthwhile, the loop must have no calls in it. 1407 top := b.Succs[0].b 1408 loop := s.loopnest.b2l[top.ID] 1409 if loop == nil || loop.header != top || loop.containsCall { 1410 goto badloop 1411 } 1412 1413 // TODO: sort by distance, pick the closest ones? 1414 for _, live := range s.live[b.ID] { 1415 if live.dist >= unlikelyDistance { 1416 // Don't preload anything live after the loop. 1417 continue 1418 } 1419 vid := live.ID 1420 vi := &s.values[vid] 1421 if vi.regs != 0 { 1422 continue 1423 } 1424 if vi.rematerializeable { 1425 continue 1426 } 1427 v := s.orig[vid] 1428 if s.f.Config.use387 && v.Type.IsFloat() { 1429 continue // 387 can't handle floats in registers between blocks 1430 } 1431 m := s.compatRegs(v.Type) &^ s.used 1432 if m&^desired.avoid != 0 { 1433 m &^= desired.avoid 1434 } 1435 if m != 0 { 1436 s.allocValToReg(v, m, false, b.Pos) 1437 } 1438 } 1439 } 1440 badloop: 1441 ; 1442 1443 // Save end-of-block register state. 1444 // First count how many, this cuts allocations in half. 1445 k := 0 1446 for r := register(0); r < s.numRegs; r++ { 1447 v := s.regs[r].v 1448 if v == nil { 1449 continue 1450 } 1451 k++ 1452 } 1453 regList := make([]endReg, 0, k) 1454 for r := register(0); r < s.numRegs; r++ { 1455 v := s.regs[r].v 1456 if v == nil { 1457 continue 1458 } 1459 regList = append(regList, endReg{r, v, s.regs[r].c}) 1460 } 1461 s.endRegs[b.ID] = regList 1462 1463 if checkEnabled { 1464 regValLiveSet.clear() 1465 for _, x := range s.live[b.ID] { 1466 regValLiveSet.add(x.ID) 1467 } 1468 for r := register(0); r < s.numRegs; r++ { 1469 v := s.regs[r].v 1470 if v == nil { 1471 continue 1472 } 1473 if !regValLiveSet.contains(v.ID) { 1474 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1475 } 1476 } 1477 } 1478 1479 // If a value is live at the end of the block and 1480 // isn't in a register, generate a use for the spill location. 1481 // We need to remember this information so that 1482 // the liveness analysis in stackalloc is correct. 1483 for _, e := range s.live[b.ID] { 1484 vi := &s.values[e.ID] 1485 if vi.regs != 0 { 1486 // in a register, we'll use that source for the merge. 1487 continue 1488 } 1489 if vi.rematerializeable { 1490 // we'll rematerialize during the merge. 1491 continue 1492 } 1493 //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1494 spill := s.makeSpill(s.orig[e.ID], b) 1495 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1496 } 1497 1498 // Clear any final uses. 1499 // All that is left should be the pseudo-uses added for values which 1500 // are live at the end of b. 1501 for _, e := range s.live[b.ID] { 1502 u := s.values[e.ID].uses 1503 if u == nil { 1504 f.Fatalf("live at end, no uses v%d", e.ID) 1505 } 1506 if u.next != nil { 1507 f.Fatalf("live at end, too many uses v%d", e.ID) 1508 } 1509 s.values[e.ID].uses = nil 1510 u.next = s.freeUseRecords 1511 s.freeUseRecords = u 1512 } 1513 } 1514 1515 // Decide where the spills we generated will go. 1516 s.placeSpills() 1517 1518 // Anything that didn't get a register gets a stack location here. 1519 // (StoreReg, stack-based phis, inputs, ...) 1520 stacklive := stackalloc(s.f, s.spillLive) 1521 1522 // Fix up all merge edges. 1523 s.shuffle(stacklive) 1524 1525 // Erase any copies we never used. 1526 // Also, an unused copy might be the only use of another copy, 1527 // so continue erasing until we reach a fixed point. 1528 for { 1529 progress := false 1530 for c, used := range s.copies { 1531 if !used && c.Uses == 0 { 1532 if s.f.pass.debug > regDebug { 1533 fmt.Printf("delete copied value %s\n", c.LongString()) 1534 } 1535 c.Args[0].Uses-- 1536 f.freeValue(c) 1537 delete(s.copies, c) 1538 progress = true 1539 } 1540 } 1541 if !progress { 1542 break 1543 } 1544 } 1545 1546 for _, b := range f.Blocks { 1547 i := 0 1548 for _, v := range b.Values { 1549 if v.Op == OpInvalid { 1550 continue 1551 } 1552 b.Values[i] = v 1553 i++ 1554 } 1555 b.Values = b.Values[:i] 1556 } 1557 } 1558 1559 func (s *regAllocState) placeSpills() { 1560 f := s.f 1561 1562 // Precompute some useful info. 1563 phiRegs := make([]regMask, f.NumBlocks()) 1564 for _, b := range f.Blocks { 1565 var m regMask 1566 for _, v := range b.Values { 1567 if v.Op != OpPhi { 1568 break 1569 } 1570 if r, ok := f.getHome(v.ID).(*Register); ok { 1571 m |= regMask(1) << uint(r.num) 1572 } 1573 } 1574 phiRegs[b.ID] = m 1575 } 1576 1577 // Start maps block IDs to the list of spills 1578 // that go at the start of the block (but after any phis). 1579 start := map[ID][]*Value{} 1580 // After maps value IDs to the list of spills 1581 // that go immediately after that value ID. 1582 after := map[ID][]*Value{} 1583 1584 for i := range s.values { 1585 vi := s.values[i] 1586 spill := vi.spill 1587 if spill == nil { 1588 continue 1589 } 1590 if spill.Block != nil { 1591 // Some spills are already fully set up, 1592 // like OpArgs and stack-based phis. 1593 continue 1594 } 1595 v := s.orig[i] 1596 1597 // Walk down the dominator tree looking for a good place to 1598 // put the spill of v. At the start "best" is the best place 1599 // we have found so far. 1600 // TODO: find a way to make this O(1) without arbitrary cutoffs. 1601 best := v.Block 1602 bestArg := v 1603 var bestDepth int16 1604 if l := s.loopnest.b2l[best.ID]; l != nil { 1605 bestDepth = l.depth 1606 } 1607 b := best 1608 const maxSpillSearch = 100 1609 for i := 0; i < maxSpillSearch; i++ { 1610 // Find the child of b in the dominator tree which 1611 // dominates all restores. 1612 p := b 1613 b = nil 1614 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 1615 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 1616 // c also dominates all restores. Walk down into c. 1617 b = c 1618 break 1619 } 1620 } 1621 if b == nil { 1622 // Ran out of blocks which dominate all restores. 1623 break 1624 } 1625 1626 var depth int16 1627 if l := s.loopnest.b2l[b.ID]; l != nil { 1628 depth = l.depth 1629 } 1630 if depth > bestDepth { 1631 // Don't push the spill into a deeper loop. 1632 continue 1633 } 1634 1635 // If v is in a register at the start of b, we can 1636 // place the spill here (after the phis). 1637 if len(b.Preds) == 1 { 1638 for _, e := range s.endRegs[b.Preds[0].b.ID] { 1639 if e.v == v { 1640 // Found a better spot for the spill. 1641 best = b 1642 bestArg = e.c 1643 bestDepth = depth 1644 break 1645 } 1646 } 1647 } else { 1648 for _, e := range s.startRegs[b.ID] { 1649 if e.v == v { 1650 // Found a better spot for the spill. 1651 best = b 1652 bestArg = e.c 1653 bestDepth = depth 1654 break 1655 } 1656 } 1657 } 1658 } 1659 1660 // Put the spill in the best block we found. 1661 spill.Block = best 1662 spill.AddArg(bestArg) 1663 if best == v.Block && v.Op != OpPhi { 1664 // Place immediately after v. 1665 after[v.ID] = append(after[v.ID], spill) 1666 } else { 1667 // Place at the start of best block. 1668 start[best.ID] = append(start[best.ID], spill) 1669 } 1670 } 1671 1672 // Insert spill instructions into the block schedules. 1673 var oldSched []*Value 1674 for _, b := range f.Blocks { 1675 nphi := 0 1676 for _, v := range b.Values { 1677 if v.Op != OpPhi { 1678 break 1679 } 1680 nphi++ 1681 } 1682 oldSched = append(oldSched[:0], b.Values[nphi:]...) 1683 b.Values = b.Values[:nphi] 1684 for _, v := range start[b.ID] { 1685 b.Values = append(b.Values, v) 1686 } 1687 for _, v := range oldSched { 1688 b.Values = append(b.Values, v) 1689 for _, w := range after[v.ID] { 1690 b.Values = append(b.Values, w) 1691 } 1692 } 1693 } 1694 } 1695 1696 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 1697 func (s *regAllocState) shuffle(stacklive [][]ID) { 1698 var e edgeState 1699 e.s = s 1700 e.cache = map[ID][]*Value{} 1701 e.contents = map[Location]contentRecord{} 1702 if s.f.pass.debug > regDebug { 1703 fmt.Printf("shuffle %s\n", s.f.Name) 1704 fmt.Println(s.f.String()) 1705 } 1706 1707 for _, b := range s.f.Blocks { 1708 if len(b.Preds) <= 1 { 1709 continue 1710 } 1711 e.b = b 1712 for i, edge := range b.Preds { 1713 p := edge.b 1714 e.p = p 1715 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 1716 e.process() 1717 } 1718 } 1719 } 1720 1721 type edgeState struct { 1722 s *regAllocState 1723 p, b *Block // edge goes from p->b. 1724 1725 // for each pre-regalloc value, a list of equivalent cached values 1726 cache map[ID][]*Value 1727 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 1728 1729 // map from location to the value it contains 1730 contents map[Location]contentRecord 1731 1732 // desired destination locations 1733 destinations []dstRecord 1734 extra []dstRecord 1735 1736 usedRegs regMask // registers currently holding something 1737 uniqueRegs regMask // registers holding the only copy of a value 1738 finalRegs regMask // registers holding final target 1739 } 1740 1741 type contentRecord struct { 1742 vid ID // pre-regalloc value 1743 c *Value // cached value 1744 final bool // this is a satisfied destination 1745 pos src.XPos // source position of use of the value 1746 } 1747 1748 type dstRecord struct { 1749 loc Location // register or stack slot 1750 vid ID // pre-regalloc value it should contain 1751 splice **Value // place to store reference to the generating instruction 1752 pos src.XPos // source position of use of this location 1753 } 1754 1755 // setup initializes the edge state for shuffling. 1756 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 1757 if e.s.f.pass.debug > regDebug { 1758 fmt.Printf("edge %s->%s\n", e.p, e.b) 1759 } 1760 1761 // Clear state. 1762 for _, vid := range e.cachedVals { 1763 delete(e.cache, vid) 1764 } 1765 e.cachedVals = e.cachedVals[:0] 1766 for k := range e.contents { 1767 delete(e.contents, k) 1768 } 1769 e.usedRegs = 0 1770 e.uniqueRegs = 0 1771 e.finalRegs = 0 1772 1773 // Live registers can be sources. 1774 for _, x := range srcReg { 1775 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 1776 } 1777 // So can all of the spill locations. 1778 for _, spillID := range stacklive { 1779 v := e.s.orig[spillID] 1780 spill := e.s.values[v.ID].spill 1781 if !e.s.sdom.isAncestorEq(spill.Block, e.p) { 1782 // Spills were placed that only dominate the uses found 1783 // during the first regalloc pass. The edge fixup code 1784 // can't use a spill location if the spill doesn't dominate 1785 // the edge. 1786 // We are guaranteed that if the spill doesn't dominate this edge, 1787 // then the value is available in a register (because we called 1788 // makeSpill for every value not in a register at the start 1789 // of an edge). 1790 continue 1791 } 1792 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 1793 } 1794 1795 // Figure out all the destinations we need. 1796 dsts := e.destinations[:0] 1797 for _, x := range dstReg { 1798 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 1799 } 1800 // Phis need their args to end up in a specific location. 1801 for _, v := range e.b.Values { 1802 if v.Op != OpPhi { 1803 break 1804 } 1805 loc := e.s.f.getHome(v.ID) 1806 if loc == nil { 1807 continue 1808 } 1809 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 1810 } 1811 e.destinations = dsts 1812 1813 if e.s.f.pass.debug > regDebug { 1814 for _, vid := range e.cachedVals { 1815 a := e.cache[vid] 1816 for _, c := range a { 1817 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID).Name(), vid, c) 1818 } 1819 } 1820 for _, d := range e.destinations { 1821 fmt.Printf("dst %s: v%d\n", d.loc.Name(), d.vid) 1822 } 1823 } 1824 } 1825 1826 // process generates code to move all the values to the right destination locations. 1827 func (e *edgeState) process() { 1828 dsts := e.destinations 1829 1830 // Process the destinations until they are all satisfied. 1831 for len(dsts) > 0 { 1832 i := 0 1833 for _, d := range dsts { 1834 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 1835 // Failed - save for next iteration. 1836 dsts[i] = d 1837 i++ 1838 } 1839 } 1840 if i < len(dsts) { 1841 // Made some progress. Go around again. 1842 dsts = dsts[:i] 1843 1844 // Append any extras destinations we generated. 1845 dsts = append(dsts, e.extra...) 1846 e.extra = e.extra[:0] 1847 continue 1848 } 1849 1850 // We made no progress. That means that any 1851 // remaining unsatisfied moves are in simple cycles. 1852 // For example, A -> B -> C -> D -> A. 1853 // A ----> B 1854 // ^ | 1855 // | | 1856 // | v 1857 // D <---- C 1858 1859 // To break the cycle, we pick an unused register, say R, 1860 // and put a copy of B there. 1861 // A ----> B 1862 // ^ | 1863 // | | 1864 // | v 1865 // D <---- C <---- R=copyofB 1866 // When we resume the outer loop, the A->B move can now proceed, 1867 // and eventually the whole cycle completes. 1868 1869 // Copy any cycle location to a temp register. This duplicates 1870 // one of the cycle entries, allowing the just duplicated value 1871 // to be overwritten and the cycle to proceed. 1872 d := dsts[0] 1873 loc := d.loc 1874 vid := e.contents[loc].vid 1875 c := e.contents[loc].c 1876 r := e.findRegFor(c.Type) 1877 if e.s.f.pass.debug > regDebug { 1878 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc.Name(), c) 1879 } 1880 if _, isReg := loc.(*Register); isReg { 1881 c = e.p.NewValue1(d.pos, OpCopy, c.Type, c) 1882 } else { 1883 c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c) 1884 } 1885 e.set(r, vid, c, false, d.pos) 1886 } 1887 } 1888 1889 // processDest generates code to put value vid into location loc. Returns true 1890 // if progress was made. 1891 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 1892 occupant := e.contents[loc] 1893 if occupant.vid == vid { 1894 // Value is already in the correct place. 1895 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 1896 if splice != nil { 1897 (*splice).Uses-- 1898 *splice = occupant.c 1899 occupant.c.Uses++ 1900 } 1901 // Note: if splice==nil then c will appear dead. This is 1902 // non-SSA formed code, so be careful after this pass not to run 1903 // deadcode elimination. 1904 if _, ok := e.s.copies[occupant.c]; ok { 1905 // The copy at occupant.c was used to avoid spill. 1906 e.s.copies[occupant.c] = true 1907 } 1908 return true 1909 } 1910 1911 // Check if we're allowed to clobber the destination location. 1912 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 1913 // We can't overwrite the last copy 1914 // of a value that needs to survive. 1915 return false 1916 } 1917 1918 // Copy from a source of v, register preferred. 1919 v := e.s.orig[vid] 1920 var c *Value 1921 var src Location 1922 if e.s.f.pass.debug > regDebug { 1923 fmt.Printf("moving v%d to %s\n", vid, loc.Name()) 1924 fmt.Printf("sources of v%d:", vid) 1925 } 1926 for _, w := range e.cache[vid] { 1927 h := e.s.f.getHome(w.ID) 1928 if e.s.f.pass.debug > regDebug { 1929 fmt.Printf(" %s:%s", h.Name(), w) 1930 } 1931 _, isreg := h.(*Register) 1932 if src == nil || isreg { 1933 c = w 1934 src = h 1935 } 1936 } 1937 if e.s.f.pass.debug > regDebug { 1938 if src != nil { 1939 fmt.Printf(" [use %s]\n", src.Name()) 1940 } else { 1941 fmt.Printf(" [no source]\n") 1942 } 1943 } 1944 _, dstReg := loc.(*Register) 1945 var x *Value 1946 if c == nil { 1947 if !e.s.values[vid].rematerializeable { 1948 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 1949 } 1950 if dstReg { 1951 x = v.copyInto(e.p) 1952 } else { 1953 // Rematerialize into stack slot. Need a free 1954 // register to accomplish this. 1955 e.erase(loc) // see pre-clobber comment below 1956 r := e.findRegFor(v.Type) 1957 x = v.copyInto(e.p) 1958 e.set(r, vid, x, false, pos) 1959 // Make sure we spill with the size of the slot, not the 1960 // size of x (which might be wider due to our dropping 1961 // of narrowing conversions). 1962 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 1963 } 1964 } else { 1965 // Emit move from src to dst. 1966 _, srcReg := src.(*Register) 1967 if srcReg { 1968 if dstReg { 1969 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 1970 } else { 1971 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 1972 } 1973 } else { 1974 if dstReg { 1975 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 1976 } else { 1977 // mem->mem. Use temp register. 1978 1979 // Pre-clobber destination. This avoids the 1980 // following situation: 1981 // - v is currently held in R0 and stacktmp0. 1982 // - We want to copy stacktmp1 to stacktmp0. 1983 // - We choose R0 as the temporary register. 1984 // During the copy, both R0 and stacktmp0 are 1985 // clobbered, losing both copies of v. Oops! 1986 // Erasing the destination early means R0 will not 1987 // be chosen as the temp register, as it will then 1988 // be the last copy of v. 1989 e.erase(loc) 1990 1991 r := e.findRegFor(c.Type) 1992 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 1993 e.set(r, vid, t, false, pos) 1994 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 1995 } 1996 } 1997 } 1998 e.set(loc, vid, x, true, pos) 1999 if splice != nil { 2000 (*splice).Uses-- 2001 *splice = x 2002 x.Uses++ 2003 } 2004 return true 2005 } 2006 2007 // set changes the contents of location loc to hold the given value and its cached representative. 2008 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2009 e.s.f.setHome(c, loc) 2010 e.erase(loc) 2011 e.contents[loc] = contentRecord{vid, c, final, pos} 2012 a := e.cache[vid] 2013 if len(a) == 0 { 2014 e.cachedVals = append(e.cachedVals, vid) 2015 } 2016 a = append(a, c) 2017 e.cache[vid] = a 2018 if r, ok := loc.(*Register); ok { 2019 e.usedRegs |= regMask(1) << uint(r.num) 2020 if final { 2021 e.finalRegs |= regMask(1) << uint(r.num) 2022 } 2023 if len(a) == 1 { 2024 e.uniqueRegs |= regMask(1) << uint(r.num) 2025 } 2026 if len(a) == 2 { 2027 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2028 e.uniqueRegs &^= regMask(1) << uint(t.num) 2029 } 2030 } 2031 } 2032 if e.s.f.pass.debug > regDebug { 2033 fmt.Printf("%s\n", c.LongString()) 2034 fmt.Printf("v%d now available in %s:%s\n", vid, loc.Name(), c) 2035 } 2036 } 2037 2038 // erase removes any user of loc. 2039 func (e *edgeState) erase(loc Location) { 2040 cr := e.contents[loc] 2041 if cr.c == nil { 2042 return 2043 } 2044 vid := cr.vid 2045 2046 if cr.final { 2047 // Add a destination to move this value back into place. 2048 // Make sure it gets added to the tail of the destination queue 2049 // so we make progress on other moves first. 2050 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2051 } 2052 2053 // Remove c from the list of cached values. 2054 a := e.cache[vid] 2055 for i, c := range a { 2056 if e.s.f.getHome(c.ID) == loc { 2057 if e.s.f.pass.debug > regDebug { 2058 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc.Name(), c) 2059 } 2060 a[i], a = a[len(a)-1], a[:len(a)-1] 2061 break 2062 } 2063 } 2064 e.cache[vid] = a 2065 2066 // Update register masks. 2067 if r, ok := loc.(*Register); ok { 2068 e.usedRegs &^= regMask(1) << uint(r.num) 2069 if cr.final { 2070 e.finalRegs &^= regMask(1) << uint(r.num) 2071 } 2072 } 2073 if len(a) == 1 { 2074 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2075 e.uniqueRegs |= regMask(1) << uint(r.num) 2076 } 2077 } 2078 } 2079 2080 // findRegFor finds a register we can use to make a temp copy of type typ. 2081 func (e *edgeState) findRegFor(typ Type) Location { 2082 // Which registers are possibilities. 2083 var m regMask 2084 types := &e.s.f.Config.Types 2085 if typ.IsFloat() { 2086 m = e.s.compatRegs(types.Float64) 2087 } else { 2088 m = e.s.compatRegs(types.Int64) 2089 } 2090 2091 // Pick a register. In priority order: 2092 // 1) an unused register 2093 // 2) a non-unique register not holding a final value 2094 // 3) a non-unique register 2095 // 4) TODO: a register holding a rematerializeable value 2096 x := m &^ e.usedRegs 2097 if x != 0 { 2098 return &e.s.registers[pickReg(x)] 2099 } 2100 x = m &^ e.uniqueRegs &^ e.finalRegs 2101 if x != 0 { 2102 return &e.s.registers[pickReg(x)] 2103 } 2104 x = m &^ e.uniqueRegs 2105 if x != 0 { 2106 return &e.s.registers[pickReg(x)] 2107 } 2108 2109 // No register is available. 2110 // Pick a register to spill. 2111 for _, vid := range e.cachedVals { 2112 a := e.cache[vid] 2113 for _, c := range a { 2114 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2115 if !c.rematerializeable() { 2116 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2117 // Allocate a temp location to spill a register to. 2118 // The type of the slot is immaterial - it will not be live across 2119 // any safepoint. Just use a type big enough to hold any register. 2120 t := LocalSlot{e.s.f.fe.Auto(c.Pos, types.Int64), types.Int64, 0} 2121 // TODO: reuse these slots. 2122 e.set(t, vid, x, false, c.Pos) 2123 if e.s.f.pass.debug > regDebug { 2124 fmt.Printf(" SPILL %s->%s %s\n", r.Name(), t.Name(), x.LongString()) 2125 } 2126 } 2127 // r will now be overwritten by the caller. At some point 2128 // later, the newly saved value will be moved back to its 2129 // final destination in processDest. 2130 return r 2131 } 2132 } 2133 } 2134 2135 fmt.Printf("m:%d unique:%d final:%d\n", m, e.uniqueRegs, e.finalRegs) 2136 for _, vid := range e.cachedVals { 2137 a := e.cache[vid] 2138 for _, c := range a { 2139 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID).Name()) 2140 } 2141 } 2142 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2143 return nil 2144 } 2145 2146 // rematerializeable reports whether the register allocator should recompute 2147 // a value instead of spilling/restoring it. 2148 func (v *Value) rematerializeable() bool { 2149 if !opcodeTable[v.Op].rematerializeable { 2150 return false 2151 } 2152 for _, a := range v.Args { 2153 // SP and SB (generated by OpSP and OpSB) are always available. 2154 if a.Op != OpSP && a.Op != OpSB { 2155 return false 2156 } 2157 } 2158 return true 2159 } 2160 2161 type liveInfo struct { 2162 ID ID // ID of value 2163 dist int32 // # of instructions before next use 2164 pos src.XPos // source position of next use 2165 } 2166 2167 // dblock contains information about desired & avoid registers at the end of a block. 2168 type dblock struct { 2169 prefers []desiredStateEntry 2170 avoid regMask 2171 } 2172 2173 // computeLive computes a map from block ID to a list of value IDs live at the end 2174 // of that block. Together with the value ID is a count of how many instructions 2175 // to the next use of that value. The resulting map is stored in s.live. 2176 // computeLive also computes the desired register information at the end of each block. 2177 // This desired register information is stored in s.desired. 2178 // TODO: this could be quadratic if lots of variables are live across lots of 2179 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2180 // of this function) require only linear size & time. 2181 func (s *regAllocState) computeLive() { 2182 f := s.f 2183 s.live = make([][]liveInfo, f.NumBlocks()) 2184 s.desired = make([]desiredState, f.NumBlocks()) 2185 var phis []*Value 2186 2187 live := newSparseMap(f.NumValues()) 2188 t := newSparseMap(f.NumValues()) 2189 2190 // Keep track of which value we want in each register. 2191 var desired desiredState 2192 2193 // Instead of iterating over f.Blocks, iterate over their postordering. 2194 // Liveness information flows backward, so starting at the end 2195 // increases the probability that we will stabilize quickly. 2196 // TODO: Do a better job yet. Here's one possibility: 2197 // Calculate the dominator tree and locate all strongly connected components. 2198 // If a value is live in one block of an SCC, it is live in all. 2199 // Walk the dominator tree from end to beginning, just once, treating SCC 2200 // components as single blocks, duplicated calculated liveness information 2201 // out to all of them. 2202 po := f.postorder() 2203 s.loopnest = f.loopnest() 2204 s.loopnest.calculateDepths() 2205 for { 2206 changed := false 2207 2208 for _, b := range po { 2209 // Start with known live values at the end of the block. 2210 // Add len(b.Values) to adjust from end-of-block distance 2211 // to beginning-of-block distance. 2212 live.clear() 2213 for _, e := range s.live[b.ID] { 2214 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2215 } 2216 2217 // Mark control value as live 2218 if b.Control != nil && s.values[b.Control.ID].needReg { 2219 live.set(b.Control.ID, int32(len(b.Values)), b.Pos) 2220 } 2221 2222 // Propagate backwards to the start of the block 2223 // Assumes Values have been scheduled. 2224 phis = phis[:0] 2225 for i := len(b.Values) - 1; i >= 0; i-- { 2226 v := b.Values[i] 2227 live.remove(v.ID) 2228 if v.Op == OpPhi { 2229 // save phi ops for later 2230 phis = append(phis, v) 2231 continue 2232 } 2233 if opcodeTable[v.Op].call { 2234 c := live.contents() 2235 for i := range c { 2236 c[i].val += unlikelyDistance 2237 } 2238 } 2239 for _, a := range v.Args { 2240 if s.values[a.ID].needReg { 2241 live.set(a.ID, int32(i), v.Pos) 2242 } 2243 } 2244 } 2245 // Propagate desired registers backwards. 2246 desired.copy(&s.desired[b.ID]) 2247 for i := len(b.Values) - 1; i >= 0; i-- { 2248 v := b.Values[i] 2249 prefs := desired.remove(v.ID) 2250 if v.Op == OpPhi { 2251 // TODO: if v is a phi, save desired register for phi inputs. 2252 // For now, we just drop it and don't propagate 2253 // desired registers back though phi nodes. 2254 continue 2255 } 2256 // Cancel desired registers if they get clobbered. 2257 desired.clobber(opcodeTable[v.Op].reg.clobbers) 2258 // Update desired registers if there are any fixed register inputs. 2259 for _, j := range opcodeTable[v.Op].reg.inputs { 2260 if countRegs(j.regs) != 1 { 2261 continue 2262 } 2263 desired.clobber(j.regs) 2264 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2265 } 2266 // Set desired register of input 0 if this is a 2-operand instruction. 2267 if opcodeTable[v.Op].resultInArg0 { 2268 if opcodeTable[v.Op].commutative { 2269 desired.addList(v.Args[1].ID, prefs) 2270 } 2271 desired.addList(v.Args[0].ID, prefs) 2272 } 2273 } 2274 2275 // For each predecessor of b, expand its list of live-at-end values. 2276 // invariant: live contains the values live at the start of b (excluding phi inputs) 2277 for i, e := range b.Preds { 2278 p := e.b 2279 // Compute additional distance for the edge. 2280 // Note: delta must be at least 1 to distinguish the control 2281 // value use from the first user in a successor block. 2282 delta := int32(normalDistance) 2283 if len(p.Succs) == 2 { 2284 if p.Succs[0].b == b && p.Likely == BranchLikely || 2285 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2286 delta = likelyDistance 2287 } 2288 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2289 p.Succs[1].b == b && p.Likely == BranchLikely { 2290 delta = unlikelyDistance 2291 } 2292 } 2293 2294 // Update any desired registers at the end of p. 2295 s.desired[p.ID].merge(&desired) 2296 2297 // Start t off with the previously known live values at the end of p. 2298 t.clear() 2299 for _, e := range s.live[p.ID] { 2300 t.set(e.ID, e.dist, e.pos) 2301 } 2302 update := false 2303 2304 // Add new live values from scanning this block. 2305 for _, e := range live.contents() { 2306 d := e.val + delta 2307 if !t.contains(e.key) || d < t.get(e.key) { 2308 update = true 2309 t.set(e.key, d, e.aux) 2310 } 2311 } 2312 // Also add the correct arg from the saved phi values. 2313 // All phis are at distance delta (we consider them 2314 // simultaneously happening at the start of the block). 2315 for _, v := range phis { 2316 id := v.Args[i].ID 2317 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2318 update = true 2319 t.set(id, delta, v.Pos) 2320 } 2321 } 2322 2323 if !update { 2324 continue 2325 } 2326 // The live set has changed, update it. 2327 l := s.live[p.ID][:0] 2328 if cap(l) < t.size() { 2329 l = make([]liveInfo, 0, t.size()) 2330 } 2331 for _, e := range t.contents() { 2332 l = append(l, liveInfo{e.key, e.val, e.aux}) 2333 } 2334 s.live[p.ID] = l 2335 changed = true 2336 } 2337 } 2338 2339 if !changed { 2340 break 2341 } 2342 } 2343 if f.pass.debug > regDebug { 2344 fmt.Println("live values at end of each block") 2345 for _, b := range f.Blocks { 2346 fmt.Printf(" %s:", b) 2347 for _, x := range s.live[b.ID] { 2348 fmt.Printf(" v%d", x.ID) 2349 for _, e := range s.desired[b.ID].entries { 2350 if e.ID != x.ID { 2351 continue 2352 } 2353 fmt.Printf("[") 2354 first := true 2355 for _, r := range e.regs { 2356 if r == noRegister { 2357 continue 2358 } 2359 if !first { 2360 fmt.Printf(",") 2361 } 2362 fmt.Print(s.registers[r].Name()) 2363 first = false 2364 } 2365 fmt.Printf("]") 2366 } 2367 } 2368 fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid)) 2369 fmt.Println() 2370 } 2371 } 2372 } 2373 2374 // A desiredState represents desired register assignments. 2375 type desiredState struct { 2376 // Desired assignments will be small, so we just use a list 2377 // of valueID+registers entries. 2378 entries []desiredStateEntry 2379 // Registers that other values want to be in. This value will 2380 // contain at least the union of the regs fields of entries, but 2381 // may contain additional entries for values that were once in 2382 // this data structure but are no longer. 2383 avoid regMask 2384 } 2385 type desiredStateEntry struct { 2386 // (pre-regalloc) value 2387 ID ID 2388 // Registers it would like to be in, in priority order. 2389 // Unused slots are filled with noRegister. 2390 regs [4]register 2391 } 2392 2393 func (d *desiredState) clear() { 2394 d.entries = d.entries[:0] 2395 d.avoid = 0 2396 } 2397 2398 // get returns a list of desired registers for value vid. 2399 func (d *desiredState) get(vid ID) [4]register { 2400 for _, e := range d.entries { 2401 if e.ID == vid { 2402 return e.regs 2403 } 2404 } 2405 return [4]register{noRegister, noRegister, noRegister, noRegister} 2406 } 2407 2408 // add records that we'd like value vid to be in register r. 2409 func (d *desiredState) add(vid ID, r register) { 2410 d.avoid |= regMask(1) << r 2411 for i := range d.entries { 2412 e := &d.entries[i] 2413 if e.ID != vid { 2414 continue 2415 } 2416 if e.regs[0] == r { 2417 // Already known and highest priority 2418 return 2419 } 2420 for j := 1; j < len(e.regs); j++ { 2421 if e.regs[j] == r { 2422 // Move from lower priority to top priority 2423 copy(e.regs[1:], e.regs[:j]) 2424 e.regs[0] = r 2425 return 2426 } 2427 } 2428 copy(e.regs[1:], e.regs[:]) 2429 e.regs[0] = r 2430 return 2431 } 2432 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2433 } 2434 2435 func (d *desiredState) addList(vid ID, regs [4]register) { 2436 // regs is in priority order, so iterate in reverse order. 2437 for i := len(regs) - 1; i >= 0; i-- { 2438 r := regs[i] 2439 if r != noRegister { 2440 d.add(vid, r) 2441 } 2442 } 2443 } 2444 2445 // clobber erases any desired registers in the set m. 2446 func (d *desiredState) clobber(m regMask) { 2447 for i := 0; i < len(d.entries); { 2448 e := &d.entries[i] 2449 j := 0 2450 for _, r := range e.regs { 2451 if r != noRegister && m>>r&1 == 0 { 2452 e.regs[j] = r 2453 j++ 2454 } 2455 } 2456 if j == 0 { 2457 // No more desired registers for this value. 2458 d.entries[i] = d.entries[len(d.entries)-1] 2459 d.entries = d.entries[:len(d.entries)-1] 2460 continue 2461 } 2462 for ; j < len(e.regs); j++ { 2463 e.regs[j] = noRegister 2464 } 2465 i++ 2466 } 2467 d.avoid &^= m 2468 } 2469 2470 // copy copies a desired state from another desiredState x. 2471 func (d *desiredState) copy(x *desiredState) { 2472 d.entries = append(d.entries[:0], x.entries...) 2473 d.avoid = x.avoid 2474 } 2475 2476 // remove removes the desired registers for vid and returns them. 2477 func (d *desiredState) remove(vid ID) [4]register { 2478 for i := range d.entries { 2479 if d.entries[i].ID == vid { 2480 regs := d.entries[i].regs 2481 d.entries[i] = d.entries[len(d.entries)-1] 2482 d.entries = d.entries[:len(d.entries)-1] 2483 return regs 2484 } 2485 } 2486 return [4]register{noRegister, noRegister, noRegister, noRegister} 2487 } 2488 2489 // merge merges another desired state x into d. 2490 func (d *desiredState) merge(x *desiredState) { 2491 d.avoid |= x.avoid 2492 // There should only be a few desired registers, so 2493 // linear insert is ok. 2494 for _, e := range x.entries { 2495 d.addList(e.ID, e.regs) 2496 } 2497 } 2498 2499 func min32(x, y int32) int32 { 2500 if x < y { 2501 return x 2502 } 2503 return y 2504 } 2505 func max32(x, y int32) int32 { 2506 if x > y { 2507 return x 2508 } 2509 return y 2510 }