github.com/dannin/go@v0.0.0-20161031215817-d35dfd405eaa/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // For every value, we generate a spill immediately after the value itself. 30 // x = Op y z : AX 31 // x2 = StoreReg x 32 // While AX still holds x, any uses of x will use that value. When AX is needed 33 // for another value, we simply reuse AX. Spill code has already been generated 34 // so there is no code generated at "spill" time. When x is referenced 35 // subsequently, we issue a load to restore x to a register using x2 as 36 // its argument: 37 // x3 = Restore x2 : CX 38 // x3 can then be used wherever x is referenced again. 39 // If the spill (x2) is never used, it will be removed at the end of regalloc. 40 // 41 // Phi values are special, as always. We define two kinds of phis, those 42 // where the merge happens in a register (a "register" phi) and those where 43 // the merge happens in a stack location (a "stack" phi). 44 // 45 // A register phi must have the phi and all of its inputs allocated to the 46 // same register. Register phis are spilled similarly to regular ops: 47 // b1: y = ... : AX b2: z = ... : AX 48 // goto b3 goto b3 49 // b3: x = phi(y, z) : AX 50 // x2 = StoreReg x 51 // 52 // A stack phi must have the phi and all of its inputs allocated to the same 53 // stack location. Stack phis start out life already spilled - each phi 54 // input must be a store (using StoreReg) at the end of the corresponding 55 // predecessor block. 56 // b1: y = ... : AX b2: z = ... : BX 57 // y2 = StoreReg y z2 = StoreReg z 58 // goto b3 goto b3 59 // b3: x = phi(y2, z2) 60 // The stack allocator knows that StoreReg args of stack-allocated phis 61 // must be allocated to the same stack slot as the phi that uses them. 62 // x is now a spilled value and a restore must appear before its first use. 63 64 // TODO 65 66 // Use an affinity graph to mark two values which should use the 67 // same register. This affinity graph will be used to prefer certain 68 // registers for allocation. This affinity helps eliminate moves that 69 // are required for phi implementations and helps generate allocations 70 // for 2-register architectures. 71 72 // Note: regalloc generates a not-quite-SSA output. If we have: 73 // 74 // b1: x = ... : AX 75 // x2 = StoreReg x 76 // ... AX gets reused for something else ... 77 // if ... goto b3 else b4 78 // 79 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 80 // ... use x3 ... ... use x4 ... 81 // 82 // b2: ... use x3 ... 83 // 84 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 85 // add a x4:CX->BX copy at the end of b4. 86 // But the definition of x3 doesn't dominate b2. We should really 87 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep 88 // SSA form. For now, we ignore this problem as remaining in strict 89 // SSA form isn't needed after regalloc. We'll just leave the use 90 // of x3 not dominated by the definition of x3, and the CX->BX copy 91 // will have no use (so don't run deadcode after regalloc!). 92 // TODO: maybe we should introduce these extra phis? 93 94 // Additional not-quite-SSA output occurs when spills are sunk out 95 // of loops to the targets of exit edges from the loop. Before sinking, 96 // there is one spill site (one StoreReg) targeting stack slot X, after 97 // sinking there may be multiple spill sites targeting stack slot X, 98 // with no phi functions at any join points reachable by the multiple 99 // spill sites. In addition, uses of the spill from copies of the original 100 // will not name the copy in their reference; instead they will name 101 // the original, though both will have the same spill location. The 102 // first sunk spill will be the original, but moved, to an exit block, 103 // thus ensuring that there is a definition somewhere corresponding to 104 // the original spill's uses. 105 106 package ssa 107 108 import ( 109 "cmd/internal/obj" 110 "fmt" 111 "unsafe" 112 ) 113 114 const ( 115 moveSpills = iota 116 logSpills 117 regDebug 118 stackDebug 119 ) 120 121 // distance is a measure of how far into the future values are used. 122 // distance is measured in units of instructions. 123 const ( 124 likelyDistance = 1 125 normalDistance = 10 126 unlikelyDistance = 100 127 ) 128 129 // regalloc performs register allocation on f. It sets f.RegAlloc 130 // to the resulting allocation. 131 func regalloc(f *Func) { 132 var s regAllocState 133 s.init(f) 134 s.regalloc(f) 135 } 136 137 type register uint8 138 139 const noRegister register = 255 140 141 type regMask uint64 142 143 func (m regMask) String() string { 144 s := "" 145 for r := register(0); m != 0; r++ { 146 if m>>r&1 == 0 { 147 continue 148 } 149 m &^= regMask(1) << r 150 if s != "" { 151 s += " " 152 } 153 s += fmt.Sprintf("r%d", r) 154 } 155 return s 156 } 157 158 // countRegs returns the number of set bits in the register mask. 159 func countRegs(r regMask) int { 160 n := 0 161 for r != 0 { 162 n += int(r & 1) 163 r >>= 1 164 } 165 return n 166 } 167 168 // pickReg picks an arbitrary register from the register mask. 169 func pickReg(r regMask) register { 170 // pick the lowest one 171 if r == 0 { 172 panic("can't pick a register from an empty set") 173 } 174 for i := register(0); ; i++ { 175 if r&1 != 0 { 176 return i 177 } 178 r >>= 1 179 } 180 } 181 182 type use struct { 183 dist int32 // distance from start of the block to a use of a value 184 line int32 // line number of the use 185 next *use // linked list of uses of a value in nondecreasing dist order 186 } 187 188 type valState struct { 189 regs regMask // the set of registers holding a Value (usually just one) 190 uses *use // list of uses in this block 191 spill *Value // spilled copy of the Value 192 spillUsed bool 193 spillUsedShuffle bool // true if used in shuffling, after ordinary uses 194 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 195 rematerializeable bool // cached value of v.rematerializeable() 196 } 197 198 type regState struct { 199 v *Value // Original (preregalloc) Value stored in this register. 200 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 201 // If a register is unused, v==c==nil 202 } 203 204 type regAllocState struct { 205 f *Func 206 207 registers []Register 208 numRegs register 209 SPReg register 210 SBReg register 211 GReg register 212 allocatable regMask 213 214 // for each block, its primary predecessor. 215 // A predecessor of b is primary if it is the closest 216 // predecessor that appears before b in the layout order. 217 // We record the index in the Preds list where the primary predecessor sits. 218 primary []int32 219 220 // live values at the end of each block. live[b.ID] is a list of value IDs 221 // which are live at the end of b, together with a count of how many instructions 222 // forward to the next use. 223 live [][]liveInfo 224 // desired register assignments at the end of each block. 225 // Note that this is a static map computed before allocation occurs. Dynamic 226 // register desires (from partially completed allocations) will trump 227 // this information. 228 desired []desiredState 229 230 // current state of each (preregalloc) Value 231 values []valState 232 233 // For each Value, map from its value ID back to the 234 // preregalloc Value it was derived from. 235 orig []*Value 236 237 // current state of each register 238 regs []regState 239 240 // registers that contain values which can't be kicked out 241 nospill regMask 242 243 // mask of registers currently in use 244 used regMask 245 246 // mask of registers used in the current instruction 247 tmpused regMask 248 249 // current block we're working on 250 curBlock *Block 251 252 // cache of use records 253 freeUseRecords *use 254 255 // endRegs[blockid] is the register state at the end of each block. 256 // encoded as a set of endReg records. 257 endRegs [][]endReg 258 259 // startRegs[blockid] is the register state at the start of merge blocks. 260 // saved state does not include the state of phi ops in the block. 261 startRegs [][]startReg 262 263 // spillLive[blockid] is the set of live spills at the end of each block 264 spillLive [][]ID 265 266 // a set of copies we generated to move things around, and 267 // whether it is used in shuffle. Unused copies will be deleted. 268 copies map[*Value]bool 269 270 loopnest *loopnest 271 } 272 273 type spillToSink struct { 274 spill *Value // Spill instruction to move (a StoreReg) 275 dests int32 // Bitmask indicating exit blocks from loop in which spill/val is defined. 1<<i set means val is live into loop.exitBlocks[i] 276 } 277 278 func (sts *spillToSink) spilledValue() *Value { 279 return sts.spill.Args[0] 280 } 281 282 type endReg struct { 283 r register 284 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 285 c *Value // cached version of the value 286 } 287 288 type startReg struct { 289 r register 290 vid ID // pre-regalloc value needed in this register 291 line int32 // line number of use of this register 292 } 293 294 // freeReg frees up register r. Any current user of r is kicked out. 295 func (s *regAllocState) freeReg(r register) { 296 v := s.regs[r].v 297 if v == nil { 298 s.f.Fatalf("tried to free an already free register %d\n", r) 299 } 300 301 // Mark r as unused. 302 if s.f.pass.debug > regDebug { 303 fmt.Printf("freeReg %s (dump %s/%s)\n", s.registers[r].Name(), v, s.regs[r].c) 304 } 305 s.regs[r] = regState{} 306 s.values[v.ID].regs &^= regMask(1) << r 307 s.used &^= regMask(1) << r 308 } 309 310 // freeRegs frees up all registers listed in m. 311 func (s *regAllocState) freeRegs(m regMask) { 312 for m&s.used != 0 { 313 s.freeReg(pickReg(m & s.used)) 314 } 315 } 316 317 // setOrig records that c's original value is the same as 318 // v's original value. 319 func (s *regAllocState) setOrig(c *Value, v *Value) { 320 for int(c.ID) >= len(s.orig) { 321 s.orig = append(s.orig, nil) 322 } 323 if s.orig[c.ID] != nil { 324 s.f.Fatalf("orig value set twice %s %s", c, v) 325 } 326 s.orig[c.ID] = s.orig[v.ID] 327 } 328 329 // assignReg assigns register r to hold c, a copy of v. 330 // r must be unused. 331 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 332 if s.f.pass.debug > regDebug { 333 fmt.Printf("assignReg %s %s/%s\n", s.registers[r].Name(), v, c) 334 } 335 if s.regs[r].v != nil { 336 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 337 } 338 339 // Update state. 340 s.regs[r] = regState{v, c} 341 s.values[v.ID].regs |= regMask(1) << r 342 s.used |= regMask(1) << r 343 s.f.setHome(c, &s.registers[r]) 344 } 345 346 // allocReg chooses a register from the set of registers in mask. 347 // If there is no unused register, a Value will be kicked out of 348 // a register to make room. 349 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 350 mask &= s.allocatable 351 mask &^= s.nospill 352 if mask == 0 { 353 s.f.Fatalf("no register available for %s", v) 354 } 355 356 // Pick an unused register if one is available. 357 if mask&^s.used != 0 { 358 return pickReg(mask &^ s.used) 359 } 360 361 // Pick a value to spill. Spill the value with the 362 // farthest-in-the-future use. 363 // TODO: Prefer registers with already spilled Values? 364 // TODO: Modify preference using affinity graph. 365 // TODO: if a single value is in multiple registers, spill one of them 366 // before spilling a value in just a single register. 367 368 // Find a register to spill. We spill the register containing the value 369 // whose next use is as far in the future as possible. 370 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 371 var r register 372 maxuse := int32(-1) 373 for t := register(0); t < s.numRegs; t++ { 374 if mask>>t&1 == 0 { 375 continue 376 } 377 v := s.regs[t].v 378 if n := s.values[v.ID].uses.dist; n > maxuse { 379 // v's next use is farther in the future than any value 380 // we've seen so far. A new best spill candidate. 381 r = t 382 maxuse = n 383 } 384 } 385 if maxuse == -1 { 386 s.f.Fatalf("couldn't find register to spill") 387 } 388 389 // Try to move it around before kicking out, if there is a free register. 390 // We generate a Copy and record it. It will be deleted if never used. 391 v2 := s.regs[r].v 392 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 393 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 394 r2 := pickReg(m) 395 c := s.curBlock.NewValue1(v2.Line, OpCopy, v2.Type, s.regs[r].c) 396 s.copies[c] = false 397 if s.f.pass.debug > regDebug { 398 fmt.Printf("copy %s to %s : %s\n", v2, c, s.registers[r2].Name()) 399 } 400 s.setOrig(c, v2) 401 s.assignReg(r2, v2, c) 402 } 403 s.freeReg(r) 404 return r 405 } 406 407 // allocValToReg allocates v to a register selected from regMask and 408 // returns the register copy of v. Any previous user is kicked out and spilled 409 // (if necessary). Load code is added at the current pc. If nospill is set the 410 // allocated register is marked nospill so the assignment cannot be 411 // undone until the caller allows it by clearing nospill. Returns a 412 // *Value which is either v or a copy of v allocated to the chosen register. 413 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line int32) *Value { 414 vi := &s.values[v.ID] 415 416 // Check if v is already in a requested register. 417 if mask&vi.regs != 0 { 418 r := pickReg(mask & vi.regs) 419 if s.regs[r].v != v || s.regs[r].c == nil { 420 panic("bad register state") 421 } 422 if nospill { 423 s.nospill |= regMask(1) << r 424 } 425 return s.regs[r].c 426 } 427 428 // Allocate a register. 429 r := s.allocReg(mask, v) 430 431 // Allocate v to the new register. 432 var c *Value 433 if vi.regs != 0 { 434 // Copy from a register that v is already in. 435 r2 := pickReg(vi.regs) 436 if s.regs[r2].v != v { 437 panic("bad register state") 438 } 439 c = s.curBlock.NewValue1(line, OpCopy, v.Type, s.regs[r2].c) 440 } else if v.rematerializeable() { 441 // Rematerialize instead of loading from the spill location. 442 c = v.copyInto(s.curBlock) 443 } else { 444 switch { 445 // Load v from its spill location. 446 case vi.spill != nil: 447 if s.f.pass.debug > logSpills { 448 s.f.Config.Warnl(vi.spill.Line, "load spill for %v from %v", v, vi.spill) 449 } 450 c = s.curBlock.NewValue1(line, OpLoadReg, v.Type, vi.spill) 451 vi.spillUsed = true 452 default: 453 s.f.Fatalf("attempt to load unspilled value %v", v.LongString()) 454 } 455 } 456 s.setOrig(c, v) 457 s.assignReg(r, v, c) 458 if nospill { 459 s.nospill |= regMask(1) << r 460 } 461 return c 462 } 463 464 // isLeaf reports whether f performs any calls. 465 func isLeaf(f *Func) bool { 466 for _, b := range f.Blocks { 467 for _, v := range b.Values { 468 if opcodeTable[v.Op].call { 469 return false 470 } 471 } 472 } 473 return true 474 } 475 476 func (s *regAllocState) init(f *Func) { 477 s.f = f 478 s.registers = f.Config.registers 479 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 480 s.f.Fatalf("bad number of registers: %d", nr) 481 } else { 482 s.numRegs = register(nr) 483 } 484 // Locate SP, SB, and g registers. 485 s.SPReg = noRegister 486 s.SBReg = noRegister 487 s.GReg = noRegister 488 for r := register(0); r < s.numRegs; r++ { 489 switch s.registers[r].Name() { 490 case "SP": 491 s.SPReg = r 492 case "SB": 493 s.SBReg = r 494 case "g": 495 s.GReg = r 496 } 497 } 498 // Make sure we found all required registers. 499 switch noRegister { 500 case s.SPReg: 501 s.f.Fatalf("no SP register found") 502 case s.SBReg: 503 s.f.Fatalf("no SB register found") 504 case s.GReg: 505 if f.Config.hasGReg { 506 s.f.Fatalf("no g register found") 507 } 508 } 509 510 // Figure out which registers we're allowed to use. 511 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 512 s.allocatable &^= 1 << s.SPReg 513 s.allocatable &^= 1 << s.SBReg 514 if s.f.Config.hasGReg { 515 s.allocatable &^= 1 << s.GReg 516 } 517 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 { 518 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 519 } 520 if s.f.Config.ctxt.Flag_shared { 521 switch s.f.Config.arch { 522 case "ppc64le": // R2 already reserved. 523 s.allocatable &^= 1 << 12 // R12 524 } 525 } 526 if s.f.Config.LinkReg != -1 { 527 if isLeaf(f) { 528 // Leaf functions don't save/restore the link register. 529 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 530 } 531 if s.f.Config.arch == "arm" && obj.GOARM == 5 { 532 // On ARMv5 we insert softfloat calls at each FP instruction. 533 // This clobbers LR almost everywhere. Disable allocating LR 534 // on ARMv5. 535 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 536 } 537 } 538 if s.f.Config.ctxt.Flag_dynlink { 539 switch s.f.Config.arch { 540 case "amd64": 541 s.allocatable &^= 1 << 15 // R15 542 case "arm": 543 s.allocatable &^= 1 << 9 // R9 544 case "ppc64le": // R2 already reserved. 545 s.allocatable &^= 1 << 12 // R12 546 case "arm64": 547 // nothing to do? 548 case "386": 549 // nothing to do. 550 // Note that for Flag_shared (position independent code) 551 // we do need to be careful, but that carefulness is hidden 552 // in the rewrite rules so we always have a free register 553 // available for global load/stores. See gen/386.rules (search for Flag_shared). 554 case "s390x": 555 // nothing to do, R10 & R11 already reserved 556 default: 557 s.f.Config.fe.Fatalf(0, "arch %s not implemented", s.f.Config.arch) 558 } 559 } 560 if s.f.Config.nacl { 561 switch s.f.Config.arch { 562 case "arm": 563 s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm 564 case "amd64p32": 565 s.allocatable &^= 1 << 5 // BP - reserved for nacl 566 s.allocatable &^= 1 << 15 // R15 - reserved for nacl 567 } 568 } 569 if s.f.Config.use387 { 570 s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) 571 } 572 573 s.regs = make([]regState, s.numRegs) 574 s.values = make([]valState, f.NumValues()) 575 s.orig = make([]*Value, f.NumValues()) 576 s.copies = make(map[*Value]bool) 577 for _, b := range f.Blocks { 578 for _, v := range b.Values { 579 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 580 s.values[v.ID].needReg = true 581 s.values[v.ID].rematerializeable = v.rematerializeable() 582 s.orig[v.ID] = v 583 } 584 // Note: needReg is false for values returning Tuple types. 585 // Instead, we mark the corresponding Selects as needReg. 586 } 587 } 588 s.computeLive() 589 590 // Compute block order. This array allows us to distinguish forward edges 591 // from backward edges and compute how far they go. 592 blockOrder := make([]int32, f.NumBlocks()) 593 for i, b := range f.Blocks { 594 blockOrder[b.ID] = int32(i) 595 } 596 597 // Compute primary predecessors. 598 s.primary = make([]int32, f.NumBlocks()) 599 for _, b := range f.Blocks { 600 best := -1 601 for i, e := range b.Preds { 602 p := e.b 603 if blockOrder[p.ID] >= blockOrder[b.ID] { 604 continue // backward edge 605 } 606 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] { 607 best = i 608 } 609 } 610 s.primary[b.ID] = int32(best) 611 } 612 613 s.endRegs = make([][]endReg, f.NumBlocks()) 614 s.startRegs = make([][]startReg, f.NumBlocks()) 615 s.spillLive = make([][]ID, f.NumBlocks()) 616 } 617 618 // Adds a use record for id at distance dist from the start of the block. 619 // All calls to addUse must happen with nonincreasing dist. 620 func (s *regAllocState) addUse(id ID, dist int32, line int32) { 621 r := s.freeUseRecords 622 if r != nil { 623 s.freeUseRecords = r.next 624 } else { 625 r = &use{} 626 } 627 r.dist = dist 628 r.line = line 629 r.next = s.values[id].uses 630 s.values[id].uses = r 631 if r.next != nil && dist > r.next.dist { 632 s.f.Fatalf("uses added in wrong order") 633 } 634 } 635 636 // advanceUses advances the uses of v's args from the state before v to the state after v. 637 // Any values which have no more uses are deallocated from registers. 638 func (s *regAllocState) advanceUses(v *Value) { 639 for _, a := range v.Args { 640 if !s.values[a.ID].needReg { 641 continue 642 } 643 ai := &s.values[a.ID] 644 r := ai.uses 645 ai.uses = r.next 646 if r.next == nil { 647 // Value is dead, free all registers that hold it. 648 s.freeRegs(ai.regs) 649 } 650 r.next = s.freeUseRecords 651 s.freeUseRecords = r 652 } 653 } 654 655 // liveAfterCurrentInstruction reports whether v is live after 656 // the current instruction is completed. v must be used by the 657 // current instruction. 658 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 659 u := s.values[v.ID].uses 660 d := u.dist 661 for u != nil && u.dist == d { 662 u = u.next 663 } 664 return u != nil && u.dist > d 665 } 666 667 // Sets the state of the registers to that encoded in regs. 668 func (s *regAllocState) setState(regs []endReg) { 669 s.freeRegs(s.used) 670 for _, x := range regs { 671 s.assignReg(x.r, x.v, x.c) 672 } 673 } 674 675 // compatRegs returns the set of registers which can store a type t. 676 func (s *regAllocState) compatRegs(t Type) regMask { 677 var m regMask 678 if t.IsTuple() || t.IsFlags() { 679 return 0 680 } 681 if t.IsFloat() || t == TypeInt128 { 682 m = s.f.Config.fpRegMask 683 } else { 684 m = s.f.Config.gpRegMask 685 } 686 return m & s.allocatable 687 } 688 689 // loopForBlock returns the loop containing block b, 690 // provided that the loop is "interesting" for purposes 691 // of improving register allocation (= is inner, and does 692 // not contain a call) 693 func (s *regAllocState) loopForBlock(b *Block) *loop { 694 loop := s.loopnest.b2l[b.ID] 695 696 // Minor for-the-time-being optimization: nothing happens 697 // unless a loop is both inner and call-free, therefore 698 // don't bother with other loops. 699 if loop != nil && (loop.containsCall || !loop.isInner) { 700 loop = nil 701 } 702 return loop 703 } 704 705 func (s *regAllocState) regalloc(f *Func) { 706 liveSet := f.newSparseSet(f.NumValues()) 707 defer f.retSparseSet(liveSet) 708 var oldSched []*Value 709 var phis []*Value 710 var phiRegs []register 711 var args []*Value 712 713 // statistics 714 var nSpills int // # of spills remaining 715 var nSpillsInner int // # of spills remaining in inner loops 716 var nSpillsSunk int // # of sunk spills remaining 717 var nSpillsChanged int // # of sunk spills lost because of register use change 718 var nSpillsSunkUnused int // # of spills not sunk because they were removed completely 719 var nSpillsNotSunkLateUse int // # of spills not sunk because of very late use (in shuffle) 720 721 // Data structure used for computing desired registers. 722 var desired desiredState 723 724 // Desired registers for inputs & outputs for each instruction in the block. 725 type dentry struct { 726 out [4]register // desired output registers 727 in [3][4]register // desired input registers (for inputs 0,1, and 2) 728 } 729 var dinfo []dentry 730 731 if f.Entry != f.Blocks[0] { 732 f.Fatalf("entry block must be first") 733 } 734 735 // Get loop nest so that spills in inner loops can be 736 // tracked. When the last block of a loop is processed, 737 // attempt to move spills out of the loop. 738 s.loopnest.findExits() 739 740 // Spills are moved from one block's slice of values to another's. 741 // This confuses register allocation if it occurs before it is 742 // complete, so candidates are recorded, then rechecked and 743 // moved after all allocation (register and stack) is complete. 744 // Because movement is only within a stack slot's lifetime, it 745 // is safe to do this. 746 var toSink []spillToSink 747 // Will be used to figure out live inputs to exit blocks of inner loops. 748 entryCandidates := newSparseMap(f.NumValues()) 749 750 for _, b := range f.Blocks { 751 s.curBlock = b 752 loop := s.loopForBlock(b) 753 754 // Initialize liveSet and uses fields for this block. 755 // Walk backwards through the block doing liveness analysis. 756 liveSet.clear() 757 for _, e := range s.live[b.ID] { 758 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.line) // pseudo-uses from beyond end of block 759 liveSet.add(e.ID) 760 } 761 if v := b.Control; v != nil && s.values[v.ID].needReg { 762 s.addUse(v.ID, int32(len(b.Values)), b.Line) // psuedo-use by control value 763 liveSet.add(v.ID) 764 } 765 for i := len(b.Values) - 1; i >= 0; i-- { 766 v := b.Values[i] 767 liveSet.remove(v.ID) 768 if v.Op == OpPhi { 769 // Remove v from the live set, but don't add 770 // any inputs. This is the state the len(b.Preds)>1 771 // case below desires; it wants to process phis specially. 772 continue 773 } 774 for _, a := range v.Args { 775 if !s.values[a.ID].needReg { 776 continue 777 } 778 s.addUse(a.ID, int32(i), v.Line) 779 liveSet.add(a.ID) 780 } 781 } 782 if s.f.pass.debug > regDebug { 783 fmt.Printf("uses for %s:%s\n", s.f.Name, b) 784 for i := range s.values { 785 vi := &s.values[i] 786 u := vi.uses 787 if u == nil { 788 continue 789 } 790 fmt.Printf(" v%d:", i) 791 for u != nil { 792 fmt.Printf(" %d", u.dist) 793 u = u.next 794 } 795 fmt.Println() 796 } 797 } 798 799 // Make a copy of the block schedule so we can generate a new one in place. 800 // We make a separate copy for phis and regular values. 801 nphi := 0 802 for _, v := range b.Values { 803 if v.Op != OpPhi { 804 break 805 } 806 nphi++ 807 } 808 phis = append(phis[:0], b.Values[:nphi]...) 809 oldSched = append(oldSched[:0], b.Values[nphi:]...) 810 b.Values = b.Values[:0] 811 812 // Initialize start state of block. 813 if b == f.Entry { 814 // Regalloc state is empty to start. 815 if nphi > 0 { 816 f.Fatalf("phis in entry block") 817 } 818 } else if len(b.Preds) == 1 { 819 // Start regalloc state with the end state of the previous block. 820 s.setState(s.endRegs[b.Preds[0].b.ID]) 821 if nphi > 0 { 822 f.Fatalf("phis in single-predecessor block") 823 } 824 // Drop any values which are no longer live. 825 // This may happen because at the end of p, a value may be 826 // live but only used by some other successor of p. 827 for r := register(0); r < s.numRegs; r++ { 828 v := s.regs[r].v 829 if v != nil && !liveSet.contains(v.ID) { 830 s.freeReg(r) 831 } 832 } 833 } else { 834 // This is the complicated case. We have more than one predecessor, 835 // which means we may have Phi ops. 836 837 // Copy phi ops into new schedule. 838 b.Values = append(b.Values, phis...) 839 840 // Start with the final register state of the primary predecessor 841 idx := s.primary[b.ID] 842 if idx < 0 { 843 f.Fatalf("block with no primary predecessor %s", b) 844 } 845 p := b.Preds[idx].b 846 s.setState(s.endRegs[p.ID]) 847 848 if s.f.pass.debug > regDebug { 849 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 850 for _, x := range s.endRegs[p.ID] { 851 fmt.Printf(" %s: orig:%s cache:%s\n", s.registers[x.r].Name(), x.v, x.c) 852 } 853 } 854 855 // Decide on registers for phi ops. Use the registers determined 856 // by the primary predecessor if we can. 857 // TODO: pick best of (already processed) predecessors? 858 // Majority vote? Deepest nesting level? 859 phiRegs = phiRegs[:0] 860 var phiUsed regMask 861 for _, v := range phis { 862 if !s.values[v.ID].needReg { 863 phiRegs = append(phiRegs, noRegister) 864 continue 865 } 866 a := v.Args[idx] 867 // Some instructions target not-allocatable registers. 868 // They're not suitable for further (phi-function) allocation. 869 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 870 if m != 0 { 871 r := pickReg(m) 872 s.freeReg(r) 873 phiUsed |= regMask(1) << r 874 phiRegs = append(phiRegs, r) 875 } else { 876 phiRegs = append(phiRegs, noRegister) 877 } 878 } 879 880 // Second pass - deallocate any phi inputs which are now dead. 881 for _, v := range phis { 882 if !s.values[v.ID].needReg { 883 continue 884 } 885 a := v.Args[idx] 886 if !liveSet.contains(a.ID) { 887 // Input is dead beyond the phi, deallocate 888 // anywhere else it might live. 889 s.freeRegs(s.values[a.ID].regs) 890 } 891 } 892 893 // Third pass - pick registers for phis whose inputs 894 // were not in a register. 895 for i, v := range phis { 896 if !s.values[v.ID].needReg { 897 continue 898 } 899 if phiRegs[i] != noRegister { 900 continue 901 } 902 if s.f.Config.use387 && v.Type.IsFloat() { 903 continue // 387 can't handle floats in registers between blocks 904 } 905 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 906 if m != 0 { 907 r := pickReg(m) 908 phiRegs[i] = r 909 phiUsed |= regMask(1) << r 910 } 911 } 912 913 // Set registers for phis. Add phi spill code. 914 for i, v := range phis { 915 if !s.values[v.ID].needReg { 916 continue 917 } 918 r := phiRegs[i] 919 if r == noRegister { 920 // stack-based phi 921 // Spills will be inserted in all the predecessors below. 922 s.values[v.ID].spill = v // v starts life spilled 923 s.values[v.ID].spillUsed = true // use is guaranteed 924 continue 925 } 926 // register-based phi 927 s.assignReg(r, v, v) 928 // Spill the phi in case we need to restore it later. 929 spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v) 930 s.setOrig(spill, v) 931 s.values[v.ID].spill = spill 932 s.values[v.ID].spillUsed = false 933 if loop != nil { 934 loop.spills = append(loop.spills, v) 935 nSpillsInner++ 936 } 937 nSpills++ 938 } 939 940 // Save the starting state for use by merge edges. 941 var regList []startReg 942 for r := register(0); r < s.numRegs; r++ { 943 v := s.regs[r].v 944 if v == nil { 945 continue 946 } 947 if phiUsed>>r&1 != 0 { 948 // Skip registers that phis used, we'll handle those 949 // specially during merge edge processing. 950 continue 951 } 952 regList = append(regList, startReg{r, v.ID, s.values[v.ID].uses.line}) 953 } 954 s.startRegs[b.ID] = regList 955 956 if s.f.pass.debug > regDebug { 957 fmt.Printf("after phis\n") 958 for _, x := range s.startRegs[b.ID] { 959 fmt.Printf(" %s: v%d\n", s.registers[x.r].Name(), x.vid) 960 } 961 } 962 } 963 964 // Allocate space to record the desired registers for each value. 965 dinfo = dinfo[:0] 966 for i := 0; i < len(oldSched); i++ { 967 dinfo = append(dinfo, dentry{}) 968 } 969 970 // Load static desired register info at the end of the block. 971 desired.copy(&s.desired[b.ID]) 972 973 // Check actual assigned registers at the start of the next block(s). 974 // Dynamically assigned registers will trump the static 975 // desired registers computed during liveness analysis. 976 // Note that we do this phase after startRegs is set above, so that 977 // we get the right behavior for a block which branches to itself. 978 for _, e := range b.Succs { 979 succ := e.b 980 // TODO: prioritize likely successor? 981 for _, x := range s.startRegs[succ.ID] { 982 desired.add(x.vid, x.r) 983 } 984 // Process phi ops in succ. 985 pidx := e.i 986 for _, v := range succ.Values { 987 if v.Op != OpPhi { 988 break 989 } 990 if !s.values[v.ID].needReg { 991 continue 992 } 993 rp, ok := s.f.getHome(v.ID).(*Register) 994 if !ok { 995 continue 996 } 997 desired.add(v.Args[pidx].ID, register(rp.num)) 998 } 999 } 1000 // Walk values backwards computing desired register info. 1001 // See computeLive for more comments. 1002 for i := len(oldSched) - 1; i >= 0; i-- { 1003 v := oldSched[i] 1004 prefs := desired.remove(v.ID) 1005 desired.clobber(opcodeTable[v.Op].reg.clobbers) 1006 for _, j := range opcodeTable[v.Op].reg.inputs { 1007 if countRegs(j.regs) != 1 { 1008 continue 1009 } 1010 desired.clobber(j.regs) 1011 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1012 } 1013 if opcodeTable[v.Op].resultInArg0 { 1014 if opcodeTable[v.Op].commutative { 1015 desired.addList(v.Args[1].ID, prefs) 1016 } 1017 desired.addList(v.Args[0].ID, prefs) 1018 } 1019 // Save desired registers for this value. 1020 dinfo[i].out = prefs 1021 for j, a := range v.Args { 1022 if j >= len(dinfo[i].in) { 1023 break 1024 } 1025 dinfo[i].in[j] = desired.get(a.ID) 1026 } 1027 } 1028 1029 // Process all the non-phi values. 1030 for idx, v := range oldSched { 1031 if s.f.pass.debug > regDebug { 1032 fmt.Printf(" processing %s\n", v.LongString()) 1033 } 1034 regspec := opcodeTable[v.Op].reg 1035 if v.Op == OpPhi { 1036 f.Fatalf("phi %s not at start of block", v) 1037 } 1038 if v.Op == OpSP { 1039 s.assignReg(s.SPReg, v, v) 1040 b.Values = append(b.Values, v) 1041 s.advanceUses(v) 1042 continue 1043 } 1044 if v.Op == OpSB { 1045 s.assignReg(s.SBReg, v, v) 1046 b.Values = append(b.Values, v) 1047 s.advanceUses(v) 1048 continue 1049 } 1050 if v.Op == OpSelect0 || v.Op == OpSelect1 { 1051 if s.values[v.ID].needReg { 1052 var i = 0 1053 if v.Op == OpSelect1 { 1054 i = 1 1055 } 1056 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1057 } 1058 b.Values = append(b.Values, v) 1059 s.advanceUses(v) 1060 goto issueSpill 1061 } 1062 if v.Op == OpGetG && s.f.Config.hasGReg { 1063 // use hardware g register 1064 if s.regs[s.GReg].v != nil { 1065 s.freeReg(s.GReg) // kick out the old value 1066 } 1067 s.assignReg(s.GReg, v, v) 1068 b.Values = append(b.Values, v) 1069 s.advanceUses(v) 1070 goto issueSpill 1071 } 1072 if v.Op == OpArg { 1073 // Args are "pre-spilled" values. We don't allocate 1074 // any register here. We just set up the spill pointer to 1075 // point at itself and any later user will restore it to use it. 1076 s.values[v.ID].spill = v 1077 s.values[v.ID].spillUsed = true // use is guaranteed 1078 b.Values = append(b.Values, v) 1079 s.advanceUses(v) 1080 continue 1081 } 1082 if v.Op == OpKeepAlive { 1083 // Make sure the argument to v is still live here. 1084 s.advanceUses(v) 1085 vi := &s.values[v.Args[0].ID] 1086 if vi.spillUsed { 1087 // Use the spill location. 1088 v.SetArg(0, vi.spill) 1089 } else { 1090 // No need to keep unspilled values live. 1091 // These are typically rematerializeable constants like nil, 1092 // or values of a variable that were modified since the last call. 1093 v.Op = OpCopy 1094 v.SetArgs1(v.Args[1]) 1095 } 1096 b.Values = append(b.Values, v) 1097 continue 1098 } 1099 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1100 // No register allocation required (or none specified yet) 1101 s.freeRegs(regspec.clobbers) 1102 b.Values = append(b.Values, v) 1103 s.advanceUses(v) 1104 continue 1105 } 1106 1107 if s.values[v.ID].rematerializeable { 1108 // Value is rematerializeable, don't issue it here. 1109 // It will get issued just before each use (see 1110 // allocValueToReg). 1111 for _, a := range v.Args { 1112 a.Uses-- 1113 } 1114 s.advanceUses(v) 1115 continue 1116 } 1117 1118 if s.f.pass.debug > regDebug { 1119 fmt.Printf("value %s\n", v.LongString()) 1120 fmt.Printf(" out:") 1121 for _, r := range dinfo[idx].out { 1122 if r != noRegister { 1123 fmt.Printf(" %s", s.registers[r].Name()) 1124 } 1125 } 1126 fmt.Println() 1127 for i := 0; i < len(v.Args) && i < 3; i++ { 1128 fmt.Printf(" in%d:", i) 1129 for _, r := range dinfo[idx].in[i] { 1130 if r != noRegister { 1131 fmt.Printf(" %s", s.registers[r].Name()) 1132 } 1133 } 1134 fmt.Println() 1135 } 1136 } 1137 1138 // Move arguments to registers. Process in an ordering defined 1139 // by the register specification (most constrained first). 1140 args = append(args[:0], v.Args...) 1141 for _, i := range regspec.inputs { 1142 mask := i.regs 1143 if mask&s.values[args[i.idx].ID].regs == 0 { 1144 // Need a new register for the input. 1145 mask &= s.allocatable 1146 mask &^= s.nospill 1147 // Used desired register if available. 1148 if i.idx < 3 { 1149 for _, r := range dinfo[idx].in[i.idx] { 1150 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1151 // Desired register is allowed and unused. 1152 mask = regMask(1) << r 1153 break 1154 } 1155 } 1156 } 1157 // Avoid registers we're saving for other values. 1158 if mask&^desired.avoid != 0 { 1159 mask &^= desired.avoid 1160 } 1161 } 1162 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Line) 1163 } 1164 1165 // If the output clobbers the input register, make sure we have 1166 // at least two copies of the input register so we don't 1167 // have to reload the value from the spill location. 1168 if opcodeTable[v.Op].resultInArg0 { 1169 var m regMask 1170 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1171 // arg0 is dead. We can clobber its register. 1172 goto ok 1173 } 1174 if s.values[v.Args[0].ID].rematerializeable { 1175 // We can rematerialize the input, don't worry about clobbering it. 1176 goto ok 1177 } 1178 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1179 // we have at least 2 copies of arg0. We can afford to clobber one. 1180 goto ok 1181 } 1182 if opcodeTable[v.Op].commutative { 1183 if !s.liveAfterCurrentInstruction(v.Args[1]) { 1184 args[0], args[1] = args[1], args[0] 1185 goto ok 1186 } 1187 if s.values[v.Args[1].ID].rematerializeable { 1188 args[0], args[1] = args[1], args[0] 1189 goto ok 1190 } 1191 if countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1192 args[0], args[1] = args[1], args[0] 1193 goto ok 1194 } 1195 } 1196 1197 // We can't overwrite arg0 (or arg1, if commutative). So we 1198 // need to make a copy of an input so we have a register we can modify. 1199 1200 // Possible new registers to copy into. 1201 m = s.compatRegs(v.Args[0].Type) &^ s.used 1202 if m == 0 { 1203 // No free registers. In this case we'll just clobber 1204 // an input and future uses of that input must use a restore. 1205 // TODO(khr): We should really do this like allocReg does it, 1206 // spilling the value with the most distant next use. 1207 goto ok 1208 } 1209 1210 // Try to move an input to the desired output. 1211 for _, r := range dinfo[idx].out { 1212 if r != noRegister && m>>r&1 != 0 { 1213 m = regMask(1) << r 1214 args[0] = s.allocValToReg(v.Args[0], m, true, v.Line) 1215 // Note: we update args[0] so the instruction will 1216 // use the register copy we just made. 1217 goto ok 1218 } 1219 } 1220 // Try to copy input to its desired location & use its old 1221 // location as the result register. 1222 for _, r := range dinfo[idx].in[0] { 1223 if r != noRegister && m>>r&1 != 0 { 1224 m = regMask(1) << r 1225 c := s.allocValToReg(v.Args[0], m, true, v.Line) 1226 s.copies[c] = false 1227 // Note: no update to args[0] so the instruction will 1228 // use the original copy. 1229 goto ok 1230 } 1231 } 1232 if opcodeTable[v.Op].commutative { 1233 for _, r := range dinfo[idx].in[1] { 1234 if r != noRegister && m>>r&1 != 0 { 1235 m = regMask(1) << r 1236 c := s.allocValToReg(v.Args[1], m, true, v.Line) 1237 s.copies[c] = false 1238 args[0], args[1] = args[1], args[0] 1239 goto ok 1240 } 1241 } 1242 } 1243 // Avoid future fixed uses if we can. 1244 if m&^desired.avoid != 0 { 1245 m &^= desired.avoid 1246 } 1247 // Save input 0 to a new register so we can clobber it. 1248 c := s.allocValToReg(v.Args[0], m, true, v.Line) 1249 s.copies[c] = false 1250 } 1251 1252 ok: 1253 // Now that all args are in regs, we're ready to issue the value itself. 1254 // Before we pick a register for the output value, allow input registers 1255 // to be deallocated. We do this here so that the output can use the 1256 // same register as a dying input. 1257 if !opcodeTable[v.Op].resultNotInArgs { 1258 s.tmpused = s.nospill 1259 s.nospill = 0 1260 s.advanceUses(v) // frees any registers holding args that are no longer live 1261 } 1262 1263 // Dump any registers which will be clobbered 1264 s.freeRegs(regspec.clobbers) 1265 s.tmpused |= regspec.clobbers 1266 1267 // Pick registers for outputs. 1268 { 1269 outRegs := [2]register{noRegister, noRegister} 1270 var used regMask 1271 for _, out := range regspec.outputs { 1272 mask := out.regs & s.allocatable &^ used 1273 if mask == 0 { 1274 continue 1275 } 1276 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1277 if !opcodeTable[v.Op].commutative { 1278 // Output must use the same register as input 0. 1279 r := register(s.f.getHome(args[0].ID).(*Register).num) 1280 mask = regMask(1) << r 1281 } else { 1282 // Output must use the same register as input 0 or 1. 1283 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1284 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1285 // Check r0 and r1 for desired output register. 1286 found := false 1287 for _, r := range dinfo[idx].out { 1288 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1289 mask = regMask(1) << r 1290 found = true 1291 if r == r1 { 1292 args[0], args[1] = args[1], args[0] 1293 } 1294 break 1295 } 1296 } 1297 if !found { 1298 // Neither are desired, pick r0. 1299 mask = regMask(1) << r0 1300 } 1301 } 1302 } 1303 for _, r := range dinfo[idx].out { 1304 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1305 // Desired register is allowed and unused. 1306 mask = regMask(1) << r 1307 break 1308 } 1309 } 1310 // Avoid registers we're saving for other values. 1311 if mask&^desired.avoid != 0 { 1312 mask &^= desired.avoid 1313 } 1314 r := s.allocReg(mask, v) 1315 outRegs[out.idx] = r 1316 used |= regMask(1) << r 1317 s.tmpused |= regMask(1) << r 1318 } 1319 // Record register choices 1320 if v.Type.IsTuple() { 1321 var outLocs LocPair 1322 if r := outRegs[0]; r != noRegister { 1323 outLocs[0] = &s.registers[r] 1324 } 1325 if r := outRegs[1]; r != noRegister { 1326 outLocs[1] = &s.registers[r] 1327 } 1328 s.f.setHome(v, outLocs) 1329 // Note that subsequent SelectX instructions will do the assignReg calls. 1330 } else { 1331 if r := outRegs[0]; r != noRegister { 1332 s.assignReg(r, v, v) 1333 } 1334 } 1335 } 1336 1337 // deallocate dead args, if we have not done so 1338 if opcodeTable[v.Op].resultNotInArgs { 1339 s.nospill = 0 1340 s.advanceUses(v) // frees any registers holding args that are no longer live 1341 } 1342 s.tmpused = 0 1343 1344 // Issue the Value itself. 1345 for i, a := range args { 1346 v.SetArg(i, a) // use register version of arguments 1347 } 1348 b.Values = append(b.Values, v) 1349 1350 // Issue a spill for this value. We issue spills unconditionally, 1351 // then at the end of regalloc delete the ones we never use. 1352 // TODO: schedule the spill at a point that dominates all restores. 1353 // The restore may be off in an unlikely branch somewhere and it 1354 // would be better to have the spill in that unlikely branch as well. 1355 // v := ... 1356 // if unlikely { 1357 // f() 1358 // } 1359 // It would be good to have both spill and restore inside the IF. 1360 issueSpill: 1361 if s.values[v.ID].needReg { 1362 spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v) 1363 s.setOrig(spill, v) 1364 s.values[v.ID].spill = spill 1365 s.values[v.ID].spillUsed = false 1366 if loop != nil { 1367 loop.spills = append(loop.spills, v) 1368 nSpillsInner++ 1369 } 1370 nSpills++ 1371 } 1372 } 1373 1374 // Load control value into reg. 1375 if v := b.Control; v != nil && s.values[v.ID].needReg { 1376 if s.f.pass.debug > regDebug { 1377 fmt.Printf(" processing control %s\n", v.LongString()) 1378 } 1379 // We assume that a control input can be passed in any 1380 // type-compatible register. If this turns out not to be true, 1381 // we'll need to introduce a regspec for a block's control value. 1382 b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Line) 1383 if b.Control != v { 1384 v.Uses-- 1385 b.Control.Uses++ 1386 } 1387 // Remove this use from the uses list. 1388 vi := &s.values[v.ID] 1389 u := vi.uses 1390 vi.uses = u.next 1391 if u.next == nil { 1392 s.freeRegs(vi.regs) // value is dead 1393 } 1394 u.next = s.freeUseRecords 1395 s.freeUseRecords = u 1396 } 1397 1398 // Spill any values that can't live across basic block boundaries. 1399 if s.f.Config.use387 { 1400 s.freeRegs(s.f.Config.fpRegMask) 1401 } 1402 1403 // If we are approaching a merge point and we are the primary 1404 // predecessor of it, find live values that we use soon after 1405 // the merge point and promote them to registers now. 1406 if len(b.Succs) == 1 { 1407 // For this to be worthwhile, the loop must have no calls in it. 1408 top := b.Succs[0].b 1409 loop := s.loopnest.b2l[top.ID] 1410 if loop == nil || loop.header != top || loop.containsCall { 1411 goto badloop 1412 } 1413 1414 // TODO: sort by distance, pick the closest ones? 1415 for _, live := range s.live[b.ID] { 1416 if live.dist >= unlikelyDistance { 1417 // Don't preload anything live after the loop. 1418 continue 1419 } 1420 vid := live.ID 1421 vi := &s.values[vid] 1422 if vi.regs != 0 { 1423 continue 1424 } 1425 if vi.rematerializeable { 1426 continue 1427 } 1428 v := s.orig[vid] 1429 if s.f.Config.use387 && v.Type.IsFloat() { 1430 continue // 387 can't handle floats in registers between blocks 1431 } 1432 m := s.compatRegs(v.Type) &^ s.used 1433 if m&^desired.avoid != 0 { 1434 m &^= desired.avoid 1435 } 1436 if m != 0 { 1437 s.allocValToReg(v, m, false, b.Line) 1438 } 1439 } 1440 } 1441 badloop: 1442 ; 1443 1444 // Save end-of-block register state. 1445 // First count how many, this cuts allocations in half. 1446 k := 0 1447 for r := register(0); r < s.numRegs; r++ { 1448 v := s.regs[r].v 1449 if v == nil { 1450 continue 1451 } 1452 k++ 1453 } 1454 regList := make([]endReg, 0, k) 1455 for r := register(0); r < s.numRegs; r++ { 1456 v := s.regs[r].v 1457 if v == nil { 1458 continue 1459 } 1460 regList = append(regList, endReg{r, v, s.regs[r].c}) 1461 } 1462 s.endRegs[b.ID] = regList 1463 1464 if checkEnabled { 1465 liveSet.clear() 1466 for _, x := range s.live[b.ID] { 1467 liveSet.add(x.ID) 1468 } 1469 for r := register(0); r < s.numRegs; r++ { 1470 v := s.regs[r].v 1471 if v == nil { 1472 continue 1473 } 1474 if !liveSet.contains(v.ID) { 1475 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1476 } 1477 } 1478 } 1479 1480 // If a value is live at the end of the block and 1481 // isn't in a register, remember that its spill location 1482 // is live. We need to remember this information so that 1483 // the liveness analysis in stackalloc is correct. 1484 for _, e := range s.live[b.ID] { 1485 if s.values[e.ID].regs != 0 { 1486 // in a register, we'll use that source for the merge. 1487 continue 1488 } 1489 spill := s.values[e.ID].spill 1490 if spill == nil { 1491 // rematerializeable values will have spill==nil. 1492 continue 1493 } 1494 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1495 s.values[e.ID].spillUsed = true 1496 } 1497 1498 // Keep track of values that are spilled in the loop, but whose spill 1499 // is not used in the loop. It may be possible to move ("sink") the 1500 // spill out of the loop into one or more exit blocks. 1501 if loop != nil { 1502 loop.scratch++ // increment count of blocks in this loop that have been processed 1503 if loop.scratch == loop.nBlocks { // just processed last block of loop, if it is an inner loop. 1504 // This check is redundant with code at the top of the loop. 1505 // This is definitive; the one at the top of the loop is an optimization. 1506 if loop.isInner && // Common case, easier, most likely to be profitable 1507 !loop.containsCall && // Calls force spills, also lead to puzzling spill info. 1508 len(loop.exits) <= 32 { // Almost no inner loops have more than 32 exits, 1509 // and this allows use of a bitvector and a sparseMap. 1510 1511 // TODO: exit calculation is messed up for non-inner loops 1512 // because of multilevel exits that are not part of the "exit" 1513 // count. 1514 1515 // Compute the set of spill-movement candidates live at entry to exit blocks. 1516 // isLoopSpillCandidate filters for 1517 // (1) defined in appropriate loop 1518 // (2) needs a register 1519 // (3) spill not already used (in the loop) 1520 // Condition (3) === "in a register at all loop exits" 1521 1522 entryCandidates.clear() 1523 1524 for whichExit, ss := range loop.exits { 1525 // Start with live at end. 1526 for _, li := range s.live[ss.ID] { 1527 if s.isLoopSpillCandidate(loop, s.orig[li.ID]) { 1528 // s.live contains original IDs, use s.orig above to map back to *Value 1529 entryCandidates.setBit(li.ID, uint(whichExit)) 1530 } 1531 } 1532 // Control can also be live. 1533 if ss.Control != nil && s.orig[ss.Control.ID] != nil && s.isLoopSpillCandidate(loop, s.orig[ss.Control.ID]) { 1534 entryCandidates.setBit(s.orig[ss.Control.ID].ID, uint(whichExit)) 1535 } 1536 // Walk backwards, filling in locally live values, removing those defined. 1537 for i := len(ss.Values) - 1; i >= 0; i-- { 1538 v := ss.Values[i] 1539 vorig := s.orig[v.ID] 1540 if vorig != nil { 1541 entryCandidates.remove(vorig.ID) // Cannot be an issue, only keeps the sets smaller. 1542 } 1543 for _, a := range v.Args { 1544 aorig := s.orig[a.ID] 1545 if aorig != nil && s.isLoopSpillCandidate(loop, aorig) { 1546 entryCandidates.setBit(aorig.ID, uint(whichExit)) 1547 } 1548 } 1549 } 1550 } 1551 1552 for _, e := range loop.spills { 1553 whichblocks := entryCandidates.get(e.ID) 1554 oldSpill := s.values[e.ID].spill 1555 if whichblocks != 0 && whichblocks != -1 { // -1 = not in map. 1556 toSink = append(toSink, spillToSink{spill: oldSpill, dests: whichblocks}) 1557 } 1558 } 1559 1560 } // loop is inner etc 1561 loop.scratch = 0 // Don't leave a mess, just in case. 1562 loop.spills = nil 1563 } // if scratch == nBlocks 1564 } // if loop is not nil 1565 1566 // Clear any final uses. 1567 // All that is left should be the pseudo-uses added for values which 1568 // are live at the end of b. 1569 for _, e := range s.live[b.ID] { 1570 u := s.values[e.ID].uses 1571 if u == nil { 1572 f.Fatalf("live at end, no uses v%d", e.ID) 1573 } 1574 if u.next != nil { 1575 f.Fatalf("live at end, too many uses v%d", e.ID) 1576 } 1577 s.values[e.ID].uses = nil 1578 u.next = s.freeUseRecords 1579 s.freeUseRecords = u 1580 } 1581 } 1582 1583 // Erase any spills we never used 1584 for i := range s.values { 1585 vi := s.values[i] 1586 if vi.spillUsed { 1587 if s.f.pass.debug > logSpills && vi.spill.Op != OpArg { 1588 s.f.Config.Warnl(vi.spill.Line, "spilled value at %v remains", vi.spill) 1589 } 1590 continue 1591 } 1592 spill := vi.spill 1593 if spill == nil { 1594 // Constants, SP, SB, ... 1595 continue 1596 } 1597 loop := s.loopForBlock(spill.Block) 1598 if loop != nil { 1599 nSpillsInner-- 1600 } 1601 1602 spill.Args[0].Uses-- 1603 f.freeValue(spill) 1604 nSpills-- 1605 } 1606 1607 for _, b := range f.Blocks { 1608 i := 0 1609 for _, v := range b.Values { 1610 if v.Op == OpInvalid { 1611 continue 1612 } 1613 b.Values[i] = v 1614 i++ 1615 } 1616 b.Values = b.Values[:i] 1617 // TODO: zero b.Values[i:], recycle Values 1618 // Not important now because this is the last phase that manipulates Values 1619 } 1620 1621 // Must clear these out before any potential recycling, though that's 1622 // not currently implemented. 1623 for i, ts := range toSink { 1624 vsp := ts.spill 1625 if vsp.Op == OpInvalid { // This spill was completely eliminated 1626 toSink[i].spill = nil 1627 } 1628 } 1629 1630 // Anything that didn't get a register gets a stack location here. 1631 // (StoreReg, stack-based phis, inputs, ...) 1632 stacklive := stackalloc(s.f, s.spillLive) 1633 1634 // Fix up all merge edges. 1635 s.shuffle(stacklive) 1636 1637 // Insert moved spills (that have not been marked invalid above) 1638 // at start of appropriate block and remove the originals from their 1639 // location within loops. Notice that this can break SSA form; 1640 // if a spill is sunk to multiple exits, there will be no phi for that 1641 // spill at a join point downstream of those two exits, though the 1642 // two spills will target the same stack slot. Notice also that this 1643 // takes place after stack allocation, so the stack allocator does 1644 // not need to process these malformed flow graphs. 1645 sinking: 1646 for _, ts := range toSink { 1647 vsp := ts.spill 1648 if vsp == nil { // This spill was completely eliminated 1649 nSpillsSunkUnused++ 1650 continue sinking 1651 } 1652 e := ts.spilledValue() 1653 if s.values[e.ID].spillUsedShuffle { 1654 nSpillsNotSunkLateUse++ 1655 continue sinking 1656 } 1657 1658 // move spills to a better (outside of loop) block. 1659 // This would be costly if it occurred very often, but it doesn't. 1660 b := vsp.Block 1661 loop := s.loopnest.b2l[b.ID] 1662 dests := ts.dests 1663 1664 // Pre-check to be sure that spilled value is still in expected register on all exits where live. 1665 check_val_still_in_reg: 1666 for i := uint(0); i < 32 && dests != 0; i++ { 1667 1668 if dests&(1<<i) == 0 { 1669 continue 1670 } 1671 dests ^= 1 << i 1672 d := loop.exits[i] 1673 if len(d.Preds) > 1 { 1674 panic("Should be impossible given critical edges removed") 1675 } 1676 p := d.Preds[0].b // block in loop exiting to d. 1677 1678 endregs := s.endRegs[p.ID] 1679 for _, regrec := range endregs { 1680 if regrec.v == e && regrec.r != noRegister && regrec.c == e { // TODO: regrec.c != e implies different spill possible. 1681 continue check_val_still_in_reg 1682 } 1683 } 1684 // If here, the register assignment was lost down at least one exit and it can't be sunk 1685 if s.f.pass.debug > moveSpills { 1686 s.f.Config.Warnl(e.Line, "lost register assignment for spill %v in %v at exit %v to %v", 1687 vsp, b, p, d) 1688 } 1689 nSpillsChanged++ 1690 continue sinking 1691 } 1692 1693 nSpillsSunk++ 1694 nSpillsInner-- 1695 // don't update nSpills, since spill is only moved, and if it is duplicated, the spills-on-a-path is not increased. 1696 1697 dests = ts.dests 1698 1699 // remove vsp from b.Values 1700 i := 0 1701 for _, w := range b.Values { 1702 if vsp == w { 1703 continue 1704 } 1705 b.Values[i] = w 1706 i++ 1707 } 1708 b.Values = b.Values[:i] 1709 1710 first := true 1711 for i := uint(0); i < 32 && dests != 0; i++ { 1712 1713 if dests&(1<<i) == 0 { 1714 continue 1715 } 1716 1717 dests ^= 1 << i 1718 1719 d := loop.exits[i] 1720 vspnew := vsp // reuse original for first sunk spill, saves tracking down and renaming uses 1721 if !first { // any sunk spills after first must make a copy 1722 vspnew = d.NewValue1(e.Line, OpStoreReg, e.Type, e) 1723 f.setHome(vspnew, f.getHome(vsp.ID)) // copy stack home 1724 if s.f.pass.debug > moveSpills { 1725 s.f.Config.Warnl(e.Line, "copied spill %v in %v for %v to %v in %v", 1726 vsp, b, e, vspnew, d) 1727 } 1728 } else { 1729 first = false 1730 vspnew.Block = d 1731 d.Values = append(d.Values, vspnew) 1732 if s.f.pass.debug > moveSpills { 1733 s.f.Config.Warnl(e.Line, "moved spill %v in %v for %v to %v in %v", 1734 vsp, b, e, vspnew, d) 1735 } 1736 } 1737 1738 // shuffle vspnew to the beginning of its block 1739 copy(d.Values[1:], d.Values[0:len(d.Values)-1]) 1740 d.Values[0] = vspnew 1741 1742 } 1743 } 1744 1745 // Erase any copies we never used. 1746 // Also, an unused copy might be the only use of another copy, 1747 // so continue erasing until we reach a fixed point. 1748 for { 1749 progress := false 1750 for c, used := range s.copies { 1751 if !used && c.Uses == 0 { 1752 if s.f.pass.debug > regDebug { 1753 fmt.Printf("delete copied value %s\n", c.LongString()) 1754 } 1755 c.Args[0].Uses-- 1756 f.freeValue(c) 1757 delete(s.copies, c) 1758 progress = true 1759 } 1760 } 1761 if !progress { 1762 break 1763 } 1764 } 1765 1766 for _, b := range f.Blocks { 1767 i := 0 1768 for _, v := range b.Values { 1769 if v.Op == OpInvalid { 1770 continue 1771 } 1772 b.Values[i] = v 1773 i++ 1774 } 1775 b.Values = b.Values[:i] 1776 } 1777 1778 if f.pass.stats > 0 { 1779 f.LogStat("spills_info", 1780 nSpills, "spills", nSpillsInner, "inner_spills_remaining", nSpillsSunk, "inner_spills_sunk", nSpillsSunkUnused, "inner_spills_unused", nSpillsNotSunkLateUse, "inner_spills_shuffled", nSpillsChanged, "inner_spills_changed") 1781 } 1782 } 1783 1784 // isLoopSpillCandidate indicates whether the spill for v satisfies preliminary 1785 // spill-sinking conditions just after the last block of loop has been processed. 1786 // In particular: 1787 // v needs a register. 1788 // v's spill is not (YET) used. 1789 // v's definition is within loop. 1790 // The spill may be used in the future, either by an outright use 1791 // in the code, or by shuffling code inserted after stack allocation. 1792 // Outright uses cause sinking; shuffling (within the loop) inhibits it. 1793 func (s *regAllocState) isLoopSpillCandidate(loop *loop, v *Value) bool { 1794 return s.values[v.ID].needReg && !s.values[v.ID].spillUsed && s.loopnest.b2l[v.Block.ID] == loop 1795 } 1796 1797 // lateSpillUse notes a late (after stack allocation) use of the spill of value with ID vid. 1798 // This will inhibit spill sinking. 1799 func (s *regAllocState) lateSpillUse(vid ID) { 1800 // TODO investigate why this is necessary. 1801 // It appears that an outside-the-loop use of 1802 // an otherwise sinkable spill makes the spill 1803 // a candidate for shuffling, when it would not 1804 // otherwise have been the case (spillUsed was not 1805 // true when isLoopSpillCandidate was called, yet 1806 // it was shuffled). Such shuffling cuts the amount 1807 // of spill sinking by more than half (in make.bash) 1808 s.values[vid].spillUsedShuffle = true 1809 } 1810 1811 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 1812 func (s *regAllocState) shuffle(stacklive [][]ID) { 1813 var e edgeState 1814 e.s = s 1815 e.cache = map[ID][]*Value{} 1816 e.contents = map[Location]contentRecord{} 1817 if s.f.pass.debug > regDebug { 1818 fmt.Printf("shuffle %s\n", s.f.Name) 1819 fmt.Println(s.f.String()) 1820 } 1821 1822 for _, b := range s.f.Blocks { 1823 if len(b.Preds) <= 1 { 1824 continue 1825 } 1826 e.b = b 1827 for i, edge := range b.Preds { 1828 p := edge.b 1829 e.p = p 1830 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 1831 e.process() 1832 } 1833 } 1834 } 1835 1836 type edgeState struct { 1837 s *regAllocState 1838 p, b *Block // edge goes from p->b. 1839 1840 // for each pre-regalloc value, a list of equivalent cached values 1841 cache map[ID][]*Value 1842 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 1843 1844 // map from location to the value it contains 1845 contents map[Location]contentRecord 1846 1847 // desired destination locations 1848 destinations []dstRecord 1849 extra []dstRecord 1850 1851 usedRegs regMask // registers currently holding something 1852 uniqueRegs regMask // registers holding the only copy of a value 1853 finalRegs regMask // registers holding final target 1854 } 1855 1856 type contentRecord struct { 1857 vid ID // pre-regalloc value 1858 c *Value // cached value 1859 final bool // this is a satisfied destination 1860 line int32 // line number of use of the value 1861 } 1862 1863 type dstRecord struct { 1864 loc Location // register or stack slot 1865 vid ID // pre-regalloc value it should contain 1866 splice **Value // place to store reference to the generating instruction 1867 line int32 // line number of use of this location 1868 } 1869 1870 // setup initializes the edge state for shuffling. 1871 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 1872 if e.s.f.pass.debug > regDebug { 1873 fmt.Printf("edge %s->%s\n", e.p, e.b) 1874 } 1875 1876 // Clear state. 1877 for _, vid := range e.cachedVals { 1878 delete(e.cache, vid) 1879 } 1880 e.cachedVals = e.cachedVals[:0] 1881 for k := range e.contents { 1882 delete(e.contents, k) 1883 } 1884 e.usedRegs = 0 1885 e.uniqueRegs = 0 1886 e.finalRegs = 0 1887 1888 // Live registers can be sources. 1889 for _, x := range srcReg { 1890 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, 0) // don't care the line number of the source 1891 } 1892 // So can all of the spill locations. 1893 for _, spillID := range stacklive { 1894 v := e.s.orig[spillID] 1895 spill := e.s.values[v.ID].spill 1896 e.set(e.s.f.getHome(spillID), v.ID, spill, false, 0) // don't care the line number of the source 1897 } 1898 1899 // Figure out all the destinations we need. 1900 dsts := e.destinations[:0] 1901 for _, x := range dstReg { 1902 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.vid, nil, x.line}) 1903 } 1904 // Phis need their args to end up in a specific location. 1905 for _, v := range e.b.Values { 1906 if v.Op != OpPhi { 1907 break 1908 } 1909 loc := e.s.f.getHome(v.ID) 1910 if loc == nil { 1911 continue 1912 } 1913 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Line}) 1914 } 1915 e.destinations = dsts 1916 1917 if e.s.f.pass.debug > regDebug { 1918 for _, vid := range e.cachedVals { 1919 a := e.cache[vid] 1920 for _, c := range a { 1921 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID).Name(), vid, c) 1922 } 1923 } 1924 for _, d := range e.destinations { 1925 fmt.Printf("dst %s: v%d\n", d.loc.Name(), d.vid) 1926 } 1927 } 1928 } 1929 1930 // process generates code to move all the values to the right destination locations. 1931 func (e *edgeState) process() { 1932 dsts := e.destinations 1933 1934 // Process the destinations until they are all satisfied. 1935 for len(dsts) > 0 { 1936 i := 0 1937 for _, d := range dsts { 1938 if !e.processDest(d.loc, d.vid, d.splice, d.line) { 1939 // Failed - save for next iteration. 1940 dsts[i] = d 1941 i++ 1942 } 1943 } 1944 if i < len(dsts) { 1945 // Made some progress. Go around again. 1946 dsts = dsts[:i] 1947 1948 // Append any extras destinations we generated. 1949 dsts = append(dsts, e.extra...) 1950 e.extra = e.extra[:0] 1951 continue 1952 } 1953 1954 // We made no progress. That means that any 1955 // remaining unsatisfied moves are in simple cycles. 1956 // For example, A -> B -> C -> D -> A. 1957 // A ----> B 1958 // ^ | 1959 // | | 1960 // | v 1961 // D <---- C 1962 1963 // To break the cycle, we pick an unused register, say R, 1964 // and put a copy of B there. 1965 // A ----> B 1966 // ^ | 1967 // | | 1968 // | v 1969 // D <---- C <---- R=copyofB 1970 // When we resume the outer loop, the A->B move can now proceed, 1971 // and eventually the whole cycle completes. 1972 1973 // Copy any cycle location to a temp register. This duplicates 1974 // one of the cycle entries, allowing the just duplicated value 1975 // to be overwritten and the cycle to proceed. 1976 d := dsts[0] 1977 loc := d.loc 1978 vid := e.contents[loc].vid 1979 c := e.contents[loc].c 1980 r := e.findRegFor(c.Type) 1981 if e.s.f.pass.debug > regDebug { 1982 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc.Name(), c) 1983 } 1984 if _, isReg := loc.(*Register); isReg { 1985 c = e.p.NewValue1(d.line, OpCopy, c.Type, c) 1986 } else { 1987 e.s.lateSpillUse(vid) 1988 c = e.p.NewValue1(d.line, OpLoadReg, c.Type, c) 1989 } 1990 e.set(r, vid, c, false, d.line) 1991 } 1992 } 1993 1994 // processDest generates code to put value vid into location loc. Returns true 1995 // if progress was made. 1996 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, line int32) bool { 1997 occupant := e.contents[loc] 1998 if occupant.vid == vid { 1999 // Value is already in the correct place. 2000 e.contents[loc] = contentRecord{vid, occupant.c, true, line} 2001 if splice != nil { 2002 (*splice).Uses-- 2003 *splice = occupant.c 2004 occupant.c.Uses++ 2005 if occupant.c.Op == OpStoreReg { 2006 e.s.lateSpillUse(vid) 2007 } 2008 } 2009 // Note: if splice==nil then c will appear dead. This is 2010 // non-SSA formed code, so be careful after this pass not to run 2011 // deadcode elimination. 2012 if _, ok := e.s.copies[occupant.c]; ok { 2013 // The copy at occupant.c was used to avoid spill. 2014 e.s.copies[occupant.c] = true 2015 } 2016 return true 2017 } 2018 2019 // Check if we're allowed to clobber the destination location. 2020 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 2021 // We can't overwrite the last copy 2022 // of a value that needs to survive. 2023 return false 2024 } 2025 2026 // Copy from a source of v, register preferred. 2027 v := e.s.orig[vid] 2028 var c *Value 2029 var src Location 2030 if e.s.f.pass.debug > regDebug { 2031 fmt.Printf("moving v%d to %s\n", vid, loc.Name()) 2032 fmt.Printf("sources of v%d:", vid) 2033 } 2034 for _, w := range e.cache[vid] { 2035 h := e.s.f.getHome(w.ID) 2036 if e.s.f.pass.debug > regDebug { 2037 fmt.Printf(" %s:%s", h.Name(), w) 2038 } 2039 _, isreg := h.(*Register) 2040 if src == nil || isreg { 2041 c = w 2042 src = h 2043 } 2044 } 2045 if e.s.f.pass.debug > regDebug { 2046 if src != nil { 2047 fmt.Printf(" [use %s]\n", src.Name()) 2048 } else { 2049 fmt.Printf(" [no source]\n") 2050 } 2051 } 2052 _, dstReg := loc.(*Register) 2053 var x *Value 2054 if c == nil { 2055 if !e.s.values[vid].rematerializeable { 2056 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 2057 } 2058 if dstReg { 2059 x = v.copyInto(e.p) 2060 } else { 2061 // Rematerialize into stack slot. Need a free 2062 // register to accomplish this. 2063 e.erase(loc) // see pre-clobber comment below 2064 r := e.findRegFor(v.Type) 2065 x = v.copyInto(e.p) 2066 e.set(r, vid, x, false, line) 2067 // Make sure we spill with the size of the slot, not the 2068 // size of x (which might be wider due to our dropping 2069 // of narrowing conversions). 2070 x = e.p.NewValue1(line, OpStoreReg, loc.(LocalSlot).Type, x) 2071 } 2072 } else { 2073 // Emit move from src to dst. 2074 _, srcReg := src.(*Register) 2075 if srcReg { 2076 if dstReg { 2077 x = e.p.NewValue1(line, OpCopy, c.Type, c) 2078 } else { 2079 x = e.p.NewValue1(line, OpStoreReg, loc.(LocalSlot).Type, c) 2080 } 2081 } else { 2082 if dstReg { 2083 e.s.lateSpillUse(vid) 2084 x = e.p.NewValue1(line, OpLoadReg, c.Type, c) 2085 } else { 2086 // mem->mem. Use temp register. 2087 2088 // Pre-clobber destination. This avoids the 2089 // following situation: 2090 // - v is currently held in R0 and stacktmp0. 2091 // - We want to copy stacktmp1 to stacktmp0. 2092 // - We choose R0 as the temporary register. 2093 // During the copy, both R0 and stacktmp0 are 2094 // clobbered, losing both copies of v. Oops! 2095 // Erasing the destination early means R0 will not 2096 // be chosen as the temp register, as it will then 2097 // be the last copy of v. 2098 e.erase(loc) 2099 2100 r := e.findRegFor(c.Type) 2101 e.s.lateSpillUse(vid) 2102 t := e.p.NewValue1(line, OpLoadReg, c.Type, c) 2103 e.set(r, vid, t, false, line) 2104 x = e.p.NewValue1(line, OpStoreReg, loc.(LocalSlot).Type, t) 2105 } 2106 } 2107 } 2108 e.set(loc, vid, x, true, line) 2109 if splice != nil { 2110 (*splice).Uses-- 2111 *splice = x 2112 x.Uses++ 2113 } 2114 return true 2115 } 2116 2117 // set changes the contents of location loc to hold the given value and its cached representative. 2118 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, line int32) { 2119 e.s.f.setHome(c, loc) 2120 e.erase(loc) 2121 e.contents[loc] = contentRecord{vid, c, final, line} 2122 a := e.cache[vid] 2123 if len(a) == 0 { 2124 e.cachedVals = append(e.cachedVals, vid) 2125 } 2126 a = append(a, c) 2127 e.cache[vid] = a 2128 if r, ok := loc.(*Register); ok { 2129 e.usedRegs |= regMask(1) << uint(r.num) 2130 if final { 2131 e.finalRegs |= regMask(1) << uint(r.num) 2132 } 2133 if len(a) == 1 { 2134 e.uniqueRegs |= regMask(1) << uint(r.num) 2135 } 2136 if len(a) == 2 { 2137 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2138 e.uniqueRegs &^= regMask(1) << uint(t.num) 2139 } 2140 } 2141 } 2142 if e.s.f.pass.debug > regDebug { 2143 fmt.Printf("%s\n", c.LongString()) 2144 fmt.Printf("v%d now available in %s:%s\n", vid, loc.Name(), c) 2145 } 2146 } 2147 2148 // erase removes any user of loc. 2149 func (e *edgeState) erase(loc Location) { 2150 cr := e.contents[loc] 2151 if cr.c == nil { 2152 return 2153 } 2154 vid := cr.vid 2155 2156 if cr.final { 2157 // Add a destination to move this value back into place. 2158 // Make sure it gets added to the tail of the destination queue 2159 // so we make progress on other moves first. 2160 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.line}) 2161 } 2162 2163 // Remove c from the list of cached values. 2164 a := e.cache[vid] 2165 for i, c := range a { 2166 if e.s.f.getHome(c.ID) == loc { 2167 if e.s.f.pass.debug > regDebug { 2168 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc.Name(), c) 2169 } 2170 a[i], a = a[len(a)-1], a[:len(a)-1] 2171 break 2172 } 2173 } 2174 e.cache[vid] = a 2175 2176 // Update register masks. 2177 if r, ok := loc.(*Register); ok { 2178 e.usedRegs &^= regMask(1) << uint(r.num) 2179 if cr.final { 2180 e.finalRegs &^= regMask(1) << uint(r.num) 2181 } 2182 } 2183 if len(a) == 1 { 2184 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2185 e.uniqueRegs |= regMask(1) << uint(r.num) 2186 } 2187 } 2188 } 2189 2190 // findRegFor finds a register we can use to make a temp copy of type typ. 2191 func (e *edgeState) findRegFor(typ Type) Location { 2192 // Which registers are possibilities. 2193 var m regMask 2194 if typ.IsFloat() { 2195 m = e.s.compatRegs(e.s.f.Config.fe.TypeFloat64()) 2196 } else { 2197 m = e.s.compatRegs(e.s.f.Config.fe.TypeInt64()) 2198 } 2199 2200 // Pick a register. In priority order: 2201 // 1) an unused register 2202 // 2) a non-unique register not holding a final value 2203 // 3) a non-unique register 2204 x := m &^ e.usedRegs 2205 if x != 0 { 2206 return &e.s.registers[pickReg(x)] 2207 } 2208 x = m &^ e.uniqueRegs &^ e.finalRegs 2209 if x != 0 { 2210 return &e.s.registers[pickReg(x)] 2211 } 2212 x = m &^ e.uniqueRegs 2213 if x != 0 { 2214 return &e.s.registers[pickReg(x)] 2215 } 2216 2217 // No register is available. Allocate a temp location to spill a register to. 2218 // The type of the slot is immaterial - it will not be live across 2219 // any safepoint. Just use a type big enough to hold any register. 2220 typ = e.s.f.Config.fe.TypeInt64() 2221 t := LocalSlot{e.s.f.Config.fe.Auto(typ), typ, 0} 2222 // TODO: reuse these slots. 2223 2224 // Pick a register to spill. 2225 for _, vid := range e.cachedVals { 2226 a := e.cache[vid] 2227 for _, c := range a { 2228 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2229 x := e.p.NewValue1(c.Line, OpStoreReg, c.Type, c) 2230 e.set(t, vid, x, false, c.Line) 2231 if e.s.f.pass.debug > regDebug { 2232 fmt.Printf(" SPILL %s->%s %s\n", r.Name(), t.Name(), x.LongString()) 2233 } 2234 // r will now be overwritten by the caller. At some point 2235 // later, the newly saved value will be moved back to its 2236 // final destination in processDest. 2237 return r 2238 } 2239 } 2240 } 2241 2242 fmt.Printf("m:%d unique:%d final:%d\n", m, e.uniqueRegs, e.finalRegs) 2243 for _, vid := range e.cachedVals { 2244 a := e.cache[vid] 2245 for _, c := range a { 2246 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID).Name()) 2247 } 2248 } 2249 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2250 return nil 2251 } 2252 2253 // rematerializeable reports whether the register allocator should recompute 2254 // a value instead of spilling/restoring it. 2255 func (v *Value) rematerializeable() bool { 2256 if !opcodeTable[v.Op].rematerializeable { 2257 return false 2258 } 2259 for _, a := range v.Args { 2260 // SP and SB (generated by OpSP and OpSB) are always available. 2261 if a.Op != OpSP && a.Op != OpSB { 2262 return false 2263 } 2264 } 2265 return true 2266 } 2267 2268 type liveInfo struct { 2269 ID ID // ID of value 2270 dist int32 // # of instructions before next use 2271 line int32 // line number of next use 2272 } 2273 2274 // dblock contains information about desired & avoid registers at the end of a block. 2275 type dblock struct { 2276 prefers []desiredStateEntry 2277 avoid regMask 2278 } 2279 2280 // computeLive computes a map from block ID to a list of value IDs live at the end 2281 // of that block. Together with the value ID is a count of how many instructions 2282 // to the next use of that value. The resulting map is stored in s.live. 2283 // computeLive also computes the desired register information at the end of each block. 2284 // This desired register information is stored in s.desired. 2285 // TODO: this could be quadratic if lots of variables are live across lots of 2286 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2287 // of this function) require only linear size & time. 2288 func (s *regAllocState) computeLive() { 2289 f := s.f 2290 s.live = make([][]liveInfo, f.NumBlocks()) 2291 s.desired = make([]desiredState, f.NumBlocks()) 2292 var phis []*Value 2293 2294 live := newSparseMap(f.NumValues()) 2295 t := newSparseMap(f.NumValues()) 2296 2297 // Keep track of which value we want in each register. 2298 var desired desiredState 2299 2300 // Instead of iterating over f.Blocks, iterate over their postordering. 2301 // Liveness information flows backward, so starting at the end 2302 // increases the probability that we will stabilize quickly. 2303 // TODO: Do a better job yet. Here's one possibility: 2304 // Calculate the dominator tree and locate all strongly connected components. 2305 // If a value is live in one block of an SCC, it is live in all. 2306 // Walk the dominator tree from end to beginning, just once, treating SCC 2307 // components as single blocks, duplicated calculated liveness information 2308 // out to all of them. 2309 po := f.postorder() 2310 s.loopnest = f.loopnest() 2311 for { 2312 changed := false 2313 2314 for _, b := range po { 2315 // Start with known live values at the end of the block. 2316 // Add len(b.Values) to adjust from end-of-block distance 2317 // to beginning-of-block distance. 2318 live.clear() 2319 for _, e := range s.live[b.ID] { 2320 live.set(e.ID, e.dist+int32(len(b.Values)), e.line) 2321 } 2322 2323 // Mark control value as live 2324 if b.Control != nil && s.values[b.Control.ID].needReg { 2325 live.set(b.Control.ID, int32(len(b.Values)), b.Line) 2326 } 2327 2328 // Propagate backwards to the start of the block 2329 // Assumes Values have been scheduled. 2330 phis = phis[:0] 2331 for i := len(b.Values) - 1; i >= 0; i-- { 2332 v := b.Values[i] 2333 live.remove(v.ID) 2334 if v.Op == OpPhi { 2335 // save phi ops for later 2336 phis = append(phis, v) 2337 continue 2338 } 2339 if opcodeTable[v.Op].call { 2340 c := live.contents() 2341 for i := range c { 2342 c[i].val += unlikelyDistance 2343 } 2344 } 2345 for _, a := range v.Args { 2346 if s.values[a.ID].needReg { 2347 live.set(a.ID, int32(i), v.Line) 2348 } 2349 } 2350 } 2351 // Propagate desired registers backwards. 2352 desired.copy(&s.desired[b.ID]) 2353 for i := len(b.Values) - 1; i >= 0; i-- { 2354 v := b.Values[i] 2355 prefs := desired.remove(v.ID) 2356 if v.Op == OpPhi { 2357 // TODO: if v is a phi, save desired register for phi inputs. 2358 // For now, we just drop it and don't propagate 2359 // desired registers back though phi nodes. 2360 continue 2361 } 2362 // Cancel desired registers if they get clobbered. 2363 desired.clobber(opcodeTable[v.Op].reg.clobbers) 2364 // Update desired registers if there are any fixed register inputs. 2365 for _, j := range opcodeTable[v.Op].reg.inputs { 2366 if countRegs(j.regs) != 1 { 2367 continue 2368 } 2369 desired.clobber(j.regs) 2370 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2371 } 2372 // Set desired register of input 0 if this is a 2-operand instruction. 2373 if opcodeTable[v.Op].resultInArg0 { 2374 if opcodeTable[v.Op].commutative { 2375 desired.addList(v.Args[1].ID, prefs) 2376 } 2377 desired.addList(v.Args[0].ID, prefs) 2378 } 2379 } 2380 2381 // For each predecessor of b, expand its list of live-at-end values. 2382 // invariant: live contains the values live at the start of b (excluding phi inputs) 2383 for i, e := range b.Preds { 2384 p := e.b 2385 // Compute additional distance for the edge. 2386 // Note: delta must be at least 1 to distinguish the control 2387 // value use from the first user in a successor block. 2388 delta := int32(normalDistance) 2389 if len(p.Succs) == 2 { 2390 if p.Succs[0].b == b && p.Likely == BranchLikely || 2391 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2392 delta = likelyDistance 2393 } 2394 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2395 p.Succs[1].b == b && p.Likely == BranchLikely { 2396 delta = unlikelyDistance 2397 } 2398 } 2399 2400 // Update any desired registers at the end of p. 2401 s.desired[p.ID].merge(&desired) 2402 2403 // Start t off with the previously known live values at the end of p. 2404 t.clear() 2405 for _, e := range s.live[p.ID] { 2406 t.set(e.ID, e.dist, e.line) 2407 } 2408 update := false 2409 2410 // Add new live values from scanning this block. 2411 for _, e := range live.contents() { 2412 d := e.val + delta 2413 if !t.contains(e.key) || d < t.get(e.key) { 2414 update = true 2415 t.set(e.key, d, e.aux) 2416 } 2417 } 2418 // Also add the correct arg from the saved phi values. 2419 // All phis are at distance delta (we consider them 2420 // simultaneously happening at the start of the block). 2421 for _, v := range phis { 2422 id := v.Args[i].ID 2423 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2424 update = true 2425 t.set(id, delta, v.Line) 2426 } 2427 } 2428 2429 if !update { 2430 continue 2431 } 2432 // The live set has changed, update it. 2433 l := s.live[p.ID][:0] 2434 if cap(l) < t.size() { 2435 l = make([]liveInfo, 0, t.size()) 2436 } 2437 for _, e := range t.contents() { 2438 l = append(l, liveInfo{e.key, e.val, e.aux}) 2439 } 2440 s.live[p.ID] = l 2441 changed = true 2442 } 2443 } 2444 2445 if !changed { 2446 break 2447 } 2448 } 2449 if f.pass.debug > regDebug { 2450 fmt.Println("live values at end of each block") 2451 for _, b := range f.Blocks { 2452 fmt.Printf(" %s:", b) 2453 for _, x := range s.live[b.ID] { 2454 fmt.Printf(" v%d", x.ID) 2455 for _, e := range s.desired[b.ID].entries { 2456 if e.ID != x.ID { 2457 continue 2458 } 2459 fmt.Printf("[") 2460 first := true 2461 for _, r := range e.regs { 2462 if r == noRegister { 2463 continue 2464 } 2465 if !first { 2466 fmt.Printf(",") 2467 } 2468 fmt.Print(s.registers[r].Name()) 2469 first = false 2470 } 2471 fmt.Printf("]") 2472 } 2473 } 2474 fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid)) 2475 fmt.Println() 2476 } 2477 } 2478 } 2479 2480 // A desiredState represents desired register assignments. 2481 type desiredState struct { 2482 // Desired assignments will be small, so we just use a list 2483 // of valueID+registers entries. 2484 entries []desiredStateEntry 2485 // Registers that other values want to be in. This value will 2486 // contain at least the union of the regs fields of entries, but 2487 // may contain additional entries for values that were once in 2488 // this data structure but are no longer. 2489 avoid regMask 2490 } 2491 type desiredStateEntry struct { 2492 // (pre-regalloc) value 2493 ID ID 2494 // Registers it would like to be in, in priority order. 2495 // Unused slots are filled with noRegister. 2496 regs [4]register 2497 } 2498 2499 func (d *desiredState) clear() { 2500 d.entries = d.entries[:0] 2501 d.avoid = 0 2502 } 2503 2504 // get returns a list of desired registers for value vid. 2505 func (d *desiredState) get(vid ID) [4]register { 2506 for _, e := range d.entries { 2507 if e.ID == vid { 2508 return e.regs 2509 } 2510 } 2511 return [4]register{noRegister, noRegister, noRegister, noRegister} 2512 } 2513 2514 // add records that we'd like value vid to be in register r. 2515 func (d *desiredState) add(vid ID, r register) { 2516 d.avoid |= regMask(1) << r 2517 for i := range d.entries { 2518 e := &d.entries[i] 2519 if e.ID != vid { 2520 continue 2521 } 2522 if e.regs[0] == r { 2523 // Already known and highest priority 2524 return 2525 } 2526 for j := 1; j < len(e.regs); j++ { 2527 if e.regs[j] == r { 2528 // Move from lower priority to top priority 2529 copy(e.regs[1:], e.regs[:j]) 2530 e.regs[0] = r 2531 return 2532 } 2533 } 2534 copy(e.regs[1:], e.regs[:]) 2535 e.regs[0] = r 2536 return 2537 } 2538 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2539 } 2540 2541 func (d *desiredState) addList(vid ID, regs [4]register) { 2542 // regs is in priority order, so iterate in reverse order. 2543 for i := len(regs) - 1; i >= 0; i-- { 2544 r := regs[i] 2545 if r != noRegister { 2546 d.add(vid, r) 2547 } 2548 } 2549 } 2550 2551 // clobber erases any desired registers in the set m. 2552 func (d *desiredState) clobber(m regMask) { 2553 for i := 0; i < len(d.entries); { 2554 e := &d.entries[i] 2555 j := 0 2556 for _, r := range e.regs { 2557 if r != noRegister && m>>r&1 == 0 { 2558 e.regs[j] = r 2559 j++ 2560 } 2561 } 2562 if j == 0 { 2563 // No more desired registers for this value. 2564 d.entries[i] = d.entries[len(d.entries)-1] 2565 d.entries = d.entries[:len(d.entries)-1] 2566 continue 2567 } 2568 for ; j < len(e.regs); j++ { 2569 e.regs[j] = noRegister 2570 } 2571 i++ 2572 } 2573 d.avoid &^= m 2574 } 2575 2576 // copy copies a desired state from another desiredState x. 2577 func (d *desiredState) copy(x *desiredState) { 2578 d.entries = append(d.entries[:0], x.entries...) 2579 d.avoid = x.avoid 2580 } 2581 2582 // remove removes the desired registers for vid and returns them. 2583 func (d *desiredState) remove(vid ID) [4]register { 2584 for i := range d.entries { 2585 if d.entries[i].ID == vid { 2586 regs := d.entries[i].regs 2587 d.entries[i] = d.entries[len(d.entries)-1] 2588 d.entries = d.entries[:len(d.entries)-1] 2589 return regs 2590 } 2591 } 2592 return [4]register{noRegister, noRegister, noRegister, noRegister} 2593 } 2594 2595 // merge merges another desired state x into d. 2596 func (d *desiredState) merge(x *desiredState) { 2597 d.avoid |= x.avoid 2598 // There should only be a few desired registers, so 2599 // linear insert is ok. 2600 for _, e := range x.entries { 2601 d.addList(e.ID, e.regs) 2602 } 2603 }