github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "cmd/compile/internal/types" 118 "cmd/internal/objabi" 119 "cmd/internal/src" 120 "fmt" 121 "unsafe" 122 ) 123 124 const ( 125 moveSpills = iota 126 logSpills 127 regDebug 128 stackDebug 129 ) 130 131 // distance is a measure of how far into the future values are used. 132 // distance is measured in units of instructions. 133 const ( 134 likelyDistance = 1 135 normalDistance = 10 136 unlikelyDistance = 100 137 ) 138 139 // regalloc performs register allocation on f. It sets f.RegAlloc 140 // to the resulting allocation. 141 func regalloc(f *Func) { 142 var s regAllocState 143 s.init(f) 144 s.regalloc(f) 145 } 146 147 type register uint8 148 149 const noRegister register = 255 150 151 type regMask uint64 152 153 func (m regMask) String() string { 154 s := "" 155 for r := register(0); m != 0; r++ { 156 if m>>r&1 == 0 { 157 continue 158 } 159 m &^= regMask(1) << r 160 if s != "" { 161 s += " " 162 } 163 s += fmt.Sprintf("r%d", r) 164 } 165 return s 166 } 167 168 // countRegs returns the number of set bits in the register mask. 169 func countRegs(r regMask) int { 170 n := 0 171 for r != 0 { 172 n += int(r & 1) 173 r >>= 1 174 } 175 return n 176 } 177 178 // pickReg picks an arbitrary register from the register mask. 179 func pickReg(r regMask) register { 180 // pick the lowest one 181 if r == 0 { 182 panic("can't pick a register from an empty set") 183 } 184 for i := register(0); ; i++ { 185 if r&1 != 0 { 186 return i 187 } 188 r >>= 1 189 } 190 } 191 192 type use struct { 193 dist int32 // distance from start of the block to a use of a value 194 pos src.XPos // source position of the use 195 next *use // linked list of uses of a value in nondecreasing dist order 196 } 197 198 // A valState records the register allocation state for a (pre-regalloc) value. 199 type valState struct { 200 regs regMask // the set of registers holding a Value (usually just one) 201 uses *use // list of uses in this block 202 spill *Value // spilled copy of the Value (if any) 203 restoreMin int32 // minimum of all restores' blocks' sdom.entry 204 restoreMax int32 // maximum of all restores' blocks' sdom.exit 205 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 206 rematerializeable bool // cached value of v.rematerializeable() 207 } 208 209 type regState struct { 210 v *Value // Original (preregalloc) Value stored in this register. 211 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 212 // If a register is unused, v==c==nil 213 } 214 215 type regAllocState struct { 216 f *Func 217 218 sdom SparseTree 219 registers []Register 220 numRegs register 221 SPReg register 222 SBReg register 223 GReg register 224 allocatable regMask 225 226 // for each block, its primary predecessor. 227 // A predecessor of b is primary if it is the closest 228 // predecessor that appears before b in the layout order. 229 // We record the index in the Preds list where the primary predecessor sits. 230 primary []int32 231 232 // live values at the end of each block. live[b.ID] is a list of value IDs 233 // which are live at the end of b, together with a count of how many instructions 234 // forward to the next use. 235 live [][]liveInfo 236 // desired register assignments at the end of each block. 237 // Note that this is a static map computed before allocation occurs. Dynamic 238 // register desires (from partially completed allocations) will trump 239 // this information. 240 desired []desiredState 241 242 // current state of each (preregalloc) Value 243 values []valState 244 245 // ID of SP, SB values 246 sp, sb ID 247 248 // For each Value, map from its value ID back to the 249 // preregalloc Value it was derived from. 250 orig []*Value 251 252 // current state of each register 253 regs []regState 254 255 // registers that contain values which can't be kicked out 256 nospill regMask 257 258 // mask of registers currently in use 259 used regMask 260 261 // mask of registers used in the current instruction 262 tmpused regMask 263 264 // current block we're working on 265 curBlock *Block 266 267 // cache of use records 268 freeUseRecords *use 269 270 // endRegs[blockid] is the register state at the end of each block. 271 // encoded as a set of endReg records. 272 endRegs [][]endReg 273 274 // startRegs[blockid] is the register state at the start of merge blocks. 275 // saved state does not include the state of phi ops in the block. 276 startRegs [][]startReg 277 278 // spillLive[blockid] is the set of live spills at the end of each block 279 spillLive [][]ID 280 281 // a set of copies we generated to move things around, and 282 // whether it is used in shuffle. Unused copies will be deleted. 283 copies map[*Value]bool 284 285 loopnest *loopnest 286 287 // choose a good order in which to visit blocks for allocation purposes. 288 visitOrder []*Block 289 } 290 291 type endReg struct { 292 r register 293 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 294 c *Value // cached version of the value 295 } 296 297 type startReg struct { 298 r register 299 v *Value // pre-regalloc value needed in this register 300 c *Value // cached version of the value 301 pos src.XPos // source position of use of this register 302 } 303 304 // freeReg frees up register r. Any current user of r is kicked out. 305 func (s *regAllocState) freeReg(r register) { 306 v := s.regs[r].v 307 if v == nil { 308 s.f.Fatalf("tried to free an already free register %d\n", r) 309 } 310 311 // Mark r as unused. 312 if s.f.pass.debug > regDebug { 313 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c) 314 } 315 s.regs[r] = regState{} 316 s.values[v.ID].regs &^= regMask(1) << r 317 s.used &^= regMask(1) << r 318 } 319 320 // freeRegs frees up all registers listed in m. 321 func (s *regAllocState) freeRegs(m regMask) { 322 for m&s.used != 0 { 323 s.freeReg(pickReg(m & s.used)) 324 } 325 } 326 327 // setOrig records that c's original value is the same as 328 // v's original value. 329 func (s *regAllocState) setOrig(c *Value, v *Value) { 330 for int(c.ID) >= len(s.orig) { 331 s.orig = append(s.orig, nil) 332 } 333 if s.orig[c.ID] != nil { 334 s.f.Fatalf("orig value set twice %s %s", c, v) 335 } 336 s.orig[c.ID] = s.orig[v.ID] 337 } 338 339 // assignReg assigns register r to hold c, a copy of v. 340 // r must be unused. 341 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 342 if s.f.pass.debug > regDebug { 343 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c) 344 } 345 if s.regs[r].v != nil { 346 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 347 } 348 349 // Update state. 350 s.regs[r] = regState{v, c} 351 s.values[v.ID].regs |= regMask(1) << r 352 s.used |= regMask(1) << r 353 s.f.setHome(c, &s.registers[r]) 354 } 355 356 // allocReg chooses a register from the set of registers in mask. 357 // If there is no unused register, a Value will be kicked out of 358 // a register to make room. 359 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 360 mask &= s.allocatable 361 mask &^= s.nospill 362 if mask == 0 { 363 s.f.Fatalf("no register available for %s", v) 364 } 365 366 // Pick an unused register if one is available. 367 if mask&^s.used != 0 { 368 return pickReg(mask &^ s.used) 369 } 370 371 // Pick a value to spill. Spill the value with the 372 // farthest-in-the-future use. 373 // TODO: Prefer registers with already spilled Values? 374 // TODO: Modify preference using affinity graph. 375 // TODO: if a single value is in multiple registers, spill one of them 376 // before spilling a value in just a single register. 377 378 // Find a register to spill. We spill the register containing the value 379 // whose next use is as far in the future as possible. 380 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 381 var r register 382 maxuse := int32(-1) 383 for t := register(0); t < s.numRegs; t++ { 384 if mask>>t&1 == 0 { 385 continue 386 } 387 v := s.regs[t].v 388 if n := s.values[v.ID].uses.dist; n > maxuse { 389 // v's next use is farther in the future than any value 390 // we've seen so far. A new best spill candidate. 391 r = t 392 maxuse = n 393 } 394 } 395 if maxuse == -1 { 396 s.f.Fatalf("couldn't find register to spill") 397 } 398 399 // Try to move it around before kicking out, if there is a free register. 400 // We generate a Copy and record it. It will be deleted if never used. 401 v2 := s.regs[r].v 402 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 403 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 404 r2 := pickReg(m) 405 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 406 s.copies[c] = false 407 if s.f.pass.debug > regDebug { 408 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2]) 409 } 410 s.setOrig(c, v2) 411 s.assignReg(r2, v2, c) 412 } 413 s.freeReg(r) 414 return r 415 } 416 417 // makeSpill returns a Value which represents the spilled value of v. 418 // b is the block in which the spill is used. 419 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 420 vi := &s.values[v.ID] 421 if vi.spill != nil { 422 // Final block not known - keep track of subtree where restores reside. 423 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 424 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 425 return vi.spill 426 } 427 // Make a spill for v. We don't know where we want 428 // to put it yet, so we leave it blockless for now. 429 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 430 // We also don't know what the spill's arg will be. 431 // Leave it argless for now. 432 s.setOrig(spill, v) 433 vi.spill = spill 434 vi.restoreMin = s.sdom[b.ID].entry 435 vi.restoreMax = s.sdom[b.ID].exit 436 return spill 437 } 438 439 // allocValToReg allocates v to a register selected from regMask and 440 // returns the register copy of v. Any previous user is kicked out and spilled 441 // (if necessary). Load code is added at the current pc. If nospill is set the 442 // allocated register is marked nospill so the assignment cannot be 443 // undone until the caller allows it by clearing nospill. Returns a 444 // *Value which is either v or a copy of v allocated to the chosen register. 445 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 446 vi := &s.values[v.ID] 447 448 // Check if v is already in a requested register. 449 if mask&vi.regs != 0 { 450 r := pickReg(mask & vi.regs) 451 if s.regs[r].v != v || s.regs[r].c == nil { 452 panic("bad register state") 453 } 454 if nospill { 455 s.nospill |= regMask(1) << r 456 } 457 return s.regs[r].c 458 } 459 460 // Allocate a register. 461 r := s.allocReg(mask, v) 462 463 // Allocate v to the new register. 464 var c *Value 465 if vi.regs != 0 { 466 // Copy from a register that v is already in. 467 r2 := pickReg(vi.regs) 468 if s.regs[r2].v != v { 469 panic("bad register state") 470 } 471 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 472 } else if v.rematerializeable() { 473 // Rematerialize instead of loading from the spill location. 474 c = v.copyIntoWithXPos(s.curBlock, pos) 475 } else { 476 // Load v from its spill location. 477 spill := s.makeSpill(v, s.curBlock) 478 if s.f.pass.debug > logSpills { 479 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 480 } 481 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 482 } 483 s.setOrig(c, v) 484 s.assignReg(r, v, c) 485 if nospill { 486 s.nospill |= regMask(1) << r 487 } 488 return c 489 } 490 491 // isLeaf reports whether f performs any calls. 492 func isLeaf(f *Func) bool { 493 for _, b := range f.Blocks { 494 for _, v := range b.Values { 495 if opcodeTable[v.Op].call { 496 return false 497 } 498 } 499 } 500 return true 501 } 502 503 func (s *regAllocState) init(f *Func) { 504 s.f = f 505 s.f.RegAlloc = s.f.Cache.locs[:0] 506 s.registers = f.Config.registers 507 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 508 s.f.Fatalf("bad number of registers: %d", nr) 509 } else { 510 s.numRegs = register(nr) 511 } 512 // Locate SP, SB, and g registers. 513 s.SPReg = noRegister 514 s.SBReg = noRegister 515 s.GReg = noRegister 516 for r := register(0); r < s.numRegs; r++ { 517 switch s.registers[r].String() { 518 case "SP": 519 s.SPReg = r 520 case "SB": 521 s.SBReg = r 522 case "g": 523 s.GReg = r 524 } 525 } 526 // Make sure we found all required registers. 527 switch noRegister { 528 case s.SPReg: 529 s.f.Fatalf("no SP register found") 530 case s.SBReg: 531 s.f.Fatalf("no SB register found") 532 case s.GReg: 533 if f.Config.hasGReg { 534 s.f.Fatalf("no g register found") 535 } 536 } 537 538 // Figure out which registers we're allowed to use. 539 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 540 s.allocatable &^= 1 << s.SPReg 541 s.allocatable &^= 1 << s.SBReg 542 if s.f.Config.hasGReg { 543 s.allocatable &^= 1 << s.GReg 544 } 545 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 { 546 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 547 } 548 if s.f.Config.LinkReg != -1 { 549 if isLeaf(f) { 550 // Leaf functions don't save/restore the link register. 551 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 552 } 553 if s.f.Config.arch == "arm" && objabi.GOARM == 5 { 554 // On ARMv5 we insert softfloat calls at each FP instruction. 555 // This clobbers LR almost everywhere. Disable allocating LR 556 // on ARMv5. 557 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 558 } 559 } 560 if s.f.Config.ctxt.Flag_dynlink { 561 switch s.f.Config.arch { 562 case "amd64": 563 s.allocatable &^= 1 << 15 // R15 564 case "arm": 565 s.allocatable &^= 1 << 9 // R9 566 case "ppc64le": // R2 already reserved. 567 // nothing to do 568 case "arm64": 569 // nothing to do? 570 case "386": 571 // nothing to do. 572 // Note that for Flag_shared (position independent code) 573 // we do need to be careful, but that carefulness is hidden 574 // in the rewrite rules so we always have a free register 575 // available for global load/stores. See gen/386.rules (search for Flag_shared). 576 case "s390x": 577 // nothing to do, R10 & R11 already reserved 578 default: 579 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 580 } 581 } 582 if s.f.Config.nacl { 583 switch s.f.Config.arch { 584 case "arm": 585 s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm 586 case "amd64p32": 587 s.allocatable &^= 1 << 5 // BP - reserved for nacl 588 s.allocatable &^= 1 << 15 // R15 - reserved for nacl 589 } 590 } 591 if s.f.Config.use387 { 592 s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) 593 } 594 595 // Linear scan register allocation can be influenced by the order in which blocks appear. 596 // Decouple the register allocation order from the generated block order. 597 // This also creates an opportunity for experiments to find a better order. 598 s.visitOrder = layoutRegallocOrder(f) 599 600 // Compute block order. This array allows us to distinguish forward edges 601 // from backward edges and compute how far they go. 602 blockOrder := make([]int32, f.NumBlocks()) 603 for i, b := range s.visitOrder { 604 blockOrder[b.ID] = int32(i) 605 } 606 607 s.regs = make([]regState, s.numRegs) 608 s.values = make([]valState, f.NumValues()) 609 s.orig = make([]*Value, f.NumValues()) 610 s.copies = make(map[*Value]bool) 611 for _, b := range s.visitOrder { 612 for _, v := range b.Values { 613 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 614 s.values[v.ID].needReg = true 615 s.values[v.ID].rematerializeable = v.rematerializeable() 616 s.orig[v.ID] = v 617 } 618 // Note: needReg is false for values returning Tuple types. 619 // Instead, we mark the corresponding Selects as needReg. 620 } 621 } 622 s.computeLive() 623 624 // Compute primary predecessors. 625 s.primary = make([]int32, f.NumBlocks()) 626 for _, b := range s.visitOrder { 627 best := -1 628 for i, e := range b.Preds { 629 p := e.b 630 if blockOrder[p.ID] >= blockOrder[b.ID] { 631 continue // backward edge 632 } 633 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] { 634 best = i 635 } 636 } 637 s.primary[b.ID] = int32(best) 638 } 639 640 s.endRegs = make([][]endReg, f.NumBlocks()) 641 s.startRegs = make([][]startReg, f.NumBlocks()) 642 s.spillLive = make([][]ID, f.NumBlocks()) 643 s.sdom = f.sdom() 644 } 645 646 // Adds a use record for id at distance dist from the start of the block. 647 // All calls to addUse must happen with nonincreasing dist. 648 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 649 r := s.freeUseRecords 650 if r != nil { 651 s.freeUseRecords = r.next 652 } else { 653 r = &use{} 654 } 655 r.dist = dist 656 r.pos = pos 657 r.next = s.values[id].uses 658 s.values[id].uses = r 659 if r.next != nil && dist > r.next.dist { 660 s.f.Fatalf("uses added in wrong order") 661 } 662 } 663 664 // advanceUses advances the uses of v's args from the state before v to the state after v. 665 // Any values which have no more uses are deallocated from registers. 666 func (s *regAllocState) advanceUses(v *Value) { 667 for _, a := range v.Args { 668 if !s.values[a.ID].needReg { 669 continue 670 } 671 ai := &s.values[a.ID] 672 r := ai.uses 673 ai.uses = r.next 674 if r.next == nil { 675 // Value is dead, free all registers that hold it. 676 s.freeRegs(ai.regs) 677 } 678 r.next = s.freeUseRecords 679 s.freeUseRecords = r 680 } 681 } 682 683 // liveAfterCurrentInstruction reports whether v is live after 684 // the current instruction is completed. v must be used by the 685 // current instruction. 686 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 687 u := s.values[v.ID].uses 688 d := u.dist 689 for u != nil && u.dist == d { 690 u = u.next 691 } 692 return u != nil && u.dist > d 693 } 694 695 // Sets the state of the registers to that encoded in regs. 696 func (s *regAllocState) setState(regs []endReg) { 697 s.freeRegs(s.used) 698 for _, x := range regs { 699 s.assignReg(x.r, x.v, x.c) 700 } 701 } 702 703 // compatRegs returns the set of registers which can store a type t. 704 func (s *regAllocState) compatRegs(t *types.Type) regMask { 705 var m regMask 706 if t.IsTuple() || t.IsFlags() { 707 return 0 708 } 709 if t.IsFloat() || t == types.TypeInt128 { 710 m = s.f.Config.fpRegMask 711 } else { 712 m = s.f.Config.gpRegMask 713 } 714 return m & s.allocatable 715 } 716 717 func (s *regAllocState) regalloc(f *Func) { 718 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 719 defer f.retSparseSet(regValLiveSet) 720 var oldSched []*Value 721 var phis []*Value 722 var phiRegs []register 723 var args []*Value 724 725 // Data structure used for computing desired registers. 726 var desired desiredState 727 728 // Desired registers for inputs & outputs for each instruction in the block. 729 type dentry struct { 730 out [4]register // desired output registers 731 in [3][4]register // desired input registers (for inputs 0,1, and 2) 732 } 733 var dinfo []dentry 734 735 if f.Entry != f.Blocks[0] { 736 f.Fatalf("entry block must be first") 737 } 738 739 for _, b := range s.visitOrder { 740 if s.f.pass.debug > regDebug { 741 fmt.Printf("Begin processing block %v\n", b) 742 } 743 s.curBlock = b 744 745 // Initialize regValLiveSet and uses fields for this block. 746 // Walk backwards through the block doing liveness analysis. 747 regValLiveSet.clear() 748 for _, e := range s.live[b.ID] { 749 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 750 regValLiveSet.add(e.ID) 751 } 752 if v := b.Control; v != nil && s.values[v.ID].needReg { 753 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value 754 regValLiveSet.add(v.ID) 755 } 756 for i := len(b.Values) - 1; i >= 0; i-- { 757 v := b.Values[i] 758 regValLiveSet.remove(v.ID) 759 if v.Op == OpPhi { 760 // Remove v from the live set, but don't add 761 // any inputs. This is the state the len(b.Preds)>1 762 // case below desires; it wants to process phis specially. 763 continue 764 } 765 if opcodeTable[v.Op].call { 766 // Function call clobbers all the registers but SP and SB. 767 regValLiveSet.clear() 768 if s.sp != 0 && s.values[s.sp].uses != nil { 769 regValLiveSet.add(s.sp) 770 } 771 if s.sb != 0 && s.values[s.sb].uses != nil { 772 regValLiveSet.add(s.sb) 773 } 774 } 775 for _, a := range v.Args { 776 if !s.values[a.ID].needReg { 777 continue 778 } 779 s.addUse(a.ID, int32(i), v.Pos) 780 regValLiveSet.add(a.ID) 781 } 782 } 783 if s.f.pass.debug > regDebug { 784 fmt.Printf("uses for %s:%s\n", s.f.Name, b) 785 for i := range s.values { 786 vi := &s.values[i] 787 u := vi.uses 788 if u == nil { 789 continue 790 } 791 fmt.Printf(" v%d:", i) 792 for u != nil { 793 fmt.Printf(" %d", u.dist) 794 u = u.next 795 } 796 fmt.Println() 797 } 798 } 799 800 // Make a copy of the block schedule so we can generate a new one in place. 801 // We make a separate copy for phis and regular values. 802 nphi := 0 803 for _, v := range b.Values { 804 if v.Op != OpPhi { 805 break 806 } 807 nphi++ 808 } 809 phis = append(phis[:0], b.Values[:nphi]...) 810 oldSched = append(oldSched[:0], b.Values[nphi:]...) 811 b.Values = b.Values[:0] 812 813 // Initialize start state of block. 814 if b == f.Entry { 815 // Regalloc state is empty to start. 816 if nphi > 0 { 817 f.Fatalf("phis in entry block") 818 } 819 } else if len(b.Preds) == 1 { 820 // Start regalloc state with the end state of the previous block. 821 s.setState(s.endRegs[b.Preds[0].b.ID]) 822 if nphi > 0 { 823 f.Fatalf("phis in single-predecessor block") 824 } 825 // Drop any values which are no longer live. 826 // This may happen because at the end of p, a value may be 827 // live but only used by some other successor of p. 828 for r := register(0); r < s.numRegs; r++ { 829 v := s.regs[r].v 830 if v != nil && !regValLiveSet.contains(v.ID) { 831 s.freeReg(r) 832 } 833 } 834 } else { 835 // This is the complicated case. We have more than one predecessor, 836 // which means we may have Phi ops. 837 838 // Start with the final register state of the primary predecessor 839 idx := s.primary[b.ID] 840 if idx < 0 { 841 f.Fatalf("block with no primary predecessor %s", b) 842 } 843 p := b.Preds[idx].b 844 s.setState(s.endRegs[p.ID]) 845 846 if s.f.pass.debug > regDebug { 847 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 848 for _, x := range s.endRegs[p.ID] { 849 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c) 850 } 851 } 852 853 // Decide on registers for phi ops. Use the registers determined 854 // by the primary predecessor if we can. 855 // TODO: pick best of (already processed) predecessors? 856 // Majority vote? Deepest nesting level? 857 phiRegs = phiRegs[:0] 858 var phiUsed regMask 859 for _, v := range phis { 860 if !s.values[v.ID].needReg { 861 phiRegs = append(phiRegs, noRegister) 862 continue 863 } 864 a := v.Args[idx] 865 // Some instructions target not-allocatable registers. 866 // They're not suitable for further (phi-function) allocation. 867 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 868 if m != 0 { 869 r := pickReg(m) 870 phiUsed |= regMask(1) << r 871 phiRegs = append(phiRegs, r) 872 } else { 873 phiRegs = append(phiRegs, noRegister) 874 } 875 } 876 877 // Second pass - deallocate any phi inputs which are now dead. 878 for i, v := range phis { 879 if !s.values[v.ID].needReg { 880 continue 881 } 882 a := v.Args[idx] 883 if !regValLiveSet.contains(a.ID) { 884 // Input is dead beyond the phi, deallocate 885 // anywhere else it might live. 886 s.freeRegs(s.values[a.ID].regs) 887 } else { 888 // Input is still live. 889 // Try to move it around before kicking out, if there is a free register. 890 // We generate a Copy in the predecessor block and record it. It will be 891 // deleted if never used. 892 r := phiRegs[i] 893 if r == noRegister { 894 continue 895 } 896 // Pick a free register. At this point some registers used in the predecessor 897 // block may have been deallocated. Those are the ones used for Phis. Exclude 898 // them (and they are not going to be helpful anyway). 899 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 900 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 901 r2 := pickReg(m) 902 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 903 s.copies[c] = false 904 if s.f.pass.debug > regDebug { 905 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2]) 906 } 907 s.setOrig(c, a) 908 s.assignReg(r2, a, c) 909 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 910 } 911 s.freeReg(r) 912 } 913 } 914 915 // Copy phi ops into new schedule. 916 b.Values = append(b.Values, phis...) 917 918 // Third pass - pick registers for phis whose inputs 919 // were not in a register. 920 for i, v := range phis { 921 if !s.values[v.ID].needReg { 922 continue 923 } 924 if phiRegs[i] != noRegister { 925 continue 926 } 927 if s.f.Config.use387 && v.Type.IsFloat() { 928 continue // 387 can't handle floats in registers between blocks 929 } 930 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 931 if m != 0 { 932 r := pickReg(m) 933 phiRegs[i] = r 934 phiUsed |= regMask(1) << r 935 } 936 } 937 938 // Set registers for phis. Add phi spill code. 939 for i, v := range phis { 940 if !s.values[v.ID].needReg { 941 continue 942 } 943 r := phiRegs[i] 944 if r == noRegister { 945 // stack-based phi 946 // Spills will be inserted in all the predecessors below. 947 s.values[v.ID].spill = v // v starts life spilled 948 continue 949 } 950 // register-based phi 951 s.assignReg(r, v, v) 952 } 953 954 // Deallocate any values which are no longer live. Phis are excluded. 955 for r := register(0); r < s.numRegs; r++ { 956 if phiUsed>>r&1 != 0 { 957 continue 958 } 959 v := s.regs[r].v 960 if v != nil && !regValLiveSet.contains(v.ID) { 961 s.freeReg(r) 962 } 963 } 964 965 // Save the starting state for use by merge edges. 966 var regList []startReg 967 for r := register(0); r < s.numRegs; r++ { 968 v := s.regs[r].v 969 if v == nil { 970 continue 971 } 972 if phiUsed>>r&1 != 0 { 973 // Skip registers that phis used, we'll handle those 974 // specially during merge edge processing. 975 continue 976 } 977 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 978 } 979 s.startRegs[b.ID] = regList 980 981 if s.f.pass.debug > regDebug { 982 fmt.Printf("after phis\n") 983 for _, x := range s.startRegs[b.ID] { 984 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID) 985 } 986 } 987 } 988 989 // Allocate space to record the desired registers for each value. 990 dinfo = dinfo[:0] 991 for i := 0; i < len(oldSched); i++ { 992 dinfo = append(dinfo, dentry{}) 993 } 994 995 // Load static desired register info at the end of the block. 996 desired.copy(&s.desired[b.ID]) 997 998 // Check actual assigned registers at the start of the next block(s). 999 // Dynamically assigned registers will trump the static 1000 // desired registers computed during liveness analysis. 1001 // Note that we do this phase after startRegs is set above, so that 1002 // we get the right behavior for a block which branches to itself. 1003 for _, e := range b.Succs { 1004 succ := e.b 1005 // TODO: prioritize likely successor? 1006 for _, x := range s.startRegs[succ.ID] { 1007 desired.add(x.v.ID, x.r) 1008 } 1009 // Process phi ops in succ. 1010 pidx := e.i 1011 for _, v := range succ.Values { 1012 if v.Op != OpPhi { 1013 break 1014 } 1015 if !s.values[v.ID].needReg { 1016 continue 1017 } 1018 rp, ok := s.f.getHome(v.ID).(*Register) 1019 if !ok { 1020 continue 1021 } 1022 desired.add(v.Args[pidx].ID, register(rp.num)) 1023 } 1024 } 1025 // Walk values backwards computing desired register info. 1026 // See computeLive for more comments. 1027 for i := len(oldSched) - 1; i >= 0; i-- { 1028 v := oldSched[i] 1029 prefs := desired.remove(v.ID) 1030 desired.clobber(opcodeTable[v.Op].reg.clobbers) 1031 for _, j := range opcodeTable[v.Op].reg.inputs { 1032 if countRegs(j.regs) != 1 { 1033 continue 1034 } 1035 desired.clobber(j.regs) 1036 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1037 } 1038 if opcodeTable[v.Op].resultInArg0 { 1039 if opcodeTable[v.Op].commutative { 1040 desired.addList(v.Args[1].ID, prefs) 1041 } 1042 desired.addList(v.Args[0].ID, prefs) 1043 } 1044 // Save desired registers for this value. 1045 dinfo[i].out = prefs 1046 for j, a := range v.Args { 1047 if j >= len(dinfo[i].in) { 1048 break 1049 } 1050 dinfo[i].in[j] = desired.get(a.ID) 1051 } 1052 } 1053 1054 // Process all the non-phi values. 1055 for idx, v := range oldSched { 1056 if s.f.pass.debug > regDebug { 1057 fmt.Printf(" processing %s\n", v.LongString()) 1058 } 1059 regspec := opcodeTable[v.Op].reg 1060 if v.Op == OpPhi { 1061 f.Fatalf("phi %s not at start of block", v) 1062 } 1063 if v.Op == OpSP { 1064 s.assignReg(s.SPReg, v, v) 1065 b.Values = append(b.Values, v) 1066 s.advanceUses(v) 1067 s.sp = v.ID 1068 continue 1069 } 1070 if v.Op == OpSB { 1071 s.assignReg(s.SBReg, v, v) 1072 b.Values = append(b.Values, v) 1073 s.advanceUses(v) 1074 s.sb = v.ID 1075 continue 1076 } 1077 if v.Op == OpSelect0 || v.Op == OpSelect1 { 1078 if s.values[v.ID].needReg { 1079 var i = 0 1080 if v.Op == OpSelect1 { 1081 i = 1 1082 } 1083 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1084 } 1085 b.Values = append(b.Values, v) 1086 s.advanceUses(v) 1087 goto issueSpill 1088 } 1089 if v.Op == OpGetG && s.f.Config.hasGReg { 1090 // use hardware g register 1091 if s.regs[s.GReg].v != nil { 1092 s.freeReg(s.GReg) // kick out the old value 1093 } 1094 s.assignReg(s.GReg, v, v) 1095 b.Values = append(b.Values, v) 1096 s.advanceUses(v) 1097 goto issueSpill 1098 } 1099 if v.Op == OpArg { 1100 // Args are "pre-spilled" values. We don't allocate 1101 // any register here. We just set up the spill pointer to 1102 // point at itself and any later user will restore it to use it. 1103 s.values[v.ID].spill = v 1104 b.Values = append(b.Values, v) 1105 s.advanceUses(v) 1106 continue 1107 } 1108 if v.Op == OpKeepAlive { 1109 // Make sure the argument to v is still live here. 1110 s.advanceUses(v) 1111 a := v.Args[0] 1112 vi := &s.values[a.ID] 1113 if vi.regs == 0 && !vi.rematerializeable { 1114 // Use the spill location. 1115 // This forces later liveness analysis to make the 1116 // value live at this point. 1117 v.SetArg(0, s.makeSpill(a, b)) 1118 } else { 1119 // In-register and rematerializeable values are already live. 1120 // These are typically rematerializeable constants like nil, 1121 // or values of a variable that were modified since the last call. 1122 v.Op = OpCopy 1123 v.SetArgs1(v.Args[1]) 1124 } 1125 b.Values = append(b.Values, v) 1126 continue 1127 } 1128 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1129 // No register allocation required (or none specified yet) 1130 s.freeRegs(regspec.clobbers) 1131 b.Values = append(b.Values, v) 1132 s.advanceUses(v) 1133 continue 1134 } 1135 1136 if s.values[v.ID].rematerializeable { 1137 // Value is rematerializeable, don't issue it here. 1138 // It will get issued just before each use (see 1139 // allocValueToReg). 1140 for _, a := range v.Args { 1141 a.Uses-- 1142 } 1143 s.advanceUses(v) 1144 continue 1145 } 1146 1147 if s.f.pass.debug > regDebug { 1148 fmt.Printf("value %s\n", v.LongString()) 1149 fmt.Printf(" out:") 1150 for _, r := range dinfo[idx].out { 1151 if r != noRegister { 1152 fmt.Printf(" %s", &s.registers[r]) 1153 } 1154 } 1155 fmt.Println() 1156 for i := 0; i < len(v.Args) && i < 3; i++ { 1157 fmt.Printf(" in%d:", i) 1158 for _, r := range dinfo[idx].in[i] { 1159 if r != noRegister { 1160 fmt.Printf(" %s", &s.registers[r]) 1161 } 1162 } 1163 fmt.Println() 1164 } 1165 } 1166 1167 // Move arguments to registers. Process in an ordering defined 1168 // by the register specification (most constrained first). 1169 args = append(args[:0], v.Args...) 1170 for _, i := range regspec.inputs { 1171 mask := i.regs 1172 if mask&s.values[args[i.idx].ID].regs == 0 { 1173 // Need a new register for the input. 1174 mask &= s.allocatable 1175 mask &^= s.nospill 1176 // Used desired register if available. 1177 if i.idx < 3 { 1178 for _, r := range dinfo[idx].in[i.idx] { 1179 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1180 // Desired register is allowed and unused. 1181 mask = regMask(1) << r 1182 break 1183 } 1184 } 1185 } 1186 // Avoid registers we're saving for other values. 1187 if mask&^desired.avoid != 0 { 1188 mask &^= desired.avoid 1189 } 1190 } 1191 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos) 1192 } 1193 1194 // If the output clobbers the input register, make sure we have 1195 // at least two copies of the input register so we don't 1196 // have to reload the value from the spill location. 1197 if opcodeTable[v.Op].resultInArg0 { 1198 var m regMask 1199 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1200 // arg0 is dead. We can clobber its register. 1201 goto ok 1202 } 1203 if s.values[v.Args[0].ID].rematerializeable { 1204 // We can rematerialize the input, don't worry about clobbering it. 1205 goto ok 1206 } 1207 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1208 // we have at least 2 copies of arg0. We can afford to clobber one. 1209 goto ok 1210 } 1211 if opcodeTable[v.Op].commutative { 1212 if !s.liveAfterCurrentInstruction(v.Args[1]) { 1213 args[0], args[1] = args[1], args[0] 1214 goto ok 1215 } 1216 if s.values[v.Args[1].ID].rematerializeable { 1217 args[0], args[1] = args[1], args[0] 1218 goto ok 1219 } 1220 if countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1221 args[0], args[1] = args[1], args[0] 1222 goto ok 1223 } 1224 } 1225 1226 // We can't overwrite arg0 (or arg1, if commutative). So we 1227 // need to make a copy of an input so we have a register we can modify. 1228 1229 // Possible new registers to copy into. 1230 m = s.compatRegs(v.Args[0].Type) &^ s.used 1231 if m == 0 { 1232 // No free registers. In this case we'll just clobber 1233 // an input and future uses of that input must use a restore. 1234 // TODO(khr): We should really do this like allocReg does it, 1235 // spilling the value with the most distant next use. 1236 goto ok 1237 } 1238 1239 // Try to move an input to the desired output. 1240 for _, r := range dinfo[idx].out { 1241 if r != noRegister && m>>r&1 != 0 { 1242 m = regMask(1) << r 1243 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1244 // Note: we update args[0] so the instruction will 1245 // use the register copy we just made. 1246 goto ok 1247 } 1248 } 1249 // Try to copy input to its desired location & use its old 1250 // location as the result register. 1251 for _, r := range dinfo[idx].in[0] { 1252 if r != noRegister && m>>r&1 != 0 { 1253 m = regMask(1) << r 1254 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1255 s.copies[c] = false 1256 // Note: no update to args[0] so the instruction will 1257 // use the original copy. 1258 goto ok 1259 } 1260 } 1261 if opcodeTable[v.Op].commutative { 1262 for _, r := range dinfo[idx].in[1] { 1263 if r != noRegister && m>>r&1 != 0 { 1264 m = regMask(1) << r 1265 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1266 s.copies[c] = false 1267 args[0], args[1] = args[1], args[0] 1268 goto ok 1269 } 1270 } 1271 } 1272 // Avoid future fixed uses if we can. 1273 if m&^desired.avoid != 0 { 1274 m &^= desired.avoid 1275 } 1276 // Save input 0 to a new register so we can clobber it. 1277 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1278 s.copies[c] = false 1279 } 1280 1281 ok: 1282 // Now that all args are in regs, we're ready to issue the value itself. 1283 // Before we pick a register for the output value, allow input registers 1284 // to be deallocated. We do this here so that the output can use the 1285 // same register as a dying input. 1286 if !opcodeTable[v.Op].resultNotInArgs { 1287 s.tmpused = s.nospill 1288 s.nospill = 0 1289 s.advanceUses(v) // frees any registers holding args that are no longer live 1290 } 1291 1292 // Dump any registers which will be clobbered 1293 s.freeRegs(regspec.clobbers) 1294 s.tmpused |= regspec.clobbers 1295 1296 // Pick registers for outputs. 1297 { 1298 outRegs := [2]register{noRegister, noRegister} 1299 var used regMask 1300 for _, out := range regspec.outputs { 1301 mask := out.regs & s.allocatable &^ used 1302 if mask == 0 { 1303 continue 1304 } 1305 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1306 if !opcodeTable[v.Op].commutative { 1307 // Output must use the same register as input 0. 1308 r := register(s.f.getHome(args[0].ID).(*Register).num) 1309 mask = regMask(1) << r 1310 } else { 1311 // Output must use the same register as input 0 or 1. 1312 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1313 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1314 // Check r0 and r1 for desired output register. 1315 found := false 1316 for _, r := range dinfo[idx].out { 1317 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1318 mask = regMask(1) << r 1319 found = true 1320 if r == r1 { 1321 args[0], args[1] = args[1], args[0] 1322 } 1323 break 1324 } 1325 } 1326 if !found { 1327 // Neither are desired, pick r0. 1328 mask = regMask(1) << r0 1329 } 1330 } 1331 } 1332 for _, r := range dinfo[idx].out { 1333 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1334 // Desired register is allowed and unused. 1335 mask = regMask(1) << r 1336 break 1337 } 1338 } 1339 // Avoid registers we're saving for other values. 1340 if mask&^desired.avoid != 0 { 1341 mask &^= desired.avoid 1342 } 1343 r := s.allocReg(mask, v) 1344 outRegs[out.idx] = r 1345 used |= regMask(1) << r 1346 s.tmpused |= regMask(1) << r 1347 } 1348 // Record register choices 1349 if v.Type.IsTuple() { 1350 var outLocs LocPair 1351 if r := outRegs[0]; r != noRegister { 1352 outLocs[0] = &s.registers[r] 1353 } 1354 if r := outRegs[1]; r != noRegister { 1355 outLocs[1] = &s.registers[r] 1356 } 1357 s.f.setHome(v, outLocs) 1358 // Note that subsequent SelectX instructions will do the assignReg calls. 1359 } else { 1360 if r := outRegs[0]; r != noRegister { 1361 s.assignReg(r, v, v) 1362 } 1363 } 1364 } 1365 1366 // deallocate dead args, if we have not done so 1367 if opcodeTable[v.Op].resultNotInArgs { 1368 s.nospill = 0 1369 s.advanceUses(v) // frees any registers holding args that are no longer live 1370 } 1371 s.tmpused = 0 1372 1373 // Issue the Value itself. 1374 for i, a := range args { 1375 v.SetArg(i, a) // use register version of arguments 1376 } 1377 b.Values = append(b.Values, v) 1378 1379 issueSpill: 1380 } 1381 1382 // Load control value into reg. 1383 if v := b.Control; v != nil && s.values[v.ID].needReg { 1384 if s.f.pass.debug > regDebug { 1385 fmt.Printf(" processing control %s\n", v.LongString()) 1386 } 1387 // We assume that a control input can be passed in any 1388 // type-compatible register. If this turns out not to be true, 1389 // we'll need to introduce a regspec for a block's control value. 1390 b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos) 1391 if b.Control != v { 1392 v.Uses-- 1393 b.Control.Uses++ 1394 } 1395 // Remove this use from the uses list. 1396 vi := &s.values[v.ID] 1397 u := vi.uses 1398 vi.uses = u.next 1399 if u.next == nil { 1400 s.freeRegs(vi.regs) // value is dead 1401 } 1402 u.next = s.freeUseRecords 1403 s.freeUseRecords = u 1404 } 1405 1406 // Spill any values that can't live across basic block boundaries. 1407 if s.f.Config.use387 { 1408 s.freeRegs(s.f.Config.fpRegMask) 1409 } 1410 1411 // If we are approaching a merge point and we are the primary 1412 // predecessor of it, find live values that we use soon after 1413 // the merge point and promote them to registers now. 1414 if len(b.Succs) == 1 { 1415 // For this to be worthwhile, the loop must have no calls in it. 1416 top := b.Succs[0].b 1417 loop := s.loopnest.b2l[top.ID] 1418 if loop == nil || loop.header != top || loop.containsCall { 1419 goto badloop 1420 } 1421 1422 // TODO: sort by distance, pick the closest ones? 1423 for _, live := range s.live[b.ID] { 1424 if live.dist >= unlikelyDistance { 1425 // Don't preload anything live after the loop. 1426 continue 1427 } 1428 vid := live.ID 1429 vi := &s.values[vid] 1430 if vi.regs != 0 { 1431 continue 1432 } 1433 if vi.rematerializeable { 1434 continue 1435 } 1436 v := s.orig[vid] 1437 if s.f.Config.use387 && v.Type.IsFloat() { 1438 continue // 387 can't handle floats in registers between blocks 1439 } 1440 m := s.compatRegs(v.Type) &^ s.used 1441 if m&^desired.avoid != 0 { 1442 m &^= desired.avoid 1443 } 1444 if m != 0 { 1445 s.allocValToReg(v, m, false, b.Pos) 1446 } 1447 } 1448 } 1449 badloop: 1450 ; 1451 1452 // Save end-of-block register state. 1453 // First count how many, this cuts allocations in half. 1454 k := 0 1455 for r := register(0); r < s.numRegs; r++ { 1456 v := s.regs[r].v 1457 if v == nil { 1458 continue 1459 } 1460 k++ 1461 } 1462 regList := make([]endReg, 0, k) 1463 for r := register(0); r < s.numRegs; r++ { 1464 v := s.regs[r].v 1465 if v == nil { 1466 continue 1467 } 1468 regList = append(regList, endReg{r, v, s.regs[r].c}) 1469 } 1470 s.endRegs[b.ID] = regList 1471 1472 if checkEnabled { 1473 regValLiveSet.clear() 1474 for _, x := range s.live[b.ID] { 1475 regValLiveSet.add(x.ID) 1476 } 1477 for r := register(0); r < s.numRegs; r++ { 1478 v := s.regs[r].v 1479 if v == nil { 1480 continue 1481 } 1482 if !regValLiveSet.contains(v.ID) { 1483 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1484 } 1485 } 1486 } 1487 1488 // If a value is live at the end of the block and 1489 // isn't in a register, generate a use for the spill location. 1490 // We need to remember this information so that 1491 // the liveness analysis in stackalloc is correct. 1492 for _, e := range s.live[b.ID] { 1493 vi := &s.values[e.ID] 1494 if vi.regs != 0 { 1495 // in a register, we'll use that source for the merge. 1496 continue 1497 } 1498 if vi.rematerializeable { 1499 // we'll rematerialize during the merge. 1500 continue 1501 } 1502 //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1503 spill := s.makeSpill(s.orig[e.ID], b) 1504 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1505 } 1506 1507 // Clear any final uses. 1508 // All that is left should be the pseudo-uses added for values which 1509 // are live at the end of b. 1510 for _, e := range s.live[b.ID] { 1511 u := s.values[e.ID].uses 1512 if u == nil { 1513 f.Fatalf("live at end, no uses v%d", e.ID) 1514 } 1515 if u.next != nil { 1516 f.Fatalf("live at end, too many uses v%d", e.ID) 1517 } 1518 s.values[e.ID].uses = nil 1519 u.next = s.freeUseRecords 1520 s.freeUseRecords = u 1521 } 1522 } 1523 1524 // Decide where the spills we generated will go. 1525 s.placeSpills() 1526 1527 // Anything that didn't get a register gets a stack location here. 1528 // (StoreReg, stack-based phis, inputs, ...) 1529 stacklive := stackalloc(s.f, s.spillLive) 1530 1531 // Fix up all merge edges. 1532 s.shuffle(stacklive) 1533 1534 // Erase any copies we never used. 1535 // Also, an unused copy might be the only use of another copy, 1536 // so continue erasing until we reach a fixed point. 1537 for { 1538 progress := false 1539 for c, used := range s.copies { 1540 if !used && c.Uses == 0 { 1541 if s.f.pass.debug > regDebug { 1542 fmt.Printf("delete copied value %s\n", c.LongString()) 1543 } 1544 c.RemoveArg(0) 1545 f.freeValue(c) 1546 delete(s.copies, c) 1547 progress = true 1548 } 1549 } 1550 if !progress { 1551 break 1552 } 1553 } 1554 1555 for _, b := range s.visitOrder { 1556 i := 0 1557 for _, v := range b.Values { 1558 if v.Op == OpInvalid { 1559 continue 1560 } 1561 b.Values[i] = v 1562 i++ 1563 } 1564 b.Values = b.Values[:i] 1565 } 1566 } 1567 1568 func (s *regAllocState) placeSpills() { 1569 f := s.f 1570 1571 // Precompute some useful info. 1572 phiRegs := make([]regMask, f.NumBlocks()) 1573 for _, b := range s.visitOrder { 1574 var m regMask 1575 for _, v := range b.Values { 1576 if v.Op != OpPhi { 1577 break 1578 } 1579 if r, ok := f.getHome(v.ID).(*Register); ok { 1580 m |= regMask(1) << uint(r.num) 1581 } 1582 } 1583 phiRegs[b.ID] = m 1584 } 1585 1586 // Start maps block IDs to the list of spills 1587 // that go at the start of the block (but after any phis). 1588 start := map[ID][]*Value{} 1589 // After maps value IDs to the list of spills 1590 // that go immediately after that value ID. 1591 after := map[ID][]*Value{} 1592 1593 for i := range s.values { 1594 vi := s.values[i] 1595 spill := vi.spill 1596 if spill == nil { 1597 continue 1598 } 1599 if spill.Block != nil { 1600 // Some spills are already fully set up, 1601 // like OpArgs and stack-based phis. 1602 continue 1603 } 1604 v := s.orig[i] 1605 1606 // Walk down the dominator tree looking for a good place to 1607 // put the spill of v. At the start "best" is the best place 1608 // we have found so far. 1609 // TODO: find a way to make this O(1) without arbitrary cutoffs. 1610 best := v.Block 1611 bestArg := v 1612 var bestDepth int16 1613 if l := s.loopnest.b2l[best.ID]; l != nil { 1614 bestDepth = l.depth 1615 } 1616 b := best 1617 const maxSpillSearch = 100 1618 for i := 0; i < maxSpillSearch; i++ { 1619 // Find the child of b in the dominator tree which 1620 // dominates all restores. 1621 p := b 1622 b = nil 1623 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 1624 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 1625 // c also dominates all restores. Walk down into c. 1626 b = c 1627 break 1628 } 1629 } 1630 if b == nil { 1631 // Ran out of blocks which dominate all restores. 1632 break 1633 } 1634 1635 var depth int16 1636 if l := s.loopnest.b2l[b.ID]; l != nil { 1637 depth = l.depth 1638 } 1639 if depth > bestDepth { 1640 // Don't push the spill into a deeper loop. 1641 continue 1642 } 1643 1644 // If v is in a register at the start of b, we can 1645 // place the spill here (after the phis). 1646 if len(b.Preds) == 1 { 1647 for _, e := range s.endRegs[b.Preds[0].b.ID] { 1648 if e.v == v { 1649 // Found a better spot for the spill. 1650 best = b 1651 bestArg = e.c 1652 bestDepth = depth 1653 break 1654 } 1655 } 1656 } else { 1657 for _, e := range s.startRegs[b.ID] { 1658 if e.v == v { 1659 // Found a better spot for the spill. 1660 best = b 1661 bestArg = e.c 1662 bestDepth = depth 1663 break 1664 } 1665 } 1666 } 1667 } 1668 1669 // Put the spill in the best block we found. 1670 spill.Block = best 1671 spill.AddArg(bestArg) 1672 if best == v.Block && v.Op != OpPhi { 1673 // Place immediately after v. 1674 after[v.ID] = append(after[v.ID], spill) 1675 } else { 1676 // Place at the start of best block. 1677 start[best.ID] = append(start[best.ID], spill) 1678 } 1679 } 1680 1681 // Insert spill instructions into the block schedules. 1682 var oldSched []*Value 1683 for _, b := range s.visitOrder { 1684 nphi := 0 1685 for _, v := range b.Values { 1686 if v.Op != OpPhi { 1687 break 1688 } 1689 nphi++ 1690 } 1691 oldSched = append(oldSched[:0], b.Values[nphi:]...) 1692 b.Values = b.Values[:nphi] 1693 b.Values = append(b.Values, start[b.ID]...) 1694 for _, v := range oldSched { 1695 b.Values = append(b.Values, v) 1696 b.Values = append(b.Values, after[v.ID]...) 1697 } 1698 } 1699 } 1700 1701 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 1702 func (s *regAllocState) shuffle(stacklive [][]ID) { 1703 var e edgeState 1704 e.s = s 1705 e.cache = map[ID][]*Value{} 1706 e.contents = map[Location]contentRecord{} 1707 if s.f.pass.debug > regDebug { 1708 fmt.Printf("shuffle %s\n", s.f.Name) 1709 fmt.Println(s.f.String()) 1710 } 1711 1712 for _, b := range s.visitOrder { 1713 if len(b.Preds) <= 1 { 1714 continue 1715 } 1716 e.b = b 1717 for i, edge := range b.Preds { 1718 p := edge.b 1719 e.p = p 1720 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 1721 e.process() 1722 } 1723 } 1724 } 1725 1726 type edgeState struct { 1727 s *regAllocState 1728 p, b *Block // edge goes from p->b. 1729 1730 // for each pre-regalloc value, a list of equivalent cached values 1731 cache map[ID][]*Value 1732 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 1733 1734 // map from location to the value it contains 1735 contents map[Location]contentRecord 1736 1737 // desired destination locations 1738 destinations []dstRecord 1739 extra []dstRecord 1740 1741 usedRegs regMask // registers currently holding something 1742 uniqueRegs regMask // registers holding the only copy of a value 1743 finalRegs regMask // registers holding final target 1744 } 1745 1746 type contentRecord struct { 1747 vid ID // pre-regalloc value 1748 c *Value // cached value 1749 final bool // this is a satisfied destination 1750 pos src.XPos // source position of use of the value 1751 } 1752 1753 type dstRecord struct { 1754 loc Location // register or stack slot 1755 vid ID // pre-regalloc value it should contain 1756 splice **Value // place to store reference to the generating instruction 1757 pos src.XPos // source position of use of this location 1758 } 1759 1760 // setup initializes the edge state for shuffling. 1761 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 1762 if e.s.f.pass.debug > regDebug { 1763 fmt.Printf("edge %s->%s\n", e.p, e.b) 1764 } 1765 1766 // Clear state. 1767 for _, vid := range e.cachedVals { 1768 delete(e.cache, vid) 1769 } 1770 e.cachedVals = e.cachedVals[:0] 1771 for k := range e.contents { 1772 delete(e.contents, k) 1773 } 1774 e.usedRegs = 0 1775 e.uniqueRegs = 0 1776 e.finalRegs = 0 1777 1778 // Live registers can be sources. 1779 for _, x := range srcReg { 1780 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 1781 } 1782 // So can all of the spill locations. 1783 for _, spillID := range stacklive { 1784 v := e.s.orig[spillID] 1785 spill := e.s.values[v.ID].spill 1786 if !e.s.sdom.isAncestorEq(spill.Block, e.p) { 1787 // Spills were placed that only dominate the uses found 1788 // during the first regalloc pass. The edge fixup code 1789 // can't use a spill location if the spill doesn't dominate 1790 // the edge. 1791 // We are guaranteed that if the spill doesn't dominate this edge, 1792 // then the value is available in a register (because we called 1793 // makeSpill for every value not in a register at the start 1794 // of an edge). 1795 continue 1796 } 1797 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 1798 } 1799 1800 // Figure out all the destinations we need. 1801 dsts := e.destinations[:0] 1802 for _, x := range dstReg { 1803 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 1804 } 1805 // Phis need their args to end up in a specific location. 1806 for _, v := range e.b.Values { 1807 if v.Op != OpPhi { 1808 break 1809 } 1810 loc := e.s.f.getHome(v.ID) 1811 if loc == nil { 1812 continue 1813 } 1814 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 1815 } 1816 e.destinations = dsts 1817 1818 if e.s.f.pass.debug > regDebug { 1819 for _, vid := range e.cachedVals { 1820 a := e.cache[vid] 1821 for _, c := range a { 1822 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c) 1823 } 1824 } 1825 for _, d := range e.destinations { 1826 fmt.Printf("dst %s: v%d\n", d.loc, d.vid) 1827 } 1828 } 1829 } 1830 1831 // process generates code to move all the values to the right destination locations. 1832 func (e *edgeState) process() { 1833 dsts := e.destinations 1834 1835 // Process the destinations until they are all satisfied. 1836 for len(dsts) > 0 { 1837 i := 0 1838 for _, d := range dsts { 1839 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 1840 // Failed - save for next iteration. 1841 dsts[i] = d 1842 i++ 1843 } 1844 } 1845 if i < len(dsts) { 1846 // Made some progress. Go around again. 1847 dsts = dsts[:i] 1848 1849 // Append any extras destinations we generated. 1850 dsts = append(dsts, e.extra...) 1851 e.extra = e.extra[:0] 1852 continue 1853 } 1854 1855 // We made no progress. That means that any 1856 // remaining unsatisfied moves are in simple cycles. 1857 // For example, A -> B -> C -> D -> A. 1858 // A ----> B 1859 // ^ | 1860 // | | 1861 // | v 1862 // D <---- C 1863 1864 // To break the cycle, we pick an unused register, say R, 1865 // and put a copy of B there. 1866 // A ----> B 1867 // ^ | 1868 // | | 1869 // | v 1870 // D <---- C <---- R=copyofB 1871 // When we resume the outer loop, the A->B move can now proceed, 1872 // and eventually the whole cycle completes. 1873 1874 // Copy any cycle location to a temp register. This duplicates 1875 // one of the cycle entries, allowing the just duplicated value 1876 // to be overwritten and the cycle to proceed. 1877 d := dsts[0] 1878 loc := d.loc 1879 vid := e.contents[loc].vid 1880 c := e.contents[loc].c 1881 r := e.findRegFor(c.Type) 1882 if e.s.f.pass.debug > regDebug { 1883 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c) 1884 } 1885 e.erase(r) 1886 if _, isReg := loc.(*Register); isReg { 1887 c = e.p.NewValue1(d.pos, OpCopy, c.Type, c) 1888 } else { 1889 c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c) 1890 } 1891 e.set(r, vid, c, false, d.pos) 1892 } 1893 } 1894 1895 // processDest generates code to put value vid into location loc. Returns true 1896 // if progress was made. 1897 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 1898 occupant := e.contents[loc] 1899 if occupant.vid == vid { 1900 // Value is already in the correct place. 1901 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 1902 if splice != nil { 1903 (*splice).Uses-- 1904 *splice = occupant.c 1905 occupant.c.Uses++ 1906 } 1907 // Note: if splice==nil then c will appear dead. This is 1908 // non-SSA formed code, so be careful after this pass not to run 1909 // deadcode elimination. 1910 if _, ok := e.s.copies[occupant.c]; ok { 1911 // The copy at occupant.c was used to avoid spill. 1912 e.s.copies[occupant.c] = true 1913 } 1914 return true 1915 } 1916 1917 // Check if we're allowed to clobber the destination location. 1918 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 1919 // We can't overwrite the last copy 1920 // of a value that needs to survive. 1921 return false 1922 } 1923 1924 // Copy from a source of v, register preferred. 1925 v := e.s.orig[vid] 1926 var c *Value 1927 var src Location 1928 if e.s.f.pass.debug > regDebug { 1929 fmt.Printf("moving v%d to %s\n", vid, loc) 1930 fmt.Printf("sources of v%d:", vid) 1931 } 1932 for _, w := range e.cache[vid] { 1933 h := e.s.f.getHome(w.ID) 1934 if e.s.f.pass.debug > regDebug { 1935 fmt.Printf(" %s:%s", h, w) 1936 } 1937 _, isreg := h.(*Register) 1938 if src == nil || isreg { 1939 c = w 1940 src = h 1941 } 1942 } 1943 if e.s.f.pass.debug > regDebug { 1944 if src != nil { 1945 fmt.Printf(" [use %s]\n", src) 1946 } else { 1947 fmt.Printf(" [no source]\n") 1948 } 1949 } 1950 _, dstReg := loc.(*Register) 1951 1952 // Pre-clobber destination. This avoids the 1953 // following situation: 1954 // - v is currently held in R0 and stacktmp0. 1955 // - We want to copy stacktmp1 to stacktmp0. 1956 // - We choose R0 as the temporary register. 1957 // During the copy, both R0 and stacktmp0 are 1958 // clobbered, losing both copies of v. Oops! 1959 // Erasing the destination early means R0 will not 1960 // be chosen as the temp register, as it will then 1961 // be the last copy of v. 1962 e.erase(loc) 1963 var x *Value 1964 if c == nil { 1965 if !e.s.values[vid].rematerializeable { 1966 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 1967 } 1968 if dstReg { 1969 x = v.copyIntoNoXPos(e.p) 1970 } else { 1971 // Rematerialize into stack slot. Need a free 1972 // register to accomplish this. 1973 r := e.findRegFor(v.Type) 1974 e.erase(r) 1975 x = v.copyIntoWithXPos(e.p, pos) 1976 e.set(r, vid, x, false, pos) 1977 // Make sure we spill with the size of the slot, not the 1978 // size of x (which might be wider due to our dropping 1979 // of narrowing conversions). 1980 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 1981 } 1982 } else { 1983 // Emit move from src to dst. 1984 _, srcReg := src.(*Register) 1985 if srcReg { 1986 if dstReg { 1987 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 1988 } else { 1989 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 1990 } 1991 } else { 1992 if dstReg { 1993 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 1994 } else { 1995 // mem->mem. Use temp register. 1996 r := e.findRegFor(c.Type) 1997 e.erase(r) 1998 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 1999 e.set(r, vid, t, false, pos) 2000 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 2001 } 2002 } 2003 } 2004 e.set(loc, vid, x, true, pos) 2005 if splice != nil { 2006 (*splice).Uses-- 2007 *splice = x 2008 x.Uses++ 2009 } 2010 return true 2011 } 2012 2013 // set changes the contents of location loc to hold the given value and its cached representative. 2014 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2015 e.s.f.setHome(c, loc) 2016 e.contents[loc] = contentRecord{vid, c, final, pos} 2017 a := e.cache[vid] 2018 if len(a) == 0 { 2019 e.cachedVals = append(e.cachedVals, vid) 2020 } 2021 a = append(a, c) 2022 e.cache[vid] = a 2023 if r, ok := loc.(*Register); ok { 2024 e.usedRegs |= regMask(1) << uint(r.num) 2025 if final { 2026 e.finalRegs |= regMask(1) << uint(r.num) 2027 } 2028 if len(a) == 1 { 2029 e.uniqueRegs |= regMask(1) << uint(r.num) 2030 } 2031 if len(a) == 2 { 2032 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2033 e.uniqueRegs &^= regMask(1) << uint(t.num) 2034 } 2035 } 2036 } 2037 if e.s.f.pass.debug > regDebug { 2038 fmt.Printf("%s\n", c.LongString()) 2039 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c) 2040 } 2041 } 2042 2043 // erase removes any user of loc. 2044 func (e *edgeState) erase(loc Location) { 2045 cr := e.contents[loc] 2046 if cr.c == nil { 2047 return 2048 } 2049 vid := cr.vid 2050 2051 if cr.final { 2052 // Add a destination to move this value back into place. 2053 // Make sure it gets added to the tail of the destination queue 2054 // so we make progress on other moves first. 2055 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2056 } 2057 2058 // Remove c from the list of cached values. 2059 a := e.cache[vid] 2060 for i, c := range a { 2061 if e.s.f.getHome(c.ID) == loc { 2062 if e.s.f.pass.debug > regDebug { 2063 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c) 2064 } 2065 a[i], a = a[len(a)-1], a[:len(a)-1] 2066 break 2067 } 2068 } 2069 e.cache[vid] = a 2070 2071 // Update register masks. 2072 if r, ok := loc.(*Register); ok { 2073 e.usedRegs &^= regMask(1) << uint(r.num) 2074 if cr.final { 2075 e.finalRegs &^= regMask(1) << uint(r.num) 2076 } 2077 } 2078 if len(a) == 1 { 2079 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2080 e.uniqueRegs |= regMask(1) << uint(r.num) 2081 } 2082 } 2083 } 2084 2085 // findRegFor finds a register we can use to make a temp copy of type typ. 2086 func (e *edgeState) findRegFor(typ *types.Type) Location { 2087 // Which registers are possibilities. 2088 var m regMask 2089 types := &e.s.f.Config.Types 2090 if typ.IsFloat() { 2091 m = e.s.compatRegs(types.Float64) 2092 } else { 2093 m = e.s.compatRegs(types.Int64) 2094 } 2095 2096 // Pick a register. In priority order: 2097 // 1) an unused register 2098 // 2) a non-unique register not holding a final value 2099 // 3) a non-unique register 2100 // 4) TODO: a register holding a rematerializeable value 2101 x := m &^ e.usedRegs 2102 if x != 0 { 2103 return &e.s.registers[pickReg(x)] 2104 } 2105 x = m &^ e.uniqueRegs &^ e.finalRegs 2106 if x != 0 { 2107 return &e.s.registers[pickReg(x)] 2108 } 2109 x = m &^ e.uniqueRegs 2110 if x != 0 { 2111 return &e.s.registers[pickReg(x)] 2112 } 2113 2114 // No register is available. 2115 // Pick a register to spill. 2116 for _, vid := range e.cachedVals { 2117 a := e.cache[vid] 2118 for _, c := range a { 2119 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2120 if !c.rematerializeable() { 2121 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2122 // Allocate a temp location to spill a register to. 2123 // The type of the slot is immaterial - it will not be live across 2124 // any safepoint. Just use a type big enough to hold any register. 2125 t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64} 2126 // TODO: reuse these slots. They'll need to be erased first. 2127 e.set(t, vid, x, false, c.Pos) 2128 if e.s.f.pass.debug > regDebug { 2129 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString()) 2130 } 2131 } 2132 // r will now be overwritten by the caller. At some point 2133 // later, the newly saved value will be moved back to its 2134 // final destination in processDest. 2135 return r 2136 } 2137 } 2138 } 2139 2140 fmt.Printf("m:%d unique:%d final:%d\n", m, e.uniqueRegs, e.finalRegs) 2141 for _, vid := range e.cachedVals { 2142 a := e.cache[vid] 2143 for _, c := range a { 2144 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID)) 2145 } 2146 } 2147 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2148 return nil 2149 } 2150 2151 // rematerializeable reports whether the register allocator should recompute 2152 // a value instead of spilling/restoring it. 2153 func (v *Value) rematerializeable() bool { 2154 if !opcodeTable[v.Op].rematerializeable { 2155 return false 2156 } 2157 for _, a := range v.Args { 2158 // SP and SB (generated by OpSP and OpSB) are always available. 2159 if a.Op != OpSP && a.Op != OpSB { 2160 return false 2161 } 2162 } 2163 return true 2164 } 2165 2166 type liveInfo struct { 2167 ID ID // ID of value 2168 dist int32 // # of instructions before next use 2169 pos src.XPos // source position of next use 2170 } 2171 2172 // computeLive computes a map from block ID to a list of value IDs live at the end 2173 // of that block. Together with the value ID is a count of how many instructions 2174 // to the next use of that value. The resulting map is stored in s.live. 2175 // computeLive also computes the desired register information at the end of each block. 2176 // This desired register information is stored in s.desired. 2177 // TODO: this could be quadratic if lots of variables are live across lots of 2178 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2179 // of this function) require only linear size & time. 2180 func (s *regAllocState) computeLive() { 2181 f := s.f 2182 s.live = make([][]liveInfo, f.NumBlocks()) 2183 s.desired = make([]desiredState, f.NumBlocks()) 2184 var phis []*Value 2185 2186 live := newSparseMap(f.NumValues()) 2187 t := newSparseMap(f.NumValues()) 2188 2189 // Keep track of which value we want in each register. 2190 var desired desiredState 2191 2192 // Instead of iterating over f.Blocks, iterate over their postordering. 2193 // Liveness information flows backward, so starting at the end 2194 // increases the probability that we will stabilize quickly. 2195 // TODO: Do a better job yet. Here's one possibility: 2196 // Calculate the dominator tree and locate all strongly connected components. 2197 // If a value is live in one block of an SCC, it is live in all. 2198 // Walk the dominator tree from end to beginning, just once, treating SCC 2199 // components as single blocks, duplicated calculated liveness information 2200 // out to all of them. 2201 po := f.postorder() 2202 s.loopnest = f.loopnest() 2203 s.loopnest.calculateDepths() 2204 for { 2205 changed := false 2206 2207 for _, b := range po { 2208 // Start with known live values at the end of the block. 2209 // Add len(b.Values) to adjust from end-of-block distance 2210 // to beginning-of-block distance. 2211 live.clear() 2212 for _, e := range s.live[b.ID] { 2213 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2214 } 2215 2216 // Mark control value as live 2217 if b.Control != nil && s.values[b.Control.ID].needReg { 2218 live.set(b.Control.ID, int32(len(b.Values)), b.Pos) 2219 } 2220 2221 // Propagate backwards to the start of the block 2222 // Assumes Values have been scheduled. 2223 phis = phis[:0] 2224 for i := len(b.Values) - 1; i >= 0; i-- { 2225 v := b.Values[i] 2226 live.remove(v.ID) 2227 if v.Op == OpPhi { 2228 // save phi ops for later 2229 phis = append(phis, v) 2230 continue 2231 } 2232 if opcodeTable[v.Op].call { 2233 c := live.contents() 2234 for i := range c { 2235 c[i].val += unlikelyDistance 2236 } 2237 } 2238 for _, a := range v.Args { 2239 if s.values[a.ID].needReg { 2240 live.set(a.ID, int32(i), v.Pos) 2241 } 2242 } 2243 } 2244 // Propagate desired registers backwards. 2245 desired.copy(&s.desired[b.ID]) 2246 for i := len(b.Values) - 1; i >= 0; i-- { 2247 v := b.Values[i] 2248 prefs := desired.remove(v.ID) 2249 if v.Op == OpPhi { 2250 // TODO: if v is a phi, save desired register for phi inputs. 2251 // For now, we just drop it and don't propagate 2252 // desired registers back though phi nodes. 2253 continue 2254 } 2255 // Cancel desired registers if they get clobbered. 2256 desired.clobber(opcodeTable[v.Op].reg.clobbers) 2257 // Update desired registers if there are any fixed register inputs. 2258 for _, j := range opcodeTable[v.Op].reg.inputs { 2259 if countRegs(j.regs) != 1 { 2260 continue 2261 } 2262 desired.clobber(j.regs) 2263 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2264 } 2265 // Set desired register of input 0 if this is a 2-operand instruction. 2266 if opcodeTable[v.Op].resultInArg0 { 2267 if opcodeTable[v.Op].commutative { 2268 desired.addList(v.Args[1].ID, prefs) 2269 } 2270 desired.addList(v.Args[0].ID, prefs) 2271 } 2272 } 2273 2274 // For each predecessor of b, expand its list of live-at-end values. 2275 // invariant: live contains the values live at the start of b (excluding phi inputs) 2276 for i, e := range b.Preds { 2277 p := e.b 2278 // Compute additional distance for the edge. 2279 // Note: delta must be at least 1 to distinguish the control 2280 // value use from the first user in a successor block. 2281 delta := int32(normalDistance) 2282 if len(p.Succs) == 2 { 2283 if p.Succs[0].b == b && p.Likely == BranchLikely || 2284 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2285 delta = likelyDistance 2286 } 2287 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2288 p.Succs[1].b == b && p.Likely == BranchLikely { 2289 delta = unlikelyDistance 2290 } 2291 } 2292 2293 // Update any desired registers at the end of p. 2294 s.desired[p.ID].merge(&desired) 2295 2296 // Start t off with the previously known live values at the end of p. 2297 t.clear() 2298 for _, e := range s.live[p.ID] { 2299 t.set(e.ID, e.dist, e.pos) 2300 } 2301 update := false 2302 2303 // Add new live values from scanning this block. 2304 for _, e := range live.contents() { 2305 d := e.val + delta 2306 if !t.contains(e.key) || d < t.get(e.key) { 2307 update = true 2308 t.set(e.key, d, e.aux) 2309 } 2310 } 2311 // Also add the correct arg from the saved phi values. 2312 // All phis are at distance delta (we consider them 2313 // simultaneously happening at the start of the block). 2314 for _, v := range phis { 2315 id := v.Args[i].ID 2316 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2317 update = true 2318 t.set(id, delta, v.Pos) 2319 } 2320 } 2321 2322 if !update { 2323 continue 2324 } 2325 // The live set has changed, update it. 2326 l := s.live[p.ID][:0] 2327 if cap(l) < t.size() { 2328 l = make([]liveInfo, 0, t.size()) 2329 } 2330 for _, e := range t.contents() { 2331 l = append(l, liveInfo{e.key, e.val, e.aux}) 2332 } 2333 s.live[p.ID] = l 2334 changed = true 2335 } 2336 } 2337 2338 if !changed { 2339 break 2340 } 2341 } 2342 if f.pass.debug > regDebug { 2343 fmt.Println("live values at end of each block") 2344 for _, b := range f.Blocks { 2345 fmt.Printf(" %s:", b) 2346 for _, x := range s.live[b.ID] { 2347 fmt.Printf(" v%d", x.ID) 2348 for _, e := range s.desired[b.ID].entries { 2349 if e.ID != x.ID { 2350 continue 2351 } 2352 fmt.Printf("[") 2353 first := true 2354 for _, r := range e.regs { 2355 if r == noRegister { 2356 continue 2357 } 2358 if !first { 2359 fmt.Printf(",") 2360 } 2361 fmt.Print(&s.registers[r]) 2362 first = false 2363 } 2364 fmt.Printf("]") 2365 } 2366 } 2367 fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid)) 2368 fmt.Println() 2369 } 2370 } 2371 } 2372 2373 // A desiredState represents desired register assignments. 2374 type desiredState struct { 2375 // Desired assignments will be small, so we just use a list 2376 // of valueID+registers entries. 2377 entries []desiredStateEntry 2378 // Registers that other values want to be in. This value will 2379 // contain at least the union of the regs fields of entries, but 2380 // may contain additional entries for values that were once in 2381 // this data structure but are no longer. 2382 avoid regMask 2383 } 2384 type desiredStateEntry struct { 2385 // (pre-regalloc) value 2386 ID ID 2387 // Registers it would like to be in, in priority order. 2388 // Unused slots are filled with noRegister. 2389 regs [4]register 2390 } 2391 2392 func (d *desiredState) clear() { 2393 d.entries = d.entries[:0] 2394 d.avoid = 0 2395 } 2396 2397 // get returns a list of desired registers for value vid. 2398 func (d *desiredState) get(vid ID) [4]register { 2399 for _, e := range d.entries { 2400 if e.ID == vid { 2401 return e.regs 2402 } 2403 } 2404 return [4]register{noRegister, noRegister, noRegister, noRegister} 2405 } 2406 2407 // add records that we'd like value vid to be in register r. 2408 func (d *desiredState) add(vid ID, r register) { 2409 d.avoid |= regMask(1) << r 2410 for i := range d.entries { 2411 e := &d.entries[i] 2412 if e.ID != vid { 2413 continue 2414 } 2415 if e.regs[0] == r { 2416 // Already known and highest priority 2417 return 2418 } 2419 for j := 1; j < len(e.regs); j++ { 2420 if e.regs[j] == r { 2421 // Move from lower priority to top priority 2422 copy(e.regs[1:], e.regs[:j]) 2423 e.regs[0] = r 2424 return 2425 } 2426 } 2427 copy(e.regs[1:], e.regs[:]) 2428 e.regs[0] = r 2429 return 2430 } 2431 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2432 } 2433 2434 func (d *desiredState) addList(vid ID, regs [4]register) { 2435 // regs is in priority order, so iterate in reverse order. 2436 for i := len(regs) - 1; i >= 0; i-- { 2437 r := regs[i] 2438 if r != noRegister { 2439 d.add(vid, r) 2440 } 2441 } 2442 } 2443 2444 // clobber erases any desired registers in the set m. 2445 func (d *desiredState) clobber(m regMask) { 2446 for i := 0; i < len(d.entries); { 2447 e := &d.entries[i] 2448 j := 0 2449 for _, r := range e.regs { 2450 if r != noRegister && m>>r&1 == 0 { 2451 e.regs[j] = r 2452 j++ 2453 } 2454 } 2455 if j == 0 { 2456 // No more desired registers for this value. 2457 d.entries[i] = d.entries[len(d.entries)-1] 2458 d.entries = d.entries[:len(d.entries)-1] 2459 continue 2460 } 2461 for ; j < len(e.regs); j++ { 2462 e.regs[j] = noRegister 2463 } 2464 i++ 2465 } 2466 d.avoid &^= m 2467 } 2468 2469 // copy copies a desired state from another desiredState x. 2470 func (d *desiredState) copy(x *desiredState) { 2471 d.entries = append(d.entries[:0], x.entries...) 2472 d.avoid = x.avoid 2473 } 2474 2475 // remove removes the desired registers for vid and returns them. 2476 func (d *desiredState) remove(vid ID) [4]register { 2477 for i := range d.entries { 2478 if d.entries[i].ID == vid { 2479 regs := d.entries[i].regs 2480 d.entries[i] = d.entries[len(d.entries)-1] 2481 d.entries = d.entries[:len(d.entries)-1] 2482 return regs 2483 } 2484 } 2485 return [4]register{noRegister, noRegister, noRegister, noRegister} 2486 } 2487 2488 // merge merges another desired state x into d. 2489 func (d *desiredState) merge(x *desiredState) { 2490 d.avoid |= x.avoid 2491 // There should only be a few desired registers, so 2492 // linear insert is ok. 2493 for _, e := range x.entries { 2494 d.addList(e.ID, e.regs) 2495 } 2496 } 2497 2498 func min32(x, y int32) int32 { 2499 if x < y { 2500 return x 2501 } 2502 return y 2503 } 2504 func max32(x, y int32) int32 { 2505 if x > y { 2506 return x 2507 } 2508 return y 2509 }