github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "cmd/compile/internal/types" 118 "cmd/internal/objabi" 119 "cmd/internal/src" 120 "fmt" 121 "unsafe" 122 ) 123 124 const ( 125 moveSpills = iota 126 logSpills 127 regDebug 128 stackDebug 129 ) 130 131 // distance is a measure of how far into the future values are used. 132 // distance is measured in units of instructions. 133 const ( 134 likelyDistance = 1 135 normalDistance = 10 136 unlikelyDistance = 100 137 ) 138 139 // regalloc performs register allocation on f. It sets f.RegAlloc 140 // to the resulting allocation. 141 func regalloc(f *Func) { 142 var s regAllocState 143 s.init(f) 144 s.regalloc(f) 145 } 146 147 type register uint8 148 149 const noRegister register = 255 150 151 type regMask uint64 152 153 func (m regMask) String() string { 154 s := "" 155 for r := register(0); m != 0; r++ { 156 if m>>r&1 == 0 { 157 continue 158 } 159 m &^= regMask(1) << r 160 if s != "" { 161 s += " " 162 } 163 s += fmt.Sprintf("r%d", r) 164 } 165 return s 166 } 167 168 // countRegs returns the number of set bits in the register mask. 169 func countRegs(r regMask) int { 170 n := 0 171 for r != 0 { 172 n += int(r & 1) 173 r >>= 1 174 } 175 return n 176 } 177 178 // pickReg picks an arbitrary register from the register mask. 179 func pickReg(r regMask) register { 180 // pick the lowest one 181 if r == 0 { 182 panic("can't pick a register from an empty set") 183 } 184 for i := register(0); ; i++ { 185 if r&1 != 0 { 186 return i 187 } 188 r >>= 1 189 } 190 } 191 192 type use struct { 193 dist int32 // distance from start of the block to a use of a value 194 pos src.XPos // source position of the use 195 next *use // linked list of uses of a value in nondecreasing dist order 196 } 197 198 // A valState records the register allocation state for a (pre-regalloc) value. 199 type valState struct { 200 regs regMask // the set of registers holding a Value (usually just one) 201 uses *use // list of uses in this block 202 spill *Value // spilled copy of the Value (if any) 203 restoreMin int32 // minimum of all restores' blocks' sdom.entry 204 restoreMax int32 // maximum of all restores' blocks' sdom.exit 205 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 206 rematerializeable bool // cached value of v.rematerializeable() 207 } 208 209 type regState struct { 210 v *Value // Original (preregalloc) Value stored in this register. 211 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 212 // If a register is unused, v==c==nil 213 } 214 215 type regAllocState struct { 216 f *Func 217 218 sdom SparseTree 219 registers []Register 220 numRegs register 221 SPReg register 222 SBReg register 223 GReg register 224 allocatable regMask 225 226 // for each block, its primary predecessor. 227 // A predecessor of b is primary if it is the closest 228 // predecessor that appears before b in the layout order. 229 // We record the index in the Preds list where the primary predecessor sits. 230 primary []int32 231 232 // live values at the end of each block. live[b.ID] is a list of value IDs 233 // which are live at the end of b, together with a count of how many instructions 234 // forward to the next use. 235 live [][]liveInfo 236 // desired register assignments at the end of each block. 237 // Note that this is a static map computed before allocation occurs. Dynamic 238 // register desires (from partially completed allocations) will trump 239 // this information. 240 desired []desiredState 241 242 // current state of each (preregalloc) Value 243 values []valState 244 245 // ID of SP, SB values 246 sp, sb ID 247 248 // For each Value, map from its value ID back to the 249 // preregalloc Value it was derived from. 250 orig []*Value 251 252 // current state of each register 253 regs []regState 254 255 // registers that contain values which can't be kicked out 256 nospill regMask 257 258 // mask of registers currently in use 259 used regMask 260 261 // mask of registers used in the current instruction 262 tmpused regMask 263 264 // current block we're working on 265 curBlock *Block 266 267 // cache of use records 268 freeUseRecords *use 269 270 // endRegs[blockid] is the register state at the end of each block. 271 // encoded as a set of endReg records. 272 endRegs [][]endReg 273 274 // startRegs[blockid] is the register state at the start of merge blocks. 275 // saved state does not include the state of phi ops in the block. 276 startRegs [][]startReg 277 278 // spillLive[blockid] is the set of live spills at the end of each block 279 spillLive [][]ID 280 281 // a set of copies we generated to move things around, and 282 // whether it is used in shuffle. Unused copies will be deleted. 283 copies map[*Value]bool 284 285 loopnest *loopnest 286 } 287 288 type endReg struct { 289 r register 290 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 291 c *Value // cached version of the value 292 } 293 294 type startReg struct { 295 r register 296 v *Value // pre-regalloc value needed in this register 297 c *Value // cached version of the value 298 pos src.XPos // source position of use of this register 299 } 300 301 // freeReg frees up register r. Any current user of r is kicked out. 302 func (s *regAllocState) freeReg(r register) { 303 v := s.regs[r].v 304 if v == nil { 305 s.f.Fatalf("tried to free an already free register %d\n", r) 306 } 307 308 // Mark r as unused. 309 if s.f.pass.debug > regDebug { 310 fmt.Printf("freeReg %s (dump %s/%s)\n", s.registers[r].Name(), v, s.regs[r].c) 311 } 312 s.regs[r] = regState{} 313 s.values[v.ID].regs &^= regMask(1) << r 314 s.used &^= regMask(1) << r 315 } 316 317 // freeRegs frees up all registers listed in m. 318 func (s *regAllocState) freeRegs(m regMask) { 319 for m&s.used != 0 { 320 s.freeReg(pickReg(m & s.used)) 321 } 322 } 323 324 // setOrig records that c's original value is the same as 325 // v's original value. 326 func (s *regAllocState) setOrig(c *Value, v *Value) { 327 for int(c.ID) >= len(s.orig) { 328 s.orig = append(s.orig, nil) 329 } 330 if s.orig[c.ID] != nil { 331 s.f.Fatalf("orig value set twice %s %s", c, v) 332 } 333 s.orig[c.ID] = s.orig[v.ID] 334 } 335 336 // assignReg assigns register r to hold c, a copy of v. 337 // r must be unused. 338 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 339 if s.f.pass.debug > regDebug { 340 fmt.Printf("assignReg %s %s/%s\n", s.registers[r].Name(), v, c) 341 } 342 if s.regs[r].v != nil { 343 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 344 } 345 346 // Update state. 347 s.regs[r] = regState{v, c} 348 s.values[v.ID].regs |= regMask(1) << r 349 s.used |= regMask(1) << r 350 s.f.setHome(c, &s.registers[r]) 351 } 352 353 // allocReg chooses a register from the set of registers in mask. 354 // If there is no unused register, a Value will be kicked out of 355 // a register to make room. 356 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 357 mask &= s.allocatable 358 mask &^= s.nospill 359 if mask == 0 { 360 s.f.Fatalf("no register available for %s", v) 361 } 362 363 // Pick an unused register if one is available. 364 if mask&^s.used != 0 { 365 return pickReg(mask &^ s.used) 366 } 367 368 // Pick a value to spill. Spill the value with the 369 // farthest-in-the-future use. 370 // TODO: Prefer registers with already spilled Values? 371 // TODO: Modify preference using affinity graph. 372 // TODO: if a single value is in multiple registers, spill one of them 373 // before spilling a value in just a single register. 374 375 // Find a register to spill. We spill the register containing the value 376 // whose next use is as far in the future as possible. 377 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 378 var r register 379 maxuse := int32(-1) 380 for t := register(0); t < s.numRegs; t++ { 381 if mask>>t&1 == 0 { 382 continue 383 } 384 v := s.regs[t].v 385 if n := s.values[v.ID].uses.dist; n > maxuse { 386 // v's next use is farther in the future than any value 387 // we've seen so far. A new best spill candidate. 388 r = t 389 maxuse = n 390 } 391 } 392 if maxuse == -1 { 393 s.f.Fatalf("couldn't find register to spill") 394 } 395 396 // Try to move it around before kicking out, if there is a free register. 397 // We generate a Copy and record it. It will be deleted if never used. 398 v2 := s.regs[r].v 399 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 400 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 401 r2 := pickReg(m) 402 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 403 s.copies[c] = false 404 if s.f.pass.debug > regDebug { 405 fmt.Printf("copy %s to %s : %s\n", v2, c, s.registers[r2].Name()) 406 } 407 s.setOrig(c, v2) 408 s.assignReg(r2, v2, c) 409 } 410 s.freeReg(r) 411 return r 412 } 413 414 // makeSpill returns a Value which represents the spilled value of v. 415 // b is the block in which the spill is used. 416 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 417 vi := &s.values[v.ID] 418 if vi.spill != nil { 419 // Final block not known - keep track of subtree where restores reside. 420 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 421 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 422 return vi.spill 423 } 424 // Make a spill for v. We don't know where we want 425 // to put it yet, so we leave it blockless for now. 426 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 427 // We also don't know what the spill's arg will be. 428 // Leave it argless for now. 429 s.setOrig(spill, v) 430 vi.spill = spill 431 vi.restoreMin = s.sdom[b.ID].entry 432 vi.restoreMax = s.sdom[b.ID].exit 433 return spill 434 } 435 436 // allocValToReg allocates v to a register selected from regMask and 437 // returns the register copy of v. Any previous user is kicked out and spilled 438 // (if necessary). Load code is added at the current pc. If nospill is set the 439 // allocated register is marked nospill so the assignment cannot be 440 // undone until the caller allows it by clearing nospill. Returns a 441 // *Value which is either v or a copy of v allocated to the chosen register. 442 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 443 vi := &s.values[v.ID] 444 445 // Check if v is already in a requested register. 446 if mask&vi.regs != 0 { 447 r := pickReg(mask & vi.regs) 448 if s.regs[r].v != v || s.regs[r].c == nil { 449 panic("bad register state") 450 } 451 if nospill { 452 s.nospill |= regMask(1) << r 453 } 454 return s.regs[r].c 455 } 456 457 // Allocate a register. 458 r := s.allocReg(mask, v) 459 460 // Allocate v to the new register. 461 var c *Value 462 if vi.regs != 0 { 463 // Copy from a register that v is already in. 464 r2 := pickReg(vi.regs) 465 if s.regs[r2].v != v { 466 panic("bad register state") 467 } 468 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 469 } else if v.rematerializeable() { 470 // Rematerialize instead of loading from the spill location. 471 c = v.copyIntoNoXPos(s.curBlock) 472 } else { 473 // Load v from its spill location. 474 spill := s.makeSpill(v, s.curBlock) 475 if s.f.pass.debug > logSpills { 476 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 477 } 478 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 479 } 480 s.setOrig(c, v) 481 s.assignReg(r, v, c) 482 if nospill { 483 s.nospill |= regMask(1) << r 484 } 485 return c 486 } 487 488 // isLeaf reports whether f performs any calls. 489 func isLeaf(f *Func) bool { 490 for _, b := range f.Blocks { 491 for _, v := range b.Values { 492 if opcodeTable[v.Op].call { 493 return false 494 } 495 } 496 } 497 return true 498 } 499 500 func (s *regAllocState) init(f *Func) { 501 s.f = f 502 s.f.RegAlloc = s.f.Cache.locs[:0] 503 s.registers = f.Config.registers 504 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 505 s.f.Fatalf("bad number of registers: %d", nr) 506 } else { 507 s.numRegs = register(nr) 508 } 509 // Locate SP, SB, and g registers. 510 s.SPReg = noRegister 511 s.SBReg = noRegister 512 s.GReg = noRegister 513 for r := register(0); r < s.numRegs; r++ { 514 switch s.registers[r].Name() { 515 case "SP": 516 s.SPReg = r 517 case "SB": 518 s.SBReg = r 519 case "g": 520 s.GReg = r 521 } 522 } 523 // Make sure we found all required registers. 524 switch noRegister { 525 case s.SPReg: 526 s.f.Fatalf("no SP register found") 527 case s.SBReg: 528 s.f.Fatalf("no SB register found") 529 case s.GReg: 530 if f.Config.hasGReg { 531 s.f.Fatalf("no g register found") 532 } 533 } 534 535 // Figure out which registers we're allowed to use. 536 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 537 s.allocatable &^= 1 << s.SPReg 538 s.allocatable &^= 1 << s.SBReg 539 if s.f.Config.hasGReg { 540 s.allocatable &^= 1 << s.GReg 541 } 542 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 { 543 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 544 } 545 if s.f.Config.ctxt.Flag_shared { 546 switch s.f.Config.arch { 547 case "ppc64le": // R2 already reserved. 548 s.allocatable &^= 1 << 12 // R12 549 } 550 } 551 if s.f.Config.LinkReg != -1 { 552 if isLeaf(f) { 553 // Leaf functions don't save/restore the link register. 554 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 555 } 556 if s.f.Config.arch == "arm" && objabi.GOARM == 5 { 557 // On ARMv5 we insert softfloat calls at each FP instruction. 558 // This clobbers LR almost everywhere. Disable allocating LR 559 // on ARMv5. 560 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 561 } 562 } 563 if s.f.Config.ctxt.Flag_dynlink { 564 switch s.f.Config.arch { 565 case "amd64": 566 s.allocatable &^= 1 << 15 // R15 567 case "arm": 568 s.allocatable &^= 1 << 9 // R9 569 case "ppc64le": // R2 already reserved. 570 s.allocatable &^= 1 << 12 // R12 571 case "arm64": 572 // nothing to do? 573 case "386": 574 // nothing to do. 575 // Note that for Flag_shared (position independent code) 576 // we do need to be careful, but that carefulness is hidden 577 // in the rewrite rules so we always have a free register 578 // available for global load/stores. See gen/386.rules (search for Flag_shared). 579 case "s390x": 580 // nothing to do, R10 & R11 already reserved 581 default: 582 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 583 } 584 } 585 if s.f.Config.nacl { 586 switch s.f.Config.arch { 587 case "arm": 588 s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm 589 case "amd64p32": 590 s.allocatable &^= 1 << 5 // BP - reserved for nacl 591 s.allocatable &^= 1 << 15 // R15 - reserved for nacl 592 } 593 } 594 if s.f.Config.use387 { 595 s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) 596 } 597 598 s.regs = make([]regState, s.numRegs) 599 s.values = make([]valState, f.NumValues()) 600 s.orig = make([]*Value, f.NumValues()) 601 s.copies = make(map[*Value]bool) 602 for _, b := range f.Blocks { 603 for _, v := range b.Values { 604 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 605 s.values[v.ID].needReg = true 606 s.values[v.ID].rematerializeable = v.rematerializeable() 607 s.orig[v.ID] = v 608 } 609 // Note: needReg is false for values returning Tuple types. 610 // Instead, we mark the corresponding Selects as needReg. 611 } 612 } 613 s.computeLive() 614 615 // Compute block order. This array allows us to distinguish forward edges 616 // from backward edges and compute how far they go. 617 blockOrder := make([]int32, f.NumBlocks()) 618 for i, b := range f.Blocks { 619 blockOrder[b.ID] = int32(i) 620 } 621 622 // Compute primary predecessors. 623 s.primary = make([]int32, f.NumBlocks()) 624 for _, b := range f.Blocks { 625 best := -1 626 for i, e := range b.Preds { 627 p := e.b 628 if blockOrder[p.ID] >= blockOrder[b.ID] { 629 continue // backward edge 630 } 631 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] { 632 best = i 633 } 634 } 635 s.primary[b.ID] = int32(best) 636 } 637 638 s.endRegs = make([][]endReg, f.NumBlocks()) 639 s.startRegs = make([][]startReg, f.NumBlocks()) 640 s.spillLive = make([][]ID, f.NumBlocks()) 641 s.sdom = f.sdom() 642 } 643 644 // Adds a use record for id at distance dist from the start of the block. 645 // All calls to addUse must happen with nonincreasing dist. 646 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 647 r := s.freeUseRecords 648 if r != nil { 649 s.freeUseRecords = r.next 650 } else { 651 r = &use{} 652 } 653 r.dist = dist 654 r.pos = pos 655 r.next = s.values[id].uses 656 s.values[id].uses = r 657 if r.next != nil && dist > r.next.dist { 658 s.f.Fatalf("uses added in wrong order") 659 } 660 } 661 662 // advanceUses advances the uses of v's args from the state before v to the state after v. 663 // Any values which have no more uses are deallocated from registers. 664 func (s *regAllocState) advanceUses(v *Value) { 665 for _, a := range v.Args { 666 if !s.values[a.ID].needReg { 667 continue 668 } 669 ai := &s.values[a.ID] 670 r := ai.uses 671 ai.uses = r.next 672 if r.next == nil { 673 // Value is dead, free all registers that hold it. 674 s.freeRegs(ai.regs) 675 } 676 r.next = s.freeUseRecords 677 s.freeUseRecords = r 678 } 679 } 680 681 // liveAfterCurrentInstruction reports whether v is live after 682 // the current instruction is completed. v must be used by the 683 // current instruction. 684 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 685 u := s.values[v.ID].uses 686 d := u.dist 687 for u != nil && u.dist == d { 688 u = u.next 689 } 690 return u != nil && u.dist > d 691 } 692 693 // Sets the state of the registers to that encoded in regs. 694 func (s *regAllocState) setState(regs []endReg) { 695 s.freeRegs(s.used) 696 for _, x := range regs { 697 s.assignReg(x.r, x.v, x.c) 698 } 699 } 700 701 // compatRegs returns the set of registers which can store a type t. 702 func (s *regAllocState) compatRegs(t *types.Type) regMask { 703 var m regMask 704 if t.IsTuple() || t.IsFlags() { 705 return 0 706 } 707 if t.IsFloat() || t == types.TypeInt128 { 708 m = s.f.Config.fpRegMask 709 } else { 710 m = s.f.Config.gpRegMask 711 } 712 return m & s.allocatable 713 } 714 715 func (s *regAllocState) regalloc(f *Func) { 716 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 717 defer f.retSparseSet(regValLiveSet) 718 var oldSched []*Value 719 var phis []*Value 720 var phiRegs []register 721 var args []*Value 722 723 // Data structure used for computing desired registers. 724 var desired desiredState 725 726 // Desired registers for inputs & outputs for each instruction in the block. 727 type dentry struct { 728 out [4]register // desired output registers 729 in [3][4]register // desired input registers (for inputs 0,1, and 2) 730 } 731 var dinfo []dentry 732 733 if f.Entry != f.Blocks[0] { 734 f.Fatalf("entry block must be first") 735 } 736 737 for _, b := range f.Blocks { 738 s.curBlock = b 739 740 // Initialize regValLiveSet and uses fields for this block. 741 // Walk backwards through the block doing liveness analysis. 742 regValLiveSet.clear() 743 for _, e := range s.live[b.ID] { 744 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 745 regValLiveSet.add(e.ID) 746 } 747 if v := b.Control; v != nil && s.values[v.ID].needReg { 748 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value 749 regValLiveSet.add(v.ID) 750 } 751 for i := len(b.Values) - 1; i >= 0; i-- { 752 v := b.Values[i] 753 regValLiveSet.remove(v.ID) 754 if v.Op == OpPhi { 755 // Remove v from the live set, but don't add 756 // any inputs. This is the state the len(b.Preds)>1 757 // case below desires; it wants to process phis specially. 758 continue 759 } 760 if opcodeTable[v.Op].call { 761 // Function call clobbers all the registers but SP and SB. 762 regValLiveSet.clear() 763 if s.sp != 0 && s.values[s.sp].uses != nil { 764 regValLiveSet.add(s.sp) 765 } 766 if s.sb != 0 && s.values[s.sb].uses != nil { 767 regValLiveSet.add(s.sb) 768 } 769 } 770 for _, a := range v.Args { 771 if !s.values[a.ID].needReg { 772 continue 773 } 774 s.addUse(a.ID, int32(i), v.Pos) 775 regValLiveSet.add(a.ID) 776 } 777 } 778 if s.f.pass.debug > regDebug { 779 fmt.Printf("uses for %s:%s\n", s.f.Name, b) 780 for i := range s.values { 781 vi := &s.values[i] 782 u := vi.uses 783 if u == nil { 784 continue 785 } 786 fmt.Printf(" v%d:", i) 787 for u != nil { 788 fmt.Printf(" %d", u.dist) 789 u = u.next 790 } 791 fmt.Println() 792 } 793 } 794 795 // Make a copy of the block schedule so we can generate a new one in place. 796 // We make a separate copy for phis and regular values. 797 nphi := 0 798 for _, v := range b.Values { 799 if v.Op != OpPhi { 800 break 801 } 802 nphi++ 803 } 804 phis = append(phis[:0], b.Values[:nphi]...) 805 oldSched = append(oldSched[:0], b.Values[nphi:]...) 806 b.Values = b.Values[:0] 807 808 // Initialize start state of block. 809 if b == f.Entry { 810 // Regalloc state is empty to start. 811 if nphi > 0 { 812 f.Fatalf("phis in entry block") 813 } 814 } else if len(b.Preds) == 1 { 815 // Start regalloc state with the end state of the previous block. 816 s.setState(s.endRegs[b.Preds[0].b.ID]) 817 if nphi > 0 { 818 f.Fatalf("phis in single-predecessor block") 819 } 820 // Drop any values which are no longer live. 821 // This may happen because at the end of p, a value may be 822 // live but only used by some other successor of p. 823 for r := register(0); r < s.numRegs; r++ { 824 v := s.regs[r].v 825 if v != nil && !regValLiveSet.contains(v.ID) { 826 s.freeReg(r) 827 } 828 } 829 } else { 830 // This is the complicated case. We have more than one predecessor, 831 // which means we may have Phi ops. 832 833 // Copy phi ops into new schedule. 834 b.Values = append(b.Values, phis...) 835 836 // Start with the final register state of the primary predecessor 837 idx := s.primary[b.ID] 838 if idx < 0 { 839 f.Fatalf("block with no primary predecessor %s", b) 840 } 841 p := b.Preds[idx].b 842 s.setState(s.endRegs[p.ID]) 843 844 if s.f.pass.debug > regDebug { 845 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 846 for _, x := range s.endRegs[p.ID] { 847 fmt.Printf(" %s: orig:%s cache:%s\n", s.registers[x.r].Name(), x.v, x.c) 848 } 849 } 850 851 // Decide on registers for phi ops. Use the registers determined 852 // by the primary predecessor if we can. 853 // TODO: pick best of (already processed) predecessors? 854 // Majority vote? Deepest nesting level? 855 phiRegs = phiRegs[:0] 856 var phiUsed regMask 857 for _, v := range phis { 858 if !s.values[v.ID].needReg { 859 phiRegs = append(phiRegs, noRegister) 860 continue 861 } 862 a := v.Args[idx] 863 // Some instructions target not-allocatable registers. 864 // They're not suitable for further (phi-function) allocation. 865 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 866 if m != 0 { 867 r := pickReg(m) 868 phiUsed |= regMask(1) << r 869 phiRegs = append(phiRegs, r) 870 } else { 871 phiRegs = append(phiRegs, noRegister) 872 } 873 } 874 875 // Second pass - deallocate any phi inputs which are now dead. 876 for i, v := range phis { 877 if !s.values[v.ID].needReg { 878 continue 879 } 880 a := v.Args[idx] 881 if !regValLiveSet.contains(a.ID) { 882 // Input is dead beyond the phi, deallocate 883 // anywhere else it might live. 884 s.freeRegs(s.values[a.ID].regs) 885 } else { 886 // Input is still live. 887 // Try to move it around before kicking out, if there is a free register. 888 // We generate a Copy in the predecessor block and record it. It will be 889 // deleted if never used. 890 r := phiRegs[i] 891 if r == noRegister { 892 continue 893 } 894 // Pick a free register. At this point some registers used in the predecessor 895 // block may have been deallocated. Those are the ones used for Phis. Exclude 896 // them (and they are not going to be helpful anyway). 897 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 898 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 899 r2 := pickReg(m) 900 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 901 s.copies[c] = false 902 if s.f.pass.debug > regDebug { 903 fmt.Printf("copy %s to %s : %s\n", a, c, s.registers[r2].Name()) 904 } 905 s.setOrig(c, a) 906 s.assignReg(r2, a, c) 907 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 908 } 909 s.freeReg(r) 910 } 911 } 912 913 // Third pass - pick registers for phis whose inputs 914 // were not in a register. 915 for i, v := range phis { 916 if !s.values[v.ID].needReg { 917 continue 918 } 919 if phiRegs[i] != noRegister { 920 continue 921 } 922 if s.f.Config.use387 && v.Type.IsFloat() { 923 continue // 387 can't handle floats in registers between blocks 924 } 925 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 926 if m != 0 { 927 r := pickReg(m) 928 phiRegs[i] = r 929 phiUsed |= regMask(1) << r 930 } 931 } 932 933 // Set registers for phis. Add phi spill code. 934 for i, v := range phis { 935 if !s.values[v.ID].needReg { 936 continue 937 } 938 r := phiRegs[i] 939 if r == noRegister { 940 // stack-based phi 941 // Spills will be inserted in all the predecessors below. 942 s.values[v.ID].spill = v // v starts life spilled 943 continue 944 } 945 // register-based phi 946 s.assignReg(r, v, v) 947 } 948 949 // Deallocate any values which are no longer live. Phis are excluded. 950 for r := register(0); r < s.numRegs; r++ { 951 if phiUsed>>r&1 != 0 { 952 continue 953 } 954 v := s.regs[r].v 955 if v != nil && !regValLiveSet.contains(v.ID) { 956 s.freeReg(r) 957 } 958 } 959 960 // Save the starting state for use by merge edges. 961 var regList []startReg 962 for r := register(0); r < s.numRegs; r++ { 963 v := s.regs[r].v 964 if v == nil { 965 continue 966 } 967 if phiUsed>>r&1 != 0 { 968 // Skip registers that phis used, we'll handle those 969 // specially during merge edge processing. 970 continue 971 } 972 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 973 } 974 s.startRegs[b.ID] = regList 975 976 if s.f.pass.debug > regDebug { 977 fmt.Printf("after phis\n") 978 for _, x := range s.startRegs[b.ID] { 979 fmt.Printf(" %s: v%d\n", s.registers[x.r].Name(), x.v.ID) 980 } 981 } 982 } 983 984 // Allocate space to record the desired registers for each value. 985 dinfo = dinfo[:0] 986 for i := 0; i < len(oldSched); i++ { 987 dinfo = append(dinfo, dentry{}) 988 } 989 990 // Load static desired register info at the end of the block. 991 desired.copy(&s.desired[b.ID]) 992 993 // Check actual assigned registers at the start of the next block(s). 994 // Dynamically assigned registers will trump the static 995 // desired registers computed during liveness analysis. 996 // Note that we do this phase after startRegs is set above, so that 997 // we get the right behavior for a block which branches to itself. 998 for _, e := range b.Succs { 999 succ := e.b 1000 // TODO: prioritize likely successor? 1001 for _, x := range s.startRegs[succ.ID] { 1002 desired.add(x.v.ID, x.r) 1003 } 1004 // Process phi ops in succ. 1005 pidx := e.i 1006 for _, v := range succ.Values { 1007 if v.Op != OpPhi { 1008 break 1009 } 1010 if !s.values[v.ID].needReg { 1011 continue 1012 } 1013 rp, ok := s.f.getHome(v.ID).(*Register) 1014 if !ok { 1015 continue 1016 } 1017 desired.add(v.Args[pidx].ID, register(rp.num)) 1018 } 1019 } 1020 // Walk values backwards computing desired register info. 1021 // See computeLive for more comments. 1022 for i := len(oldSched) - 1; i >= 0; i-- { 1023 v := oldSched[i] 1024 prefs := desired.remove(v.ID) 1025 desired.clobber(opcodeTable[v.Op].reg.clobbers) 1026 for _, j := range opcodeTable[v.Op].reg.inputs { 1027 if countRegs(j.regs) != 1 { 1028 continue 1029 } 1030 desired.clobber(j.regs) 1031 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1032 } 1033 if opcodeTable[v.Op].resultInArg0 { 1034 if opcodeTable[v.Op].commutative { 1035 desired.addList(v.Args[1].ID, prefs) 1036 } 1037 desired.addList(v.Args[0].ID, prefs) 1038 } 1039 // Save desired registers for this value. 1040 dinfo[i].out = prefs 1041 for j, a := range v.Args { 1042 if j >= len(dinfo[i].in) { 1043 break 1044 } 1045 dinfo[i].in[j] = desired.get(a.ID) 1046 } 1047 } 1048 1049 // Process all the non-phi values. 1050 for idx, v := range oldSched { 1051 if s.f.pass.debug > regDebug { 1052 fmt.Printf(" processing %s\n", v.LongString()) 1053 } 1054 regspec := opcodeTable[v.Op].reg 1055 if v.Op == OpPhi { 1056 f.Fatalf("phi %s not at start of block", v) 1057 } 1058 if v.Op == OpSP { 1059 s.assignReg(s.SPReg, v, v) 1060 b.Values = append(b.Values, v) 1061 s.advanceUses(v) 1062 s.sp = v.ID 1063 continue 1064 } 1065 if v.Op == OpSB { 1066 s.assignReg(s.SBReg, v, v) 1067 b.Values = append(b.Values, v) 1068 s.advanceUses(v) 1069 s.sb = v.ID 1070 continue 1071 } 1072 if v.Op == OpSelect0 || v.Op == OpSelect1 { 1073 if s.values[v.ID].needReg { 1074 var i = 0 1075 if v.Op == OpSelect1 { 1076 i = 1 1077 } 1078 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1079 } 1080 b.Values = append(b.Values, v) 1081 s.advanceUses(v) 1082 goto issueSpill 1083 } 1084 if v.Op == OpGetG && s.f.Config.hasGReg { 1085 // use hardware g register 1086 if s.regs[s.GReg].v != nil { 1087 s.freeReg(s.GReg) // kick out the old value 1088 } 1089 s.assignReg(s.GReg, v, v) 1090 b.Values = append(b.Values, v) 1091 s.advanceUses(v) 1092 goto issueSpill 1093 } 1094 if v.Op == OpArg { 1095 // Args are "pre-spilled" values. We don't allocate 1096 // any register here. We just set up the spill pointer to 1097 // point at itself and any later user will restore it to use it. 1098 s.values[v.ID].spill = v 1099 b.Values = append(b.Values, v) 1100 s.advanceUses(v) 1101 continue 1102 } 1103 if v.Op == OpKeepAlive { 1104 // Make sure the argument to v is still live here. 1105 s.advanceUses(v) 1106 vi := &s.values[v.Args[0].ID] 1107 if vi.spill != nil { 1108 // Use the spill location. 1109 v.SetArg(0, vi.spill) 1110 } else { 1111 // No need to keep unspilled values live. 1112 // These are typically rematerializeable constants like nil, 1113 // or values of a variable that were modified since the last call. 1114 v.Op = OpCopy 1115 v.SetArgs1(v.Args[1]) 1116 } 1117 b.Values = append(b.Values, v) 1118 continue 1119 } 1120 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1121 // No register allocation required (or none specified yet) 1122 s.freeRegs(regspec.clobbers) 1123 b.Values = append(b.Values, v) 1124 s.advanceUses(v) 1125 continue 1126 } 1127 1128 if s.values[v.ID].rematerializeable { 1129 // Value is rematerializeable, don't issue it here. 1130 // It will get issued just before each use (see 1131 // allocValueToReg). 1132 for _, a := range v.Args { 1133 a.Uses-- 1134 } 1135 s.advanceUses(v) 1136 continue 1137 } 1138 1139 if s.f.pass.debug > regDebug { 1140 fmt.Printf("value %s\n", v.LongString()) 1141 fmt.Printf(" out:") 1142 for _, r := range dinfo[idx].out { 1143 if r != noRegister { 1144 fmt.Printf(" %s", s.registers[r].Name()) 1145 } 1146 } 1147 fmt.Println() 1148 for i := 0; i < len(v.Args) && i < 3; i++ { 1149 fmt.Printf(" in%d:", i) 1150 for _, r := range dinfo[idx].in[i] { 1151 if r != noRegister { 1152 fmt.Printf(" %s", s.registers[r].Name()) 1153 } 1154 } 1155 fmt.Println() 1156 } 1157 } 1158 1159 // Move arguments to registers. Process in an ordering defined 1160 // by the register specification (most constrained first). 1161 args = append(args[:0], v.Args...) 1162 for _, i := range regspec.inputs { 1163 mask := i.regs 1164 if mask&s.values[args[i.idx].ID].regs == 0 { 1165 // Need a new register for the input. 1166 mask &= s.allocatable 1167 mask &^= s.nospill 1168 // Used desired register if available. 1169 if i.idx < 3 { 1170 for _, r := range dinfo[idx].in[i.idx] { 1171 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1172 // Desired register is allowed and unused. 1173 mask = regMask(1) << r 1174 break 1175 } 1176 } 1177 } 1178 // Avoid registers we're saving for other values. 1179 if mask&^desired.avoid != 0 { 1180 mask &^= desired.avoid 1181 } 1182 } 1183 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos) 1184 } 1185 1186 // If the output clobbers the input register, make sure we have 1187 // at least two copies of the input register so we don't 1188 // have to reload the value from the spill location. 1189 if opcodeTable[v.Op].resultInArg0 { 1190 var m regMask 1191 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1192 // arg0 is dead. We can clobber its register. 1193 goto ok 1194 } 1195 if s.values[v.Args[0].ID].rematerializeable { 1196 // We can rematerialize the input, don't worry about clobbering it. 1197 goto ok 1198 } 1199 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1200 // we have at least 2 copies of arg0. We can afford to clobber one. 1201 goto ok 1202 } 1203 if opcodeTable[v.Op].commutative { 1204 if !s.liveAfterCurrentInstruction(v.Args[1]) { 1205 args[0], args[1] = args[1], args[0] 1206 goto ok 1207 } 1208 if s.values[v.Args[1].ID].rematerializeable { 1209 args[0], args[1] = args[1], args[0] 1210 goto ok 1211 } 1212 if countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1213 args[0], args[1] = args[1], args[0] 1214 goto ok 1215 } 1216 } 1217 1218 // We can't overwrite arg0 (or arg1, if commutative). So we 1219 // need to make a copy of an input so we have a register we can modify. 1220 1221 // Possible new registers to copy into. 1222 m = s.compatRegs(v.Args[0].Type) &^ s.used 1223 if m == 0 { 1224 // No free registers. In this case we'll just clobber 1225 // an input and future uses of that input must use a restore. 1226 // TODO(khr): We should really do this like allocReg does it, 1227 // spilling the value with the most distant next use. 1228 goto ok 1229 } 1230 1231 // Try to move an input to the desired output. 1232 for _, r := range dinfo[idx].out { 1233 if r != noRegister && m>>r&1 != 0 { 1234 m = regMask(1) << r 1235 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1236 // Note: we update args[0] so the instruction will 1237 // use the register copy we just made. 1238 goto ok 1239 } 1240 } 1241 // Try to copy input to its desired location & use its old 1242 // location as the result register. 1243 for _, r := range dinfo[idx].in[0] { 1244 if r != noRegister && m>>r&1 != 0 { 1245 m = regMask(1) << r 1246 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1247 s.copies[c] = false 1248 // Note: no update to args[0] so the instruction will 1249 // use the original copy. 1250 goto ok 1251 } 1252 } 1253 if opcodeTable[v.Op].commutative { 1254 for _, r := range dinfo[idx].in[1] { 1255 if r != noRegister && m>>r&1 != 0 { 1256 m = regMask(1) << r 1257 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1258 s.copies[c] = false 1259 args[0], args[1] = args[1], args[0] 1260 goto ok 1261 } 1262 } 1263 } 1264 // Avoid future fixed uses if we can. 1265 if m&^desired.avoid != 0 { 1266 m &^= desired.avoid 1267 } 1268 // Save input 0 to a new register so we can clobber it. 1269 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1270 s.copies[c] = false 1271 } 1272 1273 ok: 1274 // Now that all args are in regs, we're ready to issue the value itself. 1275 // Before we pick a register for the output value, allow input registers 1276 // to be deallocated. We do this here so that the output can use the 1277 // same register as a dying input. 1278 if !opcodeTable[v.Op].resultNotInArgs { 1279 s.tmpused = s.nospill 1280 s.nospill = 0 1281 s.advanceUses(v) // frees any registers holding args that are no longer live 1282 } 1283 1284 // Dump any registers which will be clobbered 1285 s.freeRegs(regspec.clobbers) 1286 s.tmpused |= regspec.clobbers 1287 1288 // Pick registers for outputs. 1289 { 1290 outRegs := [2]register{noRegister, noRegister} 1291 var used regMask 1292 for _, out := range regspec.outputs { 1293 mask := out.regs & s.allocatable &^ used 1294 if mask == 0 { 1295 continue 1296 } 1297 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1298 if !opcodeTable[v.Op].commutative { 1299 // Output must use the same register as input 0. 1300 r := register(s.f.getHome(args[0].ID).(*Register).num) 1301 mask = regMask(1) << r 1302 } else { 1303 // Output must use the same register as input 0 or 1. 1304 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1305 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1306 // Check r0 and r1 for desired output register. 1307 found := false 1308 for _, r := range dinfo[idx].out { 1309 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1310 mask = regMask(1) << r 1311 found = true 1312 if r == r1 { 1313 args[0], args[1] = args[1], args[0] 1314 } 1315 break 1316 } 1317 } 1318 if !found { 1319 // Neither are desired, pick r0. 1320 mask = regMask(1) << r0 1321 } 1322 } 1323 } 1324 for _, r := range dinfo[idx].out { 1325 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1326 // Desired register is allowed and unused. 1327 mask = regMask(1) << r 1328 break 1329 } 1330 } 1331 // Avoid registers we're saving for other values. 1332 if mask&^desired.avoid != 0 { 1333 mask &^= desired.avoid 1334 } 1335 r := s.allocReg(mask, v) 1336 outRegs[out.idx] = r 1337 used |= regMask(1) << r 1338 s.tmpused |= regMask(1) << r 1339 } 1340 // Record register choices 1341 if v.Type.IsTuple() { 1342 var outLocs LocPair 1343 if r := outRegs[0]; r != noRegister { 1344 outLocs[0] = &s.registers[r] 1345 } 1346 if r := outRegs[1]; r != noRegister { 1347 outLocs[1] = &s.registers[r] 1348 } 1349 s.f.setHome(v, outLocs) 1350 // Note that subsequent SelectX instructions will do the assignReg calls. 1351 } else { 1352 if r := outRegs[0]; r != noRegister { 1353 s.assignReg(r, v, v) 1354 } 1355 } 1356 } 1357 1358 // deallocate dead args, if we have not done so 1359 if opcodeTable[v.Op].resultNotInArgs { 1360 s.nospill = 0 1361 s.advanceUses(v) // frees any registers holding args that are no longer live 1362 } 1363 s.tmpused = 0 1364 1365 // Issue the Value itself. 1366 for i, a := range args { 1367 v.SetArg(i, a) // use register version of arguments 1368 } 1369 b.Values = append(b.Values, v) 1370 1371 issueSpill: 1372 } 1373 1374 // Load control value into reg. 1375 if v := b.Control; v != nil && s.values[v.ID].needReg { 1376 if s.f.pass.debug > regDebug { 1377 fmt.Printf(" processing control %s\n", v.LongString()) 1378 } 1379 // We assume that a control input can be passed in any 1380 // type-compatible register. If this turns out not to be true, 1381 // we'll need to introduce a regspec for a block's control value. 1382 b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos) 1383 if b.Control != v { 1384 v.Uses-- 1385 b.Control.Uses++ 1386 } 1387 // Remove this use from the uses list. 1388 vi := &s.values[v.ID] 1389 u := vi.uses 1390 vi.uses = u.next 1391 if u.next == nil { 1392 s.freeRegs(vi.regs) // value is dead 1393 } 1394 u.next = s.freeUseRecords 1395 s.freeUseRecords = u 1396 } 1397 1398 // Spill any values that can't live across basic block boundaries. 1399 if s.f.Config.use387 { 1400 s.freeRegs(s.f.Config.fpRegMask) 1401 } 1402 1403 // If we are approaching a merge point and we are the primary 1404 // predecessor of it, find live values that we use soon after 1405 // the merge point and promote them to registers now. 1406 if len(b.Succs) == 1 { 1407 // For this to be worthwhile, the loop must have no calls in it. 1408 top := b.Succs[0].b 1409 loop := s.loopnest.b2l[top.ID] 1410 if loop == nil || loop.header != top || loop.containsCall { 1411 goto badloop 1412 } 1413 1414 // TODO: sort by distance, pick the closest ones? 1415 for _, live := range s.live[b.ID] { 1416 if live.dist >= unlikelyDistance { 1417 // Don't preload anything live after the loop. 1418 continue 1419 } 1420 vid := live.ID 1421 vi := &s.values[vid] 1422 if vi.regs != 0 { 1423 continue 1424 } 1425 if vi.rematerializeable { 1426 continue 1427 } 1428 v := s.orig[vid] 1429 if s.f.Config.use387 && v.Type.IsFloat() { 1430 continue // 387 can't handle floats in registers between blocks 1431 } 1432 m := s.compatRegs(v.Type) &^ s.used 1433 if m&^desired.avoid != 0 { 1434 m &^= desired.avoid 1435 } 1436 if m != 0 { 1437 s.allocValToReg(v, m, false, b.Pos) 1438 } 1439 } 1440 } 1441 badloop: 1442 ; 1443 1444 // Save end-of-block register state. 1445 // First count how many, this cuts allocations in half. 1446 k := 0 1447 for r := register(0); r < s.numRegs; r++ { 1448 v := s.regs[r].v 1449 if v == nil { 1450 continue 1451 } 1452 k++ 1453 } 1454 regList := make([]endReg, 0, k) 1455 for r := register(0); r < s.numRegs; r++ { 1456 v := s.regs[r].v 1457 if v == nil { 1458 continue 1459 } 1460 regList = append(regList, endReg{r, v, s.regs[r].c}) 1461 } 1462 s.endRegs[b.ID] = regList 1463 1464 if checkEnabled { 1465 regValLiveSet.clear() 1466 for _, x := range s.live[b.ID] { 1467 regValLiveSet.add(x.ID) 1468 } 1469 for r := register(0); r < s.numRegs; r++ { 1470 v := s.regs[r].v 1471 if v == nil { 1472 continue 1473 } 1474 if !regValLiveSet.contains(v.ID) { 1475 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1476 } 1477 } 1478 } 1479 1480 // If a value is live at the end of the block and 1481 // isn't in a register, generate a use for the spill location. 1482 // We need to remember this information so that 1483 // the liveness analysis in stackalloc is correct. 1484 for _, e := range s.live[b.ID] { 1485 vi := &s.values[e.ID] 1486 if vi.regs != 0 { 1487 // in a register, we'll use that source for the merge. 1488 continue 1489 } 1490 if vi.rematerializeable { 1491 // we'll rematerialize during the merge. 1492 continue 1493 } 1494 //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1495 spill := s.makeSpill(s.orig[e.ID], b) 1496 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1497 } 1498 1499 // Clear any final uses. 1500 // All that is left should be the pseudo-uses added for values which 1501 // are live at the end of b. 1502 for _, e := range s.live[b.ID] { 1503 u := s.values[e.ID].uses 1504 if u == nil { 1505 f.Fatalf("live at end, no uses v%d", e.ID) 1506 } 1507 if u.next != nil { 1508 f.Fatalf("live at end, too many uses v%d", e.ID) 1509 } 1510 s.values[e.ID].uses = nil 1511 u.next = s.freeUseRecords 1512 s.freeUseRecords = u 1513 } 1514 } 1515 1516 // Decide where the spills we generated will go. 1517 s.placeSpills() 1518 1519 // Anything that didn't get a register gets a stack location here. 1520 // (StoreReg, stack-based phis, inputs, ...) 1521 stacklive := stackalloc(s.f, s.spillLive) 1522 1523 // Fix up all merge edges. 1524 s.shuffle(stacklive) 1525 1526 // Erase any copies we never used. 1527 // Also, an unused copy might be the only use of another copy, 1528 // so continue erasing until we reach a fixed point. 1529 for { 1530 progress := false 1531 for c, used := range s.copies { 1532 if !used && c.Uses == 0 { 1533 if s.f.pass.debug > regDebug { 1534 fmt.Printf("delete copied value %s\n", c.LongString()) 1535 } 1536 c.Args[0].Uses-- 1537 f.freeValue(c) 1538 delete(s.copies, c) 1539 progress = true 1540 } 1541 } 1542 if !progress { 1543 break 1544 } 1545 } 1546 1547 for _, b := range f.Blocks { 1548 i := 0 1549 for _, v := range b.Values { 1550 if v.Op == OpInvalid { 1551 continue 1552 } 1553 b.Values[i] = v 1554 i++ 1555 } 1556 b.Values = b.Values[:i] 1557 } 1558 } 1559 1560 func (s *regAllocState) placeSpills() { 1561 f := s.f 1562 1563 // Precompute some useful info. 1564 phiRegs := make([]regMask, f.NumBlocks()) 1565 for _, b := range f.Blocks { 1566 var m regMask 1567 for _, v := range b.Values { 1568 if v.Op != OpPhi { 1569 break 1570 } 1571 if r, ok := f.getHome(v.ID).(*Register); ok { 1572 m |= regMask(1) << uint(r.num) 1573 } 1574 } 1575 phiRegs[b.ID] = m 1576 } 1577 1578 // Start maps block IDs to the list of spills 1579 // that go at the start of the block (but after any phis). 1580 start := map[ID][]*Value{} 1581 // After maps value IDs to the list of spills 1582 // that go immediately after that value ID. 1583 after := map[ID][]*Value{} 1584 1585 for i := range s.values { 1586 vi := s.values[i] 1587 spill := vi.spill 1588 if spill == nil { 1589 continue 1590 } 1591 if spill.Block != nil { 1592 // Some spills are already fully set up, 1593 // like OpArgs and stack-based phis. 1594 continue 1595 } 1596 v := s.orig[i] 1597 1598 // Walk down the dominator tree looking for a good place to 1599 // put the spill of v. At the start "best" is the best place 1600 // we have found so far. 1601 // TODO: find a way to make this O(1) without arbitrary cutoffs. 1602 best := v.Block 1603 bestArg := v 1604 var bestDepth int16 1605 if l := s.loopnest.b2l[best.ID]; l != nil { 1606 bestDepth = l.depth 1607 } 1608 b := best 1609 const maxSpillSearch = 100 1610 for i := 0; i < maxSpillSearch; i++ { 1611 // Find the child of b in the dominator tree which 1612 // dominates all restores. 1613 p := b 1614 b = nil 1615 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 1616 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 1617 // c also dominates all restores. Walk down into c. 1618 b = c 1619 break 1620 } 1621 } 1622 if b == nil { 1623 // Ran out of blocks which dominate all restores. 1624 break 1625 } 1626 1627 var depth int16 1628 if l := s.loopnest.b2l[b.ID]; l != nil { 1629 depth = l.depth 1630 } 1631 if depth > bestDepth { 1632 // Don't push the spill into a deeper loop. 1633 continue 1634 } 1635 1636 // If v is in a register at the start of b, we can 1637 // place the spill here (after the phis). 1638 if len(b.Preds) == 1 { 1639 for _, e := range s.endRegs[b.Preds[0].b.ID] { 1640 if e.v == v { 1641 // Found a better spot for the spill. 1642 best = b 1643 bestArg = e.c 1644 bestDepth = depth 1645 break 1646 } 1647 } 1648 } else { 1649 for _, e := range s.startRegs[b.ID] { 1650 if e.v == v { 1651 // Found a better spot for the spill. 1652 best = b 1653 bestArg = e.c 1654 bestDepth = depth 1655 break 1656 } 1657 } 1658 } 1659 } 1660 1661 // Put the spill in the best block we found. 1662 spill.Block = best 1663 spill.AddArg(bestArg) 1664 if best == v.Block && v.Op != OpPhi { 1665 // Place immediately after v. 1666 after[v.ID] = append(after[v.ID], spill) 1667 } else { 1668 // Place at the start of best block. 1669 start[best.ID] = append(start[best.ID], spill) 1670 } 1671 } 1672 1673 // Insert spill instructions into the block schedules. 1674 var oldSched []*Value 1675 for _, b := range f.Blocks { 1676 nphi := 0 1677 for _, v := range b.Values { 1678 if v.Op != OpPhi { 1679 break 1680 } 1681 nphi++ 1682 } 1683 oldSched = append(oldSched[:0], b.Values[nphi:]...) 1684 b.Values = b.Values[:nphi] 1685 for _, v := range start[b.ID] { 1686 b.Values = append(b.Values, v) 1687 } 1688 for _, v := range oldSched { 1689 b.Values = append(b.Values, v) 1690 for _, w := range after[v.ID] { 1691 b.Values = append(b.Values, w) 1692 } 1693 } 1694 } 1695 } 1696 1697 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 1698 func (s *regAllocState) shuffle(stacklive [][]ID) { 1699 var e edgeState 1700 e.s = s 1701 e.cache = map[ID][]*Value{} 1702 e.contents = map[Location]contentRecord{} 1703 if s.f.pass.debug > regDebug { 1704 fmt.Printf("shuffle %s\n", s.f.Name) 1705 fmt.Println(s.f.String()) 1706 } 1707 1708 for _, b := range s.f.Blocks { 1709 if len(b.Preds) <= 1 { 1710 continue 1711 } 1712 e.b = b 1713 for i, edge := range b.Preds { 1714 p := edge.b 1715 e.p = p 1716 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 1717 e.process() 1718 } 1719 } 1720 } 1721 1722 type edgeState struct { 1723 s *regAllocState 1724 p, b *Block // edge goes from p->b. 1725 1726 // for each pre-regalloc value, a list of equivalent cached values 1727 cache map[ID][]*Value 1728 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 1729 1730 // map from location to the value it contains 1731 contents map[Location]contentRecord 1732 1733 // desired destination locations 1734 destinations []dstRecord 1735 extra []dstRecord 1736 1737 usedRegs regMask // registers currently holding something 1738 uniqueRegs regMask // registers holding the only copy of a value 1739 finalRegs regMask // registers holding final target 1740 } 1741 1742 type contentRecord struct { 1743 vid ID // pre-regalloc value 1744 c *Value // cached value 1745 final bool // this is a satisfied destination 1746 pos src.XPos // source position of use of the value 1747 } 1748 1749 type dstRecord struct { 1750 loc Location // register or stack slot 1751 vid ID // pre-regalloc value it should contain 1752 splice **Value // place to store reference to the generating instruction 1753 pos src.XPos // source position of use of this location 1754 } 1755 1756 // setup initializes the edge state for shuffling. 1757 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 1758 if e.s.f.pass.debug > regDebug { 1759 fmt.Printf("edge %s->%s\n", e.p, e.b) 1760 } 1761 1762 // Clear state. 1763 for _, vid := range e.cachedVals { 1764 delete(e.cache, vid) 1765 } 1766 e.cachedVals = e.cachedVals[:0] 1767 for k := range e.contents { 1768 delete(e.contents, k) 1769 } 1770 e.usedRegs = 0 1771 e.uniqueRegs = 0 1772 e.finalRegs = 0 1773 1774 // Live registers can be sources. 1775 for _, x := range srcReg { 1776 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 1777 } 1778 // So can all of the spill locations. 1779 for _, spillID := range stacklive { 1780 v := e.s.orig[spillID] 1781 spill := e.s.values[v.ID].spill 1782 if !e.s.sdom.isAncestorEq(spill.Block, e.p) { 1783 // Spills were placed that only dominate the uses found 1784 // during the first regalloc pass. The edge fixup code 1785 // can't use a spill location if the spill doesn't dominate 1786 // the edge. 1787 // We are guaranteed that if the spill doesn't dominate this edge, 1788 // then the value is available in a register (because we called 1789 // makeSpill for every value not in a register at the start 1790 // of an edge). 1791 continue 1792 } 1793 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 1794 } 1795 1796 // Figure out all the destinations we need. 1797 dsts := e.destinations[:0] 1798 for _, x := range dstReg { 1799 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 1800 } 1801 // Phis need their args to end up in a specific location. 1802 for _, v := range e.b.Values { 1803 if v.Op != OpPhi { 1804 break 1805 } 1806 loc := e.s.f.getHome(v.ID) 1807 if loc == nil { 1808 continue 1809 } 1810 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 1811 } 1812 e.destinations = dsts 1813 1814 if e.s.f.pass.debug > regDebug { 1815 for _, vid := range e.cachedVals { 1816 a := e.cache[vid] 1817 for _, c := range a { 1818 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID).Name(), vid, c) 1819 } 1820 } 1821 for _, d := range e.destinations { 1822 fmt.Printf("dst %s: v%d\n", d.loc.Name(), d.vid) 1823 } 1824 } 1825 } 1826 1827 // process generates code to move all the values to the right destination locations. 1828 func (e *edgeState) process() { 1829 dsts := e.destinations 1830 1831 // Process the destinations until they are all satisfied. 1832 for len(dsts) > 0 { 1833 i := 0 1834 for _, d := range dsts { 1835 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 1836 // Failed - save for next iteration. 1837 dsts[i] = d 1838 i++ 1839 } 1840 } 1841 if i < len(dsts) { 1842 // Made some progress. Go around again. 1843 dsts = dsts[:i] 1844 1845 // Append any extras destinations we generated. 1846 dsts = append(dsts, e.extra...) 1847 e.extra = e.extra[:0] 1848 continue 1849 } 1850 1851 // We made no progress. That means that any 1852 // remaining unsatisfied moves are in simple cycles. 1853 // For example, A -> B -> C -> D -> A. 1854 // A ----> B 1855 // ^ | 1856 // | | 1857 // | v 1858 // D <---- C 1859 1860 // To break the cycle, we pick an unused register, say R, 1861 // and put a copy of B there. 1862 // A ----> B 1863 // ^ | 1864 // | | 1865 // | v 1866 // D <---- C <---- R=copyofB 1867 // When we resume the outer loop, the A->B move can now proceed, 1868 // and eventually the whole cycle completes. 1869 1870 // Copy any cycle location to a temp register. This duplicates 1871 // one of the cycle entries, allowing the just duplicated value 1872 // to be overwritten and the cycle to proceed. 1873 d := dsts[0] 1874 loc := d.loc 1875 vid := e.contents[loc].vid 1876 c := e.contents[loc].c 1877 r := e.findRegFor(c.Type) 1878 if e.s.f.pass.debug > regDebug { 1879 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc.Name(), c) 1880 } 1881 if _, isReg := loc.(*Register); isReg { 1882 c = e.p.NewValue1(d.pos, OpCopy, c.Type, c) 1883 } else { 1884 c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c) 1885 } 1886 e.set(r, vid, c, false, d.pos) 1887 } 1888 } 1889 1890 // processDest generates code to put value vid into location loc. Returns true 1891 // if progress was made. 1892 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 1893 occupant := e.contents[loc] 1894 if occupant.vid == vid { 1895 // Value is already in the correct place. 1896 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 1897 if splice != nil { 1898 (*splice).Uses-- 1899 *splice = occupant.c 1900 occupant.c.Uses++ 1901 } 1902 // Note: if splice==nil then c will appear dead. This is 1903 // non-SSA formed code, so be careful after this pass not to run 1904 // deadcode elimination. 1905 if _, ok := e.s.copies[occupant.c]; ok { 1906 // The copy at occupant.c was used to avoid spill. 1907 e.s.copies[occupant.c] = true 1908 } 1909 return true 1910 } 1911 1912 // Check if we're allowed to clobber the destination location. 1913 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 1914 // We can't overwrite the last copy 1915 // of a value that needs to survive. 1916 return false 1917 } 1918 1919 // Copy from a source of v, register preferred. 1920 v := e.s.orig[vid] 1921 var c *Value 1922 var src Location 1923 if e.s.f.pass.debug > regDebug { 1924 fmt.Printf("moving v%d to %s\n", vid, loc.Name()) 1925 fmt.Printf("sources of v%d:", vid) 1926 } 1927 for _, w := range e.cache[vid] { 1928 h := e.s.f.getHome(w.ID) 1929 if e.s.f.pass.debug > regDebug { 1930 fmt.Printf(" %s:%s", h.Name(), w) 1931 } 1932 _, isreg := h.(*Register) 1933 if src == nil || isreg { 1934 c = w 1935 src = h 1936 } 1937 } 1938 if e.s.f.pass.debug > regDebug { 1939 if src != nil { 1940 fmt.Printf(" [use %s]\n", src.Name()) 1941 } else { 1942 fmt.Printf(" [no source]\n") 1943 } 1944 } 1945 _, dstReg := loc.(*Register) 1946 var x *Value 1947 if c == nil { 1948 if !e.s.values[vid].rematerializeable { 1949 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 1950 } 1951 if dstReg { 1952 x = v.copyIntoNoXPos(e.p) 1953 } else { 1954 // Rematerialize into stack slot. Need a free 1955 // register to accomplish this. 1956 e.erase(loc) // see pre-clobber comment below 1957 r := e.findRegFor(v.Type) 1958 x = v.copyIntoNoXPos(e.p) 1959 e.set(r, vid, x, false, pos) 1960 // Make sure we spill with the size of the slot, not the 1961 // size of x (which might be wider due to our dropping 1962 // of narrowing conversions). 1963 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 1964 } 1965 } else { 1966 // Emit move from src to dst. 1967 _, srcReg := src.(*Register) 1968 if srcReg { 1969 if dstReg { 1970 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 1971 } else { 1972 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 1973 } 1974 } else { 1975 if dstReg { 1976 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 1977 } else { 1978 // mem->mem. Use temp register. 1979 1980 // Pre-clobber destination. This avoids the 1981 // following situation: 1982 // - v is currently held in R0 and stacktmp0. 1983 // - We want to copy stacktmp1 to stacktmp0. 1984 // - We choose R0 as the temporary register. 1985 // During the copy, both R0 and stacktmp0 are 1986 // clobbered, losing both copies of v. Oops! 1987 // Erasing the destination early means R0 will not 1988 // be chosen as the temp register, as it will then 1989 // be the last copy of v. 1990 e.erase(loc) 1991 1992 r := e.findRegFor(c.Type) 1993 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 1994 e.set(r, vid, t, false, pos) 1995 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 1996 } 1997 } 1998 } 1999 e.set(loc, vid, x, true, pos) 2000 if splice != nil { 2001 (*splice).Uses-- 2002 *splice = x 2003 x.Uses++ 2004 } 2005 return true 2006 } 2007 2008 // set changes the contents of location loc to hold the given value and its cached representative. 2009 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2010 e.s.f.setHome(c, loc) 2011 e.erase(loc) 2012 e.contents[loc] = contentRecord{vid, c, final, pos} 2013 a := e.cache[vid] 2014 if len(a) == 0 { 2015 e.cachedVals = append(e.cachedVals, vid) 2016 } 2017 a = append(a, c) 2018 e.cache[vid] = a 2019 if r, ok := loc.(*Register); ok { 2020 e.usedRegs |= regMask(1) << uint(r.num) 2021 if final { 2022 e.finalRegs |= regMask(1) << uint(r.num) 2023 } 2024 if len(a) == 1 { 2025 e.uniqueRegs |= regMask(1) << uint(r.num) 2026 } 2027 if len(a) == 2 { 2028 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2029 e.uniqueRegs &^= regMask(1) << uint(t.num) 2030 } 2031 } 2032 } 2033 if e.s.f.pass.debug > regDebug { 2034 fmt.Printf("%s\n", c.LongString()) 2035 fmt.Printf("v%d now available in %s:%s\n", vid, loc.Name(), c) 2036 } 2037 } 2038 2039 // erase removes any user of loc. 2040 func (e *edgeState) erase(loc Location) { 2041 cr := e.contents[loc] 2042 if cr.c == nil { 2043 return 2044 } 2045 vid := cr.vid 2046 2047 if cr.final { 2048 // Add a destination to move this value back into place. 2049 // Make sure it gets added to the tail of the destination queue 2050 // so we make progress on other moves first. 2051 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2052 } 2053 2054 // Remove c from the list of cached values. 2055 a := e.cache[vid] 2056 for i, c := range a { 2057 if e.s.f.getHome(c.ID) == loc { 2058 if e.s.f.pass.debug > regDebug { 2059 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc.Name(), c) 2060 } 2061 a[i], a = a[len(a)-1], a[:len(a)-1] 2062 break 2063 } 2064 } 2065 e.cache[vid] = a 2066 2067 // Update register masks. 2068 if r, ok := loc.(*Register); ok { 2069 e.usedRegs &^= regMask(1) << uint(r.num) 2070 if cr.final { 2071 e.finalRegs &^= regMask(1) << uint(r.num) 2072 } 2073 } 2074 if len(a) == 1 { 2075 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2076 e.uniqueRegs |= regMask(1) << uint(r.num) 2077 } 2078 } 2079 } 2080 2081 // findRegFor finds a register we can use to make a temp copy of type typ. 2082 func (e *edgeState) findRegFor(typ *types.Type) Location { 2083 // Which registers are possibilities. 2084 var m regMask 2085 types := &e.s.f.Config.Types 2086 if typ.IsFloat() { 2087 m = e.s.compatRegs(types.Float64) 2088 } else { 2089 m = e.s.compatRegs(types.Int64) 2090 } 2091 2092 // Pick a register. In priority order: 2093 // 1) an unused register 2094 // 2) a non-unique register not holding a final value 2095 // 3) a non-unique register 2096 // 4) TODO: a register holding a rematerializeable value 2097 x := m &^ e.usedRegs 2098 if x != 0 { 2099 return &e.s.registers[pickReg(x)] 2100 } 2101 x = m &^ e.uniqueRegs &^ e.finalRegs 2102 if x != 0 { 2103 return &e.s.registers[pickReg(x)] 2104 } 2105 x = m &^ e.uniqueRegs 2106 if x != 0 { 2107 return &e.s.registers[pickReg(x)] 2108 } 2109 2110 // No register is available. 2111 // Pick a register to spill. 2112 for _, vid := range e.cachedVals { 2113 a := e.cache[vid] 2114 for _, c := range a { 2115 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2116 if !c.rematerializeable() { 2117 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2118 // Allocate a temp location to spill a register to. 2119 // The type of the slot is immaterial - it will not be live across 2120 // any safepoint. Just use a type big enough to hold any register. 2121 t := LocalSlot{e.s.f.fe.Auto(c.Pos, types.Int64), types.Int64, 0} 2122 // TODO: reuse these slots. 2123 e.set(t, vid, x, false, c.Pos) 2124 if e.s.f.pass.debug > regDebug { 2125 fmt.Printf(" SPILL %s->%s %s\n", r.Name(), t.Name(), x.LongString()) 2126 } 2127 } 2128 // r will now be overwritten by the caller. At some point 2129 // later, the newly saved value will be moved back to its 2130 // final destination in processDest. 2131 return r 2132 } 2133 } 2134 } 2135 2136 fmt.Printf("m:%d unique:%d final:%d\n", m, e.uniqueRegs, e.finalRegs) 2137 for _, vid := range e.cachedVals { 2138 a := e.cache[vid] 2139 for _, c := range a { 2140 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID).Name()) 2141 } 2142 } 2143 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2144 return nil 2145 } 2146 2147 // rematerializeable reports whether the register allocator should recompute 2148 // a value instead of spilling/restoring it. 2149 func (v *Value) rematerializeable() bool { 2150 if !opcodeTable[v.Op].rematerializeable { 2151 return false 2152 } 2153 for _, a := range v.Args { 2154 // SP and SB (generated by OpSP and OpSB) are always available. 2155 if a.Op != OpSP && a.Op != OpSB { 2156 return false 2157 } 2158 } 2159 return true 2160 } 2161 2162 type liveInfo struct { 2163 ID ID // ID of value 2164 dist int32 // # of instructions before next use 2165 pos src.XPos // source position of next use 2166 } 2167 2168 // dblock contains information about desired & avoid registers at the end of a block. 2169 type dblock struct { 2170 prefers []desiredStateEntry 2171 avoid regMask 2172 } 2173 2174 // computeLive computes a map from block ID to a list of value IDs live at the end 2175 // of that block. Together with the value ID is a count of how many instructions 2176 // to the next use of that value. The resulting map is stored in s.live. 2177 // computeLive also computes the desired register information at the end of each block. 2178 // This desired register information is stored in s.desired. 2179 // TODO: this could be quadratic if lots of variables are live across lots of 2180 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2181 // of this function) require only linear size & time. 2182 func (s *regAllocState) computeLive() { 2183 f := s.f 2184 s.live = make([][]liveInfo, f.NumBlocks()) 2185 s.desired = make([]desiredState, f.NumBlocks()) 2186 var phis []*Value 2187 2188 live := newSparseMap(f.NumValues()) 2189 t := newSparseMap(f.NumValues()) 2190 2191 // Keep track of which value we want in each register. 2192 var desired desiredState 2193 2194 // Instead of iterating over f.Blocks, iterate over their postordering. 2195 // Liveness information flows backward, so starting at the end 2196 // increases the probability that we will stabilize quickly. 2197 // TODO: Do a better job yet. Here's one possibility: 2198 // Calculate the dominator tree and locate all strongly connected components. 2199 // If a value is live in one block of an SCC, it is live in all. 2200 // Walk the dominator tree from end to beginning, just once, treating SCC 2201 // components as single blocks, duplicated calculated liveness information 2202 // out to all of them. 2203 po := f.postorder() 2204 s.loopnest = f.loopnest() 2205 s.loopnest.calculateDepths() 2206 for { 2207 changed := false 2208 2209 for _, b := range po { 2210 // Start with known live values at the end of the block. 2211 // Add len(b.Values) to adjust from end-of-block distance 2212 // to beginning-of-block distance. 2213 live.clear() 2214 for _, e := range s.live[b.ID] { 2215 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2216 } 2217 2218 // Mark control value as live 2219 if b.Control != nil && s.values[b.Control.ID].needReg { 2220 live.set(b.Control.ID, int32(len(b.Values)), b.Pos) 2221 } 2222 2223 // Propagate backwards to the start of the block 2224 // Assumes Values have been scheduled. 2225 phis = phis[:0] 2226 for i := len(b.Values) - 1; i >= 0; i-- { 2227 v := b.Values[i] 2228 live.remove(v.ID) 2229 if v.Op == OpPhi { 2230 // save phi ops for later 2231 phis = append(phis, v) 2232 continue 2233 } 2234 if opcodeTable[v.Op].call { 2235 c := live.contents() 2236 for i := range c { 2237 c[i].val += unlikelyDistance 2238 } 2239 } 2240 for _, a := range v.Args { 2241 if s.values[a.ID].needReg { 2242 live.set(a.ID, int32(i), v.Pos) 2243 } 2244 } 2245 } 2246 // Propagate desired registers backwards. 2247 desired.copy(&s.desired[b.ID]) 2248 for i := len(b.Values) - 1; i >= 0; i-- { 2249 v := b.Values[i] 2250 prefs := desired.remove(v.ID) 2251 if v.Op == OpPhi { 2252 // TODO: if v is a phi, save desired register for phi inputs. 2253 // For now, we just drop it and don't propagate 2254 // desired registers back though phi nodes. 2255 continue 2256 } 2257 // Cancel desired registers if they get clobbered. 2258 desired.clobber(opcodeTable[v.Op].reg.clobbers) 2259 // Update desired registers if there are any fixed register inputs. 2260 for _, j := range opcodeTable[v.Op].reg.inputs { 2261 if countRegs(j.regs) != 1 { 2262 continue 2263 } 2264 desired.clobber(j.regs) 2265 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2266 } 2267 // Set desired register of input 0 if this is a 2-operand instruction. 2268 if opcodeTable[v.Op].resultInArg0 { 2269 if opcodeTable[v.Op].commutative { 2270 desired.addList(v.Args[1].ID, prefs) 2271 } 2272 desired.addList(v.Args[0].ID, prefs) 2273 } 2274 } 2275 2276 // For each predecessor of b, expand its list of live-at-end values. 2277 // invariant: live contains the values live at the start of b (excluding phi inputs) 2278 for i, e := range b.Preds { 2279 p := e.b 2280 // Compute additional distance for the edge. 2281 // Note: delta must be at least 1 to distinguish the control 2282 // value use from the first user in a successor block. 2283 delta := int32(normalDistance) 2284 if len(p.Succs) == 2 { 2285 if p.Succs[0].b == b && p.Likely == BranchLikely || 2286 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2287 delta = likelyDistance 2288 } 2289 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2290 p.Succs[1].b == b && p.Likely == BranchLikely { 2291 delta = unlikelyDistance 2292 } 2293 } 2294 2295 // Update any desired registers at the end of p. 2296 s.desired[p.ID].merge(&desired) 2297 2298 // Start t off with the previously known live values at the end of p. 2299 t.clear() 2300 for _, e := range s.live[p.ID] { 2301 t.set(e.ID, e.dist, e.pos) 2302 } 2303 update := false 2304 2305 // Add new live values from scanning this block. 2306 for _, e := range live.contents() { 2307 d := e.val + delta 2308 if !t.contains(e.key) || d < t.get(e.key) { 2309 update = true 2310 t.set(e.key, d, e.aux) 2311 } 2312 } 2313 // Also add the correct arg from the saved phi values. 2314 // All phis are at distance delta (we consider them 2315 // simultaneously happening at the start of the block). 2316 for _, v := range phis { 2317 id := v.Args[i].ID 2318 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2319 update = true 2320 t.set(id, delta, v.Pos) 2321 } 2322 } 2323 2324 if !update { 2325 continue 2326 } 2327 // The live set has changed, update it. 2328 l := s.live[p.ID][:0] 2329 if cap(l) < t.size() { 2330 l = make([]liveInfo, 0, t.size()) 2331 } 2332 for _, e := range t.contents() { 2333 l = append(l, liveInfo{e.key, e.val, e.aux}) 2334 } 2335 s.live[p.ID] = l 2336 changed = true 2337 } 2338 } 2339 2340 if !changed { 2341 break 2342 } 2343 } 2344 if f.pass.debug > regDebug { 2345 fmt.Println("live values at end of each block") 2346 for _, b := range f.Blocks { 2347 fmt.Printf(" %s:", b) 2348 for _, x := range s.live[b.ID] { 2349 fmt.Printf(" v%d", x.ID) 2350 for _, e := range s.desired[b.ID].entries { 2351 if e.ID != x.ID { 2352 continue 2353 } 2354 fmt.Printf("[") 2355 first := true 2356 for _, r := range e.regs { 2357 if r == noRegister { 2358 continue 2359 } 2360 if !first { 2361 fmt.Printf(",") 2362 } 2363 fmt.Print(s.registers[r].Name()) 2364 first = false 2365 } 2366 fmt.Printf("]") 2367 } 2368 } 2369 fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid)) 2370 fmt.Println() 2371 } 2372 } 2373 } 2374 2375 // A desiredState represents desired register assignments. 2376 type desiredState struct { 2377 // Desired assignments will be small, so we just use a list 2378 // of valueID+registers entries. 2379 entries []desiredStateEntry 2380 // Registers that other values want to be in. This value will 2381 // contain at least the union of the regs fields of entries, but 2382 // may contain additional entries for values that were once in 2383 // this data structure but are no longer. 2384 avoid regMask 2385 } 2386 type desiredStateEntry struct { 2387 // (pre-regalloc) value 2388 ID ID 2389 // Registers it would like to be in, in priority order. 2390 // Unused slots are filled with noRegister. 2391 regs [4]register 2392 } 2393 2394 func (d *desiredState) clear() { 2395 d.entries = d.entries[:0] 2396 d.avoid = 0 2397 } 2398 2399 // get returns a list of desired registers for value vid. 2400 func (d *desiredState) get(vid ID) [4]register { 2401 for _, e := range d.entries { 2402 if e.ID == vid { 2403 return e.regs 2404 } 2405 } 2406 return [4]register{noRegister, noRegister, noRegister, noRegister} 2407 } 2408 2409 // add records that we'd like value vid to be in register r. 2410 func (d *desiredState) add(vid ID, r register) { 2411 d.avoid |= regMask(1) << r 2412 for i := range d.entries { 2413 e := &d.entries[i] 2414 if e.ID != vid { 2415 continue 2416 } 2417 if e.regs[0] == r { 2418 // Already known and highest priority 2419 return 2420 } 2421 for j := 1; j < len(e.regs); j++ { 2422 if e.regs[j] == r { 2423 // Move from lower priority to top priority 2424 copy(e.regs[1:], e.regs[:j]) 2425 e.regs[0] = r 2426 return 2427 } 2428 } 2429 copy(e.regs[1:], e.regs[:]) 2430 e.regs[0] = r 2431 return 2432 } 2433 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2434 } 2435 2436 func (d *desiredState) addList(vid ID, regs [4]register) { 2437 // regs is in priority order, so iterate in reverse order. 2438 for i := len(regs) - 1; i >= 0; i-- { 2439 r := regs[i] 2440 if r != noRegister { 2441 d.add(vid, r) 2442 } 2443 } 2444 } 2445 2446 // clobber erases any desired registers in the set m. 2447 func (d *desiredState) clobber(m regMask) { 2448 for i := 0; i < len(d.entries); { 2449 e := &d.entries[i] 2450 j := 0 2451 for _, r := range e.regs { 2452 if r != noRegister && m>>r&1 == 0 { 2453 e.regs[j] = r 2454 j++ 2455 } 2456 } 2457 if j == 0 { 2458 // No more desired registers for this value. 2459 d.entries[i] = d.entries[len(d.entries)-1] 2460 d.entries = d.entries[:len(d.entries)-1] 2461 continue 2462 } 2463 for ; j < len(e.regs); j++ { 2464 e.regs[j] = noRegister 2465 } 2466 i++ 2467 } 2468 d.avoid &^= m 2469 } 2470 2471 // copy copies a desired state from another desiredState x. 2472 func (d *desiredState) copy(x *desiredState) { 2473 d.entries = append(d.entries[:0], x.entries...) 2474 d.avoid = x.avoid 2475 } 2476 2477 // remove removes the desired registers for vid and returns them. 2478 func (d *desiredState) remove(vid ID) [4]register { 2479 for i := range d.entries { 2480 if d.entries[i].ID == vid { 2481 regs := d.entries[i].regs 2482 d.entries[i] = d.entries[len(d.entries)-1] 2483 d.entries = d.entries[:len(d.entries)-1] 2484 return regs 2485 } 2486 } 2487 return [4]register{noRegister, noRegister, noRegister, noRegister} 2488 } 2489 2490 // merge merges another desired state x into d. 2491 func (d *desiredState) merge(x *desiredState) { 2492 d.avoid |= x.avoid 2493 // There should only be a few desired registers, so 2494 // linear insert is ok. 2495 for _, e := range x.entries { 2496 d.addList(e.ID, e.regs) 2497 } 2498 } 2499 2500 func min32(x, y int32) int32 { 2501 if x < y { 2502 return x 2503 } 2504 return y 2505 } 2506 func max32(x, y int32) int32 { 2507 if x > y { 2508 return x 2509 } 2510 return y 2511 }