github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/cmd/compile/internal/ssa/regalloc.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Register allocation. 6 // 7 // We use a version of a linear scan register allocator. We treat the 8 // whole function as a single long basic block and run through 9 // it using a greedy register allocator. Then all merge edges 10 // (those targeting a block with len(Preds)>1) are processed to 11 // shuffle data into the place that the target of the edge expects. 12 // 13 // The greedy allocator moves values into registers just before they 14 // are used, spills registers only when necessary, and spills the 15 // value whose next use is farthest in the future. 16 // 17 // The register allocator requires that a block is not scheduled until 18 // at least one of its predecessors have been scheduled. The most recent 19 // such predecessor provides the starting register state for a block. 20 // 21 // It also requires that there are no critical edges (critical = 22 // comes from a block with >1 successor and goes to a block with >1 23 // predecessor). This makes it easy to add fixup code on merge edges - 24 // the source of a merge edge has only one successor, so we can add 25 // fixup code to the end of that block. 26 27 // Spilling 28 // 29 // During the normal course of the allocator, we might throw a still-live 30 // value out of all registers. When that value is subsequently used, we must 31 // load it from a slot on the stack. We must also issue an instruction to 32 // initialize that stack location with a copy of v. 33 // 34 // pre-regalloc: 35 // (1) v = Op ... 36 // (2) x = Op ... 37 // (3) ... = Op v ... 38 // 39 // post-regalloc: 40 // (1) v = Op ... : AX // computes v, store result in AX 41 // s = StoreReg v // spill v to a stack slot 42 // (2) x = Op ... : AX // some other op uses AX 43 // c = LoadReg s : CX // restore v from stack slot 44 // (3) ... = Op c ... // use the restored value 45 // 46 // Allocation occurs normally until we reach (3) and we realize we have 47 // a use of v and it isn't in any register. At that point, we allocate 48 // a spill (a StoreReg) for v. We can't determine the correct place for 49 // the spill at this point, so we allocate the spill as blockless initially. 50 // The restore is then generated to load v back into a register so it can 51 // be used. Subsequent uses of v will use the restored value c instead. 52 // 53 // What remains is the question of where to schedule the spill. 54 // During allocation, we keep track of the dominator of all restores of v. 55 // The spill of v must dominate that block. The spill must also be issued at 56 // a point where v is still in a register. 57 // 58 // To find the right place, start at b, the block which dominates all restores. 59 // - If b is v.Block, then issue the spill right after v. 60 // It is known to be in a register at that point, and dominates any restores. 61 // - Otherwise, if v is in a register at the start of b, 62 // put the spill of v at the start of b. 63 // - Otherwise, set b = immediate dominator of b, and repeat. 64 // 65 // Phi values are special, as always. We define two kinds of phis, those 66 // where the merge happens in a register (a "register" phi) and those where 67 // the merge happens in a stack location (a "stack" phi). 68 // 69 // A register phi must have the phi and all of its inputs allocated to the 70 // same register. Register phis are spilled similarly to regular ops. 71 // 72 // A stack phi must have the phi and all of its inputs allocated to the same 73 // stack location. Stack phis start out life already spilled - each phi 74 // input must be a store (using StoreReg) at the end of the corresponding 75 // predecessor block. 76 // b1: y = ... : AX b2: z = ... : BX 77 // y2 = StoreReg y z2 = StoreReg z 78 // goto b3 goto b3 79 // b3: x = phi(y2, z2) 80 // The stack allocator knows that StoreReg args of stack-allocated phis 81 // must be allocated to the same stack slot as the phi that uses them. 82 // x is now a spilled value and a restore must appear before its first use. 83 84 // TODO 85 86 // Use an affinity graph to mark two values which should use the 87 // same register. This affinity graph will be used to prefer certain 88 // registers for allocation. This affinity helps eliminate moves that 89 // are required for phi implementations and helps generate allocations 90 // for 2-register architectures. 91 92 // Note: regalloc generates a not-quite-SSA output. If we have: 93 // 94 // b1: x = ... : AX 95 // x2 = StoreReg x 96 // ... AX gets reused for something else ... 97 // if ... goto b3 else b4 98 // 99 // b3: x3 = LoadReg x2 : BX b4: x4 = LoadReg x2 : CX 100 // ... use x3 ... ... use x4 ... 101 // 102 // b2: ... use x3 ... 103 // 104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and 105 // add a x4:CX->BX copy at the end of b4. 106 // But the definition of x3 doesn't dominate b2. We should really 107 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep 108 // SSA form. For now, we ignore this problem as remaining in strict 109 // SSA form isn't needed after regalloc. We'll just leave the use 110 // of x3 not dominated by the definition of x3, and the CX->BX copy 111 // will have no use (so don't run deadcode after regalloc!). 112 // TODO: maybe we should introduce these extra phis? 113 114 package ssa 115 116 import ( 117 "cmd/compile/internal/types" 118 "cmd/internal/objabi" 119 "cmd/internal/src" 120 "fmt" 121 "unsafe" 122 ) 123 124 const ( 125 moveSpills = iota 126 logSpills 127 regDebug 128 stackDebug 129 ) 130 131 // distance is a measure of how far into the future values are used. 132 // distance is measured in units of instructions. 133 const ( 134 likelyDistance = 1 135 normalDistance = 10 136 unlikelyDistance = 100 137 ) 138 139 // regalloc performs register allocation on f. It sets f.RegAlloc 140 // to the resulting allocation. 141 func regalloc(f *Func) { 142 var s regAllocState 143 s.init(f) 144 s.regalloc(f) 145 } 146 147 type register uint8 148 149 const noRegister register = 255 150 151 type regMask uint64 152 153 func (m regMask) String() string { 154 s := "" 155 for r := register(0); m != 0; r++ { 156 if m>>r&1 == 0 { 157 continue 158 } 159 m &^= regMask(1) << r 160 if s != "" { 161 s += " " 162 } 163 s += fmt.Sprintf("r%d", r) 164 } 165 return s 166 } 167 168 // countRegs returns the number of set bits in the register mask. 169 func countRegs(r regMask) int { 170 n := 0 171 for r != 0 { 172 n += int(r & 1) 173 r >>= 1 174 } 175 return n 176 } 177 178 // pickReg picks an arbitrary register from the register mask. 179 func pickReg(r regMask) register { 180 // pick the lowest one 181 if r == 0 { 182 panic("can't pick a register from an empty set") 183 } 184 for i := register(0); ; i++ { 185 if r&1 != 0 { 186 return i 187 } 188 r >>= 1 189 } 190 } 191 192 type use struct { 193 dist int32 // distance from start of the block to a use of a value 194 pos src.XPos // source position of the use 195 next *use // linked list of uses of a value in nondecreasing dist order 196 } 197 198 // A valState records the register allocation state for a (pre-regalloc) value. 199 type valState struct { 200 regs regMask // the set of registers holding a Value (usually just one) 201 uses *use // list of uses in this block 202 spill *Value // spilled copy of the Value (if any) 203 restoreMin int32 // minimum of all restores' blocks' sdom.entry 204 restoreMax int32 // maximum of all restores' blocks' sdom.exit 205 needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() 206 rematerializeable bool // cached value of v.rematerializeable() 207 } 208 209 type regState struct { 210 v *Value // Original (preregalloc) Value stored in this register. 211 c *Value // A Value equal to v which is currently in a register. Might be v or a copy of it. 212 // If a register is unused, v==c==nil 213 } 214 215 type regAllocState struct { 216 f *Func 217 218 sdom SparseTree 219 registers []Register 220 numRegs register 221 SPReg register 222 SBReg register 223 GReg register 224 allocatable regMask 225 226 // for each block, its primary predecessor. 227 // A predecessor of b is primary if it is the closest 228 // predecessor that appears before b in the layout order. 229 // We record the index in the Preds list where the primary predecessor sits. 230 primary []int32 231 232 // live values at the end of each block. live[b.ID] is a list of value IDs 233 // which are live at the end of b, together with a count of how many instructions 234 // forward to the next use. 235 live [][]liveInfo 236 // desired register assignments at the end of each block. 237 // Note that this is a static map computed before allocation occurs. Dynamic 238 // register desires (from partially completed allocations) will trump 239 // this information. 240 desired []desiredState 241 242 // current state of each (preregalloc) Value 243 values []valState 244 245 // names associated with each Value 246 valueNames [][]LocalSlot 247 248 // ID of SP, SB values 249 sp, sb ID 250 251 // For each Value, map from its value ID back to the 252 // preregalloc Value it was derived from. 253 orig []*Value 254 255 // current state of each register 256 regs []regState 257 258 // registers that contain values which can't be kicked out 259 nospill regMask 260 261 // mask of registers currently in use 262 used regMask 263 264 // mask of registers used in the current instruction 265 tmpused regMask 266 267 // current block we're working on 268 curBlock *Block 269 270 // cache of use records 271 freeUseRecords *use 272 273 // endRegs[blockid] is the register state at the end of each block. 274 // encoded as a set of endReg records. 275 endRegs [][]endReg 276 277 // startRegs[blockid] is the register state at the start of merge blocks. 278 // saved state does not include the state of phi ops in the block. 279 startRegs [][]startReg 280 281 // spillLive[blockid] is the set of live spills at the end of each block 282 spillLive [][]ID 283 284 // a set of copies we generated to move things around, and 285 // whether it is used in shuffle. Unused copies will be deleted. 286 copies map[*Value]bool 287 288 loopnest *loopnest 289 } 290 291 type endReg struct { 292 r register 293 v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) 294 c *Value // cached version of the value 295 } 296 297 type startReg struct { 298 r register 299 v *Value // pre-regalloc value needed in this register 300 c *Value // cached version of the value 301 pos src.XPos // source position of use of this register 302 } 303 304 // freeReg frees up register r. Any current user of r is kicked out. 305 func (s *regAllocState) freeReg(r register) { 306 s.freeOrResetReg(r, false) 307 } 308 309 // freeOrResetReg frees up register r. Any current user of r is kicked out. 310 // resetting indicates that the operation is only for bookkeeping, 311 // e.g. when clearing out state upon entry to a new block. 312 func (s *regAllocState) freeOrResetReg(r register, resetting bool) { 313 v := s.regs[r].v 314 if v == nil { 315 s.f.Fatalf("tried to free an already free register %d\n", r) 316 } 317 318 // Mark r as unused. 319 if s.f.pass.debug > regDebug { 320 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c) 321 } 322 if !resetting && s.f.Config.ctxt.Flag_locationlists && len(s.valueNames[v.ID]) != 0 { 323 kill := s.curBlock.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid) 324 for int(kill.ID) >= len(s.orig) { 325 s.orig = append(s.orig, nil) 326 } 327 for _, name := range s.valueNames[v.ID] { 328 s.f.NamedValues[name] = append(s.f.NamedValues[name], kill) 329 } 330 s.f.setHome(kill, &s.registers[r]) 331 } 332 s.regs[r] = regState{} 333 s.values[v.ID].regs &^= regMask(1) << r 334 s.used &^= regMask(1) << r 335 } 336 337 // freeRegs frees up all registers listed in m. 338 func (s *regAllocState) freeRegs(m regMask) { 339 for m&s.used != 0 { 340 s.freeReg(pickReg(m & s.used)) 341 } 342 } 343 344 // setOrig records that c's original value is the same as 345 // v's original value. 346 func (s *regAllocState) setOrig(c *Value, v *Value) { 347 for int(c.ID) >= len(s.orig) { 348 s.orig = append(s.orig, nil) 349 } 350 if s.orig[c.ID] != nil { 351 s.f.Fatalf("orig value set twice %s %s", c, v) 352 } 353 s.orig[c.ID] = s.orig[v.ID] 354 } 355 356 // assignReg assigns register r to hold c, a copy of v. 357 // r must be unused. 358 func (s *regAllocState) assignReg(r register, v *Value, c *Value) { 359 if s.f.pass.debug > regDebug { 360 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c) 361 } 362 if s.regs[r].v != nil { 363 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) 364 } 365 366 // Update state. 367 s.regs[r] = regState{v, c} 368 s.values[v.ID].regs |= regMask(1) << r 369 s.used |= regMask(1) << r 370 s.f.setHome(c, &s.registers[r]) 371 } 372 373 // allocReg chooses a register from the set of registers in mask. 374 // If there is no unused register, a Value will be kicked out of 375 // a register to make room. 376 func (s *regAllocState) allocReg(mask regMask, v *Value) register { 377 mask &= s.allocatable 378 mask &^= s.nospill 379 if mask == 0 { 380 s.f.Fatalf("no register available for %s", v) 381 } 382 383 // Pick an unused register if one is available. 384 if mask&^s.used != 0 { 385 return pickReg(mask &^ s.used) 386 } 387 388 // Pick a value to spill. Spill the value with the 389 // farthest-in-the-future use. 390 // TODO: Prefer registers with already spilled Values? 391 // TODO: Modify preference using affinity graph. 392 // TODO: if a single value is in multiple registers, spill one of them 393 // before spilling a value in just a single register. 394 395 // Find a register to spill. We spill the register containing the value 396 // whose next use is as far in the future as possible. 397 // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm 398 var r register 399 maxuse := int32(-1) 400 for t := register(0); t < s.numRegs; t++ { 401 if mask>>t&1 == 0 { 402 continue 403 } 404 v := s.regs[t].v 405 if n := s.values[v.ID].uses.dist; n > maxuse { 406 // v's next use is farther in the future than any value 407 // we've seen so far. A new best spill candidate. 408 r = t 409 maxuse = n 410 } 411 } 412 if maxuse == -1 { 413 s.f.Fatalf("couldn't find register to spill") 414 } 415 416 // Try to move it around before kicking out, if there is a free register. 417 // We generate a Copy and record it. It will be deleted if never used. 418 v2 := s.regs[r].v 419 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r) 420 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 { 421 r2 := pickReg(m) 422 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c) 423 s.copies[c] = false 424 if s.f.pass.debug > regDebug { 425 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2]) 426 } 427 s.setOrig(c, v2) 428 s.assignReg(r2, v2, c) 429 } 430 s.freeReg(r) 431 return r 432 } 433 434 // makeSpill returns a Value which represents the spilled value of v. 435 // b is the block in which the spill is used. 436 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value { 437 vi := &s.values[v.ID] 438 if vi.spill != nil { 439 // Final block not known - keep track of subtree where restores reside. 440 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry) 441 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit) 442 return vi.spill 443 } 444 // Make a spill for v. We don't know where we want 445 // to put it yet, so we leave it blockless for now. 446 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos) 447 // We also don't know what the spill's arg will be. 448 // Leave it argless for now. 449 s.setOrig(spill, v) 450 vi.spill = spill 451 vi.restoreMin = s.sdom[b.ID].entry 452 vi.restoreMax = s.sdom[b.ID].exit 453 return spill 454 } 455 456 // allocValToReg allocates v to a register selected from regMask and 457 // returns the register copy of v. Any previous user is kicked out and spilled 458 // (if necessary). Load code is added at the current pc. If nospill is set the 459 // allocated register is marked nospill so the assignment cannot be 460 // undone until the caller allows it by clearing nospill. Returns a 461 // *Value which is either v or a copy of v allocated to the chosen register. 462 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value { 463 vi := &s.values[v.ID] 464 465 // Check if v is already in a requested register. 466 if mask&vi.regs != 0 { 467 r := pickReg(mask & vi.regs) 468 if s.regs[r].v != v || s.regs[r].c == nil { 469 panic("bad register state") 470 } 471 if nospill { 472 s.nospill |= regMask(1) << r 473 } 474 return s.regs[r].c 475 } 476 477 // Allocate a register. 478 r := s.allocReg(mask, v) 479 480 // Allocate v to the new register. 481 var c *Value 482 if vi.regs != 0 { 483 // Copy from a register that v is already in. 484 r2 := pickReg(vi.regs) 485 if s.regs[r2].v != v { 486 panic("bad register state") 487 } 488 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) 489 } else if v.rematerializeable() { 490 // Rematerialize instead of loading from the spill location. 491 c = v.copyIntoNoXPos(s.curBlock) 492 } else { 493 // Load v from its spill location. 494 spill := s.makeSpill(v, s.curBlock) 495 if s.f.pass.debug > logSpills { 496 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) 497 } 498 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) 499 } 500 s.setOrig(c, v) 501 s.assignReg(r, v, c) 502 if nospill { 503 s.nospill |= regMask(1) << r 504 } 505 return c 506 } 507 508 // isLeaf reports whether f performs any calls. 509 func isLeaf(f *Func) bool { 510 for _, b := range f.Blocks { 511 for _, v := range b.Values { 512 if opcodeTable[v.Op].call { 513 return false 514 } 515 } 516 } 517 return true 518 } 519 520 func (s *regAllocState) init(f *Func) { 521 s.f = f 522 s.f.RegAlloc = s.f.Cache.locs[:0] 523 s.registers = f.Config.registers 524 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) { 525 s.f.Fatalf("bad number of registers: %d", nr) 526 } else { 527 s.numRegs = register(nr) 528 } 529 // Locate SP, SB, and g registers. 530 s.SPReg = noRegister 531 s.SBReg = noRegister 532 s.GReg = noRegister 533 for r := register(0); r < s.numRegs; r++ { 534 switch s.registers[r].String() { 535 case "SP": 536 s.SPReg = r 537 case "SB": 538 s.SBReg = r 539 case "g": 540 s.GReg = r 541 } 542 } 543 // Make sure we found all required registers. 544 switch noRegister { 545 case s.SPReg: 546 s.f.Fatalf("no SP register found") 547 case s.SBReg: 548 s.f.Fatalf("no SB register found") 549 case s.GReg: 550 if f.Config.hasGReg { 551 s.f.Fatalf("no g register found") 552 } 553 } 554 555 // Figure out which registers we're allowed to use. 556 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask 557 s.allocatable &^= 1 << s.SPReg 558 s.allocatable &^= 1 << s.SBReg 559 if s.f.Config.hasGReg { 560 s.allocatable &^= 1 << s.GReg 561 } 562 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 { 563 s.allocatable &^= 1 << uint(s.f.Config.FPReg) 564 } 565 if s.f.Config.ctxt.Flag_shared { 566 switch s.f.Config.arch { 567 case "ppc64le": // R2 already reserved. 568 s.allocatable &^= 1 << 12 // R12 569 } 570 } 571 if s.f.Config.LinkReg != -1 { 572 if isLeaf(f) { 573 // Leaf functions don't save/restore the link register. 574 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 575 } 576 if s.f.Config.arch == "arm" && objabi.GOARM == 5 { 577 // On ARMv5 we insert softfloat calls at each FP instruction. 578 // This clobbers LR almost everywhere. Disable allocating LR 579 // on ARMv5. 580 s.allocatable &^= 1 << uint(s.f.Config.LinkReg) 581 } 582 } 583 if s.f.Config.ctxt.Flag_dynlink { 584 switch s.f.Config.arch { 585 case "amd64": 586 s.allocatable &^= 1 << 15 // R15 587 case "arm": 588 s.allocatable &^= 1 << 9 // R9 589 case "ppc64le": // R2 already reserved. 590 s.allocatable &^= 1 << 12 // R12 591 case "arm64": 592 // nothing to do? 593 case "386": 594 // nothing to do. 595 // Note that for Flag_shared (position independent code) 596 // we do need to be careful, but that carefulness is hidden 597 // in the rewrite rules so we always have a free register 598 // available for global load/stores. See gen/386.rules (search for Flag_shared). 599 case "s390x": 600 // nothing to do, R10 & R11 already reserved 601 default: 602 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) 603 } 604 } 605 if s.f.Config.nacl { 606 switch s.f.Config.arch { 607 case "arm": 608 s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm 609 case "amd64p32": 610 s.allocatable &^= 1 << 5 // BP - reserved for nacl 611 s.allocatable &^= 1 << 15 // R15 - reserved for nacl 612 } 613 } 614 if s.f.Config.use387 { 615 s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) 616 } 617 618 s.regs = make([]regState, s.numRegs) 619 s.values = make([]valState, f.NumValues()) 620 s.orig = make([]*Value, f.NumValues()) 621 s.copies = make(map[*Value]bool) 622 if s.f.Config.ctxt.Flag_locationlists { 623 s.valueNames = make([][]LocalSlot, f.NumValues()) 624 for slot, values := range f.NamedValues { 625 if isSynthetic(&slot) { 626 continue 627 } 628 for _, value := range values { 629 s.valueNames[value.ID] = append(s.valueNames[value.ID], slot) 630 } 631 } 632 } 633 for _, b := range f.Blocks { 634 for _, v := range b.Values { 635 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { 636 s.values[v.ID].needReg = true 637 s.values[v.ID].rematerializeable = v.rematerializeable() 638 s.orig[v.ID] = v 639 } 640 // Note: needReg is false for values returning Tuple types. 641 // Instead, we mark the corresponding Selects as needReg. 642 } 643 } 644 s.computeLive() 645 646 // Compute block order. This array allows us to distinguish forward edges 647 // from backward edges and compute how far they go. 648 blockOrder := make([]int32, f.NumBlocks()) 649 for i, b := range f.Blocks { 650 blockOrder[b.ID] = int32(i) 651 } 652 653 // Compute primary predecessors. 654 s.primary = make([]int32, f.NumBlocks()) 655 for _, b := range f.Blocks { 656 best := -1 657 for i, e := range b.Preds { 658 p := e.b 659 if blockOrder[p.ID] >= blockOrder[b.ID] { 660 continue // backward edge 661 } 662 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] { 663 best = i 664 } 665 } 666 s.primary[b.ID] = int32(best) 667 } 668 669 s.endRegs = make([][]endReg, f.NumBlocks()) 670 s.startRegs = make([][]startReg, f.NumBlocks()) 671 s.spillLive = make([][]ID, f.NumBlocks()) 672 s.sdom = f.sdom() 673 } 674 675 // Adds a use record for id at distance dist from the start of the block. 676 // All calls to addUse must happen with nonincreasing dist. 677 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { 678 r := s.freeUseRecords 679 if r != nil { 680 s.freeUseRecords = r.next 681 } else { 682 r = &use{} 683 } 684 r.dist = dist 685 r.pos = pos 686 r.next = s.values[id].uses 687 s.values[id].uses = r 688 if r.next != nil && dist > r.next.dist { 689 s.f.Fatalf("uses added in wrong order") 690 } 691 } 692 693 // advanceUses advances the uses of v's args from the state before v to the state after v. 694 // Any values which have no more uses are deallocated from registers. 695 func (s *regAllocState) advanceUses(v *Value) { 696 for _, a := range v.Args { 697 if !s.values[a.ID].needReg { 698 continue 699 } 700 ai := &s.values[a.ID] 701 r := ai.uses 702 ai.uses = r.next 703 if r.next == nil { 704 // Value is dead, free all registers that hold it. 705 s.freeRegs(ai.regs) 706 } 707 r.next = s.freeUseRecords 708 s.freeUseRecords = r 709 } 710 } 711 712 // liveAfterCurrentInstruction reports whether v is live after 713 // the current instruction is completed. v must be used by the 714 // current instruction. 715 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { 716 u := s.values[v.ID].uses 717 d := u.dist 718 for u != nil && u.dist == d { 719 u = u.next 720 } 721 return u != nil && u.dist > d 722 } 723 724 // Sets the state of the registers to that encoded in regs. 725 func (s *regAllocState) setState(regs []endReg) { 726 for s.used != 0 { 727 s.freeOrResetReg(pickReg(s.used), true) 728 } 729 for _, x := range regs { 730 s.assignReg(x.r, x.v, x.c) 731 } 732 } 733 734 // compatRegs returns the set of registers which can store a type t. 735 func (s *regAllocState) compatRegs(t *types.Type) regMask { 736 var m regMask 737 if t.IsTuple() || t.IsFlags() { 738 return 0 739 } 740 if t.IsFloat() || t == types.TypeInt128 { 741 m = s.f.Config.fpRegMask 742 } else { 743 m = s.f.Config.gpRegMask 744 } 745 return m & s.allocatable 746 } 747 748 func (s *regAllocState) regalloc(f *Func) { 749 regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register 750 defer f.retSparseSet(regValLiveSet) 751 var oldSched []*Value 752 var phis []*Value 753 var phiRegs []register 754 var args []*Value 755 756 // Data structure used for computing desired registers. 757 var desired desiredState 758 759 // Desired registers for inputs & outputs for each instruction in the block. 760 type dentry struct { 761 out [4]register // desired output registers 762 in [3][4]register // desired input registers (for inputs 0,1, and 2) 763 } 764 var dinfo []dentry 765 766 if f.Entry != f.Blocks[0] { 767 f.Fatalf("entry block must be first") 768 } 769 770 for _, b := range f.Blocks { 771 if s.f.pass.debug > regDebug { 772 fmt.Printf("Begin processing block %v\n", b) 773 } 774 s.curBlock = b 775 776 // Initialize regValLiveSet and uses fields for this block. 777 // Walk backwards through the block doing liveness analysis. 778 regValLiveSet.clear() 779 for _, e := range s.live[b.ID] { 780 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block 781 regValLiveSet.add(e.ID) 782 } 783 if v := b.Control; v != nil && s.values[v.ID].needReg { 784 s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value 785 regValLiveSet.add(v.ID) 786 } 787 for i := len(b.Values) - 1; i >= 0; i-- { 788 v := b.Values[i] 789 regValLiveSet.remove(v.ID) 790 if v.Op == OpPhi { 791 // Remove v from the live set, but don't add 792 // any inputs. This is the state the len(b.Preds)>1 793 // case below desires; it wants to process phis specially. 794 continue 795 } 796 if opcodeTable[v.Op].call { 797 // Function call clobbers all the registers but SP and SB. 798 regValLiveSet.clear() 799 if s.sp != 0 && s.values[s.sp].uses != nil { 800 regValLiveSet.add(s.sp) 801 } 802 if s.sb != 0 && s.values[s.sb].uses != nil { 803 regValLiveSet.add(s.sb) 804 } 805 } 806 for _, a := range v.Args { 807 if !s.values[a.ID].needReg { 808 continue 809 } 810 s.addUse(a.ID, int32(i), v.Pos) 811 regValLiveSet.add(a.ID) 812 } 813 } 814 if s.f.pass.debug > regDebug { 815 fmt.Printf("uses for %s:%s\n", s.f.Name, b) 816 for i := range s.values { 817 vi := &s.values[i] 818 u := vi.uses 819 if u == nil { 820 continue 821 } 822 fmt.Printf(" v%d:", i) 823 for u != nil { 824 fmt.Printf(" %d", u.dist) 825 u = u.next 826 } 827 fmt.Println() 828 } 829 } 830 831 // Make a copy of the block schedule so we can generate a new one in place. 832 // We make a separate copy for phis and regular values. 833 nphi := 0 834 for _, v := range b.Values { 835 if v.Op != OpPhi { 836 break 837 } 838 nphi++ 839 } 840 phis = append(phis[:0], b.Values[:nphi]...) 841 oldSched = append(oldSched[:0], b.Values[nphi:]...) 842 b.Values = b.Values[:0] 843 844 // Initialize start state of block. 845 if b == f.Entry { 846 // Regalloc state is empty to start. 847 if nphi > 0 { 848 f.Fatalf("phis in entry block") 849 } 850 } else if len(b.Preds) == 1 { 851 // Start regalloc state with the end state of the previous block. 852 s.setState(s.endRegs[b.Preds[0].b.ID]) 853 if nphi > 0 { 854 f.Fatalf("phis in single-predecessor block") 855 } 856 // Drop any values which are no longer live. 857 // This may happen because at the end of p, a value may be 858 // live but only used by some other successor of p. 859 for r := register(0); r < s.numRegs; r++ { 860 v := s.regs[r].v 861 if v != nil && !regValLiveSet.contains(v.ID) { 862 s.freeReg(r) 863 } 864 } 865 } else { 866 // This is the complicated case. We have more than one predecessor, 867 // which means we may have Phi ops. 868 869 // Start with the final register state of the primary predecessor 870 idx := s.primary[b.ID] 871 if idx < 0 { 872 f.Fatalf("block with no primary predecessor %s", b) 873 } 874 p := b.Preds[idx].b 875 s.setState(s.endRegs[p.ID]) 876 877 if s.f.pass.debug > regDebug { 878 fmt.Printf("starting merge block %s with end state of %s:\n", b, p) 879 for _, x := range s.endRegs[p.ID] { 880 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c) 881 } 882 } 883 884 // Decide on registers for phi ops. Use the registers determined 885 // by the primary predecessor if we can. 886 // TODO: pick best of (already processed) predecessors? 887 // Majority vote? Deepest nesting level? 888 phiRegs = phiRegs[:0] 889 var phiUsed regMask 890 for _, v := range phis { 891 if !s.values[v.ID].needReg { 892 phiRegs = append(phiRegs, noRegister) 893 continue 894 } 895 a := v.Args[idx] 896 // Some instructions target not-allocatable registers. 897 // They're not suitable for further (phi-function) allocation. 898 m := s.values[a.ID].regs &^ phiUsed & s.allocatable 899 if m != 0 { 900 r := pickReg(m) 901 phiUsed |= regMask(1) << r 902 phiRegs = append(phiRegs, r) 903 } else { 904 phiRegs = append(phiRegs, noRegister) 905 } 906 } 907 908 // Second pass - deallocate any phi inputs which are now dead. 909 for i, v := range phis { 910 if !s.values[v.ID].needReg { 911 continue 912 } 913 a := v.Args[idx] 914 if !regValLiveSet.contains(a.ID) { 915 // Input is dead beyond the phi, deallocate 916 // anywhere else it might live. 917 s.freeRegs(s.values[a.ID].regs) 918 } else { 919 // Input is still live. 920 // Try to move it around before kicking out, if there is a free register. 921 // We generate a Copy in the predecessor block and record it. It will be 922 // deleted if never used. 923 r := phiRegs[i] 924 if r == noRegister { 925 continue 926 } 927 // Pick a free register. At this point some registers used in the predecessor 928 // block may have been deallocated. Those are the ones used for Phis. Exclude 929 // them (and they are not going to be helpful anyway). 930 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed 931 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { 932 r2 := pickReg(m) 933 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c) 934 s.copies[c] = false 935 if s.f.pass.debug > regDebug { 936 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2]) 937 } 938 s.setOrig(c, a) 939 s.assignReg(r2, a, c) 940 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) 941 } 942 s.freeReg(r) 943 } 944 } 945 946 // Copy phi ops into new schedule. 947 b.Values = append(b.Values, phis...) 948 949 // Third pass - pick registers for phis whose inputs 950 // were not in a register. 951 for i, v := range phis { 952 if !s.values[v.ID].needReg { 953 continue 954 } 955 if phiRegs[i] != noRegister { 956 continue 957 } 958 if s.f.Config.use387 && v.Type.IsFloat() { 959 continue // 387 can't handle floats in registers between blocks 960 } 961 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used 962 if m != 0 { 963 r := pickReg(m) 964 phiRegs[i] = r 965 phiUsed |= regMask(1) << r 966 } 967 } 968 969 // Set registers for phis. Add phi spill code. 970 for i, v := range phis { 971 if !s.values[v.ID].needReg { 972 continue 973 } 974 r := phiRegs[i] 975 if r == noRegister { 976 // stack-based phi 977 // Spills will be inserted in all the predecessors below. 978 s.values[v.ID].spill = v // v starts life spilled 979 continue 980 } 981 // register-based phi 982 s.assignReg(r, v, v) 983 } 984 985 // Deallocate any values which are no longer live. Phis are excluded. 986 for r := register(0); r < s.numRegs; r++ { 987 if phiUsed>>r&1 != 0 { 988 continue 989 } 990 v := s.regs[r].v 991 if v != nil && !regValLiveSet.contains(v.ID) { 992 s.freeReg(r) 993 } 994 } 995 996 // Save the starting state for use by merge edges. 997 var regList []startReg 998 for r := register(0); r < s.numRegs; r++ { 999 v := s.regs[r].v 1000 if v == nil { 1001 continue 1002 } 1003 if phiUsed>>r&1 != 0 { 1004 // Skip registers that phis used, we'll handle those 1005 // specially during merge edge processing. 1006 continue 1007 } 1008 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) 1009 } 1010 s.startRegs[b.ID] = regList 1011 1012 if s.f.pass.debug > regDebug { 1013 fmt.Printf("after phis\n") 1014 for _, x := range s.startRegs[b.ID] { 1015 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID) 1016 } 1017 } 1018 } 1019 1020 // Allocate space to record the desired registers for each value. 1021 dinfo = dinfo[:0] 1022 for i := 0; i < len(oldSched); i++ { 1023 dinfo = append(dinfo, dentry{}) 1024 } 1025 1026 // Load static desired register info at the end of the block. 1027 desired.copy(&s.desired[b.ID]) 1028 1029 // Check actual assigned registers at the start of the next block(s). 1030 // Dynamically assigned registers will trump the static 1031 // desired registers computed during liveness analysis. 1032 // Note that we do this phase after startRegs is set above, so that 1033 // we get the right behavior for a block which branches to itself. 1034 for _, e := range b.Succs { 1035 succ := e.b 1036 // TODO: prioritize likely successor? 1037 for _, x := range s.startRegs[succ.ID] { 1038 desired.add(x.v.ID, x.r) 1039 } 1040 // Process phi ops in succ. 1041 pidx := e.i 1042 for _, v := range succ.Values { 1043 if v.Op != OpPhi { 1044 continue 1045 } 1046 if !s.values[v.ID].needReg { 1047 continue 1048 } 1049 rp, ok := s.f.getHome(v.ID).(*Register) 1050 if !ok { 1051 continue 1052 } 1053 desired.add(v.Args[pidx].ID, register(rp.num)) 1054 } 1055 } 1056 // Walk values backwards computing desired register info. 1057 // See computeLive for more comments. 1058 for i := len(oldSched) - 1; i >= 0; i-- { 1059 v := oldSched[i] 1060 prefs := desired.remove(v.ID) 1061 desired.clobber(opcodeTable[v.Op].reg.clobbers) 1062 for _, j := range opcodeTable[v.Op].reg.inputs { 1063 if countRegs(j.regs) != 1 { 1064 continue 1065 } 1066 desired.clobber(j.regs) 1067 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 1068 } 1069 if opcodeTable[v.Op].resultInArg0 { 1070 if opcodeTable[v.Op].commutative { 1071 desired.addList(v.Args[1].ID, prefs) 1072 } 1073 desired.addList(v.Args[0].ID, prefs) 1074 } 1075 // Save desired registers for this value. 1076 dinfo[i].out = prefs 1077 for j, a := range v.Args { 1078 if j >= len(dinfo[i].in) { 1079 break 1080 } 1081 dinfo[i].in[j] = desired.get(a.ID) 1082 } 1083 } 1084 1085 // Process all the non-phi values. 1086 for idx, v := range oldSched { 1087 if s.f.pass.debug > regDebug { 1088 fmt.Printf(" processing %s\n", v.LongString()) 1089 } 1090 regspec := opcodeTable[v.Op].reg 1091 if v.Op == OpPhi { 1092 f.Fatalf("phi %s not at start of block", v) 1093 } 1094 if v.Op == OpSP { 1095 s.assignReg(s.SPReg, v, v) 1096 b.Values = append(b.Values, v) 1097 s.advanceUses(v) 1098 s.sp = v.ID 1099 continue 1100 } 1101 if v.Op == OpSB { 1102 s.assignReg(s.SBReg, v, v) 1103 b.Values = append(b.Values, v) 1104 s.advanceUses(v) 1105 s.sb = v.ID 1106 continue 1107 } 1108 if v.Op == OpSelect0 || v.Op == OpSelect1 { 1109 if s.values[v.ID].needReg { 1110 var i = 0 1111 if v.Op == OpSelect1 { 1112 i = 1 1113 } 1114 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v) 1115 } 1116 b.Values = append(b.Values, v) 1117 s.advanceUses(v) 1118 goto issueSpill 1119 } 1120 if v.Op == OpGetG && s.f.Config.hasGReg { 1121 // use hardware g register 1122 if s.regs[s.GReg].v != nil { 1123 s.freeReg(s.GReg) // kick out the old value 1124 } 1125 s.assignReg(s.GReg, v, v) 1126 b.Values = append(b.Values, v) 1127 s.advanceUses(v) 1128 goto issueSpill 1129 } 1130 if v.Op == OpArg { 1131 // Args are "pre-spilled" values. We don't allocate 1132 // any register here. We just set up the spill pointer to 1133 // point at itself and any later user will restore it to use it. 1134 s.values[v.ID].spill = v 1135 b.Values = append(b.Values, v) 1136 s.advanceUses(v) 1137 continue 1138 } 1139 if v.Op == OpKeepAlive { 1140 // Make sure the argument to v is still live here. 1141 s.advanceUses(v) 1142 vi := &s.values[v.Args[0].ID] 1143 if vi.spill != nil { 1144 // Use the spill location. 1145 v.SetArg(0, vi.spill) 1146 } else { 1147 // No need to keep unspilled values live. 1148 // These are typically rematerializeable constants like nil, 1149 // or values of a variable that were modified since the last call. 1150 v.Op = OpCopy 1151 v.SetArgs1(v.Args[1]) 1152 } 1153 b.Values = append(b.Values, v) 1154 continue 1155 } 1156 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { 1157 // No register allocation required (or none specified yet) 1158 s.freeRegs(regspec.clobbers) 1159 b.Values = append(b.Values, v) 1160 s.advanceUses(v) 1161 continue 1162 } 1163 1164 if s.values[v.ID].rematerializeable { 1165 // Value is rematerializeable, don't issue it here. 1166 // It will get issued just before each use (see 1167 // allocValueToReg). 1168 for _, a := range v.Args { 1169 a.Uses-- 1170 } 1171 s.advanceUses(v) 1172 continue 1173 } 1174 1175 if s.f.pass.debug > regDebug { 1176 fmt.Printf("value %s\n", v.LongString()) 1177 fmt.Printf(" out:") 1178 for _, r := range dinfo[idx].out { 1179 if r != noRegister { 1180 fmt.Printf(" %s", &s.registers[r]) 1181 } 1182 } 1183 fmt.Println() 1184 for i := 0; i < len(v.Args) && i < 3; i++ { 1185 fmt.Printf(" in%d:", i) 1186 for _, r := range dinfo[idx].in[i] { 1187 if r != noRegister { 1188 fmt.Printf(" %s", &s.registers[r]) 1189 } 1190 } 1191 fmt.Println() 1192 } 1193 } 1194 1195 // Move arguments to registers. Process in an ordering defined 1196 // by the register specification (most constrained first). 1197 args = append(args[:0], v.Args...) 1198 for _, i := range regspec.inputs { 1199 mask := i.regs 1200 if mask&s.values[args[i.idx].ID].regs == 0 { 1201 // Need a new register for the input. 1202 mask &= s.allocatable 1203 mask &^= s.nospill 1204 // Used desired register if available. 1205 if i.idx < 3 { 1206 for _, r := range dinfo[idx].in[i.idx] { 1207 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1208 // Desired register is allowed and unused. 1209 mask = regMask(1) << r 1210 break 1211 } 1212 } 1213 } 1214 // Avoid registers we're saving for other values. 1215 if mask&^desired.avoid != 0 { 1216 mask &^= desired.avoid 1217 } 1218 } 1219 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos) 1220 } 1221 1222 // If the output clobbers the input register, make sure we have 1223 // at least two copies of the input register so we don't 1224 // have to reload the value from the spill location. 1225 if opcodeTable[v.Op].resultInArg0 { 1226 var m regMask 1227 if !s.liveAfterCurrentInstruction(v.Args[0]) { 1228 // arg0 is dead. We can clobber its register. 1229 goto ok 1230 } 1231 if s.values[v.Args[0].ID].rematerializeable { 1232 // We can rematerialize the input, don't worry about clobbering it. 1233 goto ok 1234 } 1235 if countRegs(s.values[v.Args[0].ID].regs) >= 2 { 1236 // we have at least 2 copies of arg0. We can afford to clobber one. 1237 goto ok 1238 } 1239 if opcodeTable[v.Op].commutative { 1240 if !s.liveAfterCurrentInstruction(v.Args[1]) { 1241 args[0], args[1] = args[1], args[0] 1242 goto ok 1243 } 1244 if s.values[v.Args[1].ID].rematerializeable { 1245 args[0], args[1] = args[1], args[0] 1246 goto ok 1247 } 1248 if countRegs(s.values[v.Args[1].ID].regs) >= 2 { 1249 args[0], args[1] = args[1], args[0] 1250 goto ok 1251 } 1252 } 1253 1254 // We can't overwrite arg0 (or arg1, if commutative). So we 1255 // need to make a copy of an input so we have a register we can modify. 1256 1257 // Possible new registers to copy into. 1258 m = s.compatRegs(v.Args[0].Type) &^ s.used 1259 if m == 0 { 1260 // No free registers. In this case we'll just clobber 1261 // an input and future uses of that input must use a restore. 1262 // TODO(khr): We should really do this like allocReg does it, 1263 // spilling the value with the most distant next use. 1264 goto ok 1265 } 1266 1267 // Try to move an input to the desired output. 1268 for _, r := range dinfo[idx].out { 1269 if r != noRegister && m>>r&1 != 0 { 1270 m = regMask(1) << r 1271 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos) 1272 // Note: we update args[0] so the instruction will 1273 // use the register copy we just made. 1274 goto ok 1275 } 1276 } 1277 // Try to copy input to its desired location & use its old 1278 // location as the result register. 1279 for _, r := range dinfo[idx].in[0] { 1280 if r != noRegister && m>>r&1 != 0 { 1281 m = regMask(1) << r 1282 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1283 s.copies[c] = false 1284 // Note: no update to args[0] so the instruction will 1285 // use the original copy. 1286 goto ok 1287 } 1288 } 1289 if opcodeTable[v.Op].commutative { 1290 for _, r := range dinfo[idx].in[1] { 1291 if r != noRegister && m>>r&1 != 0 { 1292 m = regMask(1) << r 1293 c := s.allocValToReg(v.Args[1], m, true, v.Pos) 1294 s.copies[c] = false 1295 args[0], args[1] = args[1], args[0] 1296 goto ok 1297 } 1298 } 1299 } 1300 // Avoid future fixed uses if we can. 1301 if m&^desired.avoid != 0 { 1302 m &^= desired.avoid 1303 } 1304 // Save input 0 to a new register so we can clobber it. 1305 c := s.allocValToReg(v.Args[0], m, true, v.Pos) 1306 s.copies[c] = false 1307 } 1308 1309 ok: 1310 // Now that all args are in regs, we're ready to issue the value itself. 1311 // Before we pick a register for the output value, allow input registers 1312 // to be deallocated. We do this here so that the output can use the 1313 // same register as a dying input. 1314 if !opcodeTable[v.Op].resultNotInArgs { 1315 s.tmpused = s.nospill 1316 s.nospill = 0 1317 s.advanceUses(v) // frees any registers holding args that are no longer live 1318 } 1319 1320 // Dump any registers which will be clobbered 1321 s.freeRegs(regspec.clobbers) 1322 s.tmpused |= regspec.clobbers 1323 1324 // Pick registers for outputs. 1325 { 1326 outRegs := [2]register{noRegister, noRegister} 1327 var used regMask 1328 for _, out := range regspec.outputs { 1329 mask := out.regs & s.allocatable &^ used 1330 if mask == 0 { 1331 continue 1332 } 1333 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 { 1334 if !opcodeTable[v.Op].commutative { 1335 // Output must use the same register as input 0. 1336 r := register(s.f.getHome(args[0].ID).(*Register).num) 1337 mask = regMask(1) << r 1338 } else { 1339 // Output must use the same register as input 0 or 1. 1340 r0 := register(s.f.getHome(args[0].ID).(*Register).num) 1341 r1 := register(s.f.getHome(args[1].ID).(*Register).num) 1342 // Check r0 and r1 for desired output register. 1343 found := false 1344 for _, r := range dinfo[idx].out { 1345 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { 1346 mask = regMask(1) << r 1347 found = true 1348 if r == r1 { 1349 args[0], args[1] = args[1], args[0] 1350 } 1351 break 1352 } 1353 } 1354 if !found { 1355 // Neither are desired, pick r0. 1356 mask = regMask(1) << r0 1357 } 1358 } 1359 } 1360 for _, r := range dinfo[idx].out { 1361 if r != noRegister && (mask&^s.used)>>r&1 != 0 { 1362 // Desired register is allowed and unused. 1363 mask = regMask(1) << r 1364 break 1365 } 1366 } 1367 // Avoid registers we're saving for other values. 1368 if mask&^desired.avoid != 0 { 1369 mask &^= desired.avoid 1370 } 1371 r := s.allocReg(mask, v) 1372 outRegs[out.idx] = r 1373 used |= regMask(1) << r 1374 s.tmpused |= regMask(1) << r 1375 } 1376 // Record register choices 1377 if v.Type.IsTuple() { 1378 var outLocs LocPair 1379 if r := outRegs[0]; r != noRegister { 1380 outLocs[0] = &s.registers[r] 1381 } 1382 if r := outRegs[1]; r != noRegister { 1383 outLocs[1] = &s.registers[r] 1384 } 1385 s.f.setHome(v, outLocs) 1386 // Note that subsequent SelectX instructions will do the assignReg calls. 1387 } else { 1388 if r := outRegs[0]; r != noRegister { 1389 s.assignReg(r, v, v) 1390 } 1391 } 1392 } 1393 1394 // deallocate dead args, if we have not done so 1395 if opcodeTable[v.Op].resultNotInArgs { 1396 s.nospill = 0 1397 s.advanceUses(v) // frees any registers holding args that are no longer live 1398 } 1399 s.tmpused = 0 1400 1401 // Issue the Value itself. 1402 for i, a := range args { 1403 v.SetArg(i, a) // use register version of arguments 1404 } 1405 b.Values = append(b.Values, v) 1406 1407 issueSpill: 1408 } 1409 1410 // Load control value into reg. 1411 if v := b.Control; v != nil && s.values[v.ID].needReg { 1412 if s.f.pass.debug > regDebug { 1413 fmt.Printf(" processing control %s\n", v.LongString()) 1414 } 1415 // We assume that a control input can be passed in any 1416 // type-compatible register. If this turns out not to be true, 1417 // we'll need to introduce a regspec for a block's control value. 1418 b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos) 1419 if b.Control != v { 1420 v.Uses-- 1421 b.Control.Uses++ 1422 } 1423 // Remove this use from the uses list. 1424 vi := &s.values[v.ID] 1425 u := vi.uses 1426 vi.uses = u.next 1427 if u.next == nil { 1428 s.freeRegs(vi.regs) // value is dead 1429 } 1430 u.next = s.freeUseRecords 1431 s.freeUseRecords = u 1432 } 1433 1434 // Spill any values that can't live across basic block boundaries. 1435 if s.f.Config.use387 { 1436 s.freeRegs(s.f.Config.fpRegMask) 1437 } 1438 1439 // If we are approaching a merge point and we are the primary 1440 // predecessor of it, find live values that we use soon after 1441 // the merge point and promote them to registers now. 1442 if len(b.Succs) == 1 { 1443 // For this to be worthwhile, the loop must have no calls in it. 1444 top := b.Succs[0].b 1445 loop := s.loopnest.b2l[top.ID] 1446 if loop == nil || loop.header != top || loop.containsCall { 1447 goto badloop 1448 } 1449 1450 // TODO: sort by distance, pick the closest ones? 1451 for _, live := range s.live[b.ID] { 1452 if live.dist >= unlikelyDistance { 1453 // Don't preload anything live after the loop. 1454 continue 1455 } 1456 vid := live.ID 1457 vi := &s.values[vid] 1458 if vi.regs != 0 { 1459 continue 1460 } 1461 if vi.rematerializeable { 1462 continue 1463 } 1464 v := s.orig[vid] 1465 if s.f.Config.use387 && v.Type.IsFloat() { 1466 continue // 387 can't handle floats in registers between blocks 1467 } 1468 m := s.compatRegs(v.Type) &^ s.used 1469 if m&^desired.avoid != 0 { 1470 m &^= desired.avoid 1471 } 1472 if m != 0 { 1473 s.allocValToReg(v, m, false, b.Pos) 1474 } 1475 } 1476 } 1477 badloop: 1478 ; 1479 1480 // Save end-of-block register state. 1481 // First count how many, this cuts allocations in half. 1482 k := 0 1483 for r := register(0); r < s.numRegs; r++ { 1484 v := s.regs[r].v 1485 if v == nil { 1486 continue 1487 } 1488 k++ 1489 } 1490 regList := make([]endReg, 0, k) 1491 for r := register(0); r < s.numRegs; r++ { 1492 v := s.regs[r].v 1493 if v == nil { 1494 continue 1495 } 1496 regList = append(regList, endReg{r, v, s.regs[r].c}) 1497 } 1498 s.endRegs[b.ID] = regList 1499 1500 if checkEnabled { 1501 regValLiveSet.clear() 1502 for _, x := range s.live[b.ID] { 1503 regValLiveSet.add(x.ID) 1504 } 1505 for r := register(0); r < s.numRegs; r++ { 1506 v := s.regs[r].v 1507 if v == nil { 1508 continue 1509 } 1510 if !regValLiveSet.contains(v.ID) { 1511 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) 1512 } 1513 } 1514 } 1515 1516 // If a value is live at the end of the block and 1517 // isn't in a register, generate a use for the spill location. 1518 // We need to remember this information so that 1519 // the liveness analysis in stackalloc is correct. 1520 for _, e := range s.live[b.ID] { 1521 vi := &s.values[e.ID] 1522 if vi.regs != 0 { 1523 // in a register, we'll use that source for the merge. 1524 continue 1525 } 1526 if vi.rematerializeable { 1527 // we'll rematerialize during the merge. 1528 continue 1529 } 1530 //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) 1531 spill := s.makeSpill(s.orig[e.ID], b) 1532 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) 1533 } 1534 1535 // Clear any final uses. 1536 // All that is left should be the pseudo-uses added for values which 1537 // are live at the end of b. 1538 for _, e := range s.live[b.ID] { 1539 u := s.values[e.ID].uses 1540 if u == nil { 1541 f.Fatalf("live at end, no uses v%d", e.ID) 1542 } 1543 if u.next != nil { 1544 f.Fatalf("live at end, too many uses v%d", e.ID) 1545 } 1546 s.values[e.ID].uses = nil 1547 u.next = s.freeUseRecords 1548 s.freeUseRecords = u 1549 } 1550 } 1551 1552 // Decide where the spills we generated will go. 1553 s.placeSpills() 1554 1555 // Anything that didn't get a register gets a stack location here. 1556 // (StoreReg, stack-based phis, inputs, ...) 1557 stacklive := stackalloc(s.f, s.spillLive) 1558 1559 // Fix up all merge edges. 1560 s.shuffle(stacklive) 1561 1562 // Erase any copies we never used. 1563 // Also, an unused copy might be the only use of another copy, 1564 // so continue erasing until we reach a fixed point. 1565 for { 1566 progress := false 1567 for c, used := range s.copies { 1568 if !used && c.Uses == 0 { 1569 if s.f.pass.debug > regDebug { 1570 fmt.Printf("delete copied value %s\n", c.LongString()) 1571 } 1572 c.RemoveArg(0) 1573 f.freeValue(c) 1574 delete(s.copies, c) 1575 progress = true 1576 } 1577 } 1578 if !progress { 1579 break 1580 } 1581 } 1582 1583 for _, b := range f.Blocks { 1584 i := 0 1585 for _, v := range b.Values { 1586 if v.Op == OpInvalid { 1587 continue 1588 } 1589 b.Values[i] = v 1590 i++ 1591 } 1592 b.Values = b.Values[:i] 1593 } 1594 } 1595 1596 func (s *regAllocState) placeSpills() { 1597 f := s.f 1598 1599 // Precompute some useful info. 1600 phiRegs := make([]regMask, f.NumBlocks()) 1601 for _, b := range f.Blocks { 1602 var m regMask 1603 for _, v := range b.Values { 1604 if v.Op == OpRegKill { 1605 continue 1606 } 1607 if v.Op != OpPhi { 1608 break 1609 } 1610 if r, ok := f.getHome(v.ID).(*Register); ok { 1611 m |= regMask(1) << uint(r.num) 1612 } 1613 } 1614 phiRegs[b.ID] = m 1615 } 1616 1617 // Start maps block IDs to the list of spills 1618 // that go at the start of the block (but after any phis). 1619 start := map[ID][]*Value{} 1620 // After maps value IDs to the list of spills 1621 // that go immediately after that value ID. 1622 after := map[ID][]*Value{} 1623 1624 for i := range s.values { 1625 vi := s.values[i] 1626 spill := vi.spill 1627 if spill == nil { 1628 continue 1629 } 1630 if spill.Block != nil { 1631 // Some spills are already fully set up, 1632 // like OpArgs and stack-based phis. 1633 continue 1634 } 1635 v := s.orig[i] 1636 1637 // Walk down the dominator tree looking for a good place to 1638 // put the spill of v. At the start "best" is the best place 1639 // we have found so far. 1640 // TODO: find a way to make this O(1) without arbitrary cutoffs. 1641 best := v.Block 1642 bestArg := v 1643 var bestDepth int16 1644 if l := s.loopnest.b2l[best.ID]; l != nil { 1645 bestDepth = l.depth 1646 } 1647 b := best 1648 const maxSpillSearch = 100 1649 for i := 0; i < maxSpillSearch; i++ { 1650 // Find the child of b in the dominator tree which 1651 // dominates all restores. 1652 p := b 1653 b = nil 1654 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { 1655 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { 1656 // c also dominates all restores. Walk down into c. 1657 b = c 1658 break 1659 } 1660 } 1661 if b == nil { 1662 // Ran out of blocks which dominate all restores. 1663 break 1664 } 1665 1666 var depth int16 1667 if l := s.loopnest.b2l[b.ID]; l != nil { 1668 depth = l.depth 1669 } 1670 if depth > bestDepth { 1671 // Don't push the spill into a deeper loop. 1672 continue 1673 } 1674 1675 // If v is in a register at the start of b, we can 1676 // place the spill here (after the phis). 1677 if len(b.Preds) == 1 { 1678 for _, e := range s.endRegs[b.Preds[0].b.ID] { 1679 if e.v == v { 1680 // Found a better spot for the spill. 1681 best = b 1682 bestArg = e.c 1683 bestDepth = depth 1684 break 1685 } 1686 } 1687 } else { 1688 for _, e := range s.startRegs[b.ID] { 1689 if e.v == v { 1690 // Found a better spot for the spill. 1691 best = b 1692 bestArg = e.c 1693 bestDepth = depth 1694 break 1695 } 1696 } 1697 } 1698 } 1699 1700 // Put the spill in the best block we found. 1701 spill.Block = best 1702 spill.AddArg(bestArg) 1703 if best == v.Block && v.Op != OpPhi { 1704 // Place immediately after v. 1705 after[v.ID] = append(after[v.ID], spill) 1706 } else { 1707 // Place at the start of best block. 1708 start[best.ID] = append(start[best.ID], spill) 1709 } 1710 } 1711 1712 // Insert spill instructions into the block schedules. 1713 var oldSched []*Value 1714 for _, b := range f.Blocks { 1715 nphi := 0 1716 for _, v := range b.Values { 1717 if v.Op != OpRegKill && v.Op != OpPhi { 1718 break 1719 } 1720 nphi++ 1721 } 1722 oldSched = append(oldSched[:0], b.Values[nphi:]...) 1723 b.Values = b.Values[:nphi] 1724 for _, v := range start[b.ID] { 1725 b.Values = append(b.Values, v) 1726 } 1727 for _, v := range oldSched { 1728 b.Values = append(b.Values, v) 1729 for _, w := range after[v.ID] { 1730 b.Values = append(b.Values, w) 1731 } 1732 } 1733 } 1734 } 1735 1736 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). 1737 func (s *regAllocState) shuffle(stacklive [][]ID) { 1738 var e edgeState 1739 e.s = s 1740 e.cache = map[ID][]*Value{} 1741 e.contents = map[Location]contentRecord{} 1742 if s.f.pass.debug > regDebug { 1743 fmt.Printf("shuffle %s\n", s.f.Name) 1744 fmt.Println(s.f.String()) 1745 } 1746 1747 for _, b := range s.f.Blocks { 1748 if len(b.Preds) <= 1 { 1749 continue 1750 } 1751 e.b = b 1752 for i, edge := range b.Preds { 1753 p := edge.b 1754 e.p = p 1755 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) 1756 e.process() 1757 } 1758 } 1759 } 1760 1761 type edgeState struct { 1762 s *regAllocState 1763 p, b *Block // edge goes from p->b. 1764 1765 // for each pre-regalloc value, a list of equivalent cached values 1766 cache map[ID][]*Value 1767 cachedVals []ID // (superset of) keys of the above map, for deterministic iteration 1768 1769 // map from location to the value it contains 1770 contents map[Location]contentRecord 1771 1772 // desired destination locations 1773 destinations []dstRecord 1774 extra []dstRecord 1775 1776 usedRegs regMask // registers currently holding something 1777 uniqueRegs regMask // registers holding the only copy of a value 1778 finalRegs regMask // registers holding final target 1779 } 1780 1781 type contentRecord struct { 1782 vid ID // pre-regalloc value 1783 c *Value // cached value 1784 final bool // this is a satisfied destination 1785 pos src.XPos // source position of use of the value 1786 } 1787 1788 type dstRecord struct { 1789 loc Location // register or stack slot 1790 vid ID // pre-regalloc value it should contain 1791 splice **Value // place to store reference to the generating instruction 1792 pos src.XPos // source position of use of this location 1793 } 1794 1795 // setup initializes the edge state for shuffling. 1796 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { 1797 if e.s.f.pass.debug > regDebug { 1798 fmt.Printf("edge %s->%s\n", e.p, e.b) 1799 } 1800 1801 // Clear state. 1802 for _, vid := range e.cachedVals { 1803 delete(e.cache, vid) 1804 } 1805 e.cachedVals = e.cachedVals[:0] 1806 for k := range e.contents { 1807 delete(e.contents, k) 1808 } 1809 e.usedRegs = 0 1810 e.uniqueRegs = 0 1811 e.finalRegs = 0 1812 1813 // Live registers can be sources. 1814 for _, x := range srcReg { 1815 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source 1816 } 1817 // So can all of the spill locations. 1818 for _, spillID := range stacklive { 1819 v := e.s.orig[spillID] 1820 spill := e.s.values[v.ID].spill 1821 if !e.s.sdom.isAncestorEq(spill.Block, e.p) { 1822 // Spills were placed that only dominate the uses found 1823 // during the first regalloc pass. The edge fixup code 1824 // can't use a spill location if the spill doesn't dominate 1825 // the edge. 1826 // We are guaranteed that if the spill doesn't dominate this edge, 1827 // then the value is available in a register (because we called 1828 // makeSpill for every value not in a register at the start 1829 // of an edge). 1830 continue 1831 } 1832 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source 1833 } 1834 1835 // Figure out all the destinations we need. 1836 dsts := e.destinations[:0] 1837 for _, x := range dstReg { 1838 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) 1839 } 1840 // Phis need their args to end up in a specific location. 1841 for _, v := range e.b.Values { 1842 if v.Op == OpRegKill { 1843 continue 1844 } 1845 if v.Op != OpPhi { 1846 break 1847 } 1848 loc := e.s.f.getHome(v.ID) 1849 if loc == nil { 1850 continue 1851 } 1852 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos}) 1853 } 1854 e.destinations = dsts 1855 1856 if e.s.f.pass.debug > regDebug { 1857 for _, vid := range e.cachedVals { 1858 a := e.cache[vid] 1859 for _, c := range a { 1860 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c) 1861 } 1862 } 1863 for _, d := range e.destinations { 1864 fmt.Printf("dst %s: v%d\n", d.loc, d.vid) 1865 } 1866 } 1867 } 1868 1869 // process generates code to move all the values to the right destination locations. 1870 func (e *edgeState) process() { 1871 dsts := e.destinations 1872 1873 // Process the destinations until they are all satisfied. 1874 for len(dsts) > 0 { 1875 i := 0 1876 for _, d := range dsts { 1877 if !e.processDest(d.loc, d.vid, d.splice, d.pos) { 1878 // Failed - save for next iteration. 1879 dsts[i] = d 1880 i++ 1881 } 1882 } 1883 if i < len(dsts) { 1884 // Made some progress. Go around again. 1885 dsts = dsts[:i] 1886 1887 // Append any extras destinations we generated. 1888 dsts = append(dsts, e.extra...) 1889 e.extra = e.extra[:0] 1890 continue 1891 } 1892 1893 // We made no progress. That means that any 1894 // remaining unsatisfied moves are in simple cycles. 1895 // For example, A -> B -> C -> D -> A. 1896 // A ----> B 1897 // ^ | 1898 // | | 1899 // | v 1900 // D <---- C 1901 1902 // To break the cycle, we pick an unused register, say R, 1903 // and put a copy of B there. 1904 // A ----> B 1905 // ^ | 1906 // | | 1907 // | v 1908 // D <---- C <---- R=copyofB 1909 // When we resume the outer loop, the A->B move can now proceed, 1910 // and eventually the whole cycle completes. 1911 1912 // Copy any cycle location to a temp register. This duplicates 1913 // one of the cycle entries, allowing the just duplicated value 1914 // to be overwritten and the cycle to proceed. 1915 d := dsts[0] 1916 loc := d.loc 1917 vid := e.contents[loc].vid 1918 c := e.contents[loc].c 1919 r := e.findRegFor(c.Type) 1920 if e.s.f.pass.debug > regDebug { 1921 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c) 1922 } 1923 e.erase(r) 1924 if _, isReg := loc.(*Register); isReg { 1925 c = e.p.NewValue1(d.pos, OpCopy, c.Type, c) 1926 } else { 1927 c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c) 1928 } 1929 e.set(r, vid, c, false, d.pos) 1930 } 1931 } 1932 1933 // processDest generates code to put value vid into location loc. Returns true 1934 // if progress was made. 1935 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool { 1936 occupant := e.contents[loc] 1937 if occupant.vid == vid { 1938 // Value is already in the correct place. 1939 e.contents[loc] = contentRecord{vid, occupant.c, true, pos} 1940 if splice != nil { 1941 (*splice).Uses-- 1942 *splice = occupant.c 1943 occupant.c.Uses++ 1944 } 1945 // Note: if splice==nil then c will appear dead. This is 1946 // non-SSA formed code, so be careful after this pass not to run 1947 // deadcode elimination. 1948 if _, ok := e.s.copies[occupant.c]; ok { 1949 // The copy at occupant.c was used to avoid spill. 1950 e.s.copies[occupant.c] = true 1951 } 1952 return true 1953 } 1954 1955 // Check if we're allowed to clobber the destination location. 1956 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { 1957 // We can't overwrite the last copy 1958 // of a value that needs to survive. 1959 return false 1960 } 1961 1962 // Copy from a source of v, register preferred. 1963 v := e.s.orig[vid] 1964 var c *Value 1965 var src Location 1966 if e.s.f.pass.debug > regDebug { 1967 fmt.Printf("moving v%d to %s\n", vid, loc) 1968 fmt.Printf("sources of v%d:", vid) 1969 } 1970 for _, w := range e.cache[vid] { 1971 h := e.s.f.getHome(w.ID) 1972 if e.s.f.pass.debug > regDebug { 1973 fmt.Printf(" %s:%s", h, w) 1974 } 1975 _, isreg := h.(*Register) 1976 if src == nil || isreg { 1977 c = w 1978 src = h 1979 } 1980 } 1981 if e.s.f.pass.debug > regDebug { 1982 if src != nil { 1983 fmt.Printf(" [use %s]\n", src) 1984 } else { 1985 fmt.Printf(" [no source]\n") 1986 } 1987 } 1988 _, dstReg := loc.(*Register) 1989 1990 // Pre-clobber destination. This avoids the 1991 // following situation: 1992 // - v is currently held in R0 and stacktmp0. 1993 // - We want to copy stacktmp1 to stacktmp0. 1994 // - We choose R0 as the temporary register. 1995 // During the copy, both R0 and stacktmp0 are 1996 // clobbered, losing both copies of v. Oops! 1997 // Erasing the destination early means R0 will not 1998 // be chosen as the temp register, as it will then 1999 // be the last copy of v. 2000 e.erase(loc) 2001 var x *Value 2002 if c == nil { 2003 if !e.s.values[vid].rematerializeable { 2004 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) 2005 } 2006 if dstReg { 2007 x = v.copyIntoNoXPos(e.p) 2008 } else { 2009 // Rematerialize into stack slot. Need a free 2010 // register to accomplish this. 2011 r := e.findRegFor(v.Type) 2012 e.erase(r) 2013 x = v.copyIntoNoXPos(e.p) 2014 e.set(r, vid, x, false, pos) 2015 // Make sure we spill with the size of the slot, not the 2016 // size of x (which might be wider due to our dropping 2017 // of narrowing conversions). 2018 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x) 2019 } 2020 } else { 2021 // Emit move from src to dst. 2022 _, srcReg := src.(*Register) 2023 if srcReg { 2024 if dstReg { 2025 x = e.p.NewValue1(pos, OpCopy, c.Type, c) 2026 } else { 2027 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c) 2028 } 2029 } else { 2030 if dstReg { 2031 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2032 } else { 2033 // mem->mem. Use temp register. 2034 r := e.findRegFor(c.Type) 2035 e.erase(r) 2036 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) 2037 e.set(r, vid, t, false, pos) 2038 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) 2039 } 2040 } 2041 } 2042 e.set(loc, vid, x, true, pos) 2043 if splice != nil { 2044 (*splice).Uses-- 2045 *splice = x 2046 x.Uses++ 2047 } 2048 return true 2049 } 2050 2051 // set changes the contents of location loc to hold the given value and its cached representative. 2052 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { 2053 e.s.f.setHome(c, loc) 2054 e.contents[loc] = contentRecord{vid, c, final, pos} 2055 a := e.cache[vid] 2056 if len(a) == 0 { 2057 e.cachedVals = append(e.cachedVals, vid) 2058 } 2059 a = append(a, c) 2060 e.cache[vid] = a 2061 if r, ok := loc.(*Register); ok { 2062 e.usedRegs |= regMask(1) << uint(r.num) 2063 if final { 2064 e.finalRegs |= regMask(1) << uint(r.num) 2065 } 2066 if len(a) == 1 { 2067 e.uniqueRegs |= regMask(1) << uint(r.num) 2068 } 2069 if len(a) == 2 { 2070 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2071 e.uniqueRegs &^= regMask(1) << uint(t.num) 2072 } 2073 } 2074 } 2075 if e.s.f.pass.debug > regDebug { 2076 fmt.Printf("%s\n", c.LongString()) 2077 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c) 2078 } 2079 } 2080 2081 // erase removes any user of loc. 2082 func (e *edgeState) erase(loc Location) { 2083 cr := e.contents[loc] 2084 if cr.c == nil { 2085 return 2086 } 2087 vid := cr.vid 2088 2089 if cr.final { 2090 // Add a destination to move this value back into place. 2091 // Make sure it gets added to the tail of the destination queue 2092 // so we make progress on other moves first. 2093 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos}) 2094 } 2095 2096 // Remove c from the list of cached values. 2097 a := e.cache[vid] 2098 for i, c := range a { 2099 if e.s.f.getHome(c.ID) == loc { 2100 if e.s.f.pass.debug > regDebug { 2101 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c) 2102 } 2103 a[i], a = a[len(a)-1], a[:len(a)-1] 2104 if e.s.f.Config.ctxt.Flag_locationlists { 2105 if _, isReg := loc.(*Register); isReg && int(c.ID) < len(e.s.valueNames) && len(e.s.valueNames[c.ID]) != 0 { 2106 kill := e.p.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid) 2107 e.s.f.setHome(kill, loc) 2108 for _, name := range e.s.valueNames[c.ID] { 2109 e.s.f.NamedValues[name] = append(e.s.f.NamedValues[name], kill) 2110 } 2111 } 2112 } 2113 2114 break 2115 } 2116 } 2117 e.cache[vid] = a 2118 2119 // Update register masks. 2120 if r, ok := loc.(*Register); ok { 2121 e.usedRegs &^= regMask(1) << uint(r.num) 2122 if cr.final { 2123 e.finalRegs &^= regMask(1) << uint(r.num) 2124 } 2125 } 2126 if len(a) == 1 { 2127 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { 2128 e.uniqueRegs |= regMask(1) << uint(r.num) 2129 } 2130 } 2131 } 2132 2133 // findRegFor finds a register we can use to make a temp copy of type typ. 2134 func (e *edgeState) findRegFor(typ *types.Type) Location { 2135 // Which registers are possibilities. 2136 var m regMask 2137 types := &e.s.f.Config.Types 2138 if typ.IsFloat() { 2139 m = e.s.compatRegs(types.Float64) 2140 } else { 2141 m = e.s.compatRegs(types.Int64) 2142 } 2143 2144 // Pick a register. In priority order: 2145 // 1) an unused register 2146 // 2) a non-unique register not holding a final value 2147 // 3) a non-unique register 2148 // 4) TODO: a register holding a rematerializeable value 2149 x := m &^ e.usedRegs 2150 if x != 0 { 2151 return &e.s.registers[pickReg(x)] 2152 } 2153 x = m &^ e.uniqueRegs &^ e.finalRegs 2154 if x != 0 { 2155 return &e.s.registers[pickReg(x)] 2156 } 2157 x = m &^ e.uniqueRegs 2158 if x != 0 { 2159 return &e.s.registers[pickReg(x)] 2160 } 2161 2162 // No register is available. 2163 // Pick a register to spill. 2164 for _, vid := range e.cachedVals { 2165 a := e.cache[vid] 2166 for _, c := range a { 2167 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 { 2168 if !c.rematerializeable() { 2169 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c) 2170 // Allocate a temp location to spill a register to. 2171 // The type of the slot is immaterial - it will not be live across 2172 // any safepoint. Just use a type big enough to hold any register. 2173 t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64} 2174 // TODO: reuse these slots. They'll need to be erased first. 2175 e.set(t, vid, x, false, c.Pos) 2176 if e.s.f.pass.debug > regDebug { 2177 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString()) 2178 } 2179 } 2180 // r will now be overwritten by the caller. At some point 2181 // later, the newly saved value will be moved back to its 2182 // final destination in processDest. 2183 return r 2184 } 2185 } 2186 } 2187 2188 fmt.Printf("m:%d unique:%d final:%d\n", m, e.uniqueRegs, e.finalRegs) 2189 for _, vid := range e.cachedVals { 2190 a := e.cache[vid] 2191 for _, c := range a { 2192 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID)) 2193 } 2194 } 2195 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) 2196 return nil 2197 } 2198 2199 // rematerializeable reports whether the register allocator should recompute 2200 // a value instead of spilling/restoring it. 2201 func (v *Value) rematerializeable() bool { 2202 if !opcodeTable[v.Op].rematerializeable { 2203 return false 2204 } 2205 for _, a := range v.Args { 2206 // SP and SB (generated by OpSP and OpSB) are always available. 2207 if a.Op != OpSP && a.Op != OpSB { 2208 return false 2209 } 2210 } 2211 return true 2212 } 2213 2214 type liveInfo struct { 2215 ID ID // ID of value 2216 dist int32 // # of instructions before next use 2217 pos src.XPos // source position of next use 2218 } 2219 2220 // dblock contains information about desired & avoid registers at the end of a block. 2221 type dblock struct { 2222 prefers []desiredStateEntry 2223 avoid regMask 2224 } 2225 2226 // computeLive computes a map from block ID to a list of value IDs live at the end 2227 // of that block. Together with the value ID is a count of how many instructions 2228 // to the next use of that value. The resulting map is stored in s.live. 2229 // computeLive also computes the desired register information at the end of each block. 2230 // This desired register information is stored in s.desired. 2231 // TODO: this could be quadratic if lots of variables are live across lots of 2232 // basic blocks. Figure out a way to make this function (or, more precisely, the user 2233 // of this function) require only linear size & time. 2234 func (s *regAllocState) computeLive() { 2235 f := s.f 2236 s.live = make([][]liveInfo, f.NumBlocks()) 2237 s.desired = make([]desiredState, f.NumBlocks()) 2238 var phis []*Value 2239 2240 live := newSparseMap(f.NumValues()) 2241 t := newSparseMap(f.NumValues()) 2242 2243 // Keep track of which value we want in each register. 2244 var desired desiredState 2245 2246 // Instead of iterating over f.Blocks, iterate over their postordering. 2247 // Liveness information flows backward, so starting at the end 2248 // increases the probability that we will stabilize quickly. 2249 // TODO: Do a better job yet. Here's one possibility: 2250 // Calculate the dominator tree and locate all strongly connected components. 2251 // If a value is live in one block of an SCC, it is live in all. 2252 // Walk the dominator tree from end to beginning, just once, treating SCC 2253 // components as single blocks, duplicated calculated liveness information 2254 // out to all of them. 2255 po := f.postorder() 2256 s.loopnest = f.loopnest() 2257 s.loopnest.calculateDepths() 2258 for { 2259 changed := false 2260 2261 for _, b := range po { 2262 // Start with known live values at the end of the block. 2263 // Add len(b.Values) to adjust from end-of-block distance 2264 // to beginning-of-block distance. 2265 live.clear() 2266 for _, e := range s.live[b.ID] { 2267 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos) 2268 } 2269 2270 // Mark control value as live 2271 if b.Control != nil && s.values[b.Control.ID].needReg { 2272 live.set(b.Control.ID, int32(len(b.Values)), b.Pos) 2273 } 2274 2275 // Propagate backwards to the start of the block 2276 // Assumes Values have been scheduled. 2277 phis = phis[:0] 2278 for i := len(b.Values) - 1; i >= 0; i-- { 2279 v := b.Values[i] 2280 live.remove(v.ID) 2281 if v.Op == OpPhi { 2282 // save phi ops for later 2283 phis = append(phis, v) 2284 continue 2285 } 2286 if opcodeTable[v.Op].call { 2287 c := live.contents() 2288 for i := range c { 2289 c[i].val += unlikelyDistance 2290 } 2291 } 2292 for _, a := range v.Args { 2293 if s.values[a.ID].needReg { 2294 live.set(a.ID, int32(i), v.Pos) 2295 } 2296 } 2297 } 2298 // Propagate desired registers backwards. 2299 desired.copy(&s.desired[b.ID]) 2300 for i := len(b.Values) - 1; i >= 0; i-- { 2301 v := b.Values[i] 2302 prefs := desired.remove(v.ID) 2303 if v.Op == OpPhi { 2304 // TODO: if v is a phi, save desired register for phi inputs. 2305 // For now, we just drop it and don't propagate 2306 // desired registers back though phi nodes. 2307 continue 2308 } 2309 // Cancel desired registers if they get clobbered. 2310 desired.clobber(opcodeTable[v.Op].reg.clobbers) 2311 // Update desired registers if there are any fixed register inputs. 2312 for _, j := range opcodeTable[v.Op].reg.inputs { 2313 if countRegs(j.regs) != 1 { 2314 continue 2315 } 2316 desired.clobber(j.regs) 2317 desired.add(v.Args[j.idx].ID, pickReg(j.regs)) 2318 } 2319 // Set desired register of input 0 if this is a 2-operand instruction. 2320 if opcodeTable[v.Op].resultInArg0 { 2321 if opcodeTable[v.Op].commutative { 2322 desired.addList(v.Args[1].ID, prefs) 2323 } 2324 desired.addList(v.Args[0].ID, prefs) 2325 } 2326 } 2327 2328 // For each predecessor of b, expand its list of live-at-end values. 2329 // invariant: live contains the values live at the start of b (excluding phi inputs) 2330 for i, e := range b.Preds { 2331 p := e.b 2332 // Compute additional distance for the edge. 2333 // Note: delta must be at least 1 to distinguish the control 2334 // value use from the first user in a successor block. 2335 delta := int32(normalDistance) 2336 if len(p.Succs) == 2 { 2337 if p.Succs[0].b == b && p.Likely == BranchLikely || 2338 p.Succs[1].b == b && p.Likely == BranchUnlikely { 2339 delta = likelyDistance 2340 } 2341 if p.Succs[0].b == b && p.Likely == BranchUnlikely || 2342 p.Succs[1].b == b && p.Likely == BranchLikely { 2343 delta = unlikelyDistance 2344 } 2345 } 2346 2347 // Update any desired registers at the end of p. 2348 s.desired[p.ID].merge(&desired) 2349 2350 // Start t off with the previously known live values at the end of p. 2351 t.clear() 2352 for _, e := range s.live[p.ID] { 2353 t.set(e.ID, e.dist, e.pos) 2354 } 2355 update := false 2356 2357 // Add new live values from scanning this block. 2358 for _, e := range live.contents() { 2359 d := e.val + delta 2360 if !t.contains(e.key) || d < t.get(e.key) { 2361 update = true 2362 t.set(e.key, d, e.aux) 2363 } 2364 } 2365 // Also add the correct arg from the saved phi values. 2366 // All phis are at distance delta (we consider them 2367 // simultaneously happening at the start of the block). 2368 for _, v := range phis { 2369 id := v.Args[i].ID 2370 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { 2371 update = true 2372 t.set(id, delta, v.Pos) 2373 } 2374 } 2375 2376 if !update { 2377 continue 2378 } 2379 // The live set has changed, update it. 2380 l := s.live[p.ID][:0] 2381 if cap(l) < t.size() { 2382 l = make([]liveInfo, 0, t.size()) 2383 } 2384 for _, e := range t.contents() { 2385 l = append(l, liveInfo{e.key, e.val, e.aux}) 2386 } 2387 s.live[p.ID] = l 2388 changed = true 2389 } 2390 } 2391 2392 if !changed { 2393 break 2394 } 2395 } 2396 if f.pass.debug > regDebug { 2397 fmt.Println("live values at end of each block") 2398 for _, b := range f.Blocks { 2399 fmt.Printf(" %s:", b) 2400 for _, x := range s.live[b.ID] { 2401 fmt.Printf(" v%d", x.ID) 2402 for _, e := range s.desired[b.ID].entries { 2403 if e.ID != x.ID { 2404 continue 2405 } 2406 fmt.Printf("[") 2407 first := true 2408 for _, r := range e.regs { 2409 if r == noRegister { 2410 continue 2411 } 2412 if !first { 2413 fmt.Printf(",") 2414 } 2415 fmt.Print(&s.registers[r]) 2416 first = false 2417 } 2418 fmt.Printf("]") 2419 } 2420 } 2421 fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid)) 2422 fmt.Println() 2423 } 2424 } 2425 } 2426 2427 // A desiredState represents desired register assignments. 2428 type desiredState struct { 2429 // Desired assignments will be small, so we just use a list 2430 // of valueID+registers entries. 2431 entries []desiredStateEntry 2432 // Registers that other values want to be in. This value will 2433 // contain at least the union of the regs fields of entries, but 2434 // may contain additional entries for values that were once in 2435 // this data structure but are no longer. 2436 avoid regMask 2437 } 2438 type desiredStateEntry struct { 2439 // (pre-regalloc) value 2440 ID ID 2441 // Registers it would like to be in, in priority order. 2442 // Unused slots are filled with noRegister. 2443 regs [4]register 2444 } 2445 2446 func (d *desiredState) clear() { 2447 d.entries = d.entries[:0] 2448 d.avoid = 0 2449 } 2450 2451 // get returns a list of desired registers for value vid. 2452 func (d *desiredState) get(vid ID) [4]register { 2453 for _, e := range d.entries { 2454 if e.ID == vid { 2455 return e.regs 2456 } 2457 } 2458 return [4]register{noRegister, noRegister, noRegister, noRegister} 2459 } 2460 2461 // add records that we'd like value vid to be in register r. 2462 func (d *desiredState) add(vid ID, r register) { 2463 d.avoid |= regMask(1) << r 2464 for i := range d.entries { 2465 e := &d.entries[i] 2466 if e.ID != vid { 2467 continue 2468 } 2469 if e.regs[0] == r { 2470 // Already known and highest priority 2471 return 2472 } 2473 for j := 1; j < len(e.regs); j++ { 2474 if e.regs[j] == r { 2475 // Move from lower priority to top priority 2476 copy(e.regs[1:], e.regs[:j]) 2477 e.regs[0] = r 2478 return 2479 } 2480 } 2481 copy(e.regs[1:], e.regs[:]) 2482 e.regs[0] = r 2483 return 2484 } 2485 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) 2486 } 2487 2488 func (d *desiredState) addList(vid ID, regs [4]register) { 2489 // regs is in priority order, so iterate in reverse order. 2490 for i := len(regs) - 1; i >= 0; i-- { 2491 r := regs[i] 2492 if r != noRegister { 2493 d.add(vid, r) 2494 } 2495 } 2496 } 2497 2498 // clobber erases any desired registers in the set m. 2499 func (d *desiredState) clobber(m regMask) { 2500 for i := 0; i < len(d.entries); { 2501 e := &d.entries[i] 2502 j := 0 2503 for _, r := range e.regs { 2504 if r != noRegister && m>>r&1 == 0 { 2505 e.regs[j] = r 2506 j++ 2507 } 2508 } 2509 if j == 0 { 2510 // No more desired registers for this value. 2511 d.entries[i] = d.entries[len(d.entries)-1] 2512 d.entries = d.entries[:len(d.entries)-1] 2513 continue 2514 } 2515 for ; j < len(e.regs); j++ { 2516 e.regs[j] = noRegister 2517 } 2518 i++ 2519 } 2520 d.avoid &^= m 2521 } 2522 2523 // copy copies a desired state from another desiredState x. 2524 func (d *desiredState) copy(x *desiredState) { 2525 d.entries = append(d.entries[:0], x.entries...) 2526 d.avoid = x.avoid 2527 } 2528 2529 // remove removes the desired registers for vid and returns them. 2530 func (d *desiredState) remove(vid ID) [4]register { 2531 for i := range d.entries { 2532 if d.entries[i].ID == vid { 2533 regs := d.entries[i].regs 2534 d.entries[i] = d.entries[len(d.entries)-1] 2535 d.entries = d.entries[:len(d.entries)-1] 2536 return regs 2537 } 2538 } 2539 return [4]register{noRegister, noRegister, noRegister, noRegister} 2540 } 2541 2542 // merge merges another desired state x into d. 2543 func (d *desiredState) merge(x *desiredState) { 2544 d.avoid |= x.avoid 2545 // There should only be a few desired registers, so 2546 // linear insert is ok. 2547 for _, e := range x.entries { 2548 d.addList(e.ID, e.regs) 2549 } 2550 } 2551 2552 func min32(x, y int32) int32 { 2553 if x < y { 2554 return x 2555 } 2556 return y 2557 } 2558 func max32(x, y int32) int32 { 2559 if x > y { 2560 return x 2561 } 2562 return y 2563 }