github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/gc/reg.go (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package gc 32 33 import ( 34 "bytes" 35 "cmd/internal/obj" 36 "fmt" 37 "sort" 38 "strings" 39 ) 40 41 // A Var represents a single variable that may be stored in a register. 42 // That variable may itself correspond to a hardware register, 43 // to represent the use of registers in the unoptimized instruction stream. 44 type Var struct { 45 offset int64 46 node *Node 47 nextinnode *Var 48 width int 49 id int // index in vars 50 name int8 51 etype EType 52 addr int8 53 } 54 55 // Bits represents a set of Vars, stored as a bit set of var numbers 56 // (the index in vars, or equivalently v.id). 57 type Bits struct { 58 b [BITS]uint64 59 } 60 61 const ( 62 BITS = 3 63 NVAR = BITS * 64 64 ) 65 66 var ( 67 vars [NVAR]Var // variables under consideration 68 nvar int // number of vars 69 70 regbits uint64 // bits for hardware registers 71 72 zbits Bits // zero 73 externs Bits // global variables 74 params Bits // function parameters and results 75 ivar Bits // function parameters (inputs) 76 ovar Bits // function results (outputs) 77 consts Bits // constant values 78 addrs Bits // variables with address taken 79 ) 80 81 // A Reg is a wrapper around a single Prog (one instruction) that holds 82 // register optimization information while the optimizer runs. 83 // r->prog is the instruction. 84 type Reg struct { 85 set Bits // regopt variables written by this instruction. 86 use1 Bits // regopt variables read by prog->from. 87 use2 Bits // regopt variables read by prog->to. 88 89 // refahead/refbehind are the regopt variables whose current 90 // value may be used in the following/preceding instructions 91 // up to a CALL (or the value is clobbered). 92 refbehind Bits 93 refahead Bits 94 95 // calahead/calbehind are similar, but for variables in 96 // instructions that are reachable after hitting at least one 97 // CALL. 98 calbehind Bits 99 calahead Bits 100 101 regdiff Bits 102 act Bits 103 regu uint64 // register used bitmap 104 } 105 106 // A Rgn represents a single regopt variable over a region of code 107 // where a register could potentially be dedicated to that variable. 108 // The code encompassed by a Rgn is defined by the flow graph, 109 // starting at enter, flood-filling forward while varno is refahead 110 // and backward while varno is refbehind, and following branches. 111 // A single variable may be represented by multiple disjoint Rgns and 112 // each Rgn may choose a different register for that variable. 113 // Registers are allocated to regions greedily in order of descending 114 // cost. 115 type Rgn struct { 116 enter *Flow 117 cost int16 118 varno int16 119 regno int16 120 } 121 122 // The Plan 9 C compilers used a limit of 600 regions, 123 // but the yacc-generated parser in y.go has 3100 regions. 124 // We set MaxRgn large enough to handle that. 125 // There's not a huge cost to having too many regions: 126 // the main processing traces the live area for each variable, 127 // which is limited by the number of variables times the area, 128 // not the raw region count. If there are many regions, they 129 // are almost certainly small and easy to trace. 130 // The only operation that scales with region count is the 131 // sorting by cost, which uses sort.Sort and is therefore 132 // guaranteed n log n. 133 const MaxRgn = 6000 134 135 var ( 136 region []Rgn 137 nregion int 138 ) 139 140 type rcmp []Rgn 141 142 func (x rcmp) Len() int { 143 return len(x) 144 } 145 146 func (x rcmp) Swap(i, j int) { 147 x[i], x[j] = x[j], x[i] 148 } 149 150 func (x rcmp) Less(i, j int) bool { 151 p1 := &x[i] 152 p2 := &x[j] 153 if p1.cost != p2.cost { 154 return int(p2.cost)-int(p1.cost) < 0 155 } 156 if p1.varno != p2.varno { 157 return int(p2.varno)-int(p1.varno) < 0 158 } 159 if p1.enter != p2.enter { 160 return int(p2.enter.Id-p1.enter.Id) < 0 161 } 162 return false 163 } 164 165 func setaddrs(bit Bits) { 166 var i int 167 var n int 168 var v *Var 169 var node *Node 170 171 for bany(&bit) { 172 // convert each bit to a variable 173 i = bnum(&bit) 174 175 node = vars[i].node 176 n = int(vars[i].name) 177 biclr(&bit, uint(i)) 178 179 // disable all pieces of that variable 180 for i = 0; i < nvar; i++ { 181 v = &vars[i] 182 if v.node == node && int(v.name) == n { 183 v.addr = 2 184 } 185 } 186 } 187 } 188 189 var regnodes [64]*Node 190 191 func walkvardef(n *Node, f *Flow, active int) { 192 var f1 *Flow 193 var bn int 194 var v *Var 195 196 for f1 = f; f1 != nil; f1 = f1.S1 { 197 if f1.Active == int32(active) { 198 break 199 } 200 f1.Active = int32(active) 201 if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n { 202 break 203 } 204 for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode { 205 bn = v.id 206 biset(&(f1.Data.(*Reg)).act, uint(bn)) 207 } 208 209 if f1.Prog.As == obj.ACALL { 210 break 211 } 212 } 213 214 for f2 := f; f2 != f1; f2 = f2.S1 { 215 if f2.S2 != nil { 216 walkvardef(n, f2.S2, active) 217 } 218 } 219 } 220 221 // add mov b,rn 222 // just after r 223 func addmove(r *Flow, bn int, rn int, f int) { 224 p1 := Ctxt.NewProg() 225 Clearp(p1) 226 p1.Pc = 9999 227 228 p := r.Prog 229 p1.Link = p.Link 230 p.Link = p1 231 p1.Lineno = p.Lineno 232 233 v := &vars[bn] 234 235 a := &p1.To 236 a.Offset = v.offset 237 a.Etype = uint8(v.etype) 238 a.Type = obj.TYPE_MEM 239 a.Name = v.name 240 a.Node = v.node 241 a.Sym = Linksym(v.node.Sym) 242 243 /* NOTE(rsc): 9g did 244 if(a->etype == TARRAY) 245 a->type = TYPE_ADDR; 246 else if(a->sym == nil) 247 a->type = TYPE_CONST; 248 */ 249 p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)])) 250 251 // TODO(rsc): Remove special case here. 252 if (Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL { 253 p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8])) 254 } 255 p1.From.Type = obj.TYPE_REG 256 p1.From.Reg = int16(rn) 257 p1.From.Name = obj.NAME_NONE 258 if f == 0 { 259 p1.From = *a 260 *a = obj.Addr{} 261 a.Type = obj.TYPE_REG 262 a.Reg = int16(rn) 263 } 264 265 if Debug['R'] != 0 && Debug['v'] != 0 { 266 fmt.Printf("%v ===add=== %v\n", p, p1) 267 } 268 Ostats.Nspill++ 269 } 270 271 func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool { 272 t1 := o1 + int64(w1) 273 t2 := o2 + int64(w2) 274 275 if t1 <= o2 || t2 <= o1 { 276 return false 277 } 278 279 return true 280 } 281 282 func mkvar(f *Flow, a *obj.Addr) Bits { 283 // mark registers used 284 if a.Type == obj.TYPE_NONE { 285 return zbits 286 } 287 288 r := f.Data.(*Reg) 289 r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB 290 291 var n int 292 switch a.Type { 293 default: 294 regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB 295 if regu == 0 { 296 return zbits 297 } 298 bit := zbits 299 bit.b[0] = regu 300 return bit 301 302 // TODO(rsc): Remove special case here. 303 case obj.TYPE_ADDR: 304 var bit Bits 305 if Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' { 306 goto memcase 307 } 308 a.Type = obj.TYPE_MEM 309 bit = mkvar(f, a) 310 setaddrs(bit) 311 a.Type = obj.TYPE_ADDR 312 Ostats.Naddr++ 313 return zbits 314 315 memcase: 316 fallthrough 317 318 case obj.TYPE_MEM: 319 if r != nil { 320 r.use1.b[0] |= Thearch.RtoB(int(a.Reg)) 321 } 322 323 /* NOTE: 5g did 324 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 325 r->set.b[0] |= RtoB(a->reg); 326 */ 327 switch a.Name { 328 default: 329 // Note: This case handles NAME_EXTERN and NAME_STATIC. 330 // We treat these as requiring eager writes to memory, due to 331 // the possibility of a fault handler looking at them, so there is 332 // not much point in registerizing the loads. 333 // If we later choose the set of candidate variables from a 334 // larger list, these cases could be deprioritized instead of 335 // removed entirely. 336 return zbits 337 338 case obj.NAME_PARAM, 339 obj.NAME_AUTO: 340 n = int(a.Name) 341 } 342 } 343 344 node, _ := a.Node.(*Node) 345 if node == nil || node.Op != ONAME || node.Orig == nil { 346 return zbits 347 } 348 node = node.Orig 349 if node.Orig != node { 350 Fatalf("%v: bad node", Ctxt.Dconv(a)) 351 } 352 if node.Sym == nil || node.Sym.Name[0] == '.' { 353 return zbits 354 } 355 et := EType(a.Etype) 356 o := a.Offset 357 w := a.Width 358 if w < 0 { 359 Fatalf("bad width %d for %v", w, Ctxt.Dconv(a)) 360 } 361 362 flag := 0 363 var v *Var 364 for i := 0; i < nvar; i++ { 365 v = &vars[i] 366 if v.node == node && int(v.name) == n { 367 if v.offset == o { 368 if v.etype == et { 369 if int64(v.width) == w { 370 // TODO(rsc): Remove special case for arm here. 371 if flag == 0 || Thearch.Thechar != '5' { 372 return blsh(uint(i)) 373 } 374 } 375 } 376 } 377 378 // if they overlap, disable both 379 if overlap_reg(v.offset, v.width, o, int(w)) { 380 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 381 v.addr = 1 382 383 flag = 1 384 } 385 } 386 } 387 388 switch et { 389 case 0, TFUNC: 390 return zbits 391 } 392 393 if nvar >= NVAR { 394 if Debug['w'] > 1 && node != nil { 395 Fatalf("variable not optimized: %v", Nconv(node, obj.FmtSharp)) 396 } 397 if Debug['v'] > 0 { 398 Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp)) 399 } 400 401 // If we're not tracking a word in a variable, mark the rest as 402 // having its address taken, so that we keep the whole thing 403 // live at all calls. otherwise we might optimize away part of 404 // a variable but not all of it. 405 var v *Var 406 for i := 0; i < nvar; i++ { 407 v = &vars[i] 408 if v.node == node { 409 v.addr = 1 410 } 411 } 412 413 return zbits 414 } 415 416 i := nvar 417 nvar++ 418 v = &vars[i] 419 v.id = i 420 v.offset = o 421 v.name = int8(n) 422 v.etype = et 423 v.width = int(w) 424 v.addr = int8(flag) // funny punning 425 v.node = node 426 427 // node->opt is the head of a linked list 428 // of Vars within the given Node, so that 429 // we can start at a Var and find all the other 430 // Vars in the same Go variable. 431 v.nextinnode, _ = node.Opt().(*Var) 432 433 node.SetOpt(v) 434 435 bit := blsh(uint(i)) 436 if n == obj.NAME_EXTERN || n == obj.NAME_STATIC { 437 for z := 0; z < BITS; z++ { 438 externs.b[z] |= bit.b[z] 439 } 440 } 441 if n == obj.NAME_PARAM { 442 for z := 0; z < BITS; z++ { 443 params.b[z] |= bit.b[z] 444 } 445 } 446 447 if node.Class == PPARAM { 448 for z := 0; z < BITS; z++ { 449 ivar.b[z] |= bit.b[z] 450 } 451 } 452 if node.Class == PPARAMOUT { 453 for z := 0; z < BITS; z++ { 454 ovar.b[z] |= bit.b[z] 455 } 456 } 457 458 // Treat values with their address taken as live at calls, 459 // because the garbage collector's liveness analysis in plive.go does. 460 // These must be consistent or else we will elide stores and the garbage 461 // collector will see uninitialized data. 462 // The typical case where our own analysis is out of sync is when the 463 // node appears to have its address taken but that code doesn't actually 464 // get generated and therefore doesn't show up as an address being 465 // taken when we analyze the instruction stream. 466 // One instance of this case is when a closure uses the same name as 467 // an outer variable for one of its own variables declared with :=. 468 // The parser flags the outer variable as possibly shared, and therefore 469 // sets addrtaken, even though it ends up not being actually shared. 470 // If we were better about _ elision, _ = &x would suffice too. 471 // The broader := in a closure problem is mentioned in a comment in 472 // closure.go:/^typecheckclosure and dcl.go:/^oldname. 473 if node.Addrtaken { 474 v.addr = 1 475 } 476 477 // Disable registerization for globals, because: 478 // (1) we might panic at any time and we want the recovery code 479 // to see the latest values (issue 1304). 480 // (2) we don't know what pointers might point at them and we want 481 // loads via those pointers to see updated values and vice versa (issue 7995). 482 // 483 // Disable registerization for results if using defer, because the deferred func 484 // might recover and return, causing the current values to be used. 485 if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) { 486 v.addr = 1 487 } 488 489 if Debug['R'] != 0 { 490 fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(et), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr) 491 } 492 Ostats.Nvar++ 493 494 return bit 495 } 496 497 var change int 498 499 func prop(f *Flow, ref Bits, cal Bits) { 500 var f1 *Flow 501 var r1 *Reg 502 var z int 503 var i int 504 var v *Var 505 var v1 *Var 506 507 for f1 = f; f1 != nil; f1 = f1.P1 { 508 r1 = f1.Data.(*Reg) 509 for z = 0; z < BITS; z++ { 510 ref.b[z] |= r1.refahead.b[z] 511 if ref.b[z] != r1.refahead.b[z] { 512 r1.refahead.b[z] = ref.b[z] 513 change = 1 514 } 515 516 cal.b[z] |= r1.calahead.b[z] 517 if cal.b[z] != r1.calahead.b[z] { 518 r1.calahead.b[z] = cal.b[z] 519 change = 1 520 } 521 } 522 523 switch f1.Prog.As { 524 case obj.ACALL: 525 if Noreturn(f1.Prog) { 526 break 527 } 528 529 // Mark all input variables (ivar) as used, because that's what the 530 // liveness bitmaps say. The liveness bitmaps say that so that a 531 // panic will not show stale values in the parameter dump. 532 // Mark variables with a recent VARDEF (r1->act) as used, 533 // so that the optimizer flushes initializations to memory, 534 // so that if a garbage collection happens during this CALL, 535 // the collector will see initialized memory. Again this is to 536 // match what the liveness bitmaps say. 537 for z = 0; z < BITS; z++ { 538 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z] 539 ref.b[z] = 0 540 } 541 542 // cal.b is the current approximation of what's live across the call. 543 // Every bit in cal.b is a single stack word. For each such word, 544 // find all the other tracked stack words in the same Go variable 545 // (struct/slice/string/interface) and mark them live too. 546 // This is necessary because the liveness analysis for the garbage 547 // collector works at variable granularity, not at word granularity. 548 // It is fundamental for slice/string/interface: the garbage collector 549 // needs the whole value, not just some of the words, in order to 550 // interpret the other bits correctly. Specifically, slice needs a consistent 551 // ptr and cap, string needs a consistent ptr and len, and interface 552 // needs a consistent type word and data word. 553 for z = 0; z < BITS; z++ { 554 if cal.b[z] == 0 { 555 continue 556 } 557 for i = 0; i < 64; i++ { 558 if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 { 559 continue 560 } 561 v = &vars[z*64+i] 562 if v.node.Opt() == nil { // v represents fixed register, not Go variable 563 continue 564 } 565 566 // v->node->opt is the head of a linked list of Vars 567 // corresponding to tracked words from the Go variable v->node. 568 // Walk the list and set all the bits. 569 // For a large struct this could end up being quadratic: 570 // after the first setting, the outer loop (for z, i) would see a 1 bit 571 // for all of the remaining words in the struct, and for each such 572 // word would go through and turn on all the bits again. 573 // To avoid the quadratic behavior, we only turn on the bits if 574 // v is the head of the list or if the head's bit is not yet turned on. 575 // This will set the bits at most twice, keeping the overall loop linear. 576 v1, _ = v.node.Opt().(*Var) 577 578 if v == v1 || !btest(&cal, uint(v1.id)) { 579 for ; v1 != nil; v1 = v1.nextinnode { 580 biset(&cal, uint(v1.id)) 581 } 582 } 583 } 584 } 585 586 case obj.ATEXT: 587 for z = 0; z < BITS; z++ { 588 cal.b[z] = 0 589 ref.b[z] = 0 590 } 591 592 case obj.ARET: 593 for z = 0; z < BITS; z++ { 594 cal.b[z] = externs.b[z] | ovar.b[z] 595 ref.b[z] = 0 596 } 597 } 598 599 for z = 0; z < BITS; z++ { 600 ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z] 601 cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]) 602 r1.refbehind.b[z] = ref.b[z] 603 r1.calbehind.b[z] = cal.b[z] 604 } 605 606 if f1.Active != 0 { 607 break 608 } 609 f1.Active = 1 610 } 611 612 var r *Reg 613 var f2 *Flow 614 for ; f != f1; f = f.P1 { 615 r = f.Data.(*Reg) 616 for f2 = f.P2; f2 != nil; f2 = f2.P2link { 617 prop(f2, r.refbehind, r.calbehind) 618 } 619 } 620 } 621 622 func synch(f *Flow, dif Bits) { 623 var r1 *Reg 624 var z int 625 626 for f1 := f; f1 != nil; f1 = f1.S1 { 627 r1 = f1.Data.(*Reg) 628 for z = 0; z < BITS; z++ { 629 dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z] 630 if dif.b[z] != r1.regdiff.b[z] { 631 r1.regdiff.b[z] = dif.b[z] 632 change = 1 633 } 634 } 635 636 if f1.Active != 0 { 637 break 638 } 639 f1.Active = 1 640 for z = 0; z < BITS; z++ { 641 dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z]) 642 } 643 if f1.S2 != nil { 644 synch(f1.S2, dif) 645 } 646 } 647 } 648 649 func allreg(b uint64, r *Rgn) uint64 { 650 v := &vars[r.varno] 651 r.regno = 0 652 switch v.etype { 653 default: 654 Fatalf("unknown etype %d/%v", Bitno(b), Econv(v.etype)) 655 656 case TINT8, 657 TUINT8, 658 TINT16, 659 TUINT16, 660 TINT32, 661 TUINT32, 662 TINT64, 663 TUINT64, 664 TINT, 665 TUINT, 666 TUINTPTR, 667 TBOOL, 668 TPTR32, 669 TPTR64: 670 i := Thearch.BtoR(^b) 671 if i != 0 && r.cost > 0 { 672 r.regno = int16(i) 673 return Thearch.RtoB(i) 674 } 675 676 case TFLOAT32, TFLOAT64: 677 i := Thearch.BtoF(^b) 678 if i != 0 && r.cost > 0 { 679 r.regno = int16(i) 680 return Thearch.FtoB(i) 681 } 682 } 683 684 return 0 685 } 686 687 func LOAD(r *Reg, z int) uint64 { 688 return ^r.refbehind.b[z] & r.refahead.b[z] 689 } 690 691 func STORE(r *Reg, z int) uint64 { 692 return ^r.calbehind.b[z] & r.calahead.b[z] 693 } 694 695 // Cost parameters 696 const ( 697 CLOAD = 5 // cost of load 698 CREF = 5 // cost of reference if not registerized 699 LOOP = 3 // loop execution count (applied in popt.go) 700 ) 701 702 func paint1(f *Flow, bn int) { 703 z := bn / 64 704 bb := uint64(1 << uint(bn%64)) 705 r := f.Data.(*Reg) 706 if r.act.b[z]&bb != 0 { 707 return 708 } 709 var f1 *Flow 710 var r1 *Reg 711 for { 712 if r.refbehind.b[z]&bb == 0 { 713 break 714 } 715 f1 = f.P1 716 if f1 == nil { 717 break 718 } 719 r1 = f1.Data.(*Reg) 720 if r1.refahead.b[z]&bb == 0 { 721 break 722 } 723 if r1.act.b[z]&bb != 0 { 724 break 725 } 726 f = f1 727 r = r1 728 } 729 730 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 { 731 change -= CLOAD * int(f.Loop) 732 } 733 734 for { 735 r.act.b[z] |= bb 736 737 if f.Prog.As != obj.ANOP { // don't give credit for NOPs 738 if r.use1.b[z]&bb != 0 { 739 change += CREF * int(f.Loop) 740 } 741 if (r.use2.b[z]|r.set.b[z])&bb != 0 { 742 change += CREF * int(f.Loop) 743 } 744 } 745 746 if STORE(r, z)&r.regdiff.b[z]&bb != 0 { 747 change -= CLOAD * int(f.Loop) 748 } 749 750 if r.refbehind.b[z]&bb != 0 { 751 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 752 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 753 paint1(f1, bn) 754 } 755 } 756 } 757 758 if r.refahead.b[z]&bb == 0 { 759 break 760 } 761 f1 = f.S2 762 if f1 != nil { 763 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 764 paint1(f1, bn) 765 } 766 } 767 f = f.S1 768 if f == nil { 769 break 770 } 771 r = f.Data.(*Reg) 772 if r.act.b[z]&bb != 0 { 773 break 774 } 775 if r.refbehind.b[z]&bb == 0 { 776 break 777 } 778 } 779 } 780 781 func paint2(f *Flow, bn int, depth int) uint64 { 782 z := bn / 64 783 bb := uint64(1 << uint(bn%64)) 784 vreg := regbits 785 r := f.Data.(*Reg) 786 if r.act.b[z]&bb == 0 { 787 return vreg 788 } 789 var r1 *Reg 790 var f1 *Flow 791 for { 792 if r.refbehind.b[z]&bb == 0 { 793 break 794 } 795 f1 = f.P1 796 if f1 == nil { 797 break 798 } 799 r1 = f1.Data.(*Reg) 800 if r1.refahead.b[z]&bb == 0 { 801 break 802 } 803 if r1.act.b[z]&bb == 0 { 804 break 805 } 806 f = f1 807 r = r1 808 } 809 810 for { 811 if Debug['R'] != 0 && Debug['v'] != 0 { 812 fmt.Printf(" paint2 %d %v\n", depth, f.Prog) 813 } 814 815 r.act.b[z] &^= bb 816 817 vreg |= r.regu 818 819 if r.refbehind.b[z]&bb != 0 { 820 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 821 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 822 vreg |= paint2(f1, bn, depth+1) 823 } 824 } 825 } 826 827 if r.refahead.b[z]&bb == 0 { 828 break 829 } 830 f1 = f.S2 831 if f1 != nil { 832 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 833 vreg |= paint2(f1, bn, depth+1) 834 } 835 } 836 f = f.S1 837 if f == nil { 838 break 839 } 840 r = f.Data.(*Reg) 841 if r.act.b[z]&bb == 0 { 842 break 843 } 844 if r.refbehind.b[z]&bb == 0 { 845 break 846 } 847 } 848 849 return vreg 850 } 851 852 func paint3(f *Flow, bn int, rb uint64, rn int) { 853 z := bn / 64 854 bb := uint64(1 << uint(bn%64)) 855 r := f.Data.(*Reg) 856 if r.act.b[z]&bb != 0 { 857 return 858 } 859 var r1 *Reg 860 var f1 *Flow 861 for { 862 if r.refbehind.b[z]&bb == 0 { 863 break 864 } 865 f1 = f.P1 866 if f1 == nil { 867 break 868 } 869 r1 = f1.Data.(*Reg) 870 if r1.refahead.b[z]&bb == 0 { 871 break 872 } 873 if r1.act.b[z]&bb != 0 { 874 break 875 } 876 f = f1 877 r = r1 878 } 879 880 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 { 881 addmove(f, bn, rn, 0) 882 } 883 var p *obj.Prog 884 for { 885 r.act.b[z] |= bb 886 p = f.Prog 887 888 if r.use1.b[z]&bb != 0 { 889 if Debug['R'] != 0 && Debug['v'] != 0 { 890 fmt.Printf("%v", p) 891 } 892 addreg(&p.From, rn) 893 if Debug['R'] != 0 && Debug['v'] != 0 { 894 fmt.Printf(" ===change== %v\n", p) 895 } 896 } 897 898 if (r.use2.b[z]|r.set.b[z])&bb != 0 { 899 if Debug['R'] != 0 && Debug['v'] != 0 { 900 fmt.Printf("%v", p) 901 } 902 addreg(&p.To, rn) 903 if Debug['R'] != 0 && Debug['v'] != 0 { 904 fmt.Printf(" ===change== %v\n", p) 905 } 906 } 907 908 if STORE(r, z)&r.regdiff.b[z]&bb != 0 { 909 addmove(f, bn, rn, 1) 910 } 911 r.regu |= rb 912 913 if r.refbehind.b[z]&bb != 0 { 914 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 915 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 916 paint3(f1, bn, rb, rn) 917 } 918 } 919 } 920 921 if r.refahead.b[z]&bb == 0 { 922 break 923 } 924 f1 = f.S2 925 if f1 != nil { 926 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 927 paint3(f1, bn, rb, rn) 928 } 929 } 930 f = f.S1 931 if f == nil { 932 break 933 } 934 r = f.Data.(*Reg) 935 if r.act.b[z]&bb != 0 { 936 break 937 } 938 if r.refbehind.b[z]&bb == 0 { 939 break 940 } 941 } 942 } 943 944 func addreg(a *obj.Addr, rn int) { 945 a.Sym = nil 946 a.Node = nil 947 a.Offset = 0 948 a.Type = obj.TYPE_REG 949 a.Reg = int16(rn) 950 a.Name = 0 951 952 Ostats.Ncvtreg++ 953 } 954 955 func dumpone(f *Flow, isreg int) { 956 fmt.Printf("%d:%v", f.Loop, f.Prog) 957 if isreg != 0 { 958 r := f.Data.(*Reg) 959 var bit Bits 960 for z := 0; z < BITS; z++ { 961 bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0 962 } 963 if bany(&bit) { 964 fmt.Printf("\t") 965 if bany(&r.set) { 966 fmt.Printf(" s:%v", &r.set) 967 } 968 if bany(&r.use1) { 969 fmt.Printf(" u1:%v", &r.use1) 970 } 971 if bany(&r.use2) { 972 fmt.Printf(" u2:%v", &r.use2) 973 } 974 if bany(&r.refbehind) { 975 fmt.Printf(" rb:%v ", &r.refbehind) 976 } 977 if bany(&r.refahead) { 978 fmt.Printf(" ra:%v ", &r.refahead) 979 } 980 if bany(&r.calbehind) { 981 fmt.Printf(" cb:%v ", &r.calbehind) 982 } 983 if bany(&r.calahead) { 984 fmt.Printf(" ca:%v ", &r.calahead) 985 } 986 if bany(&r.regdiff) { 987 fmt.Printf(" d:%v ", &r.regdiff) 988 } 989 if bany(&r.act) { 990 fmt.Printf(" a:%v ", &r.act) 991 } 992 } 993 } 994 995 fmt.Printf("\n") 996 } 997 998 func Dumpit(str string, r0 *Flow, isreg int) { 999 var r1 *Flow 1000 1001 fmt.Printf("\n%s\n", str) 1002 for r := r0; r != nil; r = r.Link { 1003 dumpone(r, isreg) 1004 r1 = r.P2 1005 if r1 != nil { 1006 fmt.Printf("\tpred:") 1007 for ; r1 != nil; r1 = r1.P2link { 1008 fmt.Printf(" %.4d", uint(int(r1.Prog.Pc))) 1009 } 1010 if r.P1 != nil { 1011 fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc))) 1012 } else { 1013 fmt.Printf(" (only)") 1014 } 1015 fmt.Printf("\n") 1016 } 1017 1018 // Print successors if it's not just the next one 1019 if r.S1 != r.Link || r.S2 != nil { 1020 fmt.Printf("\tsucc:") 1021 if r.S1 != nil { 1022 fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc))) 1023 } 1024 if r.S2 != nil { 1025 fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc))) 1026 } 1027 fmt.Printf("\n") 1028 } 1029 } 1030 } 1031 1032 func regopt(firstp *obj.Prog) { 1033 mergetemp(firstp) 1034 1035 // control flow is more complicated in generated go code 1036 // than in generated c code. define pseudo-variables for 1037 // registers, so we have complete register usage information. 1038 var nreg int 1039 regnames := Thearch.Regnames(&nreg) 1040 1041 nvar = nreg 1042 for i := 0; i < nreg; i++ { 1043 vars[i] = Var{} 1044 } 1045 for i := 0; i < nreg; i++ { 1046 if regnodes[i] == nil { 1047 regnodes[i] = newname(Lookup(regnames[i])) 1048 } 1049 vars[i].node = regnodes[i] 1050 } 1051 1052 regbits = Thearch.Excludedregs() 1053 externs = zbits 1054 params = zbits 1055 consts = zbits 1056 addrs = zbits 1057 ivar = zbits 1058 ovar = zbits 1059 1060 // pass 1 1061 // build aux data structure 1062 // allocate pcs 1063 // find use and set of variables 1064 g := Flowstart(firstp, func() interface{} { return new(Reg) }) 1065 if g == nil { 1066 for i := 0; i < nvar; i++ { 1067 vars[i].node.SetOpt(nil) 1068 } 1069 return 1070 } 1071 1072 firstf := g.Start 1073 1074 for f := firstf; f != nil; f = f.Link { 1075 p := f.Prog 1076 if p.As == obj.AVARDEF || p.As == obj.AVARKILL { 1077 continue 1078 } 1079 1080 // Avoid making variables for direct-called functions. 1081 if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN { 1082 continue 1083 } 1084 1085 // from vs to doesn't matter for registers. 1086 r := f.Data.(*Reg) 1087 r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex 1088 r.set.b[0] |= p.Info.Regset 1089 1090 bit := mkvar(f, &p.From) 1091 if bany(&bit) { 1092 if p.Info.Flags&LeftAddr != 0 { 1093 setaddrs(bit) 1094 } 1095 if p.Info.Flags&LeftRead != 0 { 1096 for z := 0; z < BITS; z++ { 1097 r.use1.b[z] |= bit.b[z] 1098 } 1099 } 1100 if p.Info.Flags&LeftWrite != 0 { 1101 for z := 0; z < BITS; z++ { 1102 r.set.b[z] |= bit.b[z] 1103 } 1104 } 1105 } 1106 1107 // Compute used register for reg 1108 if p.Info.Flags&RegRead != 0 { 1109 r.use1.b[0] |= Thearch.RtoB(int(p.Reg)) 1110 } 1111 1112 // Currently we never generate three register forms. 1113 // If we do, this will need to change. 1114 if p.From3Type() != obj.TYPE_NONE { 1115 Fatalf("regopt not implemented for from3") 1116 } 1117 1118 bit = mkvar(f, &p.To) 1119 if bany(&bit) { 1120 if p.Info.Flags&RightAddr != 0 { 1121 setaddrs(bit) 1122 } 1123 if p.Info.Flags&RightRead != 0 { 1124 for z := 0; z < BITS; z++ { 1125 r.use2.b[z] |= bit.b[z] 1126 } 1127 } 1128 if p.Info.Flags&RightWrite != 0 { 1129 for z := 0; z < BITS; z++ { 1130 r.set.b[z] |= bit.b[z] 1131 } 1132 } 1133 } 1134 } 1135 1136 for i := 0; i < nvar; i++ { 1137 v := &vars[i] 1138 if v.addr != 0 { 1139 bit := blsh(uint(i)) 1140 for z := 0; z < BITS; z++ { 1141 addrs.b[z] |= bit.b[z] 1142 } 1143 } 1144 1145 if Debug['R'] != 0 && Debug['v'] != 0 { 1146 fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(v.etype), v.width, v.node, v.offset) 1147 } 1148 } 1149 1150 if Debug['R'] != 0 && Debug['v'] != 0 { 1151 Dumpit("pass1", firstf, 1) 1152 } 1153 1154 // pass 2 1155 // find looping structure 1156 flowrpo(g) 1157 1158 if Debug['R'] != 0 && Debug['v'] != 0 { 1159 Dumpit("pass2", firstf, 1) 1160 } 1161 1162 // pass 2.5 1163 // iterate propagating fat vardef covering forward 1164 // r->act records vars with a VARDEF since the last CALL. 1165 // (r->act will be reused in pass 5 for something else, 1166 // but we'll be done with it by then.) 1167 active := 0 1168 1169 for f := firstf; f != nil; f = f.Link { 1170 f.Active = 0 1171 r := f.Data.(*Reg) 1172 r.act = zbits 1173 } 1174 1175 for f := firstf; f != nil; f = f.Link { 1176 p := f.Prog 1177 if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil { 1178 active++ 1179 walkvardef(p.To.Node.(*Node), f, active) 1180 } 1181 } 1182 1183 // pass 3 1184 // iterate propagating usage 1185 // back until flow graph is complete 1186 var f1 *Flow 1187 var i int 1188 var f *Flow 1189 loop1: 1190 change = 0 1191 1192 for f = firstf; f != nil; f = f.Link { 1193 f.Active = 0 1194 } 1195 for f = firstf; f != nil; f = f.Link { 1196 if f.Prog.As == obj.ARET { 1197 prop(f, zbits, zbits) 1198 } 1199 } 1200 1201 // pick up unreachable code 1202 loop11: 1203 i = 0 1204 1205 for f = firstf; f != nil; f = f1 { 1206 f1 = f.Link 1207 if f1 != nil && f1.Active != 0 && f.Active == 0 { 1208 prop(f, zbits, zbits) 1209 i = 1 1210 } 1211 } 1212 1213 if i != 0 { 1214 goto loop11 1215 } 1216 if change != 0 { 1217 goto loop1 1218 } 1219 1220 if Debug['R'] != 0 && Debug['v'] != 0 { 1221 Dumpit("pass3", firstf, 1) 1222 } 1223 1224 // pass 4 1225 // iterate propagating register/variable synchrony 1226 // forward until graph is complete 1227 loop2: 1228 change = 0 1229 1230 for f = firstf; f != nil; f = f.Link { 1231 f.Active = 0 1232 } 1233 synch(firstf, zbits) 1234 if change != 0 { 1235 goto loop2 1236 } 1237 1238 if Debug['R'] != 0 && Debug['v'] != 0 { 1239 Dumpit("pass4", firstf, 1) 1240 } 1241 1242 // pass 4.5 1243 // move register pseudo-variables into regu. 1244 mask := uint64((1 << uint(nreg)) - 1) 1245 for f := firstf; f != nil; f = f.Link { 1246 r := f.Data.(*Reg) 1247 r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask 1248 r.set.b[0] &^= mask 1249 r.use1.b[0] &^= mask 1250 r.use2.b[0] &^= mask 1251 r.refbehind.b[0] &^= mask 1252 r.refahead.b[0] &^= mask 1253 r.calbehind.b[0] &^= mask 1254 r.calahead.b[0] &^= mask 1255 r.regdiff.b[0] &^= mask 1256 r.act.b[0] &^= mask 1257 } 1258 1259 if Debug['R'] != 0 && Debug['v'] != 0 { 1260 Dumpit("pass4.5", firstf, 1) 1261 } 1262 1263 // pass 5 1264 // isolate regions 1265 // calculate costs (paint1) 1266 var bit Bits 1267 if f := firstf; f != nil { 1268 r := f.Data.(*Reg) 1269 for z := 0; z < BITS; z++ { 1270 bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]) 1271 } 1272 if bany(&bit) && !f.Refset { 1273 // should never happen - all variables are preset 1274 if Debug['w'] != 0 { 1275 fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit) 1276 } 1277 f.Refset = true 1278 } 1279 } 1280 1281 for f := firstf; f != nil; f = f.Link { 1282 (f.Data.(*Reg)).act = zbits 1283 } 1284 nregion = 0 1285 region = region[:0] 1286 var rgp *Rgn 1287 for f := firstf; f != nil; f = f.Link { 1288 r := f.Data.(*Reg) 1289 for z := 0; z < BITS; z++ { 1290 bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z]) 1291 } 1292 if bany(&bit) && !f.Refset { 1293 if Debug['w'] != 0 { 1294 fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit) 1295 } 1296 f.Refset = true 1297 Thearch.Excise(f) 1298 } 1299 1300 for z := 0; z < BITS; z++ { 1301 bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z]) 1302 } 1303 for bany(&bit) { 1304 i = bnum(&bit) 1305 change = 0 1306 paint1(f, i) 1307 biclr(&bit, uint(i)) 1308 if change <= 0 { 1309 continue 1310 } 1311 if nregion >= MaxRgn { 1312 nregion++ 1313 continue 1314 } 1315 1316 region = append(region, Rgn{ 1317 enter: f, 1318 cost: int16(change), 1319 varno: int16(i), 1320 }) 1321 nregion++ 1322 } 1323 } 1324 1325 if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") { 1326 Warn("regions: %d\n", nregion) 1327 } 1328 if nregion >= MaxRgn { 1329 if Debug['v'] != 0 { 1330 Warn("too many regions: %d\n", nregion) 1331 } 1332 nregion = MaxRgn 1333 } 1334 1335 sort.Sort(rcmp(region[:nregion])) 1336 1337 if Debug['R'] != 0 && Debug['v'] != 0 { 1338 Dumpit("pass5", firstf, 1) 1339 } 1340 1341 // pass 6 1342 // determine used registers (paint2) 1343 // replace code (paint3) 1344 if Debug['R'] != 0 && Debug['v'] != 0 { 1345 fmt.Printf("\nregisterizing\n") 1346 } 1347 var usedreg uint64 1348 var vreg uint64 1349 for i := 0; i < nregion; i++ { 1350 rgp = ®ion[i] 1351 if Debug['R'] != 0 && Debug['v'] != 0 { 1352 fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc) 1353 } 1354 bit = blsh(uint(rgp.varno)) 1355 usedreg = paint2(rgp.enter, int(rgp.varno), 0) 1356 vreg = allreg(usedreg, rgp) 1357 if rgp.regno != 0 { 1358 if Debug['R'] != 0 && Debug['v'] != 0 { 1359 v := &vars[rgp.varno] 1360 fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(v.etype), obj.Rconv(int(rgp.regno)), usedreg, vreg) 1361 } 1362 1363 paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno)) 1364 } 1365 } 1366 1367 // free aux structures. peep allocates new ones. 1368 for i := 0; i < nvar; i++ { 1369 vars[i].node.SetOpt(nil) 1370 } 1371 Flowend(g) 1372 firstf = nil 1373 1374 if Debug['R'] != 0 && Debug['v'] != 0 { 1375 // Rebuild flow graph, since we inserted instructions 1376 g := Flowstart(firstp, nil) 1377 firstf = g.Start 1378 Dumpit("pass6", firstf, 0) 1379 Flowend(g) 1380 firstf = nil 1381 } 1382 1383 // pass 7 1384 // peep-hole on basic block 1385 if Debug['R'] == 0 || Debug['P'] != 0 { 1386 Thearch.Peep(firstp) 1387 } 1388 1389 // eliminate nops 1390 for p := firstp; p != nil; p = p.Link { 1391 for p.Link != nil && p.Link.As == obj.ANOP { 1392 p.Link = p.Link.Link 1393 } 1394 if p.To.Type == obj.TYPE_BRANCH { 1395 for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP { 1396 p.To.Val = p.To.Val.(*obj.Prog).Link 1397 } 1398 } 1399 } 1400 1401 if Debug['R'] != 0 { 1402 if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false { 1403 fmt.Printf("\nstats\n") 1404 } 1405 1406 if Ostats.Ncvtreg != 0 { 1407 fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg) 1408 } 1409 if Ostats.Nspill != 0 { 1410 fmt.Printf("\t%4d spill\n", Ostats.Nspill) 1411 } 1412 if Ostats.Nreload != 0 { 1413 fmt.Printf("\t%4d reload\n", Ostats.Nreload) 1414 } 1415 if Ostats.Ndelmov != 0 { 1416 fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov) 1417 } 1418 if Ostats.Nvar != 0 { 1419 fmt.Printf("\t%4d var\n", Ostats.Nvar) 1420 } 1421 if Ostats.Naddr != 0 { 1422 fmt.Printf("\t%4d addr\n", Ostats.Naddr) 1423 } 1424 1425 Ostats = OptStats{} 1426 } 1427 } 1428 1429 // bany reports whether any bits in a are set. 1430 func bany(a *Bits) bool { 1431 for _, x := range &a.b { // & to avoid making a copy of a.b 1432 if x != 0 { 1433 return true 1434 } 1435 } 1436 return false 1437 } 1438 1439 // bnum reports the lowest index of a 1 bit in a. 1440 func bnum(a *Bits) int { 1441 for i, x := range &a.b { // & to avoid making a copy of a.b 1442 if x != 0 { 1443 return 64*i + Bitno(x) 1444 } 1445 } 1446 1447 Fatalf("bad in bnum") 1448 return 0 1449 } 1450 1451 // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n). 1452 func blsh(n uint) Bits { 1453 c := zbits 1454 c.b[n/64] = 1 << (n % 64) 1455 return c 1456 } 1457 1458 // btest reports whether bit n is 1. 1459 func btest(a *Bits, n uint) bool { 1460 return a.b[n/64]&(1<<(n%64)) != 0 1461 } 1462 1463 // biset sets bit n to 1. 1464 func biset(a *Bits, n uint) { 1465 a.b[n/64] |= 1 << (n % 64) 1466 } 1467 1468 // biclr sets bit n to 0. 1469 func biclr(a *Bits, n uint) { 1470 a.b[n/64] &^= (1 << (n % 64)) 1471 } 1472 1473 // Bitno reports the lowest index of a 1 bit in b. 1474 // It calls Fatalf if there is no 1 bit. 1475 func Bitno(b uint64) int { 1476 if b == 0 { 1477 Fatalf("bad in bitno") 1478 } 1479 n := 0 1480 if b&(1<<32-1) == 0 { 1481 n += 32 1482 b >>= 32 1483 } 1484 if b&(1<<16-1) == 0 { 1485 n += 16 1486 b >>= 16 1487 } 1488 if b&(1<<8-1) == 0 { 1489 n += 8 1490 b >>= 8 1491 } 1492 if b&(1<<4-1) == 0 { 1493 n += 4 1494 b >>= 4 1495 } 1496 if b&(1<<2-1) == 0 { 1497 n += 2 1498 b >>= 2 1499 } 1500 if b&1 == 0 { 1501 n++ 1502 } 1503 return n 1504 } 1505 1506 // String returns a space-separated list of the variables represented by bits. 1507 func (bits Bits) String() string { 1508 // Note: This method takes a value receiver, both for convenience 1509 // and to make it safe to modify the bits as we process them. 1510 // Even so, most prints above use &bits, because then the value 1511 // being stored in the interface{} is a pointer and does not require 1512 // an allocation and copy to create the interface{}. 1513 var buf bytes.Buffer 1514 sep := "" 1515 for bany(&bits) { 1516 i := bnum(&bits) 1517 buf.WriteString(sep) 1518 sep = " " 1519 v := &vars[i] 1520 if v.node == nil || v.node.Sym == nil { 1521 fmt.Fprintf(&buf, "$%d", i) 1522 } else { 1523 fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i) 1524 if v.offset != 0 { 1525 fmt.Fprintf(&buf, "%+d", int64(v.offset)) 1526 } 1527 } 1528 biclr(&bits, uint(i)) 1529 } 1530 return buf.String() 1531 }