github.com/aloncn/graphics-go@v0.0.1/src/cmd/compile/internal/gc/reg.go (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package gc 32 33 import ( 34 "bytes" 35 "cmd/internal/obj" 36 "fmt" 37 "sort" 38 "strings" 39 ) 40 41 // A Var represents a single variable that may be stored in a register. 42 // That variable may itself correspond to a hardware register, 43 // to represent the use of registers in the unoptimized instruction stream. 44 type Var struct { 45 offset int64 46 node *Node 47 nextinnode *Var 48 width int 49 id int // index in vars 50 name int8 51 etype EType 52 addr int8 53 } 54 55 // Bits represents a set of Vars, stored as a bit set of var numbers 56 // (the index in vars, or equivalently v.id). 57 type Bits struct { 58 b [BITS]uint64 59 } 60 61 const ( 62 BITS = 3 63 NVAR = BITS * 64 64 ) 65 66 var ( 67 vars [NVAR]Var // variables under consideration 68 nvar int // number of vars 69 70 regbits uint64 // bits for hardware registers 71 72 zbits Bits // zero 73 externs Bits // global variables 74 params Bits // function parameters and results 75 ivar Bits // function parameters (inputs) 76 ovar Bits // function results (outputs) 77 consts Bits // constant values 78 addrs Bits // variables with address taken 79 ) 80 81 // A Reg is a wrapper around a single Prog (one instruction) that holds 82 // register optimization information while the optimizer runs. 83 // r->prog is the instruction. 84 type Reg struct { 85 set Bits // regopt variables written by this instruction. 86 use1 Bits // regopt variables read by prog->from. 87 use2 Bits // regopt variables read by prog->to. 88 89 // refahead/refbehind are the regopt variables whose current 90 // value may be used in the following/preceding instructions 91 // up to a CALL (or the value is clobbered). 92 refbehind Bits 93 refahead Bits 94 95 // calahead/calbehind are similar, but for variables in 96 // instructions that are reachable after hitting at least one 97 // CALL. 98 calbehind Bits 99 calahead Bits 100 101 regdiff Bits 102 act Bits 103 regu uint64 // register used bitmap 104 } 105 106 // A Rgn represents a single regopt variable over a region of code 107 // where a register could potentially be dedicated to that variable. 108 // The code encompassed by a Rgn is defined by the flow graph, 109 // starting at enter, flood-filling forward while varno is refahead 110 // and backward while varno is refbehind, and following branches. 111 // A single variable may be represented by multiple disjoint Rgns and 112 // each Rgn may choose a different register for that variable. 113 // Registers are allocated to regions greedily in order of descending 114 // cost. 115 type Rgn struct { 116 enter *Flow 117 cost int16 118 varno int16 119 regno int16 120 } 121 122 // The Plan 9 C compilers used a limit of 600 regions, 123 // but the yacc-generated parser in y.go has 3100 regions. 124 // We set MaxRgn large enough to handle that. 125 // There's not a huge cost to having too many regions: 126 // the main processing traces the live area for each variable, 127 // which is limited by the number of variables times the area, 128 // not the raw region count. If there are many regions, they 129 // are almost certainly small and easy to trace. 130 // The only operation that scales with region count is the 131 // sorting by cost, which uses sort.Sort and is therefore 132 // guaranteed n log n. 133 const MaxRgn = 6000 134 135 var ( 136 region []Rgn 137 nregion int 138 ) 139 140 type rcmp []Rgn 141 142 func (x rcmp) Len() int { 143 return len(x) 144 } 145 146 func (x rcmp) Swap(i, j int) { 147 x[i], x[j] = x[j], x[i] 148 } 149 150 func (x rcmp) Less(i, j int) bool { 151 p1 := &x[i] 152 p2 := &x[j] 153 if p1.cost != p2.cost { 154 return int(p2.cost)-int(p1.cost) < 0 155 } 156 if p1.varno != p2.varno { 157 return int(p2.varno)-int(p1.varno) < 0 158 } 159 if p1.enter != p2.enter { 160 return int(p2.enter.Id-p1.enter.Id) < 0 161 } 162 return false 163 } 164 165 func setaddrs(bit Bits) { 166 var i int 167 var n int 168 var v *Var 169 var node *Node 170 171 for bany(&bit) { 172 // convert each bit to a variable 173 i = bnum(&bit) 174 175 node = vars[i].node 176 n = int(vars[i].name) 177 biclr(&bit, uint(i)) 178 179 // disable all pieces of that variable 180 for i = 0; i < nvar; i++ { 181 v = &vars[i] 182 if v.node == node && int(v.name) == n { 183 v.addr = 2 184 } 185 } 186 } 187 } 188 189 var regnodes [64]*Node 190 191 func walkvardef(n *Node, f *Flow, active int) { 192 var f1 *Flow 193 var bn int 194 var v *Var 195 196 for f1 = f; f1 != nil; f1 = f1.S1 { 197 if f1.Active == int32(active) { 198 break 199 } 200 f1.Active = int32(active) 201 if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n { 202 break 203 } 204 for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode { 205 bn = v.id 206 biset(&(f1.Data.(*Reg)).act, uint(bn)) 207 } 208 209 if f1.Prog.As == obj.ACALL { 210 break 211 } 212 } 213 214 for f2 := f; f2 != f1; f2 = f2.S1 { 215 if f2.S2 != nil { 216 walkvardef(n, f2.S2, active) 217 } 218 } 219 } 220 221 // add mov b,rn 222 // just after r 223 func addmove(r *Flow, bn int, rn int, f int) { 224 p1 := Ctxt.NewProg() 225 Clearp(p1) 226 p1.Pc = 9999 227 228 p := r.Prog 229 p1.Link = p.Link 230 p.Link = p1 231 p1.Lineno = p.Lineno 232 233 v := &vars[bn] 234 235 a := &p1.To 236 a.Offset = v.offset 237 a.Etype = uint8(v.etype) 238 a.Type = obj.TYPE_MEM 239 a.Name = v.name 240 a.Node = v.node 241 a.Sym = Linksym(v.node.Sym) 242 243 /* NOTE(rsc): 9g did 244 if(a->etype == TARRAY) 245 a->type = TYPE_ADDR; 246 else if(a->sym == nil) 247 a->type = TYPE_CONST; 248 */ 249 p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)])) 250 251 // TODO(rsc): Remove special case here. 252 if (Thearch.Thechar == '0' || Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL { 253 p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8])) 254 } 255 p1.From.Type = obj.TYPE_REG 256 p1.From.Reg = int16(rn) 257 p1.From.Name = obj.NAME_NONE 258 if f == 0 { 259 p1.From = *a 260 *a = obj.Addr{} 261 a.Type = obj.TYPE_REG 262 a.Reg = int16(rn) 263 } 264 265 if Debug['R'] != 0 && Debug['v'] != 0 { 266 fmt.Printf("%v ===add=== %v\n", p, p1) 267 } 268 Ostats.Nspill++ 269 } 270 271 func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool { 272 t1 := o1 + int64(w1) 273 t2 := o2 + int64(w2) 274 275 if t1 <= o2 || t2 <= o1 { 276 return false 277 } 278 279 return true 280 } 281 282 func mkvar(f *Flow, a *obj.Addr) Bits { 283 // mark registers used 284 if a.Type == obj.TYPE_NONE { 285 return zbits 286 } 287 288 r := f.Data.(*Reg) 289 r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB 290 291 var n int 292 switch a.Type { 293 default: 294 regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB 295 if regu == 0 { 296 return zbits 297 } 298 bit := zbits 299 bit.b[0] = regu 300 return bit 301 302 // TODO(rsc): Remove special case here. 303 case obj.TYPE_ADDR: 304 var bit Bits 305 if Thearch.Thechar == '0' || Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' { 306 goto memcase 307 } 308 a.Type = obj.TYPE_MEM 309 bit = mkvar(f, a) 310 setaddrs(bit) 311 a.Type = obj.TYPE_ADDR 312 Ostats.Naddr++ 313 return zbits 314 315 memcase: 316 fallthrough 317 318 case obj.TYPE_MEM: 319 if r != nil { 320 r.use1.b[0] |= Thearch.RtoB(int(a.Reg)) 321 } 322 323 /* NOTE: 5g did 324 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 325 r->set.b[0] |= RtoB(a->reg); 326 */ 327 switch a.Name { 328 default: 329 // Note: This case handles NAME_EXTERN and NAME_STATIC. 330 // We treat these as requiring eager writes to memory, due to 331 // the possibility of a fault handler looking at them, so there is 332 // not much point in registerizing the loads. 333 // If we later choose the set of candidate variables from a 334 // larger list, these cases could be deprioritized instead of 335 // removed entirely. 336 return zbits 337 338 case obj.NAME_PARAM, 339 obj.NAME_AUTO: 340 n = int(a.Name) 341 } 342 } 343 344 node, _ := a.Node.(*Node) 345 if node == nil || node.Op != ONAME || node.Orig == nil { 346 return zbits 347 } 348 node = node.Orig 349 if node.Orig != node { 350 Fatalf("%v: bad node", Ctxt.Dconv(a)) 351 } 352 if node.Sym == nil || node.Sym.Name[0] == '.' { 353 return zbits 354 } 355 et := EType(a.Etype) 356 o := a.Offset 357 w := a.Width 358 if w < 0 { 359 Fatalf("bad width %d for %v", w, Ctxt.Dconv(a)) 360 } 361 362 flag := 0 363 var v *Var 364 for i := 0; i < nvar; i++ { 365 v = &vars[i] 366 if v.node == node && int(v.name) == n { 367 if v.offset == o { 368 if v.etype == et { 369 if int64(v.width) == w { 370 // TODO(rsc): Remove special case for arm here. 371 if flag == 0 || Thearch.Thechar != '5' { 372 return blsh(uint(i)) 373 } 374 } 375 } 376 } 377 378 // if they overlap, disable both 379 if overlap_reg(v.offset, v.width, o, int(w)) { 380 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 381 v.addr = 1 382 383 flag = 1 384 } 385 } 386 } 387 388 switch et { 389 case 0, TFUNC: 390 return zbits 391 } 392 393 if nvar >= NVAR { 394 if Debug['w'] > 1 && node != nil { 395 Fatalf("variable not optimized: %v", Nconv(node, obj.FmtSharp)) 396 } 397 if Debug['v'] > 0 { 398 Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp)) 399 } 400 401 // If we're not tracking a word in a variable, mark the rest as 402 // having its address taken, so that we keep the whole thing 403 // live at all calls. otherwise we might optimize away part of 404 // a variable but not all of it. 405 var v *Var 406 for i := 0; i < nvar; i++ { 407 v = &vars[i] 408 if v.node == node { 409 v.addr = 1 410 } 411 } 412 413 return zbits 414 } 415 416 i := nvar 417 nvar++ 418 v = &vars[i] 419 v.id = i 420 v.offset = o 421 v.name = int8(n) 422 v.etype = et 423 v.width = int(w) 424 v.addr = int8(flag) // funny punning 425 v.node = node 426 427 // node->opt is the head of a linked list 428 // of Vars within the given Node, so that 429 // we can start at a Var and find all the other 430 // Vars in the same Go variable. 431 v.nextinnode, _ = node.Opt().(*Var) 432 433 node.SetOpt(v) 434 435 bit := blsh(uint(i)) 436 if n == obj.NAME_EXTERN || n == obj.NAME_STATIC { 437 for z := 0; z < BITS; z++ { 438 externs.b[z] |= bit.b[z] 439 } 440 } 441 if n == obj.NAME_PARAM { 442 for z := 0; z < BITS; z++ { 443 params.b[z] |= bit.b[z] 444 } 445 } 446 447 if node.Class == PPARAM { 448 for z := 0; z < BITS; z++ { 449 ivar.b[z] |= bit.b[z] 450 } 451 } 452 if node.Class == PPARAMOUT { 453 for z := 0; z < BITS; z++ { 454 ovar.b[z] |= bit.b[z] 455 } 456 } 457 458 // Treat values with their address taken as live at calls, 459 // because the garbage collector's liveness analysis in plive.go does. 460 // These must be consistent or else we will elide stores and the garbage 461 // collector will see uninitialized data. 462 // The typical case where our own analysis is out of sync is when the 463 // node appears to have its address taken but that code doesn't actually 464 // get generated and therefore doesn't show up as an address being 465 // taken when we analyze the instruction stream. 466 // One instance of this case is when a closure uses the same name as 467 // an outer variable for one of its own variables declared with :=. 468 // The parser flags the outer variable as possibly shared, and therefore 469 // sets addrtaken, even though it ends up not being actually shared. 470 // If we were better about _ elision, _ = &x would suffice too. 471 // The broader := in a closure problem is mentioned in a comment in 472 // closure.go:/^typecheckclosure and dcl.go:/^oldname. 473 if node.Addrtaken { 474 v.addr = 1 475 } 476 477 // Disable registerization for globals, because: 478 // (1) we might panic at any time and we want the recovery code 479 // to see the latest values (issue 1304). 480 // (2) we don't know what pointers might point at them and we want 481 // loads via those pointers to see updated values and vice versa (issue 7995). 482 // 483 // Disable registerization for results if using defer, because the deferred func 484 // might recover and return, causing the current values to be used. 485 if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) { 486 v.addr = 1 487 } 488 489 if Debug['R'] != 0 { 490 fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(et), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr) 491 } 492 Ostats.Nvar++ 493 494 return bit 495 } 496 497 var change int 498 499 func prop(f *Flow, ref Bits, cal Bits) { 500 var f1 *Flow 501 var r1 *Reg 502 var z int 503 var i int 504 var v *Var 505 var v1 *Var 506 507 for f1 = f; f1 != nil; f1 = f1.P1 { 508 r1 = f1.Data.(*Reg) 509 for z = 0; z < BITS; z++ { 510 ref.b[z] |= r1.refahead.b[z] 511 if ref.b[z] != r1.refahead.b[z] { 512 r1.refahead.b[z] = ref.b[z] 513 change = 1 514 } 515 516 cal.b[z] |= r1.calahead.b[z] 517 if cal.b[z] != r1.calahead.b[z] { 518 r1.calahead.b[z] = cal.b[z] 519 change = 1 520 } 521 } 522 523 switch f1.Prog.As { 524 case obj.ACALL: 525 if Noreturn(f1.Prog) { 526 break 527 } 528 529 // Mark all input variables (ivar) as used, because that's what the 530 // liveness bitmaps say. The liveness bitmaps say that so that a 531 // panic will not show stale values in the parameter dump. 532 // Mark variables with a recent VARDEF (r1->act) as used, 533 // so that the optimizer flushes initializations to memory, 534 // so that if a garbage collection happens during this CALL, 535 // the collector will see initialized memory. Again this is to 536 // match what the liveness bitmaps say. 537 for z = 0; z < BITS; z++ { 538 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z] 539 ref.b[z] = 0 540 } 541 542 // cal.b is the current approximation of what's live across the call. 543 // Every bit in cal.b is a single stack word. For each such word, 544 // find all the other tracked stack words in the same Go variable 545 // (struct/slice/string/interface) and mark them live too. 546 // This is necessary because the liveness analysis for the garbage 547 // collector works at variable granularity, not at word granularity. 548 // It is fundamental for slice/string/interface: the garbage collector 549 // needs the whole value, not just some of the words, in order to 550 // interpret the other bits correctly. Specifically, slice needs a consistent 551 // ptr and cap, string needs a consistent ptr and len, and interface 552 // needs a consistent type word and data word. 553 for z = 0; z < BITS; z++ { 554 if cal.b[z] == 0 { 555 continue 556 } 557 for i = 0; i < 64; i++ { 558 if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 { 559 continue 560 } 561 v = &vars[z*64+i] 562 if v.node.Opt() == nil { // v represents fixed register, not Go variable 563 continue 564 } 565 566 // v->node->opt is the head of a linked list of Vars 567 // corresponding to tracked words from the Go variable v->node. 568 // Walk the list and set all the bits. 569 // For a large struct this could end up being quadratic: 570 // after the first setting, the outer loop (for z, i) would see a 1 bit 571 // for all of the remaining words in the struct, and for each such 572 // word would go through and turn on all the bits again. 573 // To avoid the quadratic behavior, we only turn on the bits if 574 // v is the head of the list or if the head's bit is not yet turned on. 575 // This will set the bits at most twice, keeping the overall loop linear. 576 v1, _ = v.node.Opt().(*Var) 577 578 if v == v1 || !btest(&cal, uint(v1.id)) { 579 for ; v1 != nil; v1 = v1.nextinnode { 580 biset(&cal, uint(v1.id)) 581 } 582 } 583 } 584 } 585 586 case obj.ATEXT: 587 for z = 0; z < BITS; z++ { 588 cal.b[z] = 0 589 ref.b[z] = 0 590 } 591 592 case obj.ARET: 593 for z = 0; z < BITS; z++ { 594 cal.b[z] = externs.b[z] | ovar.b[z] 595 ref.b[z] = 0 596 } 597 } 598 599 for z = 0; z < BITS; z++ { 600 ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z] 601 cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]) 602 r1.refbehind.b[z] = ref.b[z] 603 r1.calbehind.b[z] = cal.b[z] 604 } 605 606 if f1.Active != 0 { 607 break 608 } 609 f1.Active = 1 610 } 611 612 var r *Reg 613 var f2 *Flow 614 for ; f != f1; f = f.P1 { 615 r = f.Data.(*Reg) 616 for f2 = f.P2; f2 != nil; f2 = f2.P2link { 617 prop(f2, r.refbehind, r.calbehind) 618 } 619 } 620 } 621 622 func synch(f *Flow, dif Bits) { 623 var r1 *Reg 624 var z int 625 626 for f1 := f; f1 != nil; f1 = f1.S1 { 627 r1 = f1.Data.(*Reg) 628 for z = 0; z < BITS; z++ { 629 dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z] 630 if dif.b[z] != r1.regdiff.b[z] { 631 r1.regdiff.b[z] = dif.b[z] 632 change = 1 633 } 634 } 635 636 if f1.Active != 0 { 637 break 638 } 639 f1.Active = 1 640 for z = 0; z < BITS; z++ { 641 dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z]) 642 } 643 if f1.S2 != nil { 644 synch(f1.S2, dif) 645 } 646 } 647 } 648 649 func allreg(b uint64, r *Rgn) uint64 { 650 v := &vars[r.varno] 651 r.regno = 0 652 switch v.etype { 653 default: 654 Fatalf("unknown etype %d/%v", Bitno(b), Econv(v.etype)) 655 656 case TINT8, 657 TUINT8, 658 TINT16, 659 TUINT16, 660 TINT32, 661 TUINT32, 662 TINT64, 663 TUINT64, 664 TINT, 665 TUINT, 666 TUINTPTR, 667 TBOOL, 668 TPTR32, 669 TPTR64: 670 i := Thearch.BtoR(^b) 671 if i != 0 && r.cost > 0 { 672 r.regno = int16(i) 673 return Thearch.RtoB(i) 674 } 675 676 case TFLOAT32, TFLOAT64: 677 i := Thearch.BtoF(^b) 678 if i != 0 && r.cost > 0 { 679 r.regno = int16(i) 680 return Thearch.FtoB(i) 681 } 682 } 683 684 return 0 685 } 686 687 func LOAD(r *Reg, z int) uint64 { 688 return ^r.refbehind.b[z] & r.refahead.b[z] 689 } 690 691 func STORE(r *Reg, z int) uint64 { 692 return ^r.calbehind.b[z] & r.calahead.b[z] 693 } 694 695 // Cost parameters 696 const ( 697 CLOAD = 5 // cost of load 698 CREF = 5 // cost of reference if not registerized 699 LOOP = 3 // loop execution count (applied in popt.go) 700 ) 701 702 func paint1(f *Flow, bn int) { 703 z := bn / 64 704 bb := uint64(1 << uint(bn%64)) 705 r := f.Data.(*Reg) 706 if r.act.b[z]&bb != 0 { 707 return 708 } 709 var f1 *Flow 710 var r1 *Reg 711 for { 712 if r.refbehind.b[z]&bb == 0 { 713 break 714 } 715 f1 = f.P1 716 if f1 == nil { 717 break 718 } 719 r1 = f1.Data.(*Reg) 720 if r1.refahead.b[z]&bb == 0 { 721 break 722 } 723 if r1.act.b[z]&bb != 0 { 724 break 725 } 726 f = f1 727 r = r1 728 } 729 730 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 { 731 change -= CLOAD * int(f.Loop) 732 } 733 734 for { 735 r.act.b[z] |= bb 736 737 if f.Prog.As != obj.ANOP { // don't give credit for NOPs 738 if r.use1.b[z]&bb != 0 { 739 change += CREF * int(f.Loop) 740 } 741 if (r.use2.b[z]|r.set.b[z])&bb != 0 { 742 change += CREF * int(f.Loop) 743 } 744 } 745 746 if STORE(r, z)&r.regdiff.b[z]&bb != 0 { 747 change -= CLOAD * int(f.Loop) 748 } 749 750 if r.refbehind.b[z]&bb != 0 { 751 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 752 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 753 paint1(f1, bn) 754 } 755 } 756 } 757 758 if r.refahead.b[z]&bb == 0 { 759 break 760 } 761 f1 = f.S2 762 if f1 != nil { 763 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 764 paint1(f1, bn) 765 } 766 } 767 f = f.S1 768 if f == nil { 769 break 770 } 771 r = f.Data.(*Reg) 772 if r.act.b[z]&bb != 0 { 773 break 774 } 775 if r.refbehind.b[z]&bb == 0 { 776 break 777 } 778 } 779 } 780 781 func paint2(f *Flow, bn int, depth int) uint64 { 782 z := bn / 64 783 bb := uint64(1 << uint(bn%64)) 784 vreg := regbits 785 r := f.Data.(*Reg) 786 if r.act.b[z]&bb == 0 { 787 return vreg 788 } 789 var r1 *Reg 790 var f1 *Flow 791 for { 792 if r.refbehind.b[z]&bb == 0 { 793 break 794 } 795 f1 = f.P1 796 if f1 == nil { 797 break 798 } 799 r1 = f1.Data.(*Reg) 800 if r1.refahead.b[z]&bb == 0 { 801 break 802 } 803 if r1.act.b[z]&bb == 0 { 804 break 805 } 806 f = f1 807 r = r1 808 } 809 810 for { 811 if Debug['R'] != 0 && Debug['v'] != 0 { 812 fmt.Printf(" paint2 %d %v\n", depth, f.Prog) 813 } 814 815 r.act.b[z] &^= bb 816 817 vreg |= r.regu 818 819 if r.refbehind.b[z]&bb != 0 { 820 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 821 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 822 vreg |= paint2(f1, bn, depth+1) 823 } 824 } 825 } 826 827 if r.refahead.b[z]&bb == 0 { 828 break 829 } 830 f1 = f.S2 831 if f1 != nil { 832 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 833 vreg |= paint2(f1, bn, depth+1) 834 } 835 } 836 f = f.S1 837 if f == nil { 838 break 839 } 840 r = f.Data.(*Reg) 841 if r.act.b[z]&bb == 0 { 842 break 843 } 844 if r.refbehind.b[z]&bb == 0 { 845 break 846 } 847 } 848 849 return vreg 850 } 851 852 func paint3(f *Flow, bn int, rb uint64, rn int) { 853 z := bn / 64 854 bb := uint64(1 << uint(bn%64)) 855 r := f.Data.(*Reg) 856 if r.act.b[z]&bb != 0 { 857 return 858 } 859 var r1 *Reg 860 var f1 *Flow 861 for { 862 if r.refbehind.b[z]&bb == 0 { 863 break 864 } 865 f1 = f.P1 866 if f1 == nil { 867 break 868 } 869 r1 = f1.Data.(*Reg) 870 if r1.refahead.b[z]&bb == 0 { 871 break 872 } 873 if r1.act.b[z]&bb != 0 { 874 break 875 } 876 f = f1 877 r = r1 878 } 879 880 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 { 881 addmove(f, bn, rn, 0) 882 } 883 var p *obj.Prog 884 for { 885 r.act.b[z] |= bb 886 p = f.Prog 887 888 if r.use1.b[z]&bb != 0 { 889 if Debug['R'] != 0 && Debug['v'] != 0 { 890 fmt.Printf("%v", p) 891 } 892 addreg(&p.From, rn) 893 if Debug['R'] != 0 && Debug['v'] != 0 { 894 fmt.Printf(" ===change== %v\n", p) 895 } 896 } 897 898 if (r.use2.b[z]|r.set.b[z])&bb != 0 { 899 if Debug['R'] != 0 && Debug['v'] != 0 { 900 fmt.Printf("%v", p) 901 } 902 addreg(&p.To, rn) 903 if Debug['R'] != 0 && Debug['v'] != 0 { 904 fmt.Printf(" ===change== %v\n", p) 905 } 906 } 907 908 if STORE(r, z)&r.regdiff.b[z]&bb != 0 { 909 addmove(f, bn, rn, 1) 910 } 911 r.regu |= rb 912 913 if r.refbehind.b[z]&bb != 0 { 914 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 915 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 916 paint3(f1, bn, rb, rn) 917 } 918 } 919 } 920 921 if r.refahead.b[z]&bb == 0 { 922 break 923 } 924 f1 = f.S2 925 if f1 != nil { 926 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 927 paint3(f1, bn, rb, rn) 928 } 929 } 930 f = f.S1 931 if f == nil { 932 break 933 } 934 r = f.Data.(*Reg) 935 if r.act.b[z]&bb != 0 { 936 break 937 } 938 if r.refbehind.b[z]&bb == 0 { 939 break 940 } 941 } 942 } 943 944 func addreg(a *obj.Addr, rn int) { 945 a.Sym = nil 946 a.Node = nil 947 a.Offset = 0 948 a.Type = obj.TYPE_REG 949 a.Reg = int16(rn) 950 a.Name = 0 951 952 Ostats.Ncvtreg++ 953 } 954 955 func dumpone(f *Flow, isreg int) { 956 fmt.Printf("%d:%v", f.Loop, f.Prog) 957 if isreg != 0 { 958 r := f.Data.(*Reg) 959 var bit Bits 960 for z := 0; z < BITS; z++ { 961 bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0 962 } 963 if bany(&bit) { 964 fmt.Printf("\t") 965 if bany(&r.set) { 966 fmt.Printf(" s:%v", &r.set) 967 } 968 if bany(&r.use1) { 969 fmt.Printf(" u1:%v", &r.use1) 970 } 971 if bany(&r.use2) { 972 fmt.Printf(" u2:%v", &r.use2) 973 } 974 if bany(&r.refbehind) { 975 fmt.Printf(" rb:%v ", &r.refbehind) 976 } 977 if bany(&r.refahead) { 978 fmt.Printf(" ra:%v ", &r.refahead) 979 } 980 if bany(&r.calbehind) { 981 fmt.Printf(" cb:%v ", &r.calbehind) 982 } 983 if bany(&r.calahead) { 984 fmt.Printf(" ca:%v ", &r.calahead) 985 } 986 if bany(&r.regdiff) { 987 fmt.Printf(" d:%v ", &r.regdiff) 988 } 989 if bany(&r.act) { 990 fmt.Printf(" a:%v ", &r.act) 991 } 992 } 993 } 994 995 fmt.Printf("\n") 996 } 997 998 func Dumpit(str string, r0 *Flow, isreg int) { 999 var r1 *Flow 1000 1001 fmt.Printf("\n%s\n", str) 1002 for r := r0; r != nil; r = r.Link { 1003 dumpone(r, isreg) 1004 r1 = r.P2 1005 if r1 != nil { 1006 fmt.Printf("\tpred:") 1007 for ; r1 != nil; r1 = r1.P2link { 1008 fmt.Printf(" %.4d", uint(int(r1.Prog.Pc))) 1009 } 1010 if r.P1 != nil { 1011 fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc))) 1012 } else { 1013 fmt.Printf(" (only)") 1014 } 1015 fmt.Printf("\n") 1016 } 1017 1018 // Print successors if it's not just the next one 1019 if r.S1 != r.Link || r.S2 != nil { 1020 fmt.Printf("\tsucc:") 1021 if r.S1 != nil { 1022 fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc))) 1023 } 1024 if r.S2 != nil { 1025 fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc))) 1026 } 1027 fmt.Printf("\n") 1028 } 1029 } 1030 } 1031 1032 func regopt(firstp *obj.Prog) { 1033 mergetemp(firstp) 1034 1035 // control flow is more complicated in generated go code 1036 // than in generated c code. define pseudo-variables for 1037 // registers, so we have complete register usage information. 1038 var nreg int 1039 regnames := Thearch.Regnames(&nreg) 1040 1041 nvar = nreg 1042 for i := 0; i < nreg; i++ { 1043 vars[i] = Var{} 1044 } 1045 for i := 0; i < nreg; i++ { 1046 if regnodes[i] == nil { 1047 regnodes[i] = newname(Lookup(regnames[i])) 1048 } 1049 vars[i].node = regnodes[i] 1050 } 1051 1052 regbits = Thearch.Excludedregs() 1053 externs = zbits 1054 params = zbits 1055 consts = zbits 1056 addrs = zbits 1057 ivar = zbits 1058 ovar = zbits 1059 1060 // pass 1 1061 // build aux data structure 1062 // allocate pcs 1063 // find use and set of variables 1064 g := Flowstart(firstp, func() interface{} { return new(Reg) }) 1065 if g == nil { 1066 for i := 0; i < nvar; i++ { 1067 vars[i].node.SetOpt(nil) 1068 } 1069 return 1070 } 1071 1072 firstf := g.Start 1073 1074 for f := firstf; f != nil; f = f.Link { 1075 p := f.Prog 1076 // AVARLIVE must be considered a use, do not skip it. 1077 // Otherwise the variable will be optimized away, 1078 // and the whole point of AVARLIVE is to keep it on the stack. 1079 if p.As == obj.AVARDEF || p.As == obj.AVARKILL { 1080 continue 1081 } 1082 1083 // Avoid making variables for direct-called functions. 1084 if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN { 1085 continue 1086 } 1087 1088 // from vs to doesn't matter for registers. 1089 r := f.Data.(*Reg) 1090 r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex 1091 r.set.b[0] |= p.Info.Regset 1092 1093 bit := mkvar(f, &p.From) 1094 if bany(&bit) { 1095 if p.Info.Flags&LeftAddr != 0 { 1096 setaddrs(bit) 1097 } 1098 if p.Info.Flags&LeftRead != 0 { 1099 for z := 0; z < BITS; z++ { 1100 r.use1.b[z] |= bit.b[z] 1101 } 1102 } 1103 if p.Info.Flags&LeftWrite != 0 { 1104 for z := 0; z < BITS; z++ { 1105 r.set.b[z] |= bit.b[z] 1106 } 1107 } 1108 } 1109 1110 // Compute used register for reg 1111 if p.Info.Flags&RegRead != 0 { 1112 r.use1.b[0] |= Thearch.RtoB(int(p.Reg)) 1113 } 1114 1115 // Currently we never generate three register forms. 1116 // If we do, this will need to change. 1117 if p.From3Type() != obj.TYPE_NONE { 1118 Fatalf("regopt not implemented for from3") 1119 } 1120 1121 bit = mkvar(f, &p.To) 1122 if bany(&bit) { 1123 if p.Info.Flags&RightAddr != 0 { 1124 setaddrs(bit) 1125 } 1126 if p.Info.Flags&RightRead != 0 { 1127 for z := 0; z < BITS; z++ { 1128 r.use2.b[z] |= bit.b[z] 1129 } 1130 } 1131 if p.Info.Flags&RightWrite != 0 { 1132 for z := 0; z < BITS; z++ { 1133 r.set.b[z] |= bit.b[z] 1134 } 1135 } 1136 } 1137 } 1138 1139 for i := 0; i < nvar; i++ { 1140 v := &vars[i] 1141 if v.addr != 0 { 1142 bit := blsh(uint(i)) 1143 for z := 0; z < BITS; z++ { 1144 addrs.b[z] |= bit.b[z] 1145 } 1146 } 1147 1148 if Debug['R'] != 0 && Debug['v'] != 0 { 1149 fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(v.etype), v.width, v.node, v.offset) 1150 } 1151 } 1152 1153 if Debug['R'] != 0 && Debug['v'] != 0 { 1154 Dumpit("pass1", firstf, 1) 1155 } 1156 1157 // pass 2 1158 // find looping structure 1159 flowrpo(g) 1160 1161 if Debug['R'] != 0 && Debug['v'] != 0 { 1162 Dumpit("pass2", firstf, 1) 1163 } 1164 1165 // pass 2.5 1166 // iterate propagating fat vardef covering forward 1167 // r->act records vars with a VARDEF since the last CALL. 1168 // (r->act will be reused in pass 5 for something else, 1169 // but we'll be done with it by then.) 1170 active := 0 1171 1172 for f := firstf; f != nil; f = f.Link { 1173 f.Active = 0 1174 r := f.Data.(*Reg) 1175 r.act = zbits 1176 } 1177 1178 for f := firstf; f != nil; f = f.Link { 1179 p := f.Prog 1180 if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil { 1181 active++ 1182 walkvardef(p.To.Node.(*Node), f, active) 1183 } 1184 } 1185 1186 // pass 3 1187 // iterate propagating usage 1188 // back until flow graph is complete 1189 var f1 *Flow 1190 var i int 1191 var f *Flow 1192 loop1: 1193 change = 0 1194 1195 for f = firstf; f != nil; f = f.Link { 1196 f.Active = 0 1197 } 1198 for f = firstf; f != nil; f = f.Link { 1199 if f.Prog.As == obj.ARET { 1200 prop(f, zbits, zbits) 1201 } 1202 } 1203 1204 // pick up unreachable code 1205 loop11: 1206 i = 0 1207 1208 for f = firstf; f != nil; f = f1 { 1209 f1 = f.Link 1210 if f1 != nil && f1.Active != 0 && f.Active == 0 { 1211 prop(f, zbits, zbits) 1212 i = 1 1213 } 1214 } 1215 1216 if i != 0 { 1217 goto loop11 1218 } 1219 if change != 0 { 1220 goto loop1 1221 } 1222 1223 if Debug['R'] != 0 && Debug['v'] != 0 { 1224 Dumpit("pass3", firstf, 1) 1225 } 1226 1227 // pass 4 1228 // iterate propagating register/variable synchrony 1229 // forward until graph is complete 1230 loop2: 1231 change = 0 1232 1233 for f = firstf; f != nil; f = f.Link { 1234 f.Active = 0 1235 } 1236 synch(firstf, zbits) 1237 if change != 0 { 1238 goto loop2 1239 } 1240 1241 if Debug['R'] != 0 && Debug['v'] != 0 { 1242 Dumpit("pass4", firstf, 1) 1243 } 1244 1245 // pass 4.5 1246 // move register pseudo-variables into regu. 1247 mask := uint64((1 << uint(nreg)) - 1) 1248 for f := firstf; f != nil; f = f.Link { 1249 r := f.Data.(*Reg) 1250 r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask 1251 r.set.b[0] &^= mask 1252 r.use1.b[0] &^= mask 1253 r.use2.b[0] &^= mask 1254 r.refbehind.b[0] &^= mask 1255 r.refahead.b[0] &^= mask 1256 r.calbehind.b[0] &^= mask 1257 r.calahead.b[0] &^= mask 1258 r.regdiff.b[0] &^= mask 1259 r.act.b[0] &^= mask 1260 } 1261 1262 if Debug['R'] != 0 && Debug['v'] != 0 { 1263 Dumpit("pass4.5", firstf, 1) 1264 } 1265 1266 // pass 5 1267 // isolate regions 1268 // calculate costs (paint1) 1269 var bit Bits 1270 if f := firstf; f != nil { 1271 r := f.Data.(*Reg) 1272 for z := 0; z < BITS; z++ { 1273 bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]) 1274 } 1275 if bany(&bit) && !f.Refset { 1276 // should never happen - all variables are preset 1277 if Debug['w'] != 0 { 1278 fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit) 1279 } 1280 f.Refset = true 1281 } 1282 } 1283 1284 for f := firstf; f != nil; f = f.Link { 1285 (f.Data.(*Reg)).act = zbits 1286 } 1287 nregion = 0 1288 region = region[:0] 1289 var rgp *Rgn 1290 for f := firstf; f != nil; f = f.Link { 1291 r := f.Data.(*Reg) 1292 for z := 0; z < BITS; z++ { 1293 bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z]) 1294 } 1295 if bany(&bit) && !f.Refset { 1296 if Debug['w'] != 0 { 1297 fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit) 1298 } 1299 f.Refset = true 1300 Thearch.Excise(f) 1301 } 1302 1303 for z := 0; z < BITS; z++ { 1304 bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z]) 1305 } 1306 for bany(&bit) { 1307 i = bnum(&bit) 1308 change = 0 1309 paint1(f, i) 1310 biclr(&bit, uint(i)) 1311 if change <= 0 { 1312 continue 1313 } 1314 if nregion >= MaxRgn { 1315 nregion++ 1316 continue 1317 } 1318 1319 region = append(region, Rgn{ 1320 enter: f, 1321 cost: int16(change), 1322 varno: int16(i), 1323 }) 1324 nregion++ 1325 } 1326 } 1327 1328 if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") { 1329 Warn("regions: %d\n", nregion) 1330 } 1331 if nregion >= MaxRgn { 1332 if Debug['v'] != 0 { 1333 Warn("too many regions: %d\n", nregion) 1334 } 1335 nregion = MaxRgn 1336 } 1337 1338 sort.Sort(rcmp(region[:nregion])) 1339 1340 if Debug['R'] != 0 && Debug['v'] != 0 { 1341 Dumpit("pass5", firstf, 1) 1342 } 1343 1344 // pass 6 1345 // determine used registers (paint2) 1346 // replace code (paint3) 1347 if Debug['R'] != 0 && Debug['v'] != 0 { 1348 fmt.Printf("\nregisterizing\n") 1349 } 1350 var usedreg uint64 1351 var vreg uint64 1352 for i := 0; i < nregion; i++ { 1353 rgp = ®ion[i] 1354 if Debug['R'] != 0 && Debug['v'] != 0 { 1355 fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc) 1356 } 1357 bit = blsh(uint(rgp.varno)) 1358 usedreg = paint2(rgp.enter, int(rgp.varno), 0) 1359 vreg = allreg(usedreg, rgp) 1360 if rgp.regno != 0 { 1361 if Debug['R'] != 0 && Debug['v'] != 0 { 1362 v := &vars[rgp.varno] 1363 fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(v.etype), obj.Rconv(int(rgp.regno)), usedreg, vreg) 1364 } 1365 1366 paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno)) 1367 } 1368 } 1369 1370 // free aux structures. peep allocates new ones. 1371 for i := 0; i < nvar; i++ { 1372 vars[i].node.SetOpt(nil) 1373 } 1374 Flowend(g) 1375 firstf = nil 1376 1377 if Debug['R'] != 0 && Debug['v'] != 0 { 1378 // Rebuild flow graph, since we inserted instructions 1379 g := Flowstart(firstp, nil) 1380 firstf = g.Start 1381 Dumpit("pass6", firstf, 0) 1382 Flowend(g) 1383 firstf = nil 1384 } 1385 1386 // pass 7 1387 // peep-hole on basic block 1388 if Debug['R'] == 0 || Debug['P'] != 0 { 1389 Thearch.Peep(firstp) 1390 } 1391 1392 // eliminate nops 1393 for p := firstp; p != nil; p = p.Link { 1394 for p.Link != nil && p.Link.As == obj.ANOP { 1395 p.Link = p.Link.Link 1396 } 1397 if p.To.Type == obj.TYPE_BRANCH { 1398 for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP { 1399 p.To.Val = p.To.Val.(*obj.Prog).Link 1400 } 1401 } 1402 } 1403 1404 if Debug['R'] != 0 { 1405 if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false { 1406 fmt.Printf("\nstats\n") 1407 } 1408 1409 if Ostats.Ncvtreg != 0 { 1410 fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg) 1411 } 1412 if Ostats.Nspill != 0 { 1413 fmt.Printf("\t%4d spill\n", Ostats.Nspill) 1414 } 1415 if Ostats.Nreload != 0 { 1416 fmt.Printf("\t%4d reload\n", Ostats.Nreload) 1417 } 1418 if Ostats.Ndelmov != 0 { 1419 fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov) 1420 } 1421 if Ostats.Nvar != 0 { 1422 fmt.Printf("\t%4d var\n", Ostats.Nvar) 1423 } 1424 if Ostats.Naddr != 0 { 1425 fmt.Printf("\t%4d addr\n", Ostats.Naddr) 1426 } 1427 1428 Ostats = OptStats{} 1429 } 1430 } 1431 1432 // bany reports whether any bits in a are set. 1433 func bany(a *Bits) bool { 1434 for _, x := range &a.b { // & to avoid making a copy of a.b 1435 if x != 0 { 1436 return true 1437 } 1438 } 1439 return false 1440 } 1441 1442 // bnum reports the lowest index of a 1 bit in a. 1443 func bnum(a *Bits) int { 1444 for i, x := range &a.b { // & to avoid making a copy of a.b 1445 if x != 0 { 1446 return 64*i + Bitno(x) 1447 } 1448 } 1449 1450 Fatalf("bad in bnum") 1451 return 0 1452 } 1453 1454 // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n). 1455 func blsh(n uint) Bits { 1456 c := zbits 1457 c.b[n/64] = 1 << (n % 64) 1458 return c 1459 } 1460 1461 // btest reports whether bit n is 1. 1462 func btest(a *Bits, n uint) bool { 1463 return a.b[n/64]&(1<<(n%64)) != 0 1464 } 1465 1466 // biset sets bit n to 1. 1467 func biset(a *Bits, n uint) { 1468 a.b[n/64] |= 1 << (n % 64) 1469 } 1470 1471 // biclr sets bit n to 0. 1472 func biclr(a *Bits, n uint) { 1473 a.b[n/64] &^= (1 << (n % 64)) 1474 } 1475 1476 // Bitno reports the lowest index of a 1 bit in b. 1477 // It calls Fatalf if there is no 1 bit. 1478 func Bitno(b uint64) int { 1479 if b == 0 { 1480 Fatalf("bad in bitno") 1481 } 1482 n := 0 1483 if b&(1<<32-1) == 0 { 1484 n += 32 1485 b >>= 32 1486 } 1487 if b&(1<<16-1) == 0 { 1488 n += 16 1489 b >>= 16 1490 } 1491 if b&(1<<8-1) == 0 { 1492 n += 8 1493 b >>= 8 1494 } 1495 if b&(1<<4-1) == 0 { 1496 n += 4 1497 b >>= 4 1498 } 1499 if b&(1<<2-1) == 0 { 1500 n += 2 1501 b >>= 2 1502 } 1503 if b&1 == 0 { 1504 n++ 1505 } 1506 return n 1507 } 1508 1509 // String returns a space-separated list of the variables represented by bits. 1510 func (bits Bits) String() string { 1511 // Note: This method takes a value receiver, both for convenience 1512 // and to make it safe to modify the bits as we process them. 1513 // Even so, most prints above use &bits, because then the value 1514 // being stored in the interface{} is a pointer and does not require 1515 // an allocation and copy to create the interface{}. 1516 var buf bytes.Buffer 1517 sep := "" 1518 for bany(&bits) { 1519 i := bnum(&bits) 1520 buf.WriteString(sep) 1521 sep = " " 1522 v := &vars[i] 1523 if v.node == nil || v.node.Sym == nil { 1524 fmt.Fprintf(&buf, "$%d", i) 1525 } else { 1526 fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i) 1527 if v.offset != 0 { 1528 fmt.Fprintf(&buf, "%+d", int64(v.offset)) 1529 } 1530 } 1531 biclr(&bits, uint(i)) 1532 } 1533 return buf.String() 1534 }