github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/9g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 64 /* 32 general + 32 floating */ 37 #define REGBITS ((uint64)0xffffffffffffffffull) 38 /*c2go enum { 39 NREGVAR = 64, 40 REGBITS = 0xffffffffffffffff, 41 }; 42 */ 43 44 static Reg* firstr; 45 static int first = 1; 46 47 int 48 rcmp(const void *a1, const void *a2) 49 { 50 Rgn *p1, *p2; 51 int c1, c2; 52 53 p1 = (Rgn*)a1; 54 p2 = (Rgn*)a2; 55 c1 = p2->cost; 56 c2 = p1->cost; 57 if(c1 -= c2) 58 return c1; 59 return p2->varno - p1->varno; 60 } 61 62 static void 63 setaddrs(Bits bit) 64 { 65 int i, n; 66 Var *v; 67 Node *node; 68 69 while(bany(&bit)) { 70 // convert each bit to a variable 71 i = bnum(bit); 72 node = var[i].node; 73 n = var[i].name; 74 biclr(&bit, i); 75 76 // disable all pieces of that variable 77 for(i=0; i<nvar; i++) { 78 v = var+i; 79 if(v->node == node && v->name == n) 80 v->addr = 2; 81 } 82 } 83 } 84 85 static char* regname[] = { 86 ".R0", 87 ".R1", 88 ".R2", 89 ".R3", 90 ".R4", 91 ".R5", 92 ".R6", 93 ".R7", 94 ".R8", 95 ".R9", 96 ".R10", 97 ".R11", 98 ".R12", 99 ".R13", 100 ".R14", 101 ".R15", 102 ".R16", 103 ".R17", 104 ".R18", 105 ".R19", 106 ".R20", 107 ".R21", 108 ".R22", 109 ".R23", 110 ".R24", 111 ".R25", 112 ".R26", 113 ".R27", 114 ".R28", 115 ".R29", 116 ".R30", 117 ".R31", 118 ".F0", 119 ".F1", 120 ".F2", 121 ".F3", 122 ".F4", 123 ".F5", 124 ".F6", 125 ".F7", 126 ".F8", 127 ".F9", 128 ".F10", 129 ".F11", 130 ".F12", 131 ".F13", 132 ".F14", 133 ".F15", 134 ".F16", 135 ".F17", 136 ".F18", 137 ".F19", 138 ".F20", 139 ".F21", 140 ".F22", 141 ".F23", 142 ".F24", 143 ".F25", 144 ".F26", 145 ".F27", 146 ".F28", 147 ".F29", 148 ".F30", 149 ".F31", 150 }; 151 152 static Node* regnodes[NREGVAR]; 153 154 static void walkvardef(Node *n, Reg *r, int active); 155 156 void 157 regopt(Prog *firstp) 158 { 159 Reg *r, *r1; 160 Prog *p; 161 Graph *g; 162 ProgInfo info; 163 int i, z, active; 164 uint64 vreg, usedreg; 165 Bits bit; 166 167 if(first) { 168 fmtinstall('Q', Qconv); 169 first = 0; 170 } 171 172 mergetemp(firstp); 173 174 /* 175 * control flow is more complicated in generated go code 176 * than in generated c code. define pseudo-variables for 177 * registers, so we have complete register usage information. 178 */ 179 nvar = NREGVAR; 180 memset(var, 0, NREGVAR*sizeof var[0]); 181 for(i=0; i<NREGVAR; i++) { 182 if(regnodes[i] == N) 183 regnodes[i] = newname(lookup(regname[i])); 184 var[i].node = regnodes[i]; 185 } 186 187 // Exclude registers with fixed functions 188 regbits = (1<<D_R0)|RtoB(REGSP)|RtoB(REGG)|RtoB(REGTLS); 189 // Also exclude floating point registers with fixed constants 190 regbits |= FtoB(D_F0+27)|FtoB(D_F0+28)|FtoB(D_F0+29)|FtoB(D_F0+30)|FtoB(D_F0+31); 191 externs = zbits; 192 params = zbits; 193 consts = zbits; 194 addrs = zbits; 195 ivar = zbits; 196 ovar = zbits; 197 198 /* 199 * pass 1 200 * build aux data structure 201 * allocate pcs 202 * find use and set of variables 203 */ 204 g = flowstart(firstp, sizeof(Reg)); 205 if(g == nil) { 206 for(i=0; i<nvar; i++) 207 var[i].node->opt = nil; 208 return; 209 } 210 211 firstr = (Reg*)g->start; 212 213 for(r = firstr; r != R; r = (Reg*)r->f.link) { 214 p = r->f.prog; 215 if(p->as == AVARDEF || p->as == AVARKILL) 216 continue; 217 proginfo(&info, p); 218 219 // Avoid making variables for direct-called functions. 220 if(p->as == ABL && p->to.name == D_EXTERN) 221 continue; 222 223 // from vs to doesn't matter for registers 224 r->use1.b[0] |= info.reguse | info.regindex; 225 r->set.b[0] |= info.regset; 226 227 // Compute used register for from 228 bit = mkvar(r, &p->from); 229 if(info.flags & LeftAddr) 230 setaddrs(bit); 231 if(info.flags & LeftRead) 232 for(z=0; z<BITS; z++) 233 r->use1.b[z] |= bit.b[z]; 234 235 // Compute used register for reg 236 if(info.flags & RegRead) { 237 if(p->from.type != D_FREG) 238 r->use1.b[0] |= RtoB(p->reg); 239 else 240 r->use1.b[0] |= FtoB(D_F0+p->reg); 241 } 242 243 // Currently we never generate three register forms. 244 // If we do, this will need to change. 245 if(p->from3.type != D_NONE) 246 fatal("regopt not implemented for from3"); 247 248 // Compute used register for to 249 bit = mkvar(r, &p->to); 250 if(info.flags & RightAddr) 251 setaddrs(bit); 252 if(info.flags & RightRead) 253 for(z=0; z<BITS; z++) 254 r->use2.b[z] |= bit.b[z]; 255 if(info.flags & RightWrite) 256 for(z=0; z<BITS; z++) 257 r->set.b[z] |= bit.b[z]; 258 } 259 260 for(i=0; i<nvar; i++) { 261 Var *v = var+i; 262 if(v->addr) { 263 bit = blsh(i); 264 for(z=0; z<BITS; z++) 265 addrs.b[z] |= bit.b[z]; 266 } 267 268 if(debug['R'] && debug['v']) 269 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 270 i, v->addr, v->etype, v->width, v->node, v->offset); 271 } 272 273 if(debug['R'] && debug['v']) 274 dumpit("pass1", &firstr->f, 1); 275 276 /* 277 * pass 2 278 * find looping structure 279 */ 280 flowrpo(g); 281 282 if(debug['R'] && debug['v']) 283 dumpit("pass2", &firstr->f, 1); 284 285 /* 286 * pass 2.5 287 * iterate propagating fat vardef covering forward 288 * r->act records vars with a VARDEF since the last CALL. 289 * (r->act will be reused in pass 5 for something else, 290 * but we'll be done with it by then.) 291 */ 292 active = 0; 293 for(r = firstr; r != R; r = (Reg*)r->f.link) { 294 r->f.active = 0; 295 r->act = zbits; 296 } 297 for(r = firstr; r != R; r = (Reg*)r->f.link) { 298 p = r->f.prog; 299 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 300 active++; 301 walkvardef(p->to.node, r, active); 302 } 303 } 304 305 /* 306 * pass 3 307 * iterate propagating usage 308 * back until flow graph is complete 309 */ 310 loop1: 311 change = 0; 312 for(r = firstr; r != R; r = (Reg*)r->f.link) 313 r->f.active = 0; 314 for(r = firstr; r != R; r = (Reg*)r->f.link) 315 if(r->f.prog->as == ARET) 316 prop(r, zbits, zbits); 317 loop11: 318 /* pick up unreachable code */ 319 i = 0; 320 for(r = firstr; r != R; r = r1) { 321 r1 = (Reg*)r->f.link; 322 if(r1 && r1->f.active && !r->f.active) { 323 prop(r, zbits, zbits); 324 i = 1; 325 } 326 } 327 if(i) 328 goto loop11; 329 if(change) 330 goto loop1; 331 332 if(debug['R'] && debug['v']) 333 dumpit("pass3", &firstr->f, 1); 334 335 /* 336 * pass 4 337 * iterate propagating register/variable synchrony 338 * forward until graph is complete 339 */ 340 loop2: 341 change = 0; 342 for(r = firstr; r != R; r = (Reg*)r->f.link) 343 r->f.active = 0; 344 synch(firstr, zbits); 345 if(change) 346 goto loop2; 347 348 if(debug['R'] && debug['v']) 349 dumpit("pass4", &firstr->f, 1); 350 351 /* 352 * pass 4.5 353 * move register pseudo-variables into regu. 354 */ 355 for(r = firstr; r != R; r = (Reg*)r->f.link) { 356 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 357 358 r->set.b[0] &= ~REGBITS; 359 r->use1.b[0] &= ~REGBITS; 360 r->use2.b[0] &= ~REGBITS; 361 r->refbehind.b[0] &= ~REGBITS; 362 r->refahead.b[0] &= ~REGBITS; 363 r->calbehind.b[0] &= ~REGBITS; 364 r->calahead.b[0] &= ~REGBITS; 365 r->regdiff.b[0] &= ~REGBITS; 366 r->act.b[0] &= ~REGBITS; 367 } 368 369 if(debug['R'] && debug['v']) 370 dumpit("pass4.5", &firstr->f, 1); 371 372 /* 373 * pass 5 374 * isolate regions 375 * calculate costs (paint1) 376 */ 377 r = firstr; 378 if(r) { 379 for(z=0; z<BITS; z++) 380 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 381 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 382 if(bany(&bit) && !r->f.refset) { 383 // should never happen - all variables are preset 384 if(debug['w']) 385 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 386 r->f.refset = 1; 387 } 388 } 389 for(r = firstr; r != R; r = (Reg*)r->f.link) 390 r->act = zbits; 391 rgp = region; 392 nregion = 0; 393 for(r = firstr; r != R; r = (Reg*)r->f.link) { 394 for(z=0; z<BITS; z++) 395 bit.b[z] = r->set.b[z] & 396 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 397 if(bany(&bit) && !r->f.refset) { 398 if(debug['w']) 399 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 400 r->f.refset = 1; 401 excise(&r->f); 402 } 403 for(z=0; z<BITS; z++) 404 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 405 while(bany(&bit)) { 406 i = bnum(bit); 407 rgp->enter = r; 408 rgp->varno = i; 409 change = 0; 410 paint1(r, i); 411 biclr(&bit, i); 412 if(change <= 0) 413 continue; 414 rgp->cost = change; 415 nregion++; 416 if(nregion >= NRGN) { 417 if(debug['R'] && debug['v']) 418 print("too many regions\n"); 419 goto brk; 420 } 421 rgp++; 422 } 423 } 424 brk: 425 qsort(region, nregion, sizeof(region[0]), rcmp); 426 427 if(debug['R'] && debug['v']) 428 dumpit("pass5", &firstr->f, 1); 429 430 /* 431 * pass 6 432 * determine used registers (paint2) 433 * replace code (paint3) 434 */ 435 rgp = region; 436 if(debug['R'] && debug['v']) 437 print("\nregisterizing\n"); 438 for(i=0; i<nregion; i++) { 439 if(debug['R'] && debug['v']) 440 print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc); 441 bit = blsh(rgp->varno); 442 usedreg = paint2(rgp->enter, rgp->varno, 0); 443 vreg = allreg(usedreg, rgp); 444 if(rgp->regno != 0) { 445 if(debug['R'] && debug['v']) { 446 Var *v; 447 448 v = var + rgp->varno; 449 print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n", 450 v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg); 451 } 452 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 453 } 454 rgp++; 455 } 456 457 /* 458 * free aux structures. peep allocates new ones. 459 */ 460 for(i=0; i<nvar; i++) 461 var[i].node->opt = nil; 462 flowend(g); 463 firstr = R; 464 465 if(debug['R'] && debug['v']) { 466 // Rebuild flow graph, since we inserted instructions 467 g = flowstart(firstp, sizeof(Reg)); 468 firstr = (Reg*)g->start; 469 dumpit("pass6", &firstr->f, 1); 470 flowend(g); 471 firstr = R; 472 } 473 474 /* 475 * pass 7 476 * peep-hole on basic block 477 */ 478 if(!debug['R'] || debug['P']) 479 peep(firstp); 480 481 /* 482 * eliminate nops 483 */ 484 for(p=firstp; p!=P; p=p->link) { 485 while(p->link != P && p->link->as == ANOP) 486 p->link = p->link->link; 487 if(p->to.type == D_BRANCH) 488 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 489 p->to.u.branch = p->to.u.branch->link; 490 } 491 492 if(debug['R']) { 493 if(ostats.ncvtreg || 494 ostats.nspill || 495 ostats.ndelmov || 496 ostats.nvar || 497 0) 498 print("\nstats\n"); 499 500 if(ostats.ncvtreg) 501 print(" %4d cvtreg\n", ostats.ncvtreg); 502 if(ostats.nspill) 503 print(" %4d spill\n", ostats.nspill); 504 if(ostats.ndelmov) 505 print(" %4d delmov\n", ostats.ndelmov); 506 if(ostats.nvar) 507 print(" %4d var\n", ostats.nvar); 508 509 memset(&ostats, 0, sizeof(ostats)); 510 } 511 512 return; 513 } 514 515 static void 516 walkvardef(Node *n, Reg *r, int active) 517 { 518 Reg *r1, *r2; 519 int bn; 520 Var *v; 521 522 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 523 if(r1->f.active == active) 524 break; 525 r1->f.active = active; 526 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 527 break; 528 for(v=n->opt; v!=nil; v=v->nextinnode) { 529 bn = v - var; 530 biset(&r1->act, bn); 531 } 532 if(r1->f.prog->as == ABL) 533 break; 534 } 535 536 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 537 if(r2->f.s2 != nil) 538 walkvardef(n, (Reg*)r2->f.s2, active); 539 } 540 541 /* 542 * add mov b,rn 543 * just after r 544 */ 545 void 546 addmove(Reg *r, int bn, int rn, int f) 547 { 548 Prog *p, *p1, *p2; 549 Adr *a; 550 Var *v; 551 552 p1 = mal(sizeof(*p1)); 553 *p1 = zprog; 554 p = r->f.prog; 555 556 // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc), 557 // delay the load until after the fixup. 558 p2 = p->link; 559 if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG) 560 p = p2; 561 562 p1->link = p->link; 563 p->link = p1; 564 p1->lineno = p->lineno; 565 566 v = var + bn; 567 568 a = &p1->to; 569 a->name = v->name; 570 a->node = v->node; 571 a->sym = linksym(v->node->sym); 572 a->offset = v->offset; 573 a->etype = v->etype; 574 a->type = D_OREG; 575 if(a->etype == TARRAY || a->sym == nil) 576 a->type = D_CONST; 577 578 if(v->addr) 579 fatal("addmove: shouldn't be doing this %A\n", a); 580 581 switch(v->etype) { 582 default: 583 print("What is this %E\n", v->etype); 584 585 case TINT8: 586 p1->as = AMOVB; 587 break; 588 case TBOOL: 589 case TUINT8: 590 //print("movbu %E %d %S\n", v->etype, bn, v->sym); 591 p1->as = AMOVBZ; 592 break; 593 case TINT16: 594 p1->as = AMOVH; 595 break; 596 case TUINT16: 597 p1->as = AMOVHZ; 598 break; 599 case TINT32: 600 p1->as = AMOVW; 601 break; 602 case TUINT32: 603 case TPTR32: 604 p1->as = AMOVWZ; 605 break; 606 case TINT64: 607 case TUINT64: 608 case TPTR64: 609 p1->as = AMOVD; 610 break; 611 case TFLOAT32: 612 p1->as = AFMOVS; 613 break; 614 case TFLOAT64: 615 p1->as = AFMOVD; 616 break; 617 } 618 619 p1->from.type = D_REG; 620 p1->from.reg = rn; 621 if(rn >= NREG) { 622 p1->from.type = D_FREG; 623 p1->from.reg = rn-NREG; 624 } 625 if(!f) { 626 p1->from = *a; 627 *a = zprog.from; 628 a->type = D_REG; 629 a->reg = rn; 630 if(rn >= NREG) { 631 a->type = D_FREG; 632 a->reg = rn-NREG; 633 } 634 if(v->etype == TUINT8 || v->etype == TBOOL) 635 p1->as = AMOVBZ; 636 if(v->etype == TUINT16) 637 p1->as = AMOVHZ; 638 } 639 if(debug['R']) 640 print("%P\t.a%P\n", p, p1); 641 ostats.nspill++; 642 } 643 644 static int 645 overlap(int64 o1, int w1, int64 o2, int w2) 646 { 647 int64 t1, t2; 648 649 t1 = o1+w1; 650 t2 = o2+w2; 651 652 if(!(t1 > o2 && t2 > o1)) 653 return 0; 654 655 return 1; 656 } 657 658 Bits 659 mkvar(Reg *r, Adr *a) 660 { 661 USED(r); 662 Var *v; 663 int i, t, n, et, z, flag; 664 int64 w; 665 int64 o; 666 Bits bit; 667 Node *node; 668 669 // mark registers used 670 t = a->type; 671 switch(t) { 672 default: 673 print("type %d %d %D\n", t, a->name, a); 674 goto none; 675 676 case D_NONE: 677 goto none; 678 679 case D_BRANCH: 680 case D_CONST: 681 case D_FCONST: 682 case D_SCONST: 683 case D_SPR: 684 case D_OREG: 685 break; 686 687 case D_REG: 688 if(a->reg != NREG) { 689 bit = zbits; 690 bit.b[0] = RtoB(a->reg); 691 return bit; 692 } 693 break; 694 695 case D_FREG: 696 if(a->reg != NREG) { 697 bit = zbits; 698 bit.b[0] = FtoB(D_F0+a->reg); 699 return bit; 700 } 701 break; 702 } 703 704 switch(a->name) { 705 default: 706 goto none; 707 708 case D_EXTERN: 709 case D_STATIC: 710 case D_AUTO: 711 case D_PARAM: 712 n = a->name; 713 break; 714 } 715 716 node = a->node; 717 if(node == N || node->op != ONAME || node->orig == N) 718 goto none; 719 node = node->orig; 720 if(node->orig != node) 721 fatal("%D: bad node", a); 722 if(node->sym == S || node->sym->name[0] == '.') 723 goto none; 724 et = a->etype; 725 o = a->offset; 726 w = a->width; 727 if(w < 0) 728 fatal("bad width %lld for %D", w, a); 729 730 flag = 0; 731 for(i=0; i<nvar; i++) { 732 v = var+i; 733 if(v->node == node && v->name == n) { 734 if(v->offset == o) 735 if(v->etype == et) 736 if(v->width == w) 737 return blsh(i); 738 739 // if they overlap, disable both 740 if(overlap(v->offset, v->width, o, w)) { 741 v->addr = 1; 742 flag = 1; 743 } 744 } 745 } 746 747 switch(et) { 748 case 0: 749 case TFUNC: 750 goto none; 751 } 752 753 if(nvar >= NVAR) { 754 if(debug['w'] > 1 && node != N) 755 fatal("variable not optimized: %#N", node); 756 757 // If we're not tracking a word in a variable, mark the rest as 758 // having its address taken, so that we keep the whole thing 759 // live at all calls. otherwise we might optimize away part of 760 // a variable but not all of it. 761 for(i=0; i<nvar; i++) { 762 v = var+i; 763 if(v->node == node) 764 v->addr = 1; 765 } 766 goto none; 767 } 768 769 i = nvar; 770 nvar++; 771 v = var+i; 772 v->offset = o; 773 v->name = n; 774 v->etype = et; 775 v->width = w; 776 v->addr = flag; // funny punning 777 v->node = node; 778 779 // node->opt is the head of a linked list 780 // of Vars within the given Node, so that 781 // we can start at a Var and find all the other 782 // Vars in the same Go variable. 783 v->nextinnode = node->opt; 784 node->opt = v; 785 786 bit = blsh(i); 787 if(n == D_EXTERN || n == D_STATIC) 788 for(z=0; z<BITS; z++) 789 externs.b[z] |= bit.b[z]; 790 if(n == D_PARAM) 791 for(z=0; z<BITS; z++) 792 params.b[z] |= bit.b[z]; 793 794 if(node->class == PPARAM) 795 for(z=0; z<BITS; z++) 796 ivar.b[z] |= bit.b[z]; 797 if(node->class == PPARAMOUT) 798 for(z=0; z<BITS; z++) 799 ovar.b[z] |= bit.b[z]; 800 801 // Treat values with their address taken as live at calls, 802 // because the garbage collector's liveness analysis in ../gc/plive.c does. 803 // These must be consistent or else we will elide stores and the garbage 804 // collector will see uninitialized data. 805 // The typical case where our own analysis is out of sync is when the 806 // node appears to have its address taken but that code doesn't actually 807 // get generated and therefore doesn't show up as an address being 808 // taken when we analyze the instruction stream. 809 // One instance of this case is when a closure uses the same name as 810 // an outer variable for one of its own variables declared with :=. 811 // The parser flags the outer variable as possibly shared, and therefore 812 // sets addrtaken, even though it ends up not being actually shared. 813 // If we were better about _ elision, _ = &x would suffice too. 814 // The broader := in a closure problem is mentioned in a comment in 815 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 816 if(node->addrtaken) 817 v->addr = 1; 818 819 // Disable registerization for globals, because: 820 // (1) we might panic at any time and we want the recovery code 821 // to see the latest values (issue 1304). 822 // (2) we don't know what pointers might point at them and we want 823 // loads via those pointers to see updated values and vice versa (issue 7995). 824 // 825 // Disable registerization for results if using defer, because the deferred func 826 // might recover and return, causing the current values to be used. 827 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 828 v->addr = 1; 829 830 if(debug['R']) 831 print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 832 ostats.nvar++; 833 834 return bit; 835 836 none: 837 return zbits; 838 } 839 840 void 841 prop(Reg *r, Bits ref, Bits cal) 842 { 843 Reg *r1, *r2; 844 int z, i, j; 845 Var *v, *v1; 846 847 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 848 for(z=0; z<BITS; z++) { 849 ref.b[z] |= r1->refahead.b[z]; 850 if(ref.b[z] != r1->refahead.b[z]) { 851 r1->refahead.b[z] = ref.b[z]; 852 change++; 853 } 854 cal.b[z] |= r1->calahead.b[z]; 855 if(cal.b[z] != r1->calahead.b[z]) { 856 r1->calahead.b[z] = cal.b[z]; 857 change++; 858 } 859 } 860 switch(r1->f.prog->as) { 861 case ABL: 862 if(noreturn(r1->f.prog)) 863 break; 864 865 // Mark all input variables (ivar) as used, because that's what the 866 // liveness bitmaps say. The liveness bitmaps say that so that a 867 // panic will not show stale values in the parameter dump. 868 // Mark variables with a recent VARDEF (r1->act) as used, 869 // so that the optimizer flushes initializations to memory, 870 // so that if a garbage collection happens during this CALL, 871 // the collector will see initialized memory. Again this is to 872 // match what the liveness bitmaps say. 873 for(z=0; z<BITS; z++) { 874 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 875 ref.b[z] = 0; 876 } 877 878 // cal.b is the current approximation of what's live across the call. 879 // Every bit in cal.b is a single stack word. For each such word, 880 // find all the other tracked stack words in the same Go variable 881 // (struct/slice/string/interface) and mark them live too. 882 // This is necessary because the liveness analysis for the garbage 883 // collector works at variable granularity, not at word granularity. 884 // It is fundamental for slice/string/interface: the garbage collector 885 // needs the whole value, not just some of the words, in order to 886 // interpret the other bits correctly. Specifically, slice needs a consistent 887 // ptr and cap, string needs a consistent ptr and len, and interface 888 // needs a consistent type word and data word. 889 for(z=0; z<BITS; z++) { 890 if(cal.b[z] == 0) 891 continue; 892 for(i=0; i<64; i++) { 893 if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) 894 continue; 895 v = var+z*64+i; 896 if(v->node->opt == nil) // v represents fixed register, not Go variable 897 continue; 898 899 // v->node->opt is the head of a linked list of Vars 900 // corresponding to tracked words from the Go variable v->node. 901 // Walk the list and set all the bits. 902 // For a large struct this could end up being quadratic: 903 // after the first setting, the outer loop (for z, i) would see a 1 bit 904 // for all of the remaining words in the struct, and for each such 905 // word would go through and turn on all the bits again. 906 // To avoid the quadratic behavior, we only turn on the bits if 907 // v is the head of the list or if the head's bit is not yet turned on. 908 // This will set the bits at most twice, keeping the overall loop linear. 909 v1 = v->node->opt; 910 j = v1 - var; 911 if(v == v1 || !btest(&cal, j)) { 912 for(; v1 != nil; v1 = v1->nextinnode) { 913 j = v1 - var; 914 biset(&cal, j); 915 } 916 } 917 } 918 } 919 break; 920 921 case ATEXT: 922 for(z=0; z<BITS; z++) { 923 cal.b[z] = 0; 924 ref.b[z] = 0; 925 } 926 break; 927 928 case ARET: 929 for(z=0; z<BITS; z++) { 930 cal.b[z] = externs.b[z] | ovar.b[z]; 931 ref.b[z] = 0; 932 } 933 break; 934 } 935 for(z=0; z<BITS; z++) { 936 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 937 r1->use1.b[z] | r1->use2.b[z]; 938 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 939 r1->refbehind.b[z] = ref.b[z]; 940 r1->calbehind.b[z] = cal.b[z]; 941 } 942 if(r1->f.active) 943 break; 944 r1->f.active = 1; 945 } 946 for(; r != r1; r = (Reg*)r->f.p1) 947 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 948 prop(r2, r->refbehind, r->calbehind); 949 } 950 951 void 952 synch(Reg *r, Bits dif) 953 { 954 Reg *r1; 955 int z; 956 957 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 958 for(z=0; z<BITS; z++) { 959 dif.b[z] = (dif.b[z] & 960 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 961 r1->set.b[z] | r1->regdiff.b[z]; 962 if(dif.b[z] != r1->regdiff.b[z]) { 963 r1->regdiff.b[z] = dif.b[z]; 964 change++; 965 } 966 } 967 if(r1->f.active) 968 break; 969 r1->f.active = 1; 970 for(z=0; z<BITS; z++) 971 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 972 if(r1->f.s2 != nil) 973 synch((Reg*)r1->f.s2, dif); 974 } 975 } 976 977 uint64 978 allreg(uint64 b, Rgn *r) 979 { 980 Var *v; 981 int i; 982 983 v = var + r->varno; 984 r->regno = 0; 985 switch(v->etype) { 986 987 default: 988 fatal("unknown etype %d/%E", bitno(b), v->etype); 989 break; 990 991 case TINT8: 992 case TUINT8: 993 case TINT16: 994 case TUINT16: 995 case TINT32: 996 case TUINT32: 997 case TINT64: 998 case TUINT64: 999 case TINT: 1000 case TUINT: 1001 case TUINTPTR: 1002 case TBOOL: 1003 case TPTR32: 1004 case TPTR64: 1005 i = BtoR(~b); 1006 if(i && r->cost > 0) { 1007 r->regno = i; 1008 return RtoB(i); 1009 } 1010 break; 1011 1012 case TFLOAT32: 1013 case TFLOAT64: 1014 i = BtoF(~b); 1015 if(i && r->cost > 0) { 1016 r->regno = i; 1017 return FtoB(i); 1018 } 1019 break; 1020 } 1021 return 0; 1022 } 1023 1024 void 1025 paint1(Reg *r, int bn) 1026 { 1027 Reg *r1; 1028 int z; 1029 uint64 bb; 1030 1031 z = bn/64; 1032 bb = 1LL<<(bn%64); 1033 if(r->act.b[z] & bb) 1034 return; 1035 for(;;) { 1036 if(!(r->refbehind.b[z] & bb)) 1037 break; 1038 r1 = (Reg*)r->f.p1; 1039 if(r1 == R) 1040 break; 1041 if(!(r1->refahead.b[z] & bb)) 1042 break; 1043 if(r1->act.b[z] & bb) 1044 break; 1045 r = r1; 1046 } 1047 1048 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1049 change -= CLOAD * r->f.loop; 1050 } 1051 for(;;) { 1052 r->act.b[z] |= bb; 1053 1054 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 1055 if(r->use1.b[z] & bb) 1056 change += CREF * r->f.loop; 1057 if((r->use2.b[z]|r->set.b[z]) & bb) 1058 change += CREF * r->f.loop; 1059 } 1060 1061 if(STORE(r) & r->regdiff.b[z] & bb) { 1062 change -= CLOAD * r->f.loop; 1063 } 1064 1065 if(r->refbehind.b[z] & bb) 1066 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1067 if(r1->refahead.b[z] & bb) 1068 paint1(r1, bn); 1069 1070 if(!(r->refahead.b[z] & bb)) 1071 break; 1072 r1 = (Reg*)r->f.s2; 1073 if(r1 != R) 1074 if(r1->refbehind.b[z] & bb) 1075 paint1(r1, bn); 1076 r = (Reg*)r->f.s1; 1077 if(r == R) 1078 break; 1079 if(r->act.b[z] & bb) 1080 break; 1081 if(!(r->refbehind.b[z] & bb)) 1082 break; 1083 } 1084 } 1085 1086 uint64 1087 paint2(Reg *r, int bn, int depth) 1088 { 1089 Reg *r1; 1090 int z; 1091 uint64 bb, vreg; 1092 1093 z = bn/64; 1094 bb = 1LL << (bn%64); 1095 vreg = regbits; 1096 if(!(r->act.b[z] & bb)) 1097 return vreg; 1098 for(;;) { 1099 if(!(r->refbehind.b[z] & bb)) 1100 break; 1101 r1 = (Reg*)r->f.p1; 1102 if(r1 == R) 1103 break; 1104 if(!(r1->refahead.b[z] & bb)) 1105 break; 1106 if(!(r1->act.b[z] & bb)) 1107 break; 1108 r = r1; 1109 } 1110 for(;;) { 1111 if(debug['R'] && debug['v']) 1112 print(" paint2 %d %P\n", depth, r->f.prog); 1113 1114 r->act.b[z] &= ~bb; 1115 1116 vreg |= r->regu; 1117 1118 if(r->refbehind.b[z] & bb) 1119 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1120 if(r1->refahead.b[z] & bb) 1121 vreg |= paint2(r1, bn, depth+1); 1122 1123 if(!(r->refahead.b[z] & bb)) 1124 break; 1125 r1 = (Reg*)r->f.s2; 1126 if(r1 != R) 1127 if(r1->refbehind.b[z] & bb) 1128 vreg |= paint2(r1, bn, depth+1); 1129 r = (Reg*)r->f.s1; 1130 if(r == R) 1131 break; 1132 if(!(r->act.b[z] & bb)) 1133 break; 1134 if(!(r->refbehind.b[z] & bb)) 1135 break; 1136 } 1137 return vreg; 1138 } 1139 1140 void 1141 paint3(Reg *r, int bn, uint64 rb, int rn) 1142 { 1143 Reg *r1; 1144 Prog *p; 1145 int z; 1146 uint64 bb; 1147 1148 z = bn/64; 1149 bb = 1LL << (bn%64); 1150 if(r->act.b[z] & bb) 1151 return; 1152 for(;;) { 1153 if(!(r->refbehind.b[z] & bb)) 1154 break; 1155 r1 = (Reg*)r->f.p1; 1156 if(r1 == R) 1157 break; 1158 if(!(r1->refahead.b[z] & bb)) 1159 break; 1160 if(r1->act.b[z] & bb) 1161 break; 1162 r = r1; 1163 } 1164 1165 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1166 addmove(r, bn, rn, 0); 1167 for(;;) { 1168 r->act.b[z] |= bb; 1169 p = r->f.prog; 1170 1171 if(r->use1.b[z] & bb) { 1172 if(debug['R'] && debug['v']) 1173 print("%P", p); 1174 addreg(&p->from, rn); 1175 if(debug['R'] && debug['v']) 1176 print(" ===change== %P\n", p); 1177 } 1178 if((r->use2.b[z]|r->set.b[z]) & bb) { 1179 if(debug['R'] && debug['v']) 1180 print("%P", p); 1181 addreg(&p->to, rn); 1182 if(debug['R'] && debug['v']) 1183 print(" ===change== %P\n", p); 1184 } 1185 1186 if(STORE(r) & r->regdiff.b[z] & bb) 1187 addmove(r, bn, rn, 1); 1188 r->regu |= rb; 1189 1190 if(r->refbehind.b[z] & bb) 1191 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1192 if(r1->refahead.b[z] & bb) 1193 paint3(r1, bn, rb, rn); 1194 1195 if(!(r->refahead.b[z] & bb)) 1196 break; 1197 r1 = (Reg*)r->f.s2; 1198 if(r1 != R) 1199 if(r1->refbehind.b[z] & bb) 1200 paint3(r1, bn, rb, rn); 1201 r = (Reg*)r->f.s1; 1202 if(r == R) 1203 break; 1204 if(r->act.b[z] & bb) 1205 break; 1206 if(!(r->refbehind.b[z] & bb)) 1207 break; 1208 } 1209 } 1210 1211 void 1212 addreg(Adr *a, int rn) 1213 { 1214 a->sym = nil; 1215 a->node = nil; 1216 a->name = D_NONE; 1217 a->type = D_REG; 1218 a->reg = rn; 1219 if(rn >= NREG) { 1220 a->type = D_FREG; 1221 a->reg = rn-NREG; 1222 } 1223 1224 ostats.ncvtreg++; 1225 } 1226 1227 /* 1228 * track register variables including external registers: 1229 * bit reg 1230 * 0 R0 1231 * 1 R1 1232 * ... ... 1233 * 31 R31 1234 * 32+0 F0 1235 * 32+1 F1 1236 * ... ... 1237 * 32+31 F31 1238 */ 1239 uint64 1240 RtoB(int r) 1241 { 1242 if(r > D_R0 && r <= D_R0+31) 1243 return 1ULL << (r - D_R0); 1244 return 0; 1245 } 1246 1247 int 1248 BtoR(uint64 b) 1249 { 1250 b &= 0xffffffffull; 1251 if(b == 0) 1252 return 0; 1253 return bitno(b) + D_R0; 1254 } 1255 1256 uint64 1257 FtoB(int r) 1258 { 1259 if(r >= D_F0 && r <= D_F0+31) 1260 return 1ULL << (32 + r - D_F0); 1261 return 0; 1262 } 1263 1264 int 1265 BtoF(uint64 b) 1266 { 1267 b >>= 32; 1268 if(b == 0) 1269 return 0; 1270 return bitno(b) + D_F0; 1271 } 1272 1273 void 1274 dumpone(Flow *f, int isreg) 1275 { 1276 int z; 1277 Bits bit; 1278 Reg *r; 1279 1280 print("%d:%P", f->loop, f->prog); 1281 if(isreg) { 1282 r = (Reg*)f; 1283 for(z=0; z<BITS; z++) 1284 bit.b[z] = 1285 r->set.b[z] | 1286 r->use1.b[z] | 1287 r->use2.b[z] | 1288 r->refbehind.b[z] | 1289 r->refahead.b[z] | 1290 r->calbehind.b[z] | 1291 r->calahead.b[z] | 1292 r->regdiff.b[z] | 1293 r->act.b[z] | 1294 0; 1295 if(bany(&bit)) { 1296 print("\t"); 1297 if(bany(&r->set)) 1298 print(" s:%Q", r->set); 1299 if(bany(&r->use1)) 1300 print(" u1:%Q", r->use1); 1301 if(bany(&r->use2)) 1302 print(" u2:%Q", r->use2); 1303 if(bany(&r->refbehind)) 1304 print(" rb:%Q ", r->refbehind); 1305 if(bany(&r->refahead)) 1306 print(" ra:%Q ", r->refahead); 1307 if(bany(&r->calbehind)) 1308 print(" cb:%Q ", r->calbehind); 1309 if(bany(&r->calahead)) 1310 print(" ca:%Q ", r->calahead); 1311 if(bany(&r->regdiff)) 1312 print(" d:%Q ", r->regdiff); 1313 if(bany(&r->act)) 1314 print(" a:%Q ", r->act); 1315 } 1316 } 1317 print("\n"); 1318 } 1319 1320 1321 void 1322 dumpit(char *str, Flow *r0, int isreg) 1323 { 1324 Flow *r, *r1; 1325 1326 print("\n%s\n", str); 1327 for(r = r0; r != nil; r = r->link) { 1328 dumpone(r, isreg); 1329 r1 = r->p2; 1330 if(r1 != nil) { 1331 print(" pred:"); 1332 for(; r1 != nil; r1 = r1->p2link) 1333 print(" %.4ud", (int)r1->prog->pc); 1334 print("\n"); 1335 } 1336 // Print successors if it's not just the next one 1337 if(r->s1 != r->link || r->s2 != nil) { 1338 print(" succ:"); 1339 if(r->s1 != nil) 1340 print(" %.4ud", (int)r->s1->prog->pc); 1341 if(r->s2 != nil) 1342 print(" %.4ud", (int)r->s2->prog->pc); 1343 print("\n"); 1344 } 1345 } 1346 }