github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/cmd/6g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 32 /* 16 general + 16 floating */ 37 #define REGBITS ((uint64)0xffffffffull) 38 /*c2go enum { 39 NREGVAR = 32, 40 REGBITS = 0xffffffff, 41 }; 42 */ 43 44 static Reg* firstr; 45 static int first = 1; 46 47 int 48 rcmp(const void *a1, const void *a2) 49 { 50 Rgn *p1, *p2; 51 int c1, c2; 52 53 p1 = (Rgn*)a1; 54 p2 = (Rgn*)a2; 55 c1 = p2->cost; 56 c2 = p1->cost; 57 if(c1 -= c2) 58 return c1; 59 return p2->varno - p1->varno; 60 } 61 62 static void 63 setaddrs(Bits bit) 64 { 65 int i, n; 66 Var *v; 67 Node *node; 68 69 while(bany(&bit)) { 70 // convert each bit to a variable 71 i = bnum(bit); 72 node = var[i].node; 73 n = var[i].name; 74 biclr(&bit, i); 75 76 // disable all pieces of that variable 77 for(i=0; i<nvar; i++) { 78 v = var+i; 79 if(v->node == node && v->name == n) 80 v->addr = 2; 81 } 82 } 83 } 84 85 static char* regname[] = { 86 ".AX", 87 ".CX", 88 ".DX", 89 ".BX", 90 ".SP", 91 ".BP", 92 ".SI", 93 ".DI", 94 ".R8", 95 ".R9", 96 ".R10", 97 ".R11", 98 ".R12", 99 ".R13", 100 ".R14", 101 ".R15", 102 ".X0", 103 ".X1", 104 ".X2", 105 ".X3", 106 ".X4", 107 ".X5", 108 ".X6", 109 ".X7", 110 ".X8", 111 ".X9", 112 ".X10", 113 ".X11", 114 ".X12", 115 ".X13", 116 ".X14", 117 ".X15", 118 }; 119 120 static Node* regnodes[NREGVAR]; 121 122 static void walkvardef(Node *n, Reg *r, int active); 123 124 void 125 regopt(Prog *firstp) 126 { 127 Reg *r, *r1; 128 Prog *p; 129 Graph *g; 130 ProgInfo info; 131 int i, z, active; 132 uint32 vreg; 133 Bits bit; 134 135 if(first) { 136 fmtinstall('Q', Qconv); 137 exregoffset = D_R15; 138 first = 0; 139 } 140 141 mergetemp(firstp); 142 143 /* 144 * control flow is more complicated in generated go code 145 * than in generated c code. define pseudo-variables for 146 * registers, so we have complete register usage information. 147 */ 148 nvar = NREGVAR; 149 memset(var, 0, NREGVAR*sizeof var[0]); 150 for(i=0; i<NREGVAR; i++) { 151 if(regnodes[i] == N) 152 regnodes[i] = newname(lookup(regname[i])); 153 var[i].node = regnodes[i]; 154 } 155 156 regbits = RtoB(D_SP); 157 for(z=0; z<BITS; z++) { 158 externs.b[z] = 0; 159 params.b[z] = 0; 160 consts.b[z] = 0; 161 addrs.b[z] = 0; 162 ivar.b[z] = 0; 163 ovar.b[z] = 0; 164 } 165 166 /* 167 * pass 1 168 * build aux data structure 169 * allocate pcs 170 * find use and set of variables 171 */ 172 g = flowstart(firstp, sizeof(Reg)); 173 if(g == nil) { 174 for(i=0; i<nvar; i++) 175 var[i].node->opt = nil; 176 return; 177 } 178 179 firstr = (Reg*)g->start; 180 181 for(r = firstr; r != R; r = (Reg*)r->f.link) { 182 p = r->f.prog; 183 if(p->as == AVARDEF || p->as == AVARKILL) 184 continue; 185 proginfo(&info, p); 186 187 // Avoid making variables for direct-called functions. 188 if(p->as == ACALL && p->to.type == D_EXTERN) 189 continue; 190 191 r->use1.b[0] |= info.reguse | info.regindex; 192 r->set.b[0] |= info.regset; 193 194 bit = mkvar(r, &p->from); 195 if(bany(&bit)) { 196 if(info.flags & LeftAddr) 197 setaddrs(bit); 198 if(info.flags & LeftRead) 199 for(z=0; z<BITS; z++) 200 r->use1.b[z] |= bit.b[z]; 201 if(info.flags & LeftWrite) 202 for(z=0; z<BITS; z++) 203 r->set.b[z] |= bit.b[z]; 204 } 205 206 bit = mkvar(r, &p->to); 207 if(bany(&bit)) { 208 if(info.flags & RightAddr) 209 setaddrs(bit); 210 if(info.flags & RightRead) 211 for(z=0; z<BITS; z++) 212 r->use2.b[z] |= bit.b[z]; 213 if(info.flags & RightWrite) 214 for(z=0; z<BITS; z++) 215 r->set.b[z] |= bit.b[z]; 216 } 217 } 218 219 for(i=0; i<nvar; i++) { 220 Var *v = var+i; 221 if(v->addr) { 222 bit = blsh(i); 223 for(z=0; z<BITS; z++) 224 addrs.b[z] |= bit.b[z]; 225 } 226 227 if(debug['R'] && debug['v']) 228 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 229 i, v->addr, v->etype, v->width, v->node, v->offset); 230 } 231 232 if(debug['R'] && debug['v']) 233 dumpit("pass1", &firstr->f, 1); 234 235 /* 236 * pass 2 237 * find looping structure 238 */ 239 flowrpo(g); 240 241 if(debug['R'] && debug['v']) 242 dumpit("pass2", &firstr->f, 1); 243 244 /* 245 * pass 2.5 246 * iterate propagating fat vardef covering forward 247 * r->act records vars with a VARDEF since the last CALL. 248 * (r->act will be reused in pass 5 for something else, 249 * but we'll be done with it by then.) 250 */ 251 active = 0; 252 for(r = firstr; r != R; r = (Reg*)r->f.link) { 253 r->f.active = 0; 254 r->act = zbits; 255 } 256 for(r = firstr; r != R; r = (Reg*)r->f.link) { 257 p = r->f.prog; 258 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 259 active++; 260 walkvardef(p->to.node, r, active); 261 } 262 } 263 264 /* 265 * pass 3 266 * iterate propagating usage 267 * back until flow graph is complete 268 */ 269 loop1: 270 change = 0; 271 for(r = firstr; r != R; r = (Reg*)r->f.link) 272 r->f.active = 0; 273 for(r = firstr; r != R; r = (Reg*)r->f.link) 274 if(r->f.prog->as == ARET) 275 prop(r, zbits, zbits); 276 loop11: 277 /* pick up unreachable code */ 278 i = 0; 279 for(r = firstr; r != R; r = r1) { 280 r1 = (Reg*)r->f.link; 281 if(r1 && r1->f.active && !r->f.active) { 282 prop(r, zbits, zbits); 283 i = 1; 284 } 285 } 286 if(i) 287 goto loop11; 288 if(change) 289 goto loop1; 290 291 if(debug['R'] && debug['v']) 292 dumpit("pass3", &firstr->f, 1); 293 294 /* 295 * pass 4 296 * iterate propagating register/variable synchrony 297 * forward until graph is complete 298 */ 299 loop2: 300 change = 0; 301 for(r = firstr; r != R; r = (Reg*)r->f.link) 302 r->f.active = 0; 303 synch(firstr, zbits); 304 if(change) 305 goto loop2; 306 307 if(debug['R'] && debug['v']) 308 dumpit("pass4", &firstr->f, 1); 309 310 /* 311 * pass 4.5 312 * move register pseudo-variables into regu. 313 */ 314 for(r = firstr; r != R; r = (Reg*)r->f.link) { 315 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 316 317 r->set.b[0] &= ~REGBITS; 318 r->use1.b[0] &= ~REGBITS; 319 r->use2.b[0] &= ~REGBITS; 320 r->refbehind.b[0] &= ~REGBITS; 321 r->refahead.b[0] &= ~REGBITS; 322 r->calbehind.b[0] &= ~REGBITS; 323 r->calahead.b[0] &= ~REGBITS; 324 r->regdiff.b[0] &= ~REGBITS; 325 r->act.b[0] &= ~REGBITS; 326 } 327 328 /* 329 * pass 5 330 * isolate regions 331 * calculate costs (paint1) 332 */ 333 r = firstr; 334 if(r) { 335 for(z=0; z<BITS; z++) 336 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 337 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 338 if(bany(&bit) && !r->f.refset) { 339 // should never happen - all variables are preset 340 if(debug['w']) 341 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 342 r->f.refset = 1; 343 } 344 } 345 for(r = firstr; r != R; r = (Reg*)r->f.link) 346 r->act = zbits; 347 rgp = region; 348 nregion = 0; 349 for(r = firstr; r != R; r = (Reg*)r->f.link) { 350 for(z=0; z<BITS; z++) 351 bit.b[z] = r->set.b[z] & 352 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 353 if(bany(&bit) && !r->f.refset) { 354 if(debug['w']) 355 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 356 r->f.refset = 1; 357 excise(&r->f); 358 } 359 for(z=0; z<BITS; z++) 360 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 361 while(bany(&bit)) { 362 i = bnum(bit); 363 rgp->enter = r; 364 rgp->varno = i; 365 change = 0; 366 paint1(r, i); 367 biclr(&bit, i); 368 if(change <= 0) 369 continue; 370 rgp->cost = change; 371 nregion++; 372 if(nregion >= NRGN) { 373 if(debug['R'] && debug['v']) 374 print("too many regions\n"); 375 goto brk; 376 } 377 rgp++; 378 } 379 } 380 brk: 381 qsort(region, nregion, sizeof(region[0]), rcmp); 382 383 if(debug['R'] && debug['v']) 384 dumpit("pass5", &firstr->f, 1); 385 386 /* 387 * pass 6 388 * determine used registers (paint2) 389 * replace code (paint3) 390 */ 391 rgp = region; 392 if(debug['R'] && debug['v']) 393 print("\nregisterizing\n"); 394 for(i=0; i<nregion; i++) { 395 if(debug['R'] && debug['v']) 396 print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc); 397 bit = blsh(rgp->varno); 398 vreg = paint2(rgp->enter, rgp->varno, 0); 399 vreg = allreg(vreg, rgp); 400 if(rgp->regno != 0) { 401 if(debug['R'] && debug['v']) { 402 Var *v; 403 404 v = var + rgp->varno; 405 print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", 406 v->node, v->offset, rgp->varno, v->etype, rgp->regno); 407 } 408 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 409 } 410 rgp++; 411 } 412 413 /* 414 * free aux structures. peep allocates new ones. 415 */ 416 for(i=0; i<nvar; i++) 417 var[i].node->opt = nil; 418 flowend(g); 419 firstr = R; 420 421 if(debug['R'] && debug['v']) { 422 // Rebuild flow graph, since we inserted instructions 423 g = flowstart(firstp, sizeof(Reg)); 424 firstr = (Reg*)g->start; 425 dumpit("pass6", &firstr->f, 1); 426 flowend(g); 427 firstr = R; 428 } 429 430 /* 431 * pass 7 432 * peep-hole on basic block 433 */ 434 if(!debug['R'] || debug['P']) 435 peep(firstp); 436 437 /* 438 * eliminate nops 439 */ 440 for(p=firstp; p!=P; p=p->link) { 441 while(p->link != P && p->link->as == ANOP) 442 p->link = p->link->link; 443 if(p->to.type == D_BRANCH) 444 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 445 p->to.u.branch = p->to.u.branch->link; 446 } 447 448 if(debug['R']) { 449 if(ostats.ncvtreg || 450 ostats.nspill || 451 ostats.nreload || 452 ostats.ndelmov || 453 ostats.nvar || 454 ostats.naddr || 455 0) 456 print("\nstats\n"); 457 458 if(ostats.ncvtreg) 459 print(" %4d cvtreg\n", ostats.ncvtreg); 460 if(ostats.nspill) 461 print(" %4d spill\n", ostats.nspill); 462 if(ostats.nreload) 463 print(" %4d reload\n", ostats.nreload); 464 if(ostats.ndelmov) 465 print(" %4d delmov\n", ostats.ndelmov); 466 if(ostats.nvar) 467 print(" %4d var\n", ostats.nvar); 468 if(ostats.naddr) 469 print(" %4d addr\n", ostats.naddr); 470 471 memset(&ostats, 0, sizeof(ostats)); 472 } 473 } 474 475 static void 476 walkvardef(Node *n, Reg *r, int active) 477 { 478 Reg *r1, *r2; 479 int bn; 480 Var *v; 481 482 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 483 if(r1->f.active == active) 484 break; 485 r1->f.active = active; 486 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 487 break; 488 for(v=n->opt; v!=nil; v=v->nextinnode) { 489 bn = v - var; 490 biset(&r1->act, bn); 491 } 492 if(r1->f.prog->as == ACALL) 493 break; 494 } 495 496 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 497 if(r2->f.s2 != nil) 498 walkvardef(n, (Reg*)r2->f.s2, active); 499 } 500 501 /* 502 * add mov b,rn 503 * just after r 504 */ 505 void 506 addmove(Reg *r, int bn, int rn, int f) 507 { 508 Prog *p, *p1; 509 Adr *a; 510 Var *v; 511 512 p1 = mal(sizeof(*p1)); 513 clearp(p1); 514 p1->pc = 9999; 515 516 p = r->f.prog; 517 p1->link = p->link; 518 p->link = p1; 519 p1->lineno = p->lineno; 520 521 v = var + bn; 522 523 a = &p1->to; 524 a->offset = v->offset; 525 a->etype = v->etype; 526 a->type = v->name; 527 a->node = v->node; 528 a->sym = linksym(v->node->sym); 529 530 // need to clean this up with wptr and 531 // some of the defaults 532 p1->as = AMOVL; 533 switch(simtype[(uchar)v->etype]) { 534 default: 535 fatal("unknown type %E", v->etype); 536 case TINT8: 537 case TUINT8: 538 case TBOOL: 539 p1->as = AMOVB; 540 break; 541 case TINT16: 542 case TUINT16: 543 p1->as = AMOVW; 544 break; 545 case TINT64: 546 case TUINT64: 547 case TPTR64: 548 p1->as = AMOVQ; 549 break; 550 case TFLOAT32: 551 p1->as = AMOVSS; 552 break; 553 case TFLOAT64: 554 p1->as = AMOVSD; 555 break; 556 case TINT32: 557 case TUINT32: 558 case TPTR32: 559 break; 560 } 561 562 p1->from.type = rn; 563 if(!f) { 564 p1->from = *a; 565 *a = zprog.from; 566 a->type = rn; 567 if(v->etype == TUINT8) 568 p1->as = AMOVB; 569 if(v->etype == TUINT16) 570 p1->as = AMOVW; 571 } 572 if(debug['R'] && debug['v']) 573 print("%P ===add=== %P\n", p, p1); 574 ostats.nspill++; 575 } 576 577 uint32 578 doregbits(int r) 579 { 580 uint32 b; 581 582 b = 0; 583 if(r >= D_INDIR) 584 r -= D_INDIR; 585 if(r >= D_AX && r <= D_R15) 586 b |= RtoB(r); 587 else 588 if(r >= D_AL && r <= D_R15B) 589 b |= RtoB(r-D_AL+D_AX); 590 else 591 if(r >= D_AH && r <= D_BH) 592 b |= RtoB(r-D_AH+D_AX); 593 else 594 if(r >= D_X0 && r <= D_X0+15) 595 b |= FtoB(r); 596 return b; 597 } 598 599 static int 600 overlap(int64 o1, int w1, int64 o2, int w2) 601 { 602 int64 t1, t2; 603 604 t1 = o1+w1; 605 t2 = o2+w2; 606 607 if(!(t1 > o2 && t2 > o1)) 608 return 0; 609 610 return 1; 611 } 612 613 Bits 614 mkvar(Reg *r, Adr *a) 615 { 616 Var *v; 617 int i, t, n, et, z, flag; 618 int64 w; 619 uint32 regu; 620 int64 o; 621 Bits bit; 622 Node *node; 623 624 /* 625 * mark registers used 626 */ 627 t = a->type; 628 if(t == D_NONE) 629 goto none; 630 631 if(r != R) 632 r->use1.b[0] |= doregbits(a->index); 633 634 if(t >= D_INDIR && t < 2*D_INDIR) 635 goto none; 636 637 switch(t) { 638 default: 639 regu = doregbits(t); 640 if(regu == 0) 641 goto none; 642 bit = zbits; 643 bit.b[0] = regu; 644 return bit; 645 646 case D_ADDR: 647 a->type = a->index; 648 bit = mkvar(r, a); 649 setaddrs(bit); 650 a->type = t; 651 ostats.naddr++; 652 goto none; 653 654 case D_EXTERN: 655 case D_STATIC: 656 case D_PARAM: 657 case D_AUTO: 658 n = t; 659 break; 660 } 661 662 node = a->node; 663 if(node == N || node->op != ONAME || node->orig == N) 664 goto none; 665 node = node->orig; 666 if(node->orig != node) 667 fatal("%D: bad node", a); 668 if(node->sym == S || node->sym->name[0] == '.') 669 goto none; 670 et = a->etype; 671 o = a->offset; 672 w = a->width; 673 if(w < 0) 674 fatal("bad width %lld for %D", w, a); 675 676 flag = 0; 677 for(i=0; i<nvar; i++) { 678 v = var+i; 679 if(v->node == node && v->name == n) { 680 if(v->offset == o) 681 if(v->etype == et) 682 if(v->width == w) 683 return blsh(i); 684 685 // if they overlaps, disable both 686 if(overlap(v->offset, v->width, o, w)) { 687 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 688 v->addr = 1; 689 flag = 1; 690 } 691 } 692 } 693 switch(et) { 694 case 0: 695 case TFUNC: 696 goto none; 697 } 698 699 if(nvar >= NVAR) { 700 if(debug['w'] > 1 && node != N) 701 fatal("variable not optimized: %#N", node); 702 703 // If we're not tracking a word in a variable, mark the rest as 704 // having its address taken, so that we keep the whole thing 705 // live at all calls. otherwise we might optimize away part of 706 // a variable but not all of it. 707 for(i=0; i<nvar; i++) { 708 v = var+i; 709 if(v->node == node) 710 v->addr = 1; 711 } 712 goto none; 713 } 714 715 i = nvar; 716 nvar++; 717 v = var+i; 718 v->offset = o; 719 v->name = n; 720 v->etype = et; 721 v->width = w; 722 v->addr = flag; // funny punning 723 v->node = node; 724 725 // node->opt is the head of a linked list 726 // of Vars within the given Node, so that 727 // we can start at a Var and find all the other 728 // Vars in the same Go variable. 729 v->nextinnode = node->opt; 730 node->opt = v; 731 732 bit = blsh(i); 733 if(n == D_EXTERN || n == D_STATIC) 734 for(z=0; z<BITS; z++) 735 externs.b[z] |= bit.b[z]; 736 if(n == D_PARAM) 737 for(z=0; z<BITS; z++) 738 params.b[z] |= bit.b[z]; 739 740 if(node->class == PPARAM) 741 for(z=0; z<BITS; z++) 742 ivar.b[z] |= bit.b[z]; 743 if(node->class == PPARAMOUT) 744 for(z=0; z<BITS; z++) 745 ovar.b[z] |= bit.b[z]; 746 747 // Treat values with their address taken as live at calls, 748 // because the garbage collector's liveness analysis in ../gc/plive.c does. 749 // These must be consistent or else we will elide stores and the garbage 750 // collector will see uninitialized data. 751 // The typical case where our own analysis is out of sync is when the 752 // node appears to have its address taken but that code doesn't actually 753 // get generated and therefore doesn't show up as an address being 754 // taken when we analyze the instruction stream. 755 // One instance of this case is when a closure uses the same name as 756 // an outer variable for one of its own variables declared with :=. 757 // The parser flags the outer variable as possibly shared, and therefore 758 // sets addrtaken, even though it ends up not being actually shared. 759 // If we were better about _ elision, _ = &x would suffice too. 760 // The broader := in a closure problem is mentioned in a comment in 761 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 762 if(node->addrtaken) 763 v->addr = 1; 764 765 // Disable registerization for globals, because: 766 // (1) we might panic at any time and we want the recovery code 767 // to see the latest values (issue 1304). 768 // (2) we don't know what pointers might point at them and we want 769 // loads via those pointers to see updated values and vice versa (issue 7995). 770 // 771 // Disable registerization for results if using defer, because the deferred func 772 // might recover and return, causing the current values to be used. 773 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 774 v->addr = 1; 775 776 if(debug['R']) 777 print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 778 ostats.nvar++; 779 780 return bit; 781 782 none: 783 return zbits; 784 } 785 786 void 787 prop(Reg *r, Bits ref, Bits cal) 788 { 789 Reg *r1, *r2; 790 int z, i, j; 791 Var *v, *v1; 792 793 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 794 for(z=0; z<BITS; z++) { 795 ref.b[z] |= r1->refahead.b[z]; 796 if(ref.b[z] != r1->refahead.b[z]) { 797 r1->refahead.b[z] = ref.b[z]; 798 change++; 799 } 800 cal.b[z] |= r1->calahead.b[z]; 801 if(cal.b[z] != r1->calahead.b[z]) { 802 r1->calahead.b[z] = cal.b[z]; 803 change++; 804 } 805 } 806 switch(r1->f.prog->as) { 807 case ACALL: 808 if(noreturn(r1->f.prog)) 809 break; 810 811 // Mark all input variables (ivar) as used, because that's what the 812 // liveness bitmaps say. The liveness bitmaps say that so that a 813 // panic will not show stale values in the parameter dump. 814 // Mark variables with a recent VARDEF (r1->act) as used, 815 // so that the optimizer flushes initializations to memory, 816 // so that if a garbage collection happens during this CALL, 817 // the collector will see initialized memory. Again this is to 818 // match what the liveness bitmaps say. 819 for(z=0; z<BITS; z++) { 820 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 821 ref.b[z] = 0; 822 } 823 824 // cal.b is the current approximation of what's live across the call. 825 // Every bit in cal.b is a single stack word. For each such word, 826 // find all the other tracked stack words in the same Go variable 827 // (struct/slice/string/interface) and mark them live too. 828 // This is necessary because the liveness analysis for the garbage 829 // collector works at variable granularity, not at word granularity. 830 // It is fundamental for slice/string/interface: the garbage collector 831 // needs the whole value, not just some of the words, in order to 832 // interpret the other bits correctly. Specifically, slice needs a consistent 833 // ptr and cap, string needs a consistent ptr and len, and interface 834 // needs a consistent type word and data word. 835 for(z=0; z<BITS; z++) { 836 if(cal.b[z] == 0) 837 continue; 838 for(i=0; i<64; i++) { 839 if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) 840 continue; 841 v = var+z*64+i; 842 if(v->node->opt == nil) // v represents fixed register, not Go variable 843 continue; 844 845 // v->node->opt is the head of a linked list of Vars 846 // corresponding to tracked words from the Go variable v->node. 847 // Walk the list and set all the bits. 848 // For a large struct this could end up being quadratic: 849 // after the first setting, the outer loop (for z, i) would see a 1 bit 850 // for all of the remaining words in the struct, and for each such 851 // word would go through and turn on all the bits again. 852 // To avoid the quadratic behavior, we only turn on the bits if 853 // v is the head of the list or if the head's bit is not yet turned on. 854 // This will set the bits at most twice, keeping the overall loop linear. 855 v1 = v->node->opt; 856 j = v1 - var; 857 if(v == v1 || !btest(&cal, j)) { 858 for(; v1 != nil; v1 = v1->nextinnode) { 859 j = v1 - var; 860 biset(&cal, j); 861 } 862 } 863 } 864 } 865 break; 866 867 case ATEXT: 868 for(z=0; z<BITS; z++) { 869 cal.b[z] = 0; 870 ref.b[z] = 0; 871 } 872 break; 873 874 case ARET: 875 for(z=0; z<BITS; z++) { 876 cal.b[z] = externs.b[z] | ovar.b[z]; 877 ref.b[z] = 0; 878 } 879 break; 880 } 881 for(z=0; z<BITS; z++) { 882 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 883 r1->use1.b[z] | r1->use2.b[z]; 884 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 885 r1->refbehind.b[z] = ref.b[z]; 886 r1->calbehind.b[z] = cal.b[z]; 887 } 888 if(r1->f.active) 889 break; 890 r1->f.active = 1; 891 } 892 for(; r != r1; r = (Reg*)r->f.p1) 893 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 894 prop(r2, r->refbehind, r->calbehind); 895 } 896 897 void 898 synch(Reg *r, Bits dif) 899 { 900 Reg *r1; 901 int z; 902 903 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 904 for(z=0; z<BITS; z++) { 905 dif.b[z] = (dif.b[z] & 906 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 907 r1->set.b[z] | r1->regdiff.b[z]; 908 if(dif.b[z] != r1->regdiff.b[z]) { 909 r1->regdiff.b[z] = dif.b[z]; 910 change++; 911 } 912 } 913 if(r1->f.active) 914 break; 915 r1->f.active = 1; 916 for(z=0; z<BITS; z++) 917 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 918 if(r1->f.s2 != nil) 919 synch((Reg*)r1->f.s2, dif); 920 } 921 } 922 923 uint32 924 allreg(uint32 b, Rgn *r) 925 { 926 Var *v; 927 int i; 928 929 v = var + r->varno; 930 r->regno = 0; 931 switch(v->etype) { 932 933 default: 934 fatal("unknown etype %d/%E", bitno(b), v->etype); 935 break; 936 937 case TINT8: 938 case TUINT8: 939 case TINT16: 940 case TUINT16: 941 case TINT32: 942 case TUINT32: 943 case TINT64: 944 case TUINT64: 945 case TINT: 946 case TUINT: 947 case TUINTPTR: 948 case TBOOL: 949 case TPTR32: 950 case TPTR64: 951 i = BtoR(~b); 952 if(i && r->cost > 0) { 953 r->regno = i; 954 return RtoB(i); 955 } 956 break; 957 958 case TFLOAT32: 959 case TFLOAT64: 960 i = BtoF(~b); 961 if(i && r->cost > 0) { 962 r->regno = i; 963 return FtoB(i); 964 } 965 break; 966 } 967 return 0; 968 } 969 970 void 971 paint1(Reg *r, int bn) 972 { 973 Reg *r1; 974 int z; 975 uint64 bb; 976 977 z = bn/64; 978 bb = 1LL<<(bn%64); 979 if(r->act.b[z] & bb) 980 return; 981 for(;;) { 982 if(!(r->refbehind.b[z] & bb)) 983 break; 984 r1 = (Reg*)r->f.p1; 985 if(r1 == R) 986 break; 987 if(!(r1->refahead.b[z] & bb)) 988 break; 989 if(r1->act.b[z] & bb) 990 break; 991 r = r1; 992 } 993 994 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 995 change -= CLOAD * r->f.loop; 996 } 997 for(;;) { 998 r->act.b[z] |= bb; 999 1000 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 1001 if(r->use1.b[z] & bb) 1002 change += CREF * r->f.loop; 1003 if((r->use2.b[z]|r->set.b[z]) & bb) 1004 change += CREF * r->f.loop; 1005 } 1006 1007 if(STORE(r) & r->regdiff.b[z] & bb) { 1008 change -= CLOAD * r->f.loop; 1009 } 1010 1011 if(r->refbehind.b[z] & bb) 1012 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1013 if(r1->refahead.b[z] & bb) 1014 paint1(r1, bn); 1015 1016 if(!(r->refahead.b[z] & bb)) 1017 break; 1018 r1 = (Reg*)r->f.s2; 1019 if(r1 != R) 1020 if(r1->refbehind.b[z] & bb) 1021 paint1(r1, bn); 1022 r = (Reg*)r->f.s1; 1023 if(r == R) 1024 break; 1025 if(r->act.b[z] & bb) 1026 break; 1027 if(!(r->refbehind.b[z] & bb)) 1028 break; 1029 } 1030 } 1031 1032 uint32 1033 paint2(Reg *r, int bn, int depth) 1034 { 1035 Reg *r1; 1036 int z; 1037 uint64 bb, vreg; 1038 1039 z = bn/64; 1040 bb = 1LL << (bn%64); 1041 vreg = regbits; 1042 if(!(r->act.b[z] & bb)) 1043 return vreg; 1044 for(;;) { 1045 if(!(r->refbehind.b[z] & bb)) 1046 break; 1047 r1 = (Reg*)r->f.p1; 1048 if(r1 == R) 1049 break; 1050 if(!(r1->refahead.b[z] & bb)) 1051 break; 1052 if(!(r1->act.b[z] & bb)) 1053 break; 1054 r = r1; 1055 } 1056 for(;;) { 1057 if(debug['R'] && debug['v']) 1058 print(" paint2 %d %P\n", depth, r->f.prog); 1059 1060 r->act.b[z] &= ~bb; 1061 1062 vreg |= r->regu; 1063 1064 if(r->refbehind.b[z] & bb) 1065 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1066 if(r1->refahead.b[z] & bb) 1067 vreg |= paint2(r1, bn, depth+1); 1068 1069 if(!(r->refahead.b[z] & bb)) 1070 break; 1071 r1 = (Reg*)r->f.s2; 1072 if(r1 != R) 1073 if(r1->refbehind.b[z] & bb) 1074 vreg |= paint2(r1, bn, depth+1); 1075 r = (Reg*)r->f.s1; 1076 if(r == R) 1077 break; 1078 if(!(r->act.b[z] & bb)) 1079 break; 1080 if(!(r->refbehind.b[z] & bb)) 1081 break; 1082 } 1083 1084 return vreg; 1085 } 1086 1087 void 1088 paint3(Reg *r, int bn, uint32 rb, int rn) 1089 { 1090 Reg *r1; 1091 Prog *p; 1092 int z; 1093 uint64 bb; 1094 1095 z = bn/64; 1096 bb = 1LL << (bn%64); 1097 if(r->act.b[z] & bb) 1098 return; 1099 for(;;) { 1100 if(!(r->refbehind.b[z] & bb)) 1101 break; 1102 r1 = (Reg*)r->f.p1; 1103 if(r1 == R) 1104 break; 1105 if(!(r1->refahead.b[z] & bb)) 1106 break; 1107 if(r1->act.b[z] & bb) 1108 break; 1109 r = r1; 1110 } 1111 1112 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1113 addmove(r, bn, rn, 0); 1114 for(;;) { 1115 r->act.b[z] |= bb; 1116 p = r->f.prog; 1117 1118 if(r->use1.b[z] & bb) { 1119 if(debug['R'] && debug['v']) 1120 print("%P", p); 1121 addreg(&p->from, rn); 1122 if(debug['R'] && debug['v']) 1123 print(" ===change== %P\n", p); 1124 } 1125 if((r->use2.b[z]|r->set.b[z]) & bb) { 1126 if(debug['R'] && debug['v']) 1127 print("%P", p); 1128 addreg(&p->to, rn); 1129 if(debug['R'] && debug['v']) 1130 print(" ===change== %P\n", p); 1131 } 1132 1133 if(STORE(r) & r->regdiff.b[z] & bb) 1134 addmove(r, bn, rn, 1); 1135 r->regu |= rb; 1136 1137 if(r->refbehind.b[z] & bb) 1138 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1139 if(r1->refahead.b[z] & bb) 1140 paint3(r1, bn, rb, rn); 1141 1142 if(!(r->refahead.b[z] & bb)) 1143 break; 1144 r1 = (Reg*)r->f.s2; 1145 if(r1 != R) 1146 if(r1->refbehind.b[z] & bb) 1147 paint3(r1, bn, rb, rn); 1148 r = (Reg*)r->f.s1; 1149 if(r == R) 1150 break; 1151 if(r->act.b[z] & bb) 1152 break; 1153 if(!(r->refbehind.b[z] & bb)) 1154 break; 1155 } 1156 } 1157 1158 void 1159 addreg(Adr *a, int rn) 1160 { 1161 a->sym = nil; 1162 a->node = nil; 1163 a->offset = 0; 1164 a->type = rn; 1165 1166 ostats.ncvtreg++; 1167 } 1168 1169 uint32 1170 RtoB(int r) 1171 { 1172 1173 if(r < D_AX || r > D_R15) 1174 return 0; 1175 return 1L << (r-D_AX); 1176 } 1177 1178 int 1179 BtoR(uint32 b) 1180 { 1181 b &= 0xffffL; 1182 if(nacl) 1183 b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX))); 1184 if(b == 0) 1185 return 0; 1186 return bitno(b) + D_AX; 1187 } 1188 1189 /* 1190 * bit reg 1191 * 16 X0 1192 * ... 1193 * 31 X15 1194 */ 1195 uint32 1196 FtoB(int f) 1197 { 1198 if(f < D_X0 || f > D_X15) 1199 return 0; 1200 return 1L << (f - D_X0 + 16); 1201 } 1202 1203 int 1204 BtoF(uint32 b) 1205 { 1206 1207 b &= 0xFFFF0000L; 1208 if(b == 0) 1209 return 0; 1210 return bitno(b) - 16 + D_X0; 1211 } 1212 1213 void 1214 dumpone(Flow *f, int isreg) 1215 { 1216 int z; 1217 Bits bit; 1218 Reg *r; 1219 1220 print("%d:%P", f->loop, f->prog); 1221 if(isreg) { 1222 r = (Reg*)f; 1223 for(z=0; z<BITS; z++) 1224 bit.b[z] = 1225 r->set.b[z] | 1226 r->use1.b[z] | 1227 r->use2.b[z] | 1228 r->refbehind.b[z] | 1229 r->refahead.b[z] | 1230 r->calbehind.b[z] | 1231 r->calahead.b[z] | 1232 r->regdiff.b[z] | 1233 r->act.b[z] | 1234 0; 1235 if(bany(&bit)) { 1236 print("\t"); 1237 if(bany(&r->set)) 1238 print(" s:%Q", r->set); 1239 if(bany(&r->use1)) 1240 print(" u1:%Q", r->use1); 1241 if(bany(&r->use2)) 1242 print(" u2:%Q", r->use2); 1243 if(bany(&r->refbehind)) 1244 print(" rb:%Q ", r->refbehind); 1245 if(bany(&r->refahead)) 1246 print(" ra:%Q ", r->refahead); 1247 if(bany(&r->calbehind)) 1248 print(" cb:%Q ", r->calbehind); 1249 if(bany(&r->calahead)) 1250 print(" ca:%Q ", r->calahead); 1251 if(bany(&r->regdiff)) 1252 print(" d:%Q ", r->regdiff); 1253 if(bany(&r->act)) 1254 print(" a:%Q ", r->act); 1255 } 1256 } 1257 print("\n"); 1258 } 1259 1260 void 1261 dumpit(char *str, Flow *r0, int isreg) 1262 { 1263 Flow *r, *r1; 1264 1265 print("\n%s\n", str); 1266 for(r = r0; r != nil; r = r->link) { 1267 dumpone(r, isreg); 1268 r1 = r->p2; 1269 if(r1 != nil) { 1270 print(" pred:"); 1271 for(; r1 != nil; r1 = r1->p2link) 1272 print(" %.4ud", (int)r1->prog->pc); 1273 print("\n"); 1274 } 1275 // Print successors if it's not just the next one 1276 if(r->s1 != r->link || r->s2 != nil) { 1277 print(" succ:"); 1278 if(r->s1 != nil) 1279 print(" %.4ud", (int)r->s1->prog->pc); 1280 if(r->s2 != nil) 1281 print(" %.4ud", (int)r->s2->prog->pc); 1282 print("\n"); 1283 } 1284 } 1285 }