github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/6c/reg.c (about) 1 // Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include "gc.h" 32 33 static void fixjmp(Reg*); 34 35 Reg* 36 rega(void) 37 { 38 Reg *r; 39 40 r = freer; 41 if(r == R) { 42 r = alloc(sizeof(*r)); 43 } else 44 freer = r->link; 45 46 *r = zreg; 47 return r; 48 } 49 50 int 51 rcmp(const void *a1, const void *a2) 52 { 53 Rgn *p1, *p2; 54 int c1, c2; 55 56 p1 = (Rgn*)a1; 57 p2 = (Rgn*)a2; 58 c1 = p2->cost; 59 c2 = p1->cost; 60 if(c1 -= c2) 61 return c1; 62 return p2->varno - p1->varno; 63 } 64 65 void 66 regopt(Prog *p) 67 { 68 Reg *r, *r1, *r2; 69 Prog *p1; 70 int i, z; 71 int32 initpc, val, npc; 72 uint32 vreg; 73 Bits bit; 74 struct 75 { 76 int32 m; 77 int32 c; 78 Reg* p; 79 } log5[6], *lp; 80 81 firstr = R; 82 lastr = R; 83 nvar = 0; 84 regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0); 85 for(z=0; z<BITS; z++) { 86 externs.b[z] = 0; 87 params.b[z] = 0; 88 consts.b[z] = 0; 89 addrs.b[z] = 0; 90 } 91 92 /* 93 * pass 1 94 * build aux data structure 95 * allocate pcs 96 * find use and set of variables 97 */ 98 val = 5L * 5L * 5L * 5L * 5L; 99 lp = log5; 100 for(i=0; i<5; i++) { 101 lp->m = val; 102 lp->c = 0; 103 lp->p = R; 104 val /= 5L; 105 lp++; 106 } 107 val = 0; 108 for(; p != P; p = p->link) { 109 switch(p->as) { 110 case ADATA: 111 case AGLOBL: 112 case ANAME: 113 case ASIGNAME: 114 case AFUNCDATA: 115 continue; 116 } 117 r = rega(); 118 if(firstr == R) { 119 firstr = r; 120 lastr = r; 121 } else { 122 lastr->link = r; 123 r->p1 = lastr; 124 lastr->s1 = r; 125 lastr = r; 126 } 127 r->prog = p; 128 r->pc = val; 129 val++; 130 131 lp = log5; 132 for(i=0; i<5; i++) { 133 lp->c--; 134 if(lp->c <= 0) { 135 lp->c = lp->m; 136 if(lp->p != R) 137 lp->p->log5 = r; 138 lp->p = r; 139 (lp+1)->c = 0; 140 break; 141 } 142 lp++; 143 } 144 145 r1 = r->p1; 146 if(r1 != R) 147 switch(r1->prog->as) { 148 case ARET: 149 case AJMP: 150 case AIRETL: 151 case AIRETQ: 152 r->p1 = R; 153 r1->s1 = R; 154 } 155 156 bit = mkvar(r, &p->from); 157 if(bany(&bit)) 158 switch(p->as) { 159 /* 160 * funny 161 */ 162 case ALEAL: 163 case ALEAQ: 164 for(z=0; z<BITS; z++) 165 addrs.b[z] |= bit.b[z]; 166 break; 167 168 /* 169 * left side read 170 */ 171 default: 172 for(z=0; z<BITS; z++) 173 r->use1.b[z] |= bit.b[z]; 174 break; 175 } 176 177 bit = mkvar(r, &p->to); 178 if(bany(&bit)) 179 switch(p->as) { 180 default: 181 diag(Z, "reg: unknown op: %A", p->as); 182 break; 183 184 /* 185 * right side read 186 */ 187 case ACMPB: 188 case ACMPL: 189 case ACMPQ: 190 case ACMPW: 191 case APREFETCHT0: 192 case APREFETCHT1: 193 case APREFETCHT2: 194 case APREFETCHNTA: 195 case ACOMISS: 196 case ACOMISD: 197 case AUCOMISS: 198 case AUCOMISD: 199 for(z=0; z<BITS; z++) 200 r->use2.b[z] |= bit.b[z]; 201 break; 202 203 /* 204 * right side write 205 */ 206 case ANOP: 207 case AMOVL: 208 case AMOVQ: 209 case AMOVB: 210 case AMOVW: 211 case AMOVBLSX: 212 case AMOVBLZX: 213 case AMOVBQSX: 214 case AMOVBQZX: 215 case AMOVLQSX: 216 case AMOVLQZX: 217 case AMOVWLSX: 218 case AMOVWLZX: 219 case AMOVWQSX: 220 case AMOVWQZX: 221 case AMOVQL: 222 223 case AMOVSS: 224 case AMOVSD: 225 case ACVTSD2SL: 226 case ACVTSD2SQ: 227 case ACVTSD2SS: 228 case ACVTSL2SD: 229 case ACVTSL2SS: 230 case ACVTSQ2SD: 231 case ACVTSQ2SS: 232 case ACVTSS2SD: 233 case ACVTSS2SL: 234 case ACVTSS2SQ: 235 case ACVTTSD2SL: 236 case ACVTTSD2SQ: 237 case ACVTTSS2SL: 238 case ACVTTSS2SQ: 239 for(z=0; z<BITS; z++) 240 r->set.b[z] |= bit.b[z]; 241 break; 242 243 /* 244 * right side read+write 245 */ 246 case AADDB: 247 case AADDL: 248 case AADDQ: 249 case AADDW: 250 case AANDB: 251 case AANDL: 252 case AANDQ: 253 case AANDW: 254 case ASUBB: 255 case ASUBL: 256 case ASUBQ: 257 case ASUBW: 258 case AORB: 259 case AORL: 260 case AORQ: 261 case AORW: 262 case AXORB: 263 case AXORL: 264 case AXORQ: 265 case AXORW: 266 case ASALB: 267 case ASALL: 268 case ASALQ: 269 case ASALW: 270 case ASARB: 271 case ASARL: 272 case ASARQ: 273 case ASARW: 274 case AROLB: 275 case AROLL: 276 case AROLQ: 277 case AROLW: 278 case ARORB: 279 case ARORL: 280 case ARORQ: 281 case ARORW: 282 case ASHLB: 283 case ASHLL: 284 case ASHLQ: 285 case ASHLW: 286 case ASHRB: 287 case ASHRL: 288 case ASHRQ: 289 case ASHRW: 290 case AIMULL: 291 case AIMULQ: 292 case AIMULW: 293 case ANEGL: 294 case ANEGQ: 295 case ANOTL: 296 case ANOTQ: 297 case AADCL: 298 case AADCQ: 299 case ASBBL: 300 case ASBBQ: 301 302 case AADDSD: 303 case AADDSS: 304 case ACMPSD: 305 case ACMPSS: 306 case ADIVSD: 307 case ADIVSS: 308 case AMAXSD: 309 case AMAXSS: 310 case AMINSD: 311 case AMINSS: 312 case AMULSD: 313 case AMULSS: 314 case ARCPSS: 315 case ARSQRTSS: 316 case ASQRTSD: 317 case ASQRTSS: 318 case ASUBSD: 319 case ASUBSS: 320 case AXORPD: 321 for(z=0; z<BITS; z++) { 322 r->set.b[z] |= bit.b[z]; 323 r->use2.b[z] |= bit.b[z]; 324 } 325 break; 326 327 /* 328 * funny 329 */ 330 case ACALL: 331 for(z=0; z<BITS; z++) 332 addrs.b[z] |= bit.b[z]; 333 break; 334 } 335 336 switch(p->as) { 337 case AIMULL: 338 case AIMULQ: 339 case AIMULW: 340 if(p->to.type != D_NONE) 341 break; 342 343 case AIDIVB: 344 case AIDIVL: 345 case AIDIVQ: 346 case AIDIVW: 347 case AIMULB: 348 case ADIVB: 349 case ADIVL: 350 case ADIVQ: 351 case ADIVW: 352 case AMULB: 353 case AMULL: 354 case AMULQ: 355 case AMULW: 356 357 case ACWD: 358 case ACDQ: 359 case ACQO: 360 r->regu |= RtoB(D_AX) | RtoB(D_DX); 361 break; 362 363 case AREP: 364 case AREPN: 365 case ALOOP: 366 case ALOOPEQ: 367 case ALOOPNE: 368 r->regu |= RtoB(D_CX); 369 break; 370 371 case AMOVSB: 372 case AMOVSL: 373 case AMOVSQ: 374 case AMOVSW: 375 case ACMPSB: 376 case ACMPSL: 377 case ACMPSQ: 378 case ACMPSW: 379 r->regu |= RtoB(D_SI) | RtoB(D_DI); 380 break; 381 382 case ASTOSB: 383 case ASTOSL: 384 case ASTOSQ: 385 case ASTOSW: 386 case ASCASB: 387 case ASCASL: 388 case ASCASQ: 389 case ASCASW: 390 r->regu |= RtoB(D_AX) | RtoB(D_DI); 391 break; 392 393 case AINSB: 394 case AINSL: 395 case AINSW: 396 case AOUTSB: 397 case AOUTSL: 398 case AOUTSW: 399 r->regu |= RtoB(D_DI) | RtoB(D_DX); 400 break; 401 } 402 } 403 if(firstr == R) 404 return; 405 initpc = pc - val; 406 npc = val; 407 408 /* 409 * pass 2 410 * turn branch references to pointers 411 * build back pointers 412 */ 413 for(r = firstr; r != R; r = r->link) { 414 p = r->prog; 415 if(p->to.type == D_BRANCH) { 416 val = p->to.offset - initpc; 417 r1 = firstr; 418 while(r1 != R) { 419 r2 = r1->log5; 420 if(r2 != R && val >= r2->pc) { 421 r1 = r2; 422 continue; 423 } 424 if(r1->pc == val) 425 break; 426 r1 = r1->link; 427 } 428 if(r1 == R) { 429 nearln = p->lineno; 430 diag(Z, "ref not found\n%P", p); 431 continue; 432 } 433 if(r1 == r) { 434 nearln = p->lineno; 435 diag(Z, "ref to self\n%P", p); 436 continue; 437 } 438 r->s2 = r1; 439 r->p2link = r1->p2; 440 r1->p2 = r; 441 } 442 } 443 if(debug['R']) { 444 p = firstr->prog; 445 print("\n%L %D\n", p->lineno, &p->from); 446 } 447 448 /* 449 * pass 2.1 450 * fix jumps 451 */ 452 fixjmp(firstr); 453 454 /* 455 * pass 2.5 456 * find looping structure 457 */ 458 for(r = firstr; r != R; r = r->link) 459 r->active = 0; 460 change = 0; 461 loopit(firstr, npc); 462 if(debug['R'] && debug['v']) { 463 print("\nlooping structure:\n"); 464 for(r = firstr; r != R; r = r->link) { 465 print("%d:%P", r->loop, r->prog); 466 for(z=0; z<BITS; z++) 467 bit.b[z] = r->use1.b[z] | 468 r->use2.b[z] | 469 r->set.b[z]; 470 if(bany(&bit)) { 471 print("\t"); 472 if(bany(&r->use1)) 473 print(" u1=%B", r->use1); 474 if(bany(&r->use2)) 475 print(" u2=%B", r->use2); 476 if(bany(&r->set)) 477 print(" st=%B", r->set); 478 } 479 print("\n"); 480 } 481 } 482 483 /* 484 * pass 3 485 * iterate propagating usage 486 * back until flow graph is complete 487 */ 488 loop1: 489 change = 0; 490 for(r = firstr; r != R; r = r->link) 491 r->active = 0; 492 for(r = firstr; r != R; r = r->link) 493 if(r->prog->as == ARET) 494 prop(r, zbits, zbits); 495 loop11: 496 /* pick up unreachable code */ 497 i = 0; 498 for(r = firstr; r != R; r = r1) { 499 r1 = r->link; 500 if(r1 && r1->active && !r->active) { 501 prop(r, zbits, zbits); 502 i = 1; 503 } 504 } 505 if(i) 506 goto loop11; 507 if(change) 508 goto loop1; 509 510 511 /* 512 * pass 4 513 * iterate propagating register/variable synchrony 514 * forward until graph is complete 515 */ 516 loop2: 517 change = 0; 518 for(r = firstr; r != R; r = r->link) 519 r->active = 0; 520 synch(firstr, zbits); 521 if(change) 522 goto loop2; 523 524 525 /* 526 * pass 5 527 * isolate regions 528 * calculate costs (paint1) 529 */ 530 r = firstr; 531 if(r) { 532 for(z=0; z<BITS; z++) 533 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 534 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 535 if(bany(&bit)) { 536 nearln = r->prog->lineno; 537 warn(Z, "used and not set: %B", bit); 538 if(debug['R'] && !debug['w']) 539 print("used and not set: %B\n", bit); 540 } 541 } 542 if(debug['R'] && debug['v']) 543 print("\nprop structure:\n"); 544 for(r = firstr; r != R; r = r->link) 545 r->act = zbits; 546 rgp = region; 547 nregion = 0; 548 for(r = firstr; r != R; r = r->link) { 549 if(debug['R'] && debug['v']) { 550 print("%P\t", r->prog); 551 if(bany(&r->set)) 552 print("s:%B ", r->set); 553 if(bany(&r->refahead)) 554 print("ra:%B ", r->refahead); 555 if(bany(&r->calahead)) 556 print("ca:%B ", r->calahead); 557 print("\n"); 558 } 559 for(z=0; z<BITS; z++) 560 bit.b[z] = r->set.b[z] & 561 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 562 if(bany(&bit)) { 563 nearln = r->prog->lineno; 564 warn(Z, "set and not used: %B", bit); 565 if(debug['R']) 566 print("set and not used: %B\n", bit); 567 excise(r); 568 } 569 for(z=0; z<BITS; z++) 570 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 571 while(bany(&bit)) { 572 i = bnum(bit); 573 rgp->enter = r; 574 rgp->varno = i; 575 change = 0; 576 if(debug['R'] && debug['v']) 577 print("\n"); 578 paint1(r, i); 579 bit.b[i/32] &= ~(1L<<(i%32)); 580 if(change <= 0) { 581 if(debug['R']) 582 print("%L$%d: %B\n", 583 r->prog->lineno, change, blsh(i)); 584 continue; 585 } 586 rgp->cost = change; 587 nregion++; 588 if(nregion >= NRGN) { 589 warn(Z, "too many regions"); 590 goto brk; 591 } 592 rgp++; 593 } 594 } 595 brk: 596 qsort(region, nregion, sizeof(region[0]), rcmp); 597 598 /* 599 * pass 6 600 * determine used registers (paint2) 601 * replace code (paint3) 602 */ 603 rgp = region; 604 for(i=0; i<nregion; i++) { 605 bit = blsh(rgp->varno); 606 vreg = paint2(rgp->enter, rgp->varno); 607 vreg = allreg(vreg, rgp); 608 if(debug['R']) { 609 print("%L$%d %R: %B\n", 610 rgp->enter->prog->lineno, 611 rgp->cost, 612 rgp->regno, 613 bit); 614 } 615 if(rgp->regno != 0) 616 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 617 rgp++; 618 } 619 /* 620 * pass 7 621 * peep-hole on basic block 622 */ 623 if(!debug['R'] || debug['P']) 624 peep(); 625 626 /* 627 * pass 8 628 * recalculate pc 629 */ 630 val = initpc; 631 for(r = firstr; r != R; r = r1) { 632 r->pc = val; 633 p = r->prog; 634 p1 = P; 635 r1 = r->link; 636 if(r1 != R) 637 p1 = r1->prog; 638 for(; p != p1; p = p->link) { 639 switch(p->as) { 640 default: 641 val++; 642 break; 643 644 case ANOP: 645 case ADATA: 646 case AGLOBL: 647 case ANAME: 648 case ASIGNAME: 649 case AFUNCDATA: 650 break; 651 } 652 } 653 } 654 pc = val; 655 656 /* 657 * fix up branches 658 */ 659 if(debug['R']) 660 if(bany(&addrs)) 661 print("addrs: %B\n", addrs); 662 663 r1 = 0; /* set */ 664 for(r = firstr; r != R; r = r->link) { 665 p = r->prog; 666 if(p->to.type == D_BRANCH) { 667 p->to.offset = r->s2->pc; 668 p->to.u.branch = r->s2->prog; 669 } 670 r1 = r; 671 } 672 673 /* 674 * last pass 675 * eliminate nops 676 * free aux structures 677 */ 678 for(p = firstr->prog; p != P; p = p->link){ 679 while(p->link && p->link->as == ANOP) 680 p->link = p->link->link; 681 } 682 if(r1 != R) { 683 r1->link = freer; 684 freer = firstr; 685 } 686 } 687 688 /* 689 * add mov b,rn 690 * just after r 691 */ 692 void 693 addmove(Reg *r, int bn, int rn, int f) 694 { 695 Prog *p, *p1; 696 Addr *a; 697 Var *v; 698 699 p1 = alloc(sizeof(*p1)); 700 *p1 = zprog; 701 p = r->prog; 702 703 p1->link = p->link; 704 p->link = p1; 705 p1->lineno = p->lineno; 706 707 v = var + bn; 708 709 a = &p1->to; 710 a->sym = v->sym; 711 a->offset = v->offset; 712 a->etype = v->etype; 713 a->type = v->name; 714 715 p1->as = AMOVL; 716 if(v->etype == TCHAR || v->etype == TUCHAR) 717 p1->as = AMOVB; 718 if(v->etype == TSHORT || v->etype == TUSHORT) 719 p1->as = AMOVW; 720 if(v->etype == TVLONG || v->etype == TUVLONG || (v->etype == TIND && ewidth[TIND] == 8)) 721 p1->as = AMOVQ; 722 if(v->etype == TFLOAT) 723 p1->as = AMOVSS; 724 if(v->etype == TDOUBLE) 725 p1->as = AMOVSD; 726 727 p1->from.type = rn; 728 if(!f) { 729 p1->from = *a; 730 *a = zprog.from; 731 a->type = rn; 732 if(v->etype == TUCHAR) 733 p1->as = AMOVB; 734 if(v->etype == TUSHORT) 735 p1->as = AMOVW; 736 } 737 if(debug['R']) 738 print("%P\t.a%P\n", p, p1); 739 } 740 741 uint32 742 doregbits(int r) 743 { 744 uint32 b; 745 746 b = 0; 747 if(r >= D_INDIR) 748 r -= D_INDIR; 749 if(r >= D_AX && r <= D_R15) 750 b |= RtoB(r); 751 else 752 if(r >= D_AL && r <= D_R15B) 753 b |= RtoB(r-D_AL+D_AX); 754 else 755 if(r >= D_AH && r <= D_BH) 756 b |= RtoB(r-D_AH+D_AX); 757 else 758 if(r >= D_X0 && r <= D_X0+15) 759 b |= FtoB(r); 760 return b; 761 } 762 763 Bits 764 mkvar(Reg *r, Addr *a) 765 { 766 Var *v; 767 int i, t, n, et, z; 768 int32 o; 769 Bits bit; 770 LSym *s; 771 772 /* 773 * mark registers used 774 */ 775 t = a->type; 776 r->regu |= doregbits(t); 777 r->regu |= doregbits(a->index); 778 779 switch(t) { 780 default: 781 goto none; 782 case D_ADDR: 783 a->type = a->index; 784 bit = mkvar(r, a); 785 for(z=0; z<BITS; z++) 786 addrs.b[z] |= bit.b[z]; 787 a->type = t; 788 goto none; 789 case D_EXTERN: 790 case D_STATIC: 791 case D_PARAM: 792 case D_AUTO: 793 n = t; 794 break; 795 } 796 s = a->sym; 797 if(s == nil) 798 goto none; 799 if(s->name[0] == '.') 800 goto none; 801 et = a->etype; 802 o = a->offset; 803 v = var; 804 for(i=0; i<nvar; i++) { 805 if(s == v->sym) 806 if(n == v->name) 807 if(o == v->offset) 808 goto out; 809 v++; 810 } 811 if(nvar >= NVAR) { 812 if(debug['w'] > 1 && s) 813 warn(Z, "variable not optimized: %s", s->name); 814 goto none; 815 } 816 i = nvar; 817 nvar++; 818 v = &var[i]; 819 v->sym = s; 820 v->offset = o; 821 v->name = n; 822 v->etype = et; 823 if(debug['R']) 824 print("bit=%2d et=%2d %D\n", i, et, a); 825 826 out: 827 bit = blsh(i); 828 if(n == D_EXTERN || n == D_STATIC) 829 for(z=0; z<BITS; z++) 830 externs.b[z] |= bit.b[z]; 831 if(n == D_PARAM) 832 for(z=0; z<BITS; z++) 833 params.b[z] |= bit.b[z]; 834 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */ 835 for(z=0; z<BITS; z++) 836 addrs.b[z] |= bit.b[z]; 837 return bit; 838 839 none: 840 return zbits; 841 } 842 843 void 844 prop(Reg *r, Bits ref, Bits cal) 845 { 846 Reg *r1, *r2; 847 int z; 848 849 for(r1 = r; r1 != R; r1 = r1->p1) { 850 for(z=0; z<BITS; z++) { 851 ref.b[z] |= r1->refahead.b[z]; 852 if(ref.b[z] != r1->refahead.b[z]) { 853 r1->refahead.b[z] = ref.b[z]; 854 change++; 855 } 856 cal.b[z] |= r1->calahead.b[z]; 857 if(cal.b[z] != r1->calahead.b[z]) { 858 r1->calahead.b[z] = cal.b[z]; 859 change++; 860 } 861 } 862 switch(r1->prog->as) { 863 case ACALL: 864 for(z=0; z<BITS; z++) { 865 cal.b[z] |= ref.b[z] | externs.b[z]; 866 ref.b[z] = 0; 867 } 868 break; 869 870 case ATEXT: 871 for(z=0; z<BITS; z++) { 872 cal.b[z] = 0; 873 ref.b[z] = 0; 874 } 875 break; 876 877 case ARET: 878 for(z=0; z<BITS; z++) { 879 cal.b[z] = externs.b[z]; 880 ref.b[z] = 0; 881 } 882 } 883 for(z=0; z<BITS; z++) { 884 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 885 r1->use1.b[z] | r1->use2.b[z]; 886 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 887 r1->refbehind.b[z] = ref.b[z]; 888 r1->calbehind.b[z] = cal.b[z]; 889 } 890 if(r1->active) 891 break; 892 r1->active = 1; 893 } 894 for(; r != r1; r = r->p1) 895 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 896 prop(r2, r->refbehind, r->calbehind); 897 } 898 899 /* 900 * find looping structure 901 * 902 * 1) find reverse postordering 903 * 2) find approximate dominators, 904 * the actual dominators if the flow graph is reducible 905 * otherwise, dominators plus some other non-dominators. 906 * See Matthew S. Hecht and Jeffrey D. Ullman, 907 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 908 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 909 * Oct. 1-3, 1973, pp. 207-217. 910 * 3) find all nodes with a predecessor dominated by the current node. 911 * such a node is a loop head. 912 * recursively, all preds with a greater rpo number are in the loop 913 */ 914 int32 915 postorder(Reg *r, Reg **rpo2r, int32 n) 916 { 917 Reg *r1; 918 919 r->rpo = 1; 920 r1 = r->s1; 921 if(r1 && !r1->rpo) 922 n = postorder(r1, rpo2r, n); 923 r1 = r->s2; 924 if(r1 && !r1->rpo) 925 n = postorder(r1, rpo2r, n); 926 rpo2r[n] = r; 927 n++; 928 return n; 929 } 930 931 int32 932 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 933 { 934 int32 t; 935 936 if(rpo1 == -1) 937 return rpo2; 938 while(rpo1 != rpo2){ 939 if(rpo1 > rpo2){ 940 t = rpo2; 941 rpo2 = rpo1; 942 rpo1 = t; 943 } 944 while(rpo1 < rpo2){ 945 t = idom[rpo2]; 946 if(t >= rpo2) 947 fatal(Z, "bad idom"); 948 rpo2 = t; 949 } 950 } 951 return rpo1; 952 } 953 954 int 955 doms(int32 *idom, int32 r, int32 s) 956 { 957 while(s > r) 958 s = idom[s]; 959 return s == r; 960 } 961 962 int 963 loophead(int32 *idom, Reg *r) 964 { 965 int32 src; 966 967 src = r->rpo; 968 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 969 return 1; 970 for(r = r->p2; r != R; r = r->p2link) 971 if(doms(idom, src, r->rpo)) 972 return 1; 973 return 0; 974 } 975 976 void 977 loopmark(Reg **rpo2r, int32 head, Reg *r) 978 { 979 if(r->rpo < head || r->active == head) 980 return; 981 r->active = head; 982 r->loop += LOOP; 983 if(r->p1 != R) 984 loopmark(rpo2r, head, r->p1); 985 for(r = r->p2; r != R; r = r->p2link) 986 loopmark(rpo2r, head, r); 987 } 988 989 void 990 loopit(Reg *r, int32 nr) 991 { 992 Reg *r1; 993 int32 i, d, me; 994 995 if(nr > maxnr) { 996 rpo2r = alloc(nr * sizeof(Reg*)); 997 idom = alloc(nr * sizeof(int32)); 998 maxnr = nr; 999 } 1000 1001 d = postorder(r, rpo2r, 0); 1002 if(d > nr) 1003 fatal(Z, "too many reg nodes"); 1004 nr = d; 1005 for(i = 0; i < nr / 2; i++){ 1006 r1 = rpo2r[i]; 1007 rpo2r[i] = rpo2r[nr - 1 - i]; 1008 rpo2r[nr - 1 - i] = r1; 1009 } 1010 for(i = 0; i < nr; i++) 1011 rpo2r[i]->rpo = i; 1012 1013 idom[0] = 0; 1014 for(i = 0; i < nr; i++){ 1015 r1 = rpo2r[i]; 1016 me = r1->rpo; 1017 d = -1; 1018 if(r1->p1 != R && r1->p1->rpo < me) 1019 d = r1->p1->rpo; 1020 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 1021 if(r1->rpo < me) 1022 d = rpolca(idom, d, r1->rpo); 1023 idom[i] = d; 1024 } 1025 1026 for(i = 0; i < nr; i++){ 1027 r1 = rpo2r[i]; 1028 r1->loop++; 1029 if(r1->p2 != R && loophead(idom, r1)) 1030 loopmark(rpo2r, i, r1); 1031 } 1032 } 1033 1034 void 1035 synch(Reg *r, Bits dif) 1036 { 1037 Reg *r1; 1038 int z; 1039 1040 for(r1 = r; r1 != R; r1 = r1->s1) { 1041 for(z=0; z<BITS; z++) { 1042 dif.b[z] = (dif.b[z] & 1043 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1044 r1->set.b[z] | r1->regdiff.b[z]; 1045 if(dif.b[z] != r1->regdiff.b[z]) { 1046 r1->regdiff.b[z] = dif.b[z]; 1047 change++; 1048 } 1049 } 1050 if(r1->active) 1051 break; 1052 r1->active = 1; 1053 for(z=0; z<BITS; z++) 1054 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1055 if(r1->s2 != R) 1056 synch(r1->s2, dif); 1057 } 1058 } 1059 1060 uint32 1061 allreg(uint32 b, Rgn *r) 1062 { 1063 Var *v; 1064 int i; 1065 1066 v = var + r->varno; 1067 r->regno = 0; 1068 switch(v->etype) { 1069 1070 default: 1071 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 1072 break; 1073 1074 case TCHAR: 1075 case TUCHAR: 1076 case TSHORT: 1077 case TUSHORT: 1078 case TINT: 1079 case TUINT: 1080 case TLONG: 1081 case TULONG: 1082 case TVLONG: 1083 case TUVLONG: 1084 case TIND: 1085 case TARRAY: 1086 i = BtoR(~b); 1087 if(i && r->cost > 0) { 1088 r->regno = i; 1089 return RtoB(i); 1090 } 1091 break; 1092 1093 case TDOUBLE: 1094 case TFLOAT: 1095 i = BtoF(~b); 1096 if(i && r->cost > 0) { 1097 r->regno = i; 1098 return FtoB(i); 1099 } 1100 break; 1101 } 1102 return 0; 1103 } 1104 1105 void 1106 paint1(Reg *r, int bn) 1107 { 1108 Reg *r1; 1109 Prog *p; 1110 int z; 1111 uint32 bb; 1112 1113 z = bn/32; 1114 bb = 1L<<(bn%32); 1115 if(r->act.b[z] & bb) 1116 return; 1117 for(;;) { 1118 if(!(r->refbehind.b[z] & bb)) 1119 break; 1120 r1 = r->p1; 1121 if(r1 == R) 1122 break; 1123 if(!(r1->refahead.b[z] & bb)) 1124 break; 1125 if(r1->act.b[z] & bb) 1126 break; 1127 r = r1; 1128 } 1129 1130 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1131 change -= CLOAD * r->loop; 1132 if(debug['R'] && debug['v']) 1133 print("%d%P\td %B $%d\n", r->loop, 1134 r->prog, blsh(bn), change); 1135 } 1136 for(;;) { 1137 r->act.b[z] |= bb; 1138 p = r->prog; 1139 1140 if(r->use1.b[z] & bb) { 1141 change += CREF * r->loop; 1142 if(debug['R'] && debug['v']) 1143 print("%d%P\tu1 %B $%d\n", r->loop, 1144 p, blsh(bn), change); 1145 } 1146 1147 if((r->use2.b[z]|r->set.b[z]) & bb) { 1148 change += CREF * r->loop; 1149 if(debug['R'] && debug['v']) 1150 print("%d%P\tu2 %B $%d\n", r->loop, 1151 p, blsh(bn), change); 1152 } 1153 1154 if(STORE(r) & r->regdiff.b[z] & bb) { 1155 change -= CLOAD * r->loop; 1156 if(debug['R'] && debug['v']) 1157 print("%d%P\tst %B $%d\n", r->loop, 1158 p, blsh(bn), change); 1159 } 1160 1161 if(r->refbehind.b[z] & bb) 1162 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1163 if(r1->refahead.b[z] & bb) 1164 paint1(r1, bn); 1165 1166 if(!(r->refahead.b[z] & bb)) 1167 break; 1168 r1 = r->s2; 1169 if(r1 != R) 1170 if(r1->refbehind.b[z] & bb) 1171 paint1(r1, bn); 1172 r = r->s1; 1173 if(r == R) 1174 break; 1175 if(r->act.b[z] & bb) 1176 break; 1177 if(!(r->refbehind.b[z] & bb)) 1178 break; 1179 } 1180 } 1181 1182 uint32 1183 regset(Reg *r, uint32 bb) 1184 { 1185 uint32 b, set; 1186 Addr v; 1187 int c; 1188 1189 set = 0; 1190 v = zprog.from; 1191 while(b = bb & ~(bb-1)) { 1192 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1193 if(v.type == 0) 1194 diag(Z, "zero v.type for %#ux", b); 1195 c = copyu(r->prog, &v, A); 1196 if(c == 3) 1197 set |= b; 1198 bb &= ~b; 1199 } 1200 return set; 1201 } 1202 1203 uint32 1204 reguse(Reg *r, uint32 bb) 1205 { 1206 uint32 b, set; 1207 Addr v; 1208 int c; 1209 1210 set = 0; 1211 v = zprog.from; 1212 while(b = bb & ~(bb-1)) { 1213 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1214 c = copyu(r->prog, &v, A); 1215 if(c == 1 || c == 2 || c == 4) 1216 set |= b; 1217 bb &= ~b; 1218 } 1219 return set; 1220 } 1221 1222 uint32 1223 paint2(Reg *r, int bn) 1224 { 1225 Reg *r1; 1226 int z; 1227 uint32 bb, vreg, x; 1228 1229 z = bn/32; 1230 bb = 1L << (bn%32); 1231 vreg = regbits; 1232 if(!(r->act.b[z] & bb)) 1233 return vreg; 1234 for(;;) { 1235 if(!(r->refbehind.b[z] & bb)) 1236 break; 1237 r1 = r->p1; 1238 if(r1 == R) 1239 break; 1240 if(!(r1->refahead.b[z] & bb)) 1241 break; 1242 if(!(r1->act.b[z] & bb)) 1243 break; 1244 r = r1; 1245 } 1246 for(;;) { 1247 r->act.b[z] &= ~bb; 1248 1249 vreg |= r->regu; 1250 1251 if(r->refbehind.b[z] & bb) 1252 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1253 if(r1->refahead.b[z] & bb) 1254 vreg |= paint2(r1, bn); 1255 1256 if(!(r->refahead.b[z] & bb)) 1257 break; 1258 r1 = r->s2; 1259 if(r1 != R) 1260 if(r1->refbehind.b[z] & bb) 1261 vreg |= paint2(r1, bn); 1262 r = r->s1; 1263 if(r == R) 1264 break; 1265 if(!(r->act.b[z] & bb)) 1266 break; 1267 if(!(r->refbehind.b[z] & bb)) 1268 break; 1269 } 1270 1271 bb = vreg; 1272 for(; r; r=r->s1) { 1273 x = r->regu & ~bb; 1274 if(x) { 1275 vreg |= reguse(r, x); 1276 bb |= regset(r, x); 1277 } 1278 } 1279 return vreg; 1280 } 1281 1282 void 1283 paint3(Reg *r, int bn, int32 rb, int rn) 1284 { 1285 Reg *r1; 1286 Prog *p; 1287 int z; 1288 uint32 bb; 1289 1290 z = bn/32; 1291 bb = 1L << (bn%32); 1292 if(r->act.b[z] & bb) 1293 return; 1294 for(;;) { 1295 if(!(r->refbehind.b[z] & bb)) 1296 break; 1297 r1 = r->p1; 1298 if(r1 == R) 1299 break; 1300 if(!(r1->refahead.b[z] & bb)) 1301 break; 1302 if(r1->act.b[z] & bb) 1303 break; 1304 r = r1; 1305 } 1306 1307 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1308 addmove(r, bn, rn, 0); 1309 for(;;) { 1310 r->act.b[z] |= bb; 1311 p = r->prog; 1312 1313 if(r->use1.b[z] & bb) { 1314 if(debug['R']) 1315 print("%P", p); 1316 addreg(&p->from, rn); 1317 if(debug['R']) 1318 print("\t.c%P\n", p); 1319 } 1320 if((r->use2.b[z]|r->set.b[z]) & bb) { 1321 if(debug['R']) 1322 print("%P", p); 1323 addreg(&p->to, rn); 1324 if(debug['R']) 1325 print("\t.c%P\n", p); 1326 } 1327 1328 if(STORE(r) & r->regdiff.b[z] & bb) 1329 addmove(r, bn, rn, 1); 1330 r->regu |= rb; 1331 1332 if(r->refbehind.b[z] & bb) 1333 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1334 if(r1->refahead.b[z] & bb) 1335 paint3(r1, bn, rb, rn); 1336 1337 if(!(r->refahead.b[z] & bb)) 1338 break; 1339 r1 = r->s2; 1340 if(r1 != R) 1341 if(r1->refbehind.b[z] & bb) 1342 paint3(r1, bn, rb, rn); 1343 r = r->s1; 1344 if(r == R) 1345 break; 1346 if(r->act.b[z] & bb) 1347 break; 1348 if(!(r->refbehind.b[z] & bb)) 1349 break; 1350 } 1351 } 1352 1353 void 1354 addreg(Addr *a, int rn) 1355 { 1356 1357 a->sym = 0; 1358 a->offset = 0; 1359 a->type = rn; 1360 } 1361 1362 int32 1363 RtoB(int r) 1364 { 1365 1366 if(r < D_AX || r > D_R15) 1367 return 0; 1368 return 1L << (r-D_AX); 1369 } 1370 1371 int 1372 BtoR(int32 b) 1373 { 1374 1375 b &= 0xffffL; 1376 if(nacl) 1377 b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX))); 1378 if(b == 0) 1379 return 0; 1380 return bitno(b) + D_AX; 1381 } 1382 1383 /* 1384 * bit reg 1385 * 16 X5 1386 * 17 X6 1387 * 18 X7 1388 */ 1389 int32 1390 FtoB(int f) 1391 { 1392 if(f < FREGMIN || f > FREGEXT) 1393 return 0; 1394 return 1L << (f - FREGMIN + 16); 1395 } 1396 1397 int 1398 BtoF(int32 b) 1399 { 1400 1401 b &= 0x70000L; 1402 if(b == 0) 1403 return 0; 1404 return bitno(b) - 16 + FREGMIN; 1405 } 1406 1407 /* what instruction does a JMP to p eventually land on? */ 1408 static Reg* 1409 chasejmp(Reg *r, int *jmploop) 1410 { 1411 int n; 1412 1413 n = 0; 1414 for(; r; r=r->s2) { 1415 if(r->prog->as != AJMP || r->prog->to.type != D_BRANCH) 1416 break; 1417 if(++n > 10) { 1418 *jmploop = 1; 1419 break; 1420 } 1421 } 1422 return r; 1423 } 1424 1425 /* mark all code reachable from firstp as alive */ 1426 static void 1427 mark(Reg *firstr) 1428 { 1429 Reg *r; 1430 Prog *p; 1431 1432 for(r=firstr; r; r=r->link) { 1433 if(r->active) 1434 break; 1435 r->active = 1; 1436 p = r->prog; 1437 if(p->as != ACALL && p->to.type == D_BRANCH) 1438 mark(r->s2); 1439 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 1440 break; 1441 } 1442 } 1443 1444 /* 1445 * the code generator depends on being able to write out JMP 1446 * instructions that it can jump to now but fill in later. 1447 * the linker will resolve them nicely, but they make the code 1448 * longer and more difficult to follow during debugging. 1449 * remove them. 1450 */ 1451 static void 1452 fixjmp(Reg *firstr) 1453 { 1454 int jmploop; 1455 Reg *r; 1456 Prog *p; 1457 1458 if(debug['R'] && debug['v']) 1459 print("\nfixjmp\n"); 1460 1461 // pass 1: resolve jump to AJMP, mark all code as dead. 1462 jmploop = 0; 1463 for(r=firstr; r; r=r->link) { 1464 p = r->prog; 1465 if(debug['R'] && debug['v']) 1466 print("%04d %P\n", (int)r->pc, p); 1467 if(p->as != ACALL && p->to.type == D_BRANCH && r->s2 && r->s2->prog->as == AJMP) { 1468 r->s2 = chasejmp(r->s2, &jmploop); 1469 p->to.offset = r->s2->pc; 1470 p->to.u.branch = r->s2->prog; 1471 if(debug['R'] && debug['v']) 1472 print("->%P\n", p); 1473 } 1474 r->active = 0; 1475 } 1476 if(debug['R'] && debug['v']) 1477 print("\n"); 1478 1479 // pass 2: mark all reachable code alive 1480 mark(firstr); 1481 1482 // pass 3: delete dead code (mostly JMPs). 1483 for(r=firstr; r; r=r->link) { 1484 if(!r->active) { 1485 p = r->prog; 1486 if(p->link == P && p->as == ARET && r->p1 && r->p1->prog->as != ARET) { 1487 // This is the final ARET, and the code so far doesn't have one. 1488 // Let it stay. 1489 } else { 1490 if(debug['R'] && debug['v']) 1491 print("del %04d %P\n", (int)r->pc, p); 1492 p->as = ANOP; 1493 } 1494 } 1495 } 1496 1497 // pass 4: elide JMP to next instruction. 1498 // only safe if there are no jumps to JMPs anymore. 1499 if(!jmploop) { 1500 for(r=firstr; r; r=r->link) { 1501 p = r->prog; 1502 if(p->as == AJMP && p->to.type == D_BRANCH && r->s2 == r->link) { 1503 if(debug['R'] && debug['v']) 1504 print("del %04d %P\n", (int)r->pc, p); 1505 p->as = ANOP; 1506 } 1507 } 1508 } 1509 1510 // fix back pointers. 1511 for(r=firstr; r; r=r->link) { 1512 r->p2 = R; 1513 r->p2link = R; 1514 } 1515 for(r=firstr; r; r=r->link) { 1516 if(r->s2) { 1517 r->p2link = r->s2->p2; 1518 r->s2->p2 = r; 1519 } 1520 } 1521 1522 if(debug['R'] && debug['v']) { 1523 print("\n"); 1524 for(r=firstr; r; r=r->link) 1525 print("%04d %P\n", (int)r->pc, r->prog); 1526 print("\n"); 1527 } 1528 } 1529