github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6c/reg.c (about) 1 // Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include "gc.h" 32 33 static void fixjmp(Reg*); 34 35 Reg* 36 rega(void) 37 { 38 Reg *r; 39 40 r = freer; 41 if(r == R) { 42 r = alloc(sizeof(*r)); 43 } else 44 freer = r->link; 45 46 *r = zreg; 47 return r; 48 } 49 50 int 51 rcmp(const void *a1, const void *a2) 52 { 53 Rgn *p1, *p2; 54 int c1, c2; 55 56 p1 = (Rgn*)a1; 57 p2 = (Rgn*)a2; 58 c1 = p2->cost; 59 c2 = p1->cost; 60 if(c1 -= c2) 61 return c1; 62 return p2->varno - p1->varno; 63 } 64 65 void 66 regopt(Prog *p) 67 { 68 Reg *r, *r1, *r2; 69 Prog *p1; 70 int i, z; 71 int32 initpc, val, npc; 72 uint32 vreg; 73 Bits bit; 74 struct 75 { 76 int32 m; 77 int32 c; 78 Reg* p; 79 } log5[6], *lp; 80 81 firstr = R; 82 lastr = R; 83 nvar = 0; 84 regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0); 85 for(z=0; z<BITS; z++) { 86 externs.b[z] = 0; 87 params.b[z] = 0; 88 consts.b[z] = 0; 89 addrs.b[z] = 0; 90 } 91 92 /* 93 * pass 1 94 * build aux data structure 95 * allocate pcs 96 * find use and set of variables 97 */ 98 val = 5L * 5L * 5L * 5L * 5L; 99 lp = log5; 100 for(i=0; i<5; i++) { 101 lp->m = val; 102 lp->c = 0; 103 lp->p = R; 104 val /= 5L; 105 lp++; 106 } 107 val = 0; 108 for(; p != P; p = p->link) { 109 switch(p->as) { 110 case ADATA: 111 case AGLOBL: 112 case ANAME: 113 case ASIGNAME: 114 continue; 115 } 116 r = rega(); 117 if(firstr == R) { 118 firstr = r; 119 lastr = r; 120 } else { 121 lastr->link = r; 122 r->p1 = lastr; 123 lastr->s1 = r; 124 lastr = r; 125 } 126 r->prog = p; 127 r->pc = val; 128 val++; 129 130 lp = log5; 131 for(i=0; i<5; i++) { 132 lp->c--; 133 if(lp->c <= 0) { 134 lp->c = lp->m; 135 if(lp->p != R) 136 lp->p->log5 = r; 137 lp->p = r; 138 (lp+1)->c = 0; 139 break; 140 } 141 lp++; 142 } 143 144 r1 = r->p1; 145 if(r1 != R) 146 switch(r1->prog->as) { 147 case ARET: 148 case AJMP: 149 case AIRETL: 150 case AIRETQ: 151 r->p1 = R; 152 r1->s1 = R; 153 } 154 155 bit = mkvar(r, &p->from); 156 if(bany(&bit)) 157 switch(p->as) { 158 /* 159 * funny 160 */ 161 case ALEAL: 162 case ALEAQ: 163 for(z=0; z<BITS; z++) 164 addrs.b[z] |= bit.b[z]; 165 break; 166 167 /* 168 * left side read 169 */ 170 default: 171 for(z=0; z<BITS; z++) 172 r->use1.b[z] |= bit.b[z]; 173 break; 174 } 175 176 bit = mkvar(r, &p->to); 177 if(bany(&bit)) 178 switch(p->as) { 179 default: 180 diag(Z, "reg: unknown op: %A", p->as); 181 break; 182 183 /* 184 * right side read 185 */ 186 case ACMPB: 187 case ACMPL: 188 case ACMPQ: 189 case ACMPW: 190 case APREFETCHT0: 191 case APREFETCHT1: 192 case APREFETCHT2: 193 case APREFETCHNTA: 194 case ACOMISS: 195 case ACOMISD: 196 case AUCOMISS: 197 case AUCOMISD: 198 for(z=0; z<BITS; z++) 199 r->use2.b[z] |= bit.b[z]; 200 break; 201 202 /* 203 * right side write 204 */ 205 case ANOP: 206 case AMOVL: 207 case AMOVQ: 208 case AMOVB: 209 case AMOVW: 210 case AMOVBLSX: 211 case AMOVBLZX: 212 case AMOVBQSX: 213 case AMOVBQZX: 214 case AMOVLQSX: 215 case AMOVLQZX: 216 case AMOVWLSX: 217 case AMOVWLZX: 218 case AMOVWQSX: 219 case AMOVWQZX: 220 case AMOVQL: 221 222 case AMOVSS: 223 case AMOVSD: 224 case ACVTSD2SL: 225 case ACVTSD2SQ: 226 case ACVTSD2SS: 227 case ACVTSL2SD: 228 case ACVTSL2SS: 229 case ACVTSQ2SD: 230 case ACVTSQ2SS: 231 case ACVTSS2SD: 232 case ACVTSS2SL: 233 case ACVTSS2SQ: 234 case ACVTTSD2SL: 235 case ACVTTSD2SQ: 236 case ACVTTSS2SL: 237 case ACVTTSS2SQ: 238 for(z=0; z<BITS; z++) 239 r->set.b[z] |= bit.b[z]; 240 break; 241 242 /* 243 * right side read+write 244 */ 245 case AADDB: 246 case AADDL: 247 case AADDQ: 248 case AADDW: 249 case AANDB: 250 case AANDL: 251 case AANDQ: 252 case AANDW: 253 case ASUBB: 254 case ASUBL: 255 case ASUBQ: 256 case ASUBW: 257 case AORB: 258 case AORL: 259 case AORQ: 260 case AORW: 261 case AXORB: 262 case AXORL: 263 case AXORQ: 264 case AXORW: 265 case ASALB: 266 case ASALL: 267 case ASALQ: 268 case ASALW: 269 case ASARB: 270 case ASARL: 271 case ASARQ: 272 case ASARW: 273 case AROLB: 274 case AROLL: 275 case AROLQ: 276 case AROLW: 277 case ARORB: 278 case ARORL: 279 case ARORQ: 280 case ARORW: 281 case ASHLB: 282 case ASHLL: 283 case ASHLQ: 284 case ASHLW: 285 case ASHRB: 286 case ASHRL: 287 case ASHRQ: 288 case ASHRW: 289 case AIMULL: 290 case AIMULQ: 291 case AIMULW: 292 case ANEGL: 293 case ANEGQ: 294 case ANOTL: 295 case ANOTQ: 296 case AADCL: 297 case AADCQ: 298 case ASBBL: 299 case ASBBQ: 300 301 case AADDSD: 302 case AADDSS: 303 case ACMPSD: 304 case ACMPSS: 305 case ADIVSD: 306 case ADIVSS: 307 case AMAXSD: 308 case AMAXSS: 309 case AMINSD: 310 case AMINSS: 311 case AMULSD: 312 case AMULSS: 313 case ARCPSS: 314 case ARSQRTSS: 315 case ASQRTSD: 316 case ASQRTSS: 317 case ASUBSD: 318 case ASUBSS: 319 case AXORPD: 320 for(z=0; z<BITS; z++) { 321 r->set.b[z] |= bit.b[z]; 322 r->use2.b[z] |= bit.b[z]; 323 } 324 break; 325 326 /* 327 * funny 328 */ 329 case ACALL: 330 for(z=0; z<BITS; z++) 331 addrs.b[z] |= bit.b[z]; 332 break; 333 } 334 335 switch(p->as) { 336 case AIMULL: 337 case AIMULQ: 338 case AIMULW: 339 if(p->to.type != D_NONE) 340 break; 341 342 case AIDIVB: 343 case AIDIVL: 344 case AIDIVQ: 345 case AIDIVW: 346 case AIMULB: 347 case ADIVB: 348 case ADIVL: 349 case ADIVQ: 350 case ADIVW: 351 case AMULB: 352 case AMULL: 353 case AMULQ: 354 case AMULW: 355 356 case ACWD: 357 case ACDQ: 358 case ACQO: 359 r->regu |= RtoB(D_AX) | RtoB(D_DX); 360 break; 361 362 case AREP: 363 case AREPN: 364 case ALOOP: 365 case ALOOPEQ: 366 case ALOOPNE: 367 r->regu |= RtoB(D_CX); 368 break; 369 370 case AMOVSB: 371 case AMOVSL: 372 case AMOVSQ: 373 case AMOVSW: 374 case ACMPSB: 375 case ACMPSL: 376 case ACMPSQ: 377 case ACMPSW: 378 r->regu |= RtoB(D_SI) | RtoB(D_DI); 379 break; 380 381 case ASTOSB: 382 case ASTOSL: 383 case ASTOSQ: 384 case ASTOSW: 385 case ASCASB: 386 case ASCASL: 387 case ASCASQ: 388 case ASCASW: 389 r->regu |= RtoB(D_AX) | RtoB(D_DI); 390 break; 391 392 case AINSB: 393 case AINSL: 394 case AINSW: 395 case AOUTSB: 396 case AOUTSL: 397 case AOUTSW: 398 r->regu |= RtoB(D_DI) | RtoB(D_DX); 399 break; 400 } 401 } 402 if(firstr == R) 403 return; 404 initpc = pc - val; 405 npc = val; 406 407 /* 408 * pass 2 409 * turn branch references to pointers 410 * build back pointers 411 */ 412 for(r = firstr; r != R; r = r->link) { 413 p = r->prog; 414 if(p->to.type == D_BRANCH) { 415 val = p->to.offset - initpc; 416 r1 = firstr; 417 while(r1 != R) { 418 r2 = r1->log5; 419 if(r2 != R && val >= r2->pc) { 420 r1 = r2; 421 continue; 422 } 423 if(r1->pc == val) 424 break; 425 r1 = r1->link; 426 } 427 if(r1 == R) { 428 nearln = p->lineno; 429 diag(Z, "ref not found\n%P", p); 430 continue; 431 } 432 if(r1 == r) { 433 nearln = p->lineno; 434 diag(Z, "ref to self\n%P", p); 435 continue; 436 } 437 r->s2 = r1; 438 r->p2link = r1->p2; 439 r1->p2 = r; 440 } 441 } 442 if(debug['R']) { 443 p = firstr->prog; 444 print("\n%L %D\n", p->lineno, &p->from); 445 } 446 447 /* 448 * pass 2.1 449 * fix jumps 450 */ 451 fixjmp(firstr); 452 453 /* 454 * pass 2.5 455 * find looping structure 456 */ 457 for(r = firstr; r != R; r = r->link) 458 r->active = 0; 459 change = 0; 460 loopit(firstr, npc); 461 if(debug['R'] && debug['v']) { 462 print("\nlooping structure:\n"); 463 for(r = firstr; r != R; r = r->link) { 464 print("%d:%P", r->loop, r->prog); 465 for(z=0; z<BITS; z++) 466 bit.b[z] = r->use1.b[z] | 467 r->use2.b[z] | 468 r->set.b[z]; 469 if(bany(&bit)) { 470 print("\t"); 471 if(bany(&r->use1)) 472 print(" u1=%B", r->use1); 473 if(bany(&r->use2)) 474 print(" u2=%B", r->use2); 475 if(bany(&r->set)) 476 print(" st=%B", r->set); 477 } 478 print("\n"); 479 } 480 } 481 482 /* 483 * pass 3 484 * iterate propagating usage 485 * back until flow graph is complete 486 */ 487 loop1: 488 change = 0; 489 for(r = firstr; r != R; r = r->link) 490 r->active = 0; 491 for(r = firstr; r != R; r = r->link) 492 if(r->prog->as == ARET) 493 prop(r, zbits, zbits); 494 loop11: 495 /* pick up unreachable code */ 496 i = 0; 497 for(r = firstr; r != R; r = r1) { 498 r1 = r->link; 499 if(r1 && r1->active && !r->active) { 500 prop(r, zbits, zbits); 501 i = 1; 502 } 503 } 504 if(i) 505 goto loop11; 506 if(change) 507 goto loop1; 508 509 510 /* 511 * pass 4 512 * iterate propagating register/variable synchrony 513 * forward until graph is complete 514 */ 515 loop2: 516 change = 0; 517 for(r = firstr; r != R; r = r->link) 518 r->active = 0; 519 synch(firstr, zbits); 520 if(change) 521 goto loop2; 522 523 524 /* 525 * pass 5 526 * isolate regions 527 * calculate costs (paint1) 528 */ 529 r = firstr; 530 if(r) { 531 for(z=0; z<BITS; z++) 532 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 533 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 534 if(bany(&bit)) { 535 nearln = r->prog->lineno; 536 warn(Z, "used and not set: %B", bit); 537 if(debug['R'] && !debug['w']) 538 print("used and not set: %B\n", bit); 539 } 540 } 541 if(debug['R'] && debug['v']) 542 print("\nprop structure:\n"); 543 for(r = firstr; r != R; r = r->link) 544 r->act = zbits; 545 rgp = region; 546 nregion = 0; 547 for(r = firstr; r != R; r = r->link) { 548 if(debug['R'] && debug['v']) { 549 print("%P\t", r->prog); 550 if(bany(&r->set)) 551 print("s:%B ", r->set); 552 if(bany(&r->refahead)) 553 print("ra:%B ", r->refahead); 554 if(bany(&r->calahead)) 555 print("ca:%B ", r->calahead); 556 print("\n"); 557 } 558 for(z=0; z<BITS; z++) 559 bit.b[z] = r->set.b[z] & 560 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 561 if(bany(&bit)) { 562 nearln = r->prog->lineno; 563 warn(Z, "set and not used: %B", bit); 564 if(debug['R']) 565 print("set and not used: %B\n", bit); 566 excise(r); 567 } 568 for(z=0; z<BITS; z++) 569 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 570 while(bany(&bit)) { 571 i = bnum(bit); 572 rgp->enter = r; 573 rgp->varno = i; 574 change = 0; 575 if(debug['R'] && debug['v']) 576 print("\n"); 577 paint1(r, i); 578 bit.b[i/32] &= ~(1L<<(i%32)); 579 if(change <= 0) { 580 if(debug['R']) 581 print("%L$%d: %B\n", 582 r->prog->lineno, change, blsh(i)); 583 continue; 584 } 585 rgp->cost = change; 586 nregion++; 587 if(nregion >= NRGN) { 588 warn(Z, "too many regions"); 589 goto brk; 590 } 591 rgp++; 592 } 593 } 594 brk: 595 qsort(region, nregion, sizeof(region[0]), rcmp); 596 597 /* 598 * pass 6 599 * determine used registers (paint2) 600 * replace code (paint3) 601 */ 602 rgp = region; 603 for(i=0; i<nregion; i++) { 604 bit = blsh(rgp->varno); 605 vreg = paint2(rgp->enter, rgp->varno); 606 vreg = allreg(vreg, rgp); 607 if(debug['R']) { 608 print("%L$%d %R: %B\n", 609 rgp->enter->prog->lineno, 610 rgp->cost, 611 rgp->regno, 612 bit); 613 } 614 if(rgp->regno != 0) 615 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 616 rgp++; 617 } 618 /* 619 * pass 7 620 * peep-hole on basic block 621 */ 622 if(!debug['R'] || debug['P']) 623 peep(); 624 625 /* 626 * pass 8 627 * recalculate pc 628 */ 629 val = initpc; 630 for(r = firstr; r != R; r = r1) { 631 r->pc = val; 632 p = r->prog; 633 p1 = P; 634 r1 = r->link; 635 if(r1 != R) 636 p1 = r1->prog; 637 for(; p != p1; p = p->link) { 638 switch(p->as) { 639 default: 640 val++; 641 break; 642 643 case ANOP: 644 case ADATA: 645 case AGLOBL: 646 case ANAME: 647 case ASIGNAME: 648 break; 649 } 650 } 651 } 652 pc = val; 653 654 /* 655 * fix up branches 656 */ 657 if(debug['R']) 658 if(bany(&addrs)) 659 print("addrs: %B\n", addrs); 660 661 r1 = 0; /* set */ 662 for(r = firstr; r != R; r = r->link) { 663 p = r->prog; 664 if(p->to.type == D_BRANCH) 665 p->to.offset = r->s2->pc; 666 r1 = r; 667 } 668 669 /* 670 * last pass 671 * eliminate nops 672 * free aux structures 673 */ 674 for(p = firstr->prog; p != P; p = p->link){ 675 while(p->link && p->link->as == ANOP) 676 p->link = p->link->link; 677 } 678 if(r1 != R) { 679 r1->link = freer; 680 freer = firstr; 681 } 682 } 683 684 /* 685 * add mov b,rn 686 * just after r 687 */ 688 void 689 addmove(Reg *r, int bn, int rn, int f) 690 { 691 Prog *p, *p1; 692 Adr *a; 693 Var *v; 694 695 p1 = alloc(sizeof(*p1)); 696 *p1 = zprog; 697 p = r->prog; 698 699 p1->link = p->link; 700 p->link = p1; 701 p1->lineno = p->lineno; 702 703 v = var + bn; 704 705 a = &p1->to; 706 a->sym = v->sym; 707 a->offset = v->offset; 708 a->etype = v->etype; 709 a->type = v->name; 710 711 p1->as = AMOVL; 712 if(v->etype == TCHAR || v->etype == TUCHAR) 713 p1->as = AMOVB; 714 if(v->etype == TSHORT || v->etype == TUSHORT) 715 p1->as = AMOVW; 716 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND) 717 p1->as = AMOVQ; 718 if(v->etype == TFLOAT) 719 p1->as = AMOVSS; 720 if(v->etype == TDOUBLE) 721 p1->as = AMOVSD; 722 723 p1->from.type = rn; 724 if(!f) { 725 p1->from = *a; 726 *a = zprog.from; 727 a->type = rn; 728 if(v->etype == TUCHAR) 729 p1->as = AMOVB; 730 if(v->etype == TUSHORT) 731 p1->as = AMOVW; 732 } 733 if(debug['R']) 734 print("%P\t.a%P\n", p, p1); 735 } 736 737 uint32 738 doregbits(int r) 739 { 740 uint32 b; 741 742 b = 0; 743 if(r >= D_INDIR) 744 r -= D_INDIR; 745 if(r >= D_AX && r <= D_R15) 746 b |= RtoB(r); 747 else 748 if(r >= D_AL && r <= D_R15B) 749 b |= RtoB(r-D_AL+D_AX); 750 else 751 if(r >= D_AH && r <= D_BH) 752 b |= RtoB(r-D_AH+D_AX); 753 else 754 if(r >= D_X0 && r <= D_X0+15) 755 b |= FtoB(r); 756 return b; 757 } 758 759 Bits 760 mkvar(Reg *r, Adr *a) 761 { 762 Var *v; 763 int i, t, n, et, z; 764 int32 o; 765 Bits bit; 766 Sym *s; 767 768 /* 769 * mark registers used 770 */ 771 t = a->type; 772 r->regu |= doregbits(t); 773 r->regu |= doregbits(a->index); 774 775 switch(t) { 776 default: 777 goto none; 778 case D_ADDR: 779 a->type = a->index; 780 bit = mkvar(r, a); 781 for(z=0; z<BITS; z++) 782 addrs.b[z] |= bit.b[z]; 783 a->type = t; 784 goto none; 785 case D_EXTERN: 786 case D_STATIC: 787 case D_PARAM: 788 case D_AUTO: 789 n = t; 790 break; 791 } 792 s = a->sym; 793 if(s == S) 794 goto none; 795 if(s->name[0] == '.') 796 goto none; 797 et = a->etype; 798 o = a->offset; 799 v = var; 800 for(i=0; i<nvar; i++) { 801 if(s == v->sym) 802 if(n == v->name) 803 if(o == v->offset) 804 goto out; 805 v++; 806 } 807 if(nvar >= NVAR) { 808 if(debug['w'] > 1 && s) 809 warn(Z, "variable not optimized: %s", s->name); 810 goto none; 811 } 812 i = nvar; 813 nvar++; 814 v = &var[i]; 815 v->sym = s; 816 v->offset = o; 817 v->name = n; 818 v->etype = et; 819 if(debug['R']) 820 print("bit=%2d et=%2d %D\n", i, et, a); 821 822 out: 823 bit = blsh(i); 824 if(n == D_EXTERN || n == D_STATIC) 825 for(z=0; z<BITS; z++) 826 externs.b[z] |= bit.b[z]; 827 if(n == D_PARAM) 828 for(z=0; z<BITS; z++) 829 params.b[z] |= bit.b[z]; 830 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */ 831 for(z=0; z<BITS; z++) 832 addrs.b[z] |= bit.b[z]; 833 return bit; 834 835 none: 836 return zbits; 837 } 838 839 void 840 prop(Reg *r, Bits ref, Bits cal) 841 { 842 Reg *r1, *r2; 843 int z; 844 845 for(r1 = r; r1 != R; r1 = r1->p1) { 846 for(z=0; z<BITS; z++) { 847 ref.b[z] |= r1->refahead.b[z]; 848 if(ref.b[z] != r1->refahead.b[z]) { 849 r1->refahead.b[z] = ref.b[z]; 850 change++; 851 } 852 cal.b[z] |= r1->calahead.b[z]; 853 if(cal.b[z] != r1->calahead.b[z]) { 854 r1->calahead.b[z] = cal.b[z]; 855 change++; 856 } 857 } 858 switch(r1->prog->as) { 859 case ACALL: 860 for(z=0; z<BITS; z++) { 861 cal.b[z] |= ref.b[z] | externs.b[z]; 862 ref.b[z] = 0; 863 } 864 break; 865 866 case ATEXT: 867 for(z=0; z<BITS; z++) { 868 cal.b[z] = 0; 869 ref.b[z] = 0; 870 } 871 break; 872 873 case ARET: 874 for(z=0; z<BITS; z++) { 875 cal.b[z] = externs.b[z]; 876 ref.b[z] = 0; 877 } 878 } 879 for(z=0; z<BITS; z++) { 880 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 881 r1->use1.b[z] | r1->use2.b[z]; 882 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 883 r1->refbehind.b[z] = ref.b[z]; 884 r1->calbehind.b[z] = cal.b[z]; 885 } 886 if(r1->active) 887 break; 888 r1->active = 1; 889 } 890 for(; r != r1; r = r->p1) 891 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 892 prop(r2, r->refbehind, r->calbehind); 893 } 894 895 /* 896 * find looping structure 897 * 898 * 1) find reverse postordering 899 * 2) find approximate dominators, 900 * the actual dominators if the flow graph is reducible 901 * otherwise, dominators plus some other non-dominators. 902 * See Matthew S. Hecht and Jeffrey D. Ullman, 903 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 904 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 905 * Oct. 1-3, 1973, pp. 207-217. 906 * 3) find all nodes with a predecessor dominated by the current node. 907 * such a node is a loop head. 908 * recursively, all preds with a greater rpo number are in the loop 909 */ 910 int32 911 postorder(Reg *r, Reg **rpo2r, int32 n) 912 { 913 Reg *r1; 914 915 r->rpo = 1; 916 r1 = r->s1; 917 if(r1 && !r1->rpo) 918 n = postorder(r1, rpo2r, n); 919 r1 = r->s2; 920 if(r1 && !r1->rpo) 921 n = postorder(r1, rpo2r, n); 922 rpo2r[n] = r; 923 n++; 924 return n; 925 } 926 927 int32 928 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 929 { 930 int32 t; 931 932 if(rpo1 == -1) 933 return rpo2; 934 while(rpo1 != rpo2){ 935 if(rpo1 > rpo2){ 936 t = rpo2; 937 rpo2 = rpo1; 938 rpo1 = t; 939 } 940 while(rpo1 < rpo2){ 941 t = idom[rpo2]; 942 if(t >= rpo2) 943 fatal(Z, "bad idom"); 944 rpo2 = t; 945 } 946 } 947 return rpo1; 948 } 949 950 int 951 doms(int32 *idom, int32 r, int32 s) 952 { 953 while(s > r) 954 s = idom[s]; 955 return s == r; 956 } 957 958 int 959 loophead(int32 *idom, Reg *r) 960 { 961 int32 src; 962 963 src = r->rpo; 964 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 965 return 1; 966 for(r = r->p2; r != R; r = r->p2link) 967 if(doms(idom, src, r->rpo)) 968 return 1; 969 return 0; 970 } 971 972 void 973 loopmark(Reg **rpo2r, int32 head, Reg *r) 974 { 975 if(r->rpo < head || r->active == head) 976 return; 977 r->active = head; 978 r->loop += LOOP; 979 if(r->p1 != R) 980 loopmark(rpo2r, head, r->p1); 981 for(r = r->p2; r != R; r = r->p2link) 982 loopmark(rpo2r, head, r); 983 } 984 985 void 986 loopit(Reg *r, int32 nr) 987 { 988 Reg *r1; 989 int32 i, d, me; 990 991 if(nr > maxnr) { 992 rpo2r = alloc(nr * sizeof(Reg*)); 993 idom = alloc(nr * sizeof(int32)); 994 maxnr = nr; 995 } 996 997 d = postorder(r, rpo2r, 0); 998 if(d > nr) 999 fatal(Z, "too many reg nodes"); 1000 nr = d; 1001 for(i = 0; i < nr / 2; i++){ 1002 r1 = rpo2r[i]; 1003 rpo2r[i] = rpo2r[nr - 1 - i]; 1004 rpo2r[nr - 1 - i] = r1; 1005 } 1006 for(i = 0; i < nr; i++) 1007 rpo2r[i]->rpo = i; 1008 1009 idom[0] = 0; 1010 for(i = 0; i < nr; i++){ 1011 r1 = rpo2r[i]; 1012 me = r1->rpo; 1013 d = -1; 1014 if(r1->p1 != R && r1->p1->rpo < me) 1015 d = r1->p1->rpo; 1016 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 1017 if(r1->rpo < me) 1018 d = rpolca(idom, d, r1->rpo); 1019 idom[i] = d; 1020 } 1021 1022 for(i = 0; i < nr; i++){ 1023 r1 = rpo2r[i]; 1024 r1->loop++; 1025 if(r1->p2 != R && loophead(idom, r1)) 1026 loopmark(rpo2r, i, r1); 1027 } 1028 } 1029 1030 void 1031 synch(Reg *r, Bits dif) 1032 { 1033 Reg *r1; 1034 int z; 1035 1036 for(r1 = r; r1 != R; r1 = r1->s1) { 1037 for(z=0; z<BITS; z++) { 1038 dif.b[z] = (dif.b[z] & 1039 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1040 r1->set.b[z] | r1->regdiff.b[z]; 1041 if(dif.b[z] != r1->regdiff.b[z]) { 1042 r1->regdiff.b[z] = dif.b[z]; 1043 change++; 1044 } 1045 } 1046 if(r1->active) 1047 break; 1048 r1->active = 1; 1049 for(z=0; z<BITS; z++) 1050 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1051 if(r1->s2 != R) 1052 synch(r1->s2, dif); 1053 } 1054 } 1055 1056 uint32 1057 allreg(uint32 b, Rgn *r) 1058 { 1059 Var *v; 1060 int i; 1061 1062 v = var + r->varno; 1063 r->regno = 0; 1064 switch(v->etype) { 1065 1066 default: 1067 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 1068 break; 1069 1070 case TCHAR: 1071 case TUCHAR: 1072 case TSHORT: 1073 case TUSHORT: 1074 case TINT: 1075 case TUINT: 1076 case TLONG: 1077 case TULONG: 1078 case TVLONG: 1079 case TUVLONG: 1080 case TIND: 1081 case TARRAY: 1082 i = BtoR(~b); 1083 if(i && r->cost > 0) { 1084 r->regno = i; 1085 return RtoB(i); 1086 } 1087 break; 1088 1089 case TDOUBLE: 1090 case TFLOAT: 1091 i = BtoF(~b); 1092 if(i && r->cost > 0) { 1093 r->regno = i; 1094 return FtoB(i); 1095 } 1096 break; 1097 } 1098 return 0; 1099 } 1100 1101 void 1102 paint1(Reg *r, int bn) 1103 { 1104 Reg *r1; 1105 Prog *p; 1106 int z; 1107 uint32 bb; 1108 1109 z = bn/32; 1110 bb = 1L<<(bn%32); 1111 if(r->act.b[z] & bb) 1112 return; 1113 for(;;) { 1114 if(!(r->refbehind.b[z] & bb)) 1115 break; 1116 r1 = r->p1; 1117 if(r1 == R) 1118 break; 1119 if(!(r1->refahead.b[z] & bb)) 1120 break; 1121 if(r1->act.b[z] & bb) 1122 break; 1123 r = r1; 1124 } 1125 1126 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1127 change -= CLOAD * r->loop; 1128 if(debug['R'] && debug['v']) 1129 print("%d%P\td %B $%d\n", r->loop, 1130 r->prog, blsh(bn), change); 1131 } 1132 for(;;) { 1133 r->act.b[z] |= bb; 1134 p = r->prog; 1135 1136 if(r->use1.b[z] & bb) { 1137 change += CREF * r->loop; 1138 if(debug['R'] && debug['v']) 1139 print("%d%P\tu1 %B $%d\n", r->loop, 1140 p, blsh(bn), change); 1141 } 1142 1143 if((r->use2.b[z]|r->set.b[z]) & bb) { 1144 change += CREF * r->loop; 1145 if(debug['R'] && debug['v']) 1146 print("%d%P\tu2 %B $%d\n", r->loop, 1147 p, blsh(bn), change); 1148 } 1149 1150 if(STORE(r) & r->regdiff.b[z] & bb) { 1151 change -= CLOAD * r->loop; 1152 if(debug['R'] && debug['v']) 1153 print("%d%P\tst %B $%d\n", r->loop, 1154 p, blsh(bn), change); 1155 } 1156 1157 if(r->refbehind.b[z] & bb) 1158 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1159 if(r1->refahead.b[z] & bb) 1160 paint1(r1, bn); 1161 1162 if(!(r->refahead.b[z] & bb)) 1163 break; 1164 r1 = r->s2; 1165 if(r1 != R) 1166 if(r1->refbehind.b[z] & bb) 1167 paint1(r1, bn); 1168 r = r->s1; 1169 if(r == R) 1170 break; 1171 if(r->act.b[z] & bb) 1172 break; 1173 if(!(r->refbehind.b[z] & bb)) 1174 break; 1175 } 1176 } 1177 1178 uint32 1179 regset(Reg *r, uint32 bb) 1180 { 1181 uint32 b, set; 1182 Adr v; 1183 int c; 1184 1185 set = 0; 1186 v = zprog.from; 1187 while(b = bb & ~(bb-1)) { 1188 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1189 if(v.type == 0) 1190 diag(Z, "zero v.type for %#ux", b); 1191 c = copyu(r->prog, &v, A); 1192 if(c == 3) 1193 set |= b; 1194 bb &= ~b; 1195 } 1196 return set; 1197 } 1198 1199 uint32 1200 reguse(Reg *r, uint32 bb) 1201 { 1202 uint32 b, set; 1203 Adr v; 1204 int c; 1205 1206 set = 0; 1207 v = zprog.from; 1208 while(b = bb & ~(bb-1)) { 1209 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1210 c = copyu(r->prog, &v, A); 1211 if(c == 1 || c == 2 || c == 4) 1212 set |= b; 1213 bb &= ~b; 1214 } 1215 return set; 1216 } 1217 1218 uint32 1219 paint2(Reg *r, int bn) 1220 { 1221 Reg *r1; 1222 int z; 1223 uint32 bb, vreg, x; 1224 1225 z = bn/32; 1226 bb = 1L << (bn%32); 1227 vreg = regbits; 1228 if(!(r->act.b[z] & bb)) 1229 return vreg; 1230 for(;;) { 1231 if(!(r->refbehind.b[z] & bb)) 1232 break; 1233 r1 = r->p1; 1234 if(r1 == R) 1235 break; 1236 if(!(r1->refahead.b[z] & bb)) 1237 break; 1238 if(!(r1->act.b[z] & bb)) 1239 break; 1240 r = r1; 1241 } 1242 for(;;) { 1243 r->act.b[z] &= ~bb; 1244 1245 vreg |= r->regu; 1246 1247 if(r->refbehind.b[z] & bb) 1248 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1249 if(r1->refahead.b[z] & bb) 1250 vreg |= paint2(r1, bn); 1251 1252 if(!(r->refahead.b[z] & bb)) 1253 break; 1254 r1 = r->s2; 1255 if(r1 != R) 1256 if(r1->refbehind.b[z] & bb) 1257 vreg |= paint2(r1, bn); 1258 r = r->s1; 1259 if(r == R) 1260 break; 1261 if(!(r->act.b[z] & bb)) 1262 break; 1263 if(!(r->refbehind.b[z] & bb)) 1264 break; 1265 } 1266 1267 bb = vreg; 1268 for(; r; r=r->s1) { 1269 x = r->regu & ~bb; 1270 if(x) { 1271 vreg |= reguse(r, x); 1272 bb |= regset(r, x); 1273 } 1274 } 1275 return vreg; 1276 } 1277 1278 void 1279 paint3(Reg *r, int bn, int32 rb, int rn) 1280 { 1281 Reg *r1; 1282 Prog *p; 1283 int z; 1284 uint32 bb; 1285 1286 z = bn/32; 1287 bb = 1L << (bn%32); 1288 if(r->act.b[z] & bb) 1289 return; 1290 for(;;) { 1291 if(!(r->refbehind.b[z] & bb)) 1292 break; 1293 r1 = r->p1; 1294 if(r1 == R) 1295 break; 1296 if(!(r1->refahead.b[z] & bb)) 1297 break; 1298 if(r1->act.b[z] & bb) 1299 break; 1300 r = r1; 1301 } 1302 1303 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1304 addmove(r, bn, rn, 0); 1305 for(;;) { 1306 r->act.b[z] |= bb; 1307 p = r->prog; 1308 1309 if(r->use1.b[z] & bb) { 1310 if(debug['R']) 1311 print("%P", p); 1312 addreg(&p->from, rn); 1313 if(debug['R']) 1314 print("\t.c%P\n", p); 1315 } 1316 if((r->use2.b[z]|r->set.b[z]) & bb) { 1317 if(debug['R']) 1318 print("%P", p); 1319 addreg(&p->to, rn); 1320 if(debug['R']) 1321 print("\t.c%P\n", p); 1322 } 1323 1324 if(STORE(r) & r->regdiff.b[z] & bb) 1325 addmove(r, bn, rn, 1); 1326 r->regu |= rb; 1327 1328 if(r->refbehind.b[z] & bb) 1329 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1330 if(r1->refahead.b[z] & bb) 1331 paint3(r1, bn, rb, rn); 1332 1333 if(!(r->refahead.b[z] & bb)) 1334 break; 1335 r1 = r->s2; 1336 if(r1 != R) 1337 if(r1->refbehind.b[z] & bb) 1338 paint3(r1, bn, rb, rn); 1339 r = r->s1; 1340 if(r == R) 1341 break; 1342 if(r->act.b[z] & bb) 1343 break; 1344 if(!(r->refbehind.b[z] & bb)) 1345 break; 1346 } 1347 } 1348 1349 void 1350 addreg(Adr *a, int rn) 1351 { 1352 1353 a->sym = 0; 1354 a->offset = 0; 1355 a->type = rn; 1356 } 1357 1358 int32 1359 RtoB(int r) 1360 { 1361 1362 if(r < D_AX || r > D_R15) 1363 return 0; 1364 return 1L << (r-D_AX); 1365 } 1366 1367 int 1368 BtoR(int32 b) 1369 { 1370 1371 b &= 0xffffL; 1372 if(b == 0) 1373 return 0; 1374 return bitno(b) + D_AX; 1375 } 1376 1377 /* 1378 * bit reg 1379 * 16 X5 1380 * 17 X6 1381 * 18 X7 1382 */ 1383 int32 1384 FtoB(int f) 1385 { 1386 if(f < FREGMIN || f > FREGEXT) 1387 return 0; 1388 return 1L << (f - FREGMIN + 16); 1389 } 1390 1391 int 1392 BtoF(int32 b) 1393 { 1394 1395 b &= 0x70000L; 1396 if(b == 0) 1397 return 0; 1398 return bitno(b) - 16 + FREGMIN; 1399 } 1400 1401 /* what instruction does a JMP to p eventually land on? */ 1402 static Reg* 1403 chasejmp(Reg *r, int *jmploop) 1404 { 1405 int n; 1406 1407 n = 0; 1408 for(; r; r=r->s2) { 1409 if(r->prog->as != AJMP || r->prog->to.type != D_BRANCH) 1410 break; 1411 if(++n > 10) { 1412 *jmploop = 1; 1413 break; 1414 } 1415 } 1416 return r; 1417 } 1418 1419 /* mark all code reachable from firstp as alive */ 1420 static void 1421 mark(Reg *firstr) 1422 { 1423 Reg *r; 1424 Prog *p; 1425 1426 for(r=firstr; r; r=r->link) { 1427 if(r->active) 1428 break; 1429 r->active = 1; 1430 p = r->prog; 1431 if(p->as != ACALL && p->to.type == D_BRANCH) 1432 mark(r->s2); 1433 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 1434 break; 1435 } 1436 } 1437 1438 /* 1439 * the code generator depends on being able to write out JMP 1440 * instructions that it can jump to now but fill in later. 1441 * the linker will resolve them nicely, but they make the code 1442 * longer and more difficult to follow during debugging. 1443 * remove them. 1444 */ 1445 static void 1446 fixjmp(Reg *firstr) 1447 { 1448 int jmploop; 1449 Reg *r; 1450 Prog *p; 1451 1452 if(debug['R'] && debug['v']) 1453 print("\nfixjmp\n"); 1454 1455 // pass 1: resolve jump to AJMP, mark all code as dead. 1456 jmploop = 0; 1457 for(r=firstr; r; r=r->link) { 1458 p = r->prog; 1459 if(debug['R'] && debug['v']) 1460 print("%04d %P\n", r->pc, p); 1461 if(p->as != ACALL && p->to.type == D_BRANCH && r->s2 && r->s2->prog->as == AJMP) { 1462 r->s2 = chasejmp(r->s2, &jmploop); 1463 p->to.offset = r->s2->pc; 1464 if(debug['R'] && debug['v']) 1465 print("->%P\n", p); 1466 } 1467 r->active = 0; 1468 } 1469 if(debug['R'] && debug['v']) 1470 print("\n"); 1471 1472 // pass 2: mark all reachable code alive 1473 mark(firstr); 1474 1475 // pass 3: delete dead code (mostly JMPs). 1476 for(r=firstr; r; r=r->link) { 1477 if(!r->active) { 1478 p = r->prog; 1479 if(p->link == P && p->as == ARET && r->p1 && r->p1->prog->as != ARET) { 1480 // This is the final ARET, and the code so far doesn't have one. 1481 // Let it stay. 1482 } else { 1483 if(debug['R'] && debug['v']) 1484 print("del %04d %P\n", r->pc, p); 1485 p->as = ANOP; 1486 } 1487 } 1488 } 1489 1490 // pass 4: elide JMP to next instruction. 1491 // only safe if there are no jumps to JMPs anymore. 1492 if(!jmploop) { 1493 for(r=firstr; r; r=r->link) { 1494 p = r->prog; 1495 if(p->as == AJMP && p->to.type == D_BRANCH && r->s2 == r->link) { 1496 if(debug['R'] && debug['v']) 1497 print("del %04d %P\n", r->pc, p); 1498 p->as = ANOP; 1499 } 1500 } 1501 } 1502 1503 // fix back pointers. 1504 for(r=firstr; r; r=r->link) { 1505 r->p2 = R; 1506 r->p2link = R; 1507 } 1508 for(r=firstr; r; r=r->link) { 1509 if(r->s2) { 1510 r->p2link = r->s2->p2; 1511 r->s2->p2 = r; 1512 } 1513 } 1514 1515 if(debug['R'] && debug['v']) { 1516 print("\n"); 1517 for(r=firstr; r; r=r->link) 1518 print("%04d %P\n", r->pc, r->prog); 1519 print("\n"); 1520 } 1521 } 1522