github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/8c/reg.c (about) 1 // Inferno utils/8c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include "gc.h" 32 33 static void fixjmp(Reg*); 34 35 Reg* 36 rega(void) 37 { 38 Reg *r; 39 40 r = freer; 41 if(r == R) { 42 r = alloc(sizeof(*r)); 43 } else 44 freer = r->link; 45 46 *r = zreg; 47 return r; 48 } 49 50 int 51 rcmp(const void *a1, const void *a2) 52 { 53 Rgn *p1, *p2; 54 int c1, c2; 55 56 p1 = (Rgn*)a1; 57 p2 = (Rgn*)a2; 58 c1 = p2->cost; 59 c2 = p1->cost; 60 if(c1 -= c2) 61 return c1; 62 return p2->varno - p1->varno; 63 } 64 65 void 66 regopt(Prog *p) 67 { 68 Reg *r, *r1, *r2; 69 Prog *p1; 70 int i, z; 71 int32 initpc, val, npc; 72 uint32 vreg; 73 Bits bit; 74 struct 75 { 76 int32 m; 77 int32 c; 78 Reg* p; 79 } log5[6], *lp; 80 81 firstr = R; 82 lastr = R; 83 nvar = 0; 84 regbits = RtoB(D_SP) | RtoB(D_AX); 85 for(z=0; z<BITS; z++) { 86 externs.b[z] = 0; 87 params.b[z] = 0; 88 consts.b[z] = 0; 89 addrs.b[z] = 0; 90 } 91 92 /* 93 * pass 1 94 * build aux data structure 95 * allocate pcs 96 * find use and set of variables 97 */ 98 val = 5L * 5L * 5L * 5L * 5L; 99 lp = log5; 100 for(i=0; i<5; i++) { 101 lp->m = val; 102 lp->c = 0; 103 lp->p = R; 104 val /= 5L; 105 lp++; 106 } 107 val = 0; 108 for(; p != P; p = p->link) { 109 switch(p->as) { 110 case ADATA: 111 case AGLOBL: 112 case ANAME: 113 case ASIGNAME: 114 continue; 115 } 116 r = rega(); 117 if(firstr == R) { 118 firstr = r; 119 lastr = r; 120 } else { 121 lastr->link = r; 122 r->p1 = lastr; 123 lastr->s1 = r; 124 lastr = r; 125 } 126 r->prog = p; 127 r->pc = val; 128 val++; 129 130 lp = log5; 131 for(i=0; i<5; i++) { 132 lp->c--; 133 if(lp->c <= 0) { 134 lp->c = lp->m; 135 if(lp->p != R) 136 lp->p->log5 = r; 137 lp->p = r; 138 (lp+1)->c = 0; 139 break; 140 } 141 lp++; 142 } 143 144 r1 = r->p1; 145 if(r1 != R) 146 switch(r1->prog->as) { 147 case ARET: 148 case AJMP: 149 case AIRETL: 150 r->p1 = R; 151 r1->s1 = R; 152 } 153 bit = mkvar(r, &p->from); 154 if(bany(&bit)) 155 switch(p->as) { 156 /* 157 * funny 158 */ 159 case ALEAL: 160 for(z=0; z<BITS; z++) 161 addrs.b[z] |= bit.b[z]; 162 break; 163 164 /* 165 * left side read 166 */ 167 default: 168 for(z=0; z<BITS; z++) 169 r->use1.b[z] |= bit.b[z]; 170 break; 171 } 172 173 bit = mkvar(r, &p->to); 174 if(bany(&bit)) 175 switch(p->as) { 176 default: 177 diag(Z, "reg: unknown op: %A", p->as); 178 break; 179 180 /* 181 * right side read 182 */ 183 case ACMPB: 184 case ACMPL: 185 case ACMPW: 186 case APREFETCHT0: 187 case APREFETCHT1: 188 case APREFETCHT2: 189 case APREFETCHNTA: 190 for(z=0; z<BITS; z++) 191 r->use2.b[z] |= bit.b[z]; 192 break; 193 194 /* 195 * right side write 196 */ 197 case ANOP: 198 case AMOVL: 199 case AMOVB: 200 case AMOVW: 201 case AMOVBLSX: 202 case AMOVBLZX: 203 case AMOVWLSX: 204 case AMOVWLZX: 205 for(z=0; z<BITS; z++) 206 r->set.b[z] |= bit.b[z]; 207 break; 208 209 /* 210 * right side read+write 211 */ 212 case AADDB: 213 case AADDL: 214 case AADDW: 215 case AANDB: 216 case AANDL: 217 case AANDW: 218 case ASUBB: 219 case ASUBL: 220 case ASUBW: 221 case AORB: 222 case AORL: 223 case AORW: 224 case AXORB: 225 case AXORL: 226 case AXORW: 227 case ASALB: 228 case ASALL: 229 case ASALW: 230 case ASARB: 231 case ASARL: 232 case ASARW: 233 case AROLB: 234 case AROLL: 235 case AROLW: 236 case ARORB: 237 case ARORL: 238 case ARORW: 239 case ASHLB: 240 case ASHLL: 241 case ASHLW: 242 case ASHRB: 243 case ASHRL: 244 case ASHRW: 245 case AIMULL: 246 case AIMULW: 247 case ANEGL: 248 case ANOTL: 249 case AADCL: 250 case ASBBL: 251 for(z=0; z<BITS; z++) { 252 r->set.b[z] |= bit.b[z]; 253 r->use2.b[z] |= bit.b[z]; 254 } 255 break; 256 257 /* 258 * funny 259 */ 260 case AFMOVDP: 261 case AFMOVFP: 262 case AFMOVLP: 263 case AFMOVVP: 264 case AFMOVWP: 265 case ACALL: 266 for(z=0; z<BITS; z++) 267 addrs.b[z] |= bit.b[z]; 268 break; 269 } 270 271 switch(p->as) { 272 case AIMULL: 273 case AIMULW: 274 if(p->to.type != D_NONE) 275 break; 276 277 case AIDIVB: 278 case AIDIVL: 279 case AIDIVW: 280 case AIMULB: 281 case ADIVB: 282 case ADIVL: 283 case ADIVW: 284 case AMULB: 285 case AMULL: 286 case AMULW: 287 288 case ACWD: 289 case ACDQ: 290 r->regu |= RtoB(D_AX) | RtoB(D_DX); 291 break; 292 293 case AREP: 294 case AREPN: 295 case ALOOP: 296 case ALOOPEQ: 297 case ALOOPNE: 298 r->regu |= RtoB(D_CX); 299 break; 300 301 case AMOVSB: 302 case AMOVSL: 303 case AMOVSW: 304 case ACMPSB: 305 case ACMPSL: 306 case ACMPSW: 307 r->regu |= RtoB(D_SI) | RtoB(D_DI); 308 break; 309 310 case ASTOSB: 311 case ASTOSL: 312 case ASTOSW: 313 case ASCASB: 314 case ASCASL: 315 case ASCASW: 316 r->regu |= RtoB(D_AX) | RtoB(D_DI); 317 break; 318 319 case AINSB: 320 case AINSL: 321 case AINSW: 322 case AOUTSB: 323 case AOUTSL: 324 case AOUTSW: 325 r->regu |= RtoB(D_DI) | RtoB(D_DX); 326 break; 327 328 case AFSTSW: 329 case ASAHF: 330 r->regu |= RtoB(D_AX); 331 break; 332 } 333 } 334 if(firstr == R) 335 return; 336 initpc = pc - val; 337 npc = val; 338 339 /* 340 * pass 2 341 * turn branch references to pointers 342 * build back pointers 343 */ 344 for(r = firstr; r != R; r = r->link) { 345 p = r->prog; 346 if(p->to.type == D_BRANCH) { 347 val = p->to.offset - initpc; 348 r1 = firstr; 349 while(r1 != R) { 350 r2 = r1->log5; 351 if(r2 != R && val >= r2->pc) { 352 r1 = r2; 353 continue; 354 } 355 if(r1->pc == val) 356 break; 357 r1 = r1->link; 358 } 359 if(r1 == R) { 360 nearln = p->lineno; 361 diag(Z, "ref not found\n%P", p); 362 continue; 363 } 364 if(r1 == r) { 365 nearln = p->lineno; 366 diag(Z, "ref to self\n%P", p); 367 continue; 368 } 369 r->s2 = r1; 370 r->p2link = r1->p2; 371 r1->p2 = r; 372 } 373 } 374 if(debug['R']) { 375 p = firstr->prog; 376 print("\n%L %D\n", p->lineno, &p->from); 377 } 378 379 /* 380 * pass 2.1 381 * fix jumps 382 */ 383 fixjmp(firstr); 384 385 /* 386 * pass 2.5 387 * find looping structure 388 */ 389 for(r = firstr; r != R; r = r->link) 390 r->active = 0; 391 change = 0; 392 loopit(firstr, npc); 393 if(debug['R'] && debug['v']) { 394 print("\nlooping structure:\n"); 395 for(r = firstr; r != R; r = r->link) { 396 print("%d:%P", r->loop, r->prog); 397 for(z=0; z<BITS; z++) 398 bit.b[z] = r->use1.b[z] | 399 r->use2.b[z] | 400 r->set.b[z]; 401 if(bany(&bit)) { 402 print("\t"); 403 if(bany(&r->use1)) 404 print(" u1=%B", r->use1); 405 if(bany(&r->use2)) 406 print(" u2=%B", r->use2); 407 if(bany(&r->set)) 408 print(" st=%B", r->set); 409 } 410 print("\n"); 411 } 412 } 413 414 /* 415 * pass 3 416 * iterate propagating usage 417 * back until flow graph is complete 418 */ 419 loop1: 420 change = 0; 421 for(r = firstr; r != R; r = r->link) 422 r->active = 0; 423 for(r = firstr; r != R; r = r->link) 424 if(r->prog->as == ARET) 425 prop(r, zbits, zbits); 426 loop11: 427 /* pick up unreachable code */ 428 i = 0; 429 for(r = firstr; r != R; r = r1) { 430 r1 = r->link; 431 if(r1 && r1->active && !r->active) { 432 prop(r, zbits, zbits); 433 i = 1; 434 } 435 } 436 if(i) 437 goto loop11; 438 if(change) 439 goto loop1; 440 441 442 /* 443 * pass 4 444 * iterate propagating register/variable synchrony 445 * forward until graph is complete 446 */ 447 loop2: 448 change = 0; 449 for(r = firstr; r != R; r = r->link) 450 r->active = 0; 451 synch(firstr, zbits); 452 if(change) 453 goto loop2; 454 455 456 /* 457 * pass 5 458 * isolate regions 459 * calculate costs (paint1) 460 */ 461 r = firstr; 462 if(r) { 463 for(z=0; z<BITS; z++) 464 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 465 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 466 if(bany(&bit)) { 467 nearln = r->prog->lineno; 468 warn(Z, "used and not set: %B", bit); 469 if(debug['R'] && !debug['w']) 470 print("used and not set: %B\n", bit); 471 } 472 } 473 if(debug['R'] && debug['v']) 474 print("\nprop structure:\n"); 475 for(r = firstr; r != R; r = r->link) 476 r->act = zbits; 477 rgp = region; 478 nregion = 0; 479 for(r = firstr; r != R; r = r->link) { 480 if(debug['R'] && debug['v']) { 481 print("%P\t", r->prog); 482 if(bany(&r->set)) 483 print("s:%B ", r->set); 484 if(bany(&r->refahead)) 485 print("ra:%B ", r->refahead); 486 if(bany(&r->calahead)) 487 print("ca:%B ", r->calahead); 488 print("\n"); 489 } 490 for(z=0; z<BITS; z++) 491 bit.b[z] = r->set.b[z] & 492 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 493 if(bany(&bit)) { 494 nearln = r->prog->lineno; 495 warn(Z, "set and not used: %B", bit); 496 if(debug['R']) 497 print("set and not used: %B\n", bit); 498 excise(r); 499 } 500 for(z=0; z<BITS; z++) 501 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 502 while(bany(&bit)) { 503 i = bnum(bit); 504 rgp->enter = r; 505 rgp->varno = i; 506 change = 0; 507 if(debug['R'] && debug['v']) 508 print("\n"); 509 paint1(r, i); 510 bit.b[i/32] &= ~(1L<<(i%32)); 511 if(change <= 0) { 512 if(debug['R']) 513 print("%L$%d: %B\n", 514 r->prog->lineno, change, blsh(i)); 515 continue; 516 } 517 rgp->cost = change; 518 nregion++; 519 if(nregion >= NRGN) { 520 warn(Z, "too many regions"); 521 goto brk; 522 } 523 rgp++; 524 } 525 } 526 brk: 527 qsort(region, nregion, sizeof(region[0]), rcmp); 528 529 /* 530 * pass 6 531 * determine used registers (paint2) 532 * replace code (paint3) 533 */ 534 rgp = region; 535 for(i=0; i<nregion; i++) { 536 bit = blsh(rgp->varno); 537 vreg = paint2(rgp->enter, rgp->varno); 538 vreg = allreg(vreg, rgp); 539 if(debug['R']) { 540 print("%L$%d %R: %B\n", 541 rgp->enter->prog->lineno, 542 rgp->cost, 543 rgp->regno, 544 bit); 545 } 546 if(rgp->regno != 0) 547 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 548 rgp++; 549 } 550 /* 551 * pass 7 552 * peep-hole on basic block 553 */ 554 if(!debug['R'] || debug['P']) 555 peep(); 556 557 if(debug['R'] && debug['v']) { 558 print("after pass 7 (peep)\n"); 559 for(r=firstr; r; r=r->link) 560 print("%04d %P\n", r->pc, r->prog); 561 print("\n"); 562 } 563 564 /* 565 * pass 8 566 * recalculate pc 567 */ 568 val = initpc; 569 for(r = firstr; r != R; r = r1) { 570 r->pc = val; 571 p = r->prog; 572 p1 = P; 573 r1 = r->link; 574 if(r1 != R) 575 p1 = r1->prog; 576 for(; p != p1; p = p->link) { 577 switch(p->as) { 578 default: 579 val++; 580 break; 581 582 case ANOP: 583 case ADATA: 584 case AGLOBL: 585 case ANAME: 586 case ASIGNAME: 587 break; 588 } 589 } 590 } 591 pc = val; 592 593 /* 594 * fix up branches 595 */ 596 if(debug['R']) 597 if(bany(&addrs)) 598 print("addrs: %B\n", addrs); 599 600 r1 = 0; /* set */ 601 for(r = firstr; r != R; r = r->link) { 602 p = r->prog; 603 if(p->to.type == D_BRANCH) 604 p->to.offset = r->s2->pc; 605 r1 = r; 606 } 607 608 /* 609 * last pass 610 * eliminate nops 611 * free aux structures 612 */ 613 for(p = firstr->prog; p != P; p = p->link){ 614 while(p->link && p->link->as == ANOP) 615 p->link = p->link->link; 616 } 617 618 if(debug['R'] && debug['v']) { 619 print("after pass 8 (fixup pc)\n"); 620 for(p1=firstr->prog; p1!=P; p1=p1->link) 621 print("%P\n", p1); 622 print("\n"); 623 } 624 625 if(r1 != R) { 626 r1->link = freer; 627 freer = firstr; 628 } 629 } 630 631 /* 632 * add mov b,rn 633 * just after r 634 */ 635 void 636 addmove(Reg *r, int bn, int rn, int f) 637 { 638 Prog *p, *p1; 639 Adr *a; 640 Var *v; 641 642 p1 = alloc(sizeof(*p1)); 643 *p1 = zprog; 644 p = r->prog; 645 646 p1->link = p->link; 647 p->link = p1; 648 p1->lineno = p->lineno; 649 650 v = var + bn; 651 652 a = &p1->to; 653 a->sym = v->sym; 654 a->offset = v->offset; 655 a->etype = v->etype; 656 a->type = v->name; 657 658 p1->as = AMOVL; 659 if(v->etype == TCHAR || v->etype == TUCHAR) 660 p1->as = AMOVB; 661 if(v->etype == TSHORT || v->etype == TUSHORT) 662 p1->as = AMOVW; 663 664 p1->from.type = rn; 665 if(!f) { 666 p1->from = *a; 667 *a = zprog.from; 668 a->type = rn; 669 if(v->etype == TUCHAR) 670 p1->as = AMOVB; 671 if(v->etype == TUSHORT) 672 p1->as = AMOVW; 673 } 674 if(debug['R']) 675 print("%P\t.a%P\n", p, p1); 676 } 677 678 uint32 679 doregbits(int r) 680 { 681 uint32 b; 682 683 b = 0; 684 if(r >= D_INDIR) 685 r -= D_INDIR; 686 if(r >= D_AX && r <= D_DI) 687 b |= RtoB(r); 688 else 689 if(r >= D_AL && r <= D_BL) 690 b |= RtoB(r-D_AL+D_AX); 691 else 692 if(r >= D_AH && r <= D_BH) 693 b |= RtoB(r-D_AH+D_AX); 694 return b; 695 } 696 697 Bits 698 mkvar(Reg *r, Adr *a) 699 { 700 Var *v; 701 int i, t, n, et, z; 702 int32 o; 703 Bits bit; 704 Sym *s; 705 706 /* 707 * mark registers used 708 */ 709 t = a->type; 710 r->regu |= doregbits(t); 711 r->regu |= doregbits(a->index); 712 713 switch(t) { 714 default: 715 goto none; 716 case D_ADDR: 717 a->type = a->index; 718 bit = mkvar(r, a); 719 for(z=0; z<BITS; z++) 720 addrs.b[z] |= bit.b[z]; 721 a->type = t; 722 goto none; 723 case D_EXTERN: 724 case D_STATIC: 725 case D_PARAM: 726 case D_AUTO: 727 n = t; 728 break; 729 } 730 s = a->sym; 731 if(s == S) 732 goto none; 733 if(s->name[0] == '.') 734 goto none; 735 et = a->etype; 736 o = a->offset; 737 v = var; 738 for(i=0; i<nvar; i++) { 739 if(s == v->sym) 740 if(n == v->name) 741 if(o == v->offset) 742 goto out; 743 v++; 744 } 745 if(nvar >= NVAR) { 746 if(debug['w'] > 1 && s) 747 warn(Z, "variable not optimized: %s", s->name); 748 goto none; 749 } 750 i = nvar; 751 nvar++; 752 v = &var[i]; 753 v->sym = s; 754 v->offset = o; 755 v->name = n; 756 v->etype = et; 757 if(debug['R']) 758 print("bit=%2d et=%2d %D\n", i, et, a); 759 760 out: 761 bit = blsh(i); 762 if(n == D_EXTERN || n == D_STATIC) 763 for(z=0; z<BITS; z++) 764 externs.b[z] |= bit.b[z]; 765 if(n == D_PARAM) 766 for(z=0; z<BITS; z++) 767 params.b[z] |= bit.b[z]; 768 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 769 for(z=0; z<BITS; z++) 770 addrs.b[z] |= bit.b[z]; 771 return bit; 772 773 none: 774 return zbits; 775 } 776 777 void 778 prop(Reg *r, Bits ref, Bits cal) 779 { 780 Reg *r1, *r2; 781 int z; 782 783 for(r1 = r; r1 != R; r1 = r1->p1) { 784 for(z=0; z<BITS; z++) { 785 ref.b[z] |= r1->refahead.b[z]; 786 if(ref.b[z] != r1->refahead.b[z]) { 787 r1->refahead.b[z] = ref.b[z]; 788 change++; 789 } 790 cal.b[z] |= r1->calahead.b[z]; 791 if(cal.b[z] != r1->calahead.b[z]) { 792 r1->calahead.b[z] = cal.b[z]; 793 change++; 794 } 795 } 796 switch(r1->prog->as) { 797 case ACALL: 798 for(z=0; z<BITS; z++) { 799 cal.b[z] |= ref.b[z] | externs.b[z]; 800 ref.b[z] = 0; 801 } 802 break; 803 804 case ATEXT: 805 for(z=0; z<BITS; z++) { 806 cal.b[z] = 0; 807 ref.b[z] = 0; 808 } 809 break; 810 811 case ARET: 812 for(z=0; z<BITS; z++) { 813 cal.b[z] = externs.b[z]; 814 ref.b[z] = 0; 815 } 816 } 817 for(z=0; z<BITS; z++) { 818 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 819 r1->use1.b[z] | r1->use2.b[z]; 820 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 821 r1->refbehind.b[z] = ref.b[z]; 822 r1->calbehind.b[z] = cal.b[z]; 823 } 824 if(r1->active) 825 break; 826 r1->active = 1; 827 } 828 for(; r != r1; r = r->p1) 829 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 830 prop(r2, r->refbehind, r->calbehind); 831 } 832 833 /* 834 * find looping structure 835 * 836 * 1) find reverse postordering 837 * 2) find approximate dominators, 838 * the actual dominators if the flow graph is reducible 839 * otherwise, dominators plus some other non-dominators. 840 * See Matthew S. Hecht and Jeffrey D. Ullman, 841 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 842 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 843 * Oct. 1-3, 1973, pp. 207-217. 844 * 3) find all nodes with a predecessor dominated by the current node. 845 * such a node is a loop head. 846 * recursively, all preds with a greater rpo number are in the loop 847 */ 848 int32 849 postorder(Reg *r, Reg **rpo2r, int32 n) 850 { 851 Reg *r1; 852 853 r->rpo = 1; 854 r1 = r->s1; 855 if(r1 && !r1->rpo) 856 n = postorder(r1, rpo2r, n); 857 r1 = r->s2; 858 if(r1 && !r1->rpo) 859 n = postorder(r1, rpo2r, n); 860 rpo2r[n] = r; 861 n++; 862 return n; 863 } 864 865 int32 866 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 867 { 868 int32 t; 869 870 if(rpo1 == -1) 871 return rpo2; 872 while(rpo1 != rpo2){ 873 if(rpo1 > rpo2){ 874 t = rpo2; 875 rpo2 = rpo1; 876 rpo1 = t; 877 } 878 while(rpo1 < rpo2){ 879 t = idom[rpo2]; 880 if(t >= rpo2) 881 fatal(Z, "bad idom"); 882 rpo2 = t; 883 } 884 } 885 return rpo1; 886 } 887 888 int 889 doms(int32 *idom, int32 r, int32 s) 890 { 891 while(s > r) 892 s = idom[s]; 893 return s == r; 894 } 895 896 int 897 loophead(int32 *idom, Reg *r) 898 { 899 int32 src; 900 901 src = r->rpo; 902 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 903 return 1; 904 for(r = r->p2; r != R; r = r->p2link) 905 if(doms(idom, src, r->rpo)) 906 return 1; 907 return 0; 908 } 909 910 void 911 loopmark(Reg **rpo2r, int32 head, Reg *r) 912 { 913 if(r->rpo < head || r->active == head) 914 return; 915 r->active = head; 916 r->loop += LOOP; 917 if(r->p1 != R) 918 loopmark(rpo2r, head, r->p1); 919 for(r = r->p2; r != R; r = r->p2link) 920 loopmark(rpo2r, head, r); 921 } 922 923 void 924 loopit(Reg *r, int32 nr) 925 { 926 Reg *r1; 927 int32 i, d, me; 928 929 if(nr > maxnr) { 930 rpo2r = alloc(nr * sizeof(Reg*)); 931 idom = alloc(nr * sizeof(int32)); 932 maxnr = nr; 933 } 934 935 d = postorder(r, rpo2r, 0); 936 if(d > nr) 937 fatal(Z, "too many reg nodes"); 938 nr = d; 939 for(i = 0; i < nr / 2; i++){ 940 r1 = rpo2r[i]; 941 rpo2r[i] = rpo2r[nr - 1 - i]; 942 rpo2r[nr - 1 - i] = r1; 943 } 944 for(i = 0; i < nr; i++) 945 rpo2r[i]->rpo = i; 946 947 idom[0] = 0; 948 for(i = 0; i < nr; i++){ 949 r1 = rpo2r[i]; 950 me = r1->rpo; 951 d = -1; 952 if(r1->p1 != R && r1->p1->rpo < me) 953 d = r1->p1->rpo; 954 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 955 if(r1->rpo < me) 956 d = rpolca(idom, d, r1->rpo); 957 idom[i] = d; 958 } 959 960 for(i = 0; i < nr; i++){ 961 r1 = rpo2r[i]; 962 r1->loop++; 963 if(r1->p2 != R && loophead(idom, r1)) 964 loopmark(rpo2r, i, r1); 965 } 966 } 967 968 void 969 synch(Reg *r, Bits dif) 970 { 971 Reg *r1; 972 int z; 973 974 for(r1 = r; r1 != R; r1 = r1->s1) { 975 for(z=0; z<BITS; z++) { 976 dif.b[z] = (dif.b[z] & 977 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 978 r1->set.b[z] | r1->regdiff.b[z]; 979 if(dif.b[z] != r1->regdiff.b[z]) { 980 r1->regdiff.b[z] = dif.b[z]; 981 change++; 982 } 983 } 984 if(r1->active) 985 break; 986 r1->active = 1; 987 for(z=0; z<BITS; z++) 988 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 989 if(r1->s2 != R) 990 synch(r1->s2, dif); 991 } 992 } 993 994 uint32 995 allreg(uint32 b, Rgn *r) 996 { 997 Var *v; 998 int i; 999 1000 v = var + r->varno; 1001 r->regno = 0; 1002 switch(v->etype) { 1003 1004 default: 1005 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 1006 break; 1007 1008 case TCHAR: 1009 case TUCHAR: 1010 case TSHORT: 1011 case TUSHORT: 1012 case TINT: 1013 case TUINT: 1014 case TLONG: 1015 case TULONG: 1016 case TIND: 1017 case TARRAY: 1018 i = BtoR(~b); 1019 if(i && r->cost > 0) { 1020 r->regno = i; 1021 return RtoB(i); 1022 } 1023 break; 1024 1025 case TDOUBLE: 1026 case TFLOAT: 1027 break; 1028 } 1029 return 0; 1030 } 1031 1032 void 1033 paint1(Reg *r, int bn) 1034 { 1035 Reg *r1; 1036 Prog *p; 1037 int z; 1038 uint32 bb; 1039 1040 z = bn/32; 1041 bb = 1L<<(bn%32); 1042 if(r->act.b[z] & bb) 1043 return; 1044 for(;;) { 1045 if(!(r->refbehind.b[z] & bb)) 1046 break; 1047 r1 = r->p1; 1048 if(r1 == R) 1049 break; 1050 if(!(r1->refahead.b[z] & bb)) 1051 break; 1052 if(r1->act.b[z] & bb) 1053 break; 1054 r = r1; 1055 } 1056 1057 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1058 change -= CLOAD * r->loop; 1059 if(debug['R'] && debug['v']) 1060 print("%d%P\td %B $%d\n", r->loop, 1061 r->prog, blsh(bn), change); 1062 } 1063 for(;;) { 1064 r->act.b[z] |= bb; 1065 p = r->prog; 1066 1067 if(r->use1.b[z] & bb) { 1068 change += CREF * r->loop; 1069 if(p->as == AFMOVL) 1070 if(BtoR(bb) != D_F0) 1071 change = -CINF; 1072 if(debug['R'] && debug['v']) 1073 print("%d%P\tu1 %B $%d\n", r->loop, 1074 p, blsh(bn), change); 1075 } 1076 1077 if((r->use2.b[z]|r->set.b[z]) & bb) { 1078 change += CREF * r->loop; 1079 if(p->as == AFMOVL) 1080 if(BtoR(bb) != D_F0) 1081 change = -CINF; 1082 if(debug['R'] && debug['v']) 1083 print("%d%P\tu2 %B $%d\n", r->loop, 1084 p, blsh(bn), change); 1085 } 1086 1087 if(STORE(r) & r->regdiff.b[z] & bb) { 1088 change -= CLOAD * r->loop; 1089 if(p->as == AFMOVL) 1090 if(BtoR(bb) != D_F0) 1091 change = -CINF; 1092 if(debug['R'] && debug['v']) 1093 print("%d%P\tst %B $%d\n", r->loop, 1094 p, blsh(bn), change); 1095 } 1096 1097 if(r->refbehind.b[z] & bb) 1098 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1099 if(r1->refahead.b[z] & bb) 1100 paint1(r1, bn); 1101 1102 if(!(r->refahead.b[z] & bb)) 1103 break; 1104 r1 = r->s2; 1105 if(r1 != R) 1106 if(r1->refbehind.b[z] & bb) 1107 paint1(r1, bn); 1108 r = r->s1; 1109 if(r == R) 1110 break; 1111 if(r->act.b[z] & bb) 1112 break; 1113 if(!(r->refbehind.b[z] & bb)) 1114 break; 1115 } 1116 } 1117 1118 uint32 1119 regset(Reg *r, uint32 bb) 1120 { 1121 uint32 b, set; 1122 Adr v; 1123 int c; 1124 1125 set = 0; 1126 v = zprog.from; 1127 while(b = bb & ~(bb-1)) { 1128 v.type = BtoR(b); 1129 c = copyu(r->prog, &v, A); 1130 if(c == 3) 1131 set |= b; 1132 bb &= ~b; 1133 } 1134 return set; 1135 } 1136 1137 uint32 1138 reguse(Reg *r, uint32 bb) 1139 { 1140 uint32 b, set; 1141 Adr v; 1142 int c; 1143 1144 set = 0; 1145 v = zprog.from; 1146 while(b = bb & ~(bb-1)) { 1147 v.type = BtoR(b); 1148 c = copyu(r->prog, &v, A); 1149 if(c == 1 || c == 2 || c == 4) 1150 set |= b; 1151 bb &= ~b; 1152 } 1153 return set; 1154 } 1155 1156 uint32 1157 paint2(Reg *r, int bn) 1158 { 1159 Reg *r1; 1160 int z; 1161 uint32 bb, vreg, x; 1162 1163 z = bn/32; 1164 bb = 1L << (bn%32); 1165 vreg = regbits; 1166 if(!(r->act.b[z] & bb)) 1167 return vreg; 1168 for(;;) { 1169 if(!(r->refbehind.b[z] & bb)) 1170 break; 1171 r1 = r->p1; 1172 if(r1 == R) 1173 break; 1174 if(!(r1->refahead.b[z] & bb)) 1175 break; 1176 if(!(r1->act.b[z] & bb)) 1177 break; 1178 r = r1; 1179 } 1180 for(;;) { 1181 r->act.b[z] &= ~bb; 1182 1183 vreg |= r->regu; 1184 1185 if(r->refbehind.b[z] & bb) 1186 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1187 if(r1->refahead.b[z] & bb) 1188 vreg |= paint2(r1, bn); 1189 1190 if(!(r->refahead.b[z] & bb)) 1191 break; 1192 r1 = r->s2; 1193 if(r1 != R) 1194 if(r1->refbehind.b[z] & bb) 1195 vreg |= paint2(r1, bn); 1196 r = r->s1; 1197 if(r == R) 1198 break; 1199 if(!(r->act.b[z] & bb)) 1200 break; 1201 if(!(r->refbehind.b[z] & bb)) 1202 break; 1203 } 1204 1205 bb = vreg; 1206 for(; r; r=r->s1) { 1207 x = r->regu & ~bb; 1208 if(x) { 1209 vreg |= reguse(r, x); 1210 bb |= regset(r, x); 1211 } 1212 } 1213 return vreg; 1214 } 1215 1216 void 1217 paint3(Reg *r, int bn, int32 rb, int rn) 1218 { 1219 Reg *r1; 1220 Prog *p; 1221 int z; 1222 uint32 bb; 1223 1224 z = bn/32; 1225 bb = 1L << (bn%32); 1226 if(r->act.b[z] & bb) 1227 return; 1228 for(;;) { 1229 if(!(r->refbehind.b[z] & bb)) 1230 break; 1231 r1 = r->p1; 1232 if(r1 == R) 1233 break; 1234 if(!(r1->refahead.b[z] & bb)) 1235 break; 1236 if(r1->act.b[z] & bb) 1237 break; 1238 r = r1; 1239 } 1240 1241 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1242 addmove(r, bn, rn, 0); 1243 for(;;) { 1244 r->act.b[z] |= bb; 1245 p = r->prog; 1246 1247 if(r->use1.b[z] & bb) { 1248 if(debug['R']) 1249 print("%P", p); 1250 addreg(&p->from, rn); 1251 if(debug['R']) 1252 print("\t.c%P\n", p); 1253 } 1254 if((r->use2.b[z]|r->set.b[z]) & bb) { 1255 if(debug['R']) 1256 print("%P", p); 1257 addreg(&p->to, rn); 1258 if(debug['R']) 1259 print("\t.c%P\n", p); 1260 } 1261 1262 if(STORE(r) & r->regdiff.b[z] & bb) 1263 addmove(r, bn, rn, 1); 1264 r->regu |= rb; 1265 1266 if(r->refbehind.b[z] & bb) 1267 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1268 if(r1->refahead.b[z] & bb) 1269 paint3(r1, bn, rb, rn); 1270 1271 if(!(r->refahead.b[z] & bb)) 1272 break; 1273 r1 = r->s2; 1274 if(r1 != R) 1275 if(r1->refbehind.b[z] & bb) 1276 paint3(r1, bn, rb, rn); 1277 r = r->s1; 1278 if(r == R) 1279 break; 1280 if(r->act.b[z] & bb) 1281 break; 1282 if(!(r->refbehind.b[z] & bb)) 1283 break; 1284 } 1285 } 1286 1287 void 1288 addreg(Adr *a, int rn) 1289 { 1290 1291 a->sym = 0; 1292 a->offset = 0; 1293 a->type = rn; 1294 } 1295 1296 int32 1297 RtoB(int r) 1298 { 1299 1300 if(r < D_AX || r > D_DI) 1301 return 0; 1302 return 1L << (r-D_AX); 1303 } 1304 1305 int 1306 BtoR(int32 b) 1307 { 1308 1309 b &= 0xffL; 1310 if(b == 0) 1311 return 0; 1312 return bitno(b) + D_AX; 1313 } 1314 1315 /* what instruction does a JMP to p eventually land on? */ 1316 static Reg* 1317 chasejmp(Reg *r, int *jmploop) 1318 { 1319 int n; 1320 1321 n = 0; 1322 for(; r; r=r->s2) { 1323 if(r->prog->as != AJMP || r->prog->to.type != D_BRANCH) 1324 break; 1325 if(++n > 10) { 1326 *jmploop = 1; 1327 break; 1328 } 1329 } 1330 return r; 1331 } 1332 1333 /* mark all code reachable from firstp as alive */ 1334 static void 1335 mark(Reg *firstr) 1336 { 1337 Reg *r; 1338 Prog *p; 1339 1340 for(r=firstr; r; r=r->link) { 1341 if(r->active) 1342 break; 1343 r->active = 1; 1344 p = r->prog; 1345 if(p->as != ACALL && p->to.type == D_BRANCH) 1346 mark(r->s2); 1347 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 1348 break; 1349 } 1350 } 1351 1352 /* 1353 * the code generator depends on being able to write out JMP 1354 * instructions that it can jump to now but fill in later. 1355 * the linker will resolve them nicely, but they make the code 1356 * longer and more difficult to follow during debugging. 1357 * remove them. 1358 */ 1359 static void 1360 fixjmp(Reg *firstr) 1361 { 1362 int jmploop; 1363 Reg *r; 1364 Prog *p; 1365 1366 if(debug['R'] && debug['v']) 1367 print("\nfixjmp\n"); 1368 1369 // pass 1: resolve jump to AJMP, mark all code as dead. 1370 jmploop = 0; 1371 for(r=firstr; r; r=r->link) { 1372 p = r->prog; 1373 if(debug['R'] && debug['v']) 1374 print("%04d %P\n", r->pc, p); 1375 if(p->as != ACALL && p->to.type == D_BRANCH && r->s2 && r->s2->prog->as == AJMP) { 1376 r->s2 = chasejmp(r->s2, &jmploop); 1377 p->to.offset = r->s2->pc; 1378 if(debug['R'] && debug['v']) 1379 print("->%P\n", p); 1380 } 1381 r->active = 0; 1382 } 1383 if(debug['R'] && debug['v']) 1384 print("\n"); 1385 1386 // pass 2: mark all reachable code alive 1387 mark(firstr); 1388 1389 // pass 3: delete dead code (mostly JMPs). 1390 for(r=firstr; r; r=r->link) { 1391 if(!r->active) { 1392 p = r->prog; 1393 if(p->link == P && p->as == ARET && r->p1 && r->p1->prog->as != ARET) { 1394 // This is the final ARET, and the code so far doesn't have one. 1395 // Let it stay. 1396 } else { 1397 if(debug['R'] && debug['v']) 1398 print("del %04d %P\n", r->pc, p); 1399 p->as = ANOP; 1400 } 1401 } 1402 } 1403 1404 // pass 4: elide JMP to next instruction. 1405 // only safe if there are no jumps to JMPs anymore. 1406 if(!jmploop) { 1407 for(r=firstr; r; r=r->link) { 1408 p = r->prog; 1409 if(p->as == AJMP && p->to.type == D_BRANCH && r->s2 == r->link) { 1410 if(debug['R'] && debug['v']) 1411 print("del %04d %P\n", r->pc, p); 1412 p->as = ANOP; 1413 } 1414 } 1415 } 1416 1417 // fix back pointers. 1418 for(r=firstr; r; r=r->link) { 1419 r->p2 = R; 1420 r->p2link = R; 1421 } 1422 for(r=firstr; r; r=r->link) { 1423 if(r->s2) { 1424 r->p2link = r->s2->p2; 1425 r->s2->p2 = r; 1426 } 1427 } 1428 1429 if(debug['R'] && debug['v']) { 1430 print("\n"); 1431 for(r=firstr; r; r=r->link) 1432 print("%04d %P\n", r->pc, r->prog); 1433 print("\n"); 1434 } 1435 } 1436