github.com/ccccaoqing/test@v0.0.0-20220510085219-3985d23445c0/src/cmd/8c/reg.c (about) 1 // Inferno utils/8c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include "gc.h" 32 33 static void fixjmp(Reg*); 34 35 Reg* 36 rega(void) 37 { 38 Reg *r; 39 40 r = freer; 41 if(r == R) { 42 r = alloc(sizeof(*r)); 43 } else 44 freer = r->link; 45 46 *r = zreg; 47 return r; 48 } 49 50 int 51 rcmp(const void *a1, const void *a2) 52 { 53 Rgn *p1, *p2; 54 int c1, c2; 55 56 p1 = (Rgn*)a1; 57 p2 = (Rgn*)a2; 58 c1 = p2->cost; 59 c2 = p1->cost; 60 if(c1 -= c2) 61 return c1; 62 return p2->varno - p1->varno; 63 } 64 65 void 66 regopt(Prog *p) 67 { 68 Reg *r, *r1, *r2; 69 Prog *p1; 70 int i, z; 71 int32 initpc, val, npc; 72 uint32 vreg; 73 Bits bit; 74 struct 75 { 76 int32 m; 77 int32 c; 78 Reg* p; 79 } log5[6], *lp; 80 81 firstr = R; 82 lastr = R; 83 nvar = 0; 84 regbits = RtoB(D_SP) | RtoB(D_AX); 85 for(z=0; z<BITS; z++) { 86 externs.b[z] = 0; 87 params.b[z] = 0; 88 consts.b[z] = 0; 89 addrs.b[z] = 0; 90 } 91 92 /* 93 * pass 1 94 * build aux data structure 95 * allocate pcs 96 * find use and set of variables 97 */ 98 val = 5L * 5L * 5L * 5L * 5L; 99 lp = log5; 100 for(i=0; i<5; i++) { 101 lp->m = val; 102 lp->c = 0; 103 lp->p = R; 104 val /= 5L; 105 lp++; 106 } 107 val = 0; 108 for(; p != P; p = p->link) { 109 switch(p->as) { 110 case ADATA: 111 case AGLOBL: 112 case ANAME: 113 case ASIGNAME: 114 case AFUNCDATA: 115 continue; 116 } 117 r = rega(); 118 if(firstr == R) { 119 firstr = r; 120 lastr = r; 121 } else { 122 lastr->link = r; 123 r->p1 = lastr; 124 lastr->s1 = r; 125 lastr = r; 126 } 127 r->prog = p; 128 r->pc = val; 129 val++; 130 131 lp = log5; 132 for(i=0; i<5; i++) { 133 lp->c--; 134 if(lp->c <= 0) { 135 lp->c = lp->m; 136 if(lp->p != R) 137 lp->p->log5 = r; 138 lp->p = r; 139 (lp+1)->c = 0; 140 break; 141 } 142 lp++; 143 } 144 145 r1 = r->p1; 146 if(r1 != R) 147 switch(r1->prog->as) { 148 case ARET: 149 case AJMP: 150 case AIRETL: 151 r->p1 = R; 152 r1->s1 = R; 153 } 154 bit = mkvar(r, &p->from); 155 if(bany(&bit)) 156 switch(p->as) { 157 /* 158 * funny 159 */ 160 case ALEAL: 161 for(z=0; z<BITS; z++) 162 addrs.b[z] |= bit.b[z]; 163 break; 164 165 /* 166 * left side read 167 */ 168 default: 169 for(z=0; z<BITS; z++) 170 r->use1.b[z] |= bit.b[z]; 171 break; 172 } 173 174 bit = mkvar(r, &p->to); 175 if(bany(&bit)) 176 switch(p->as) { 177 default: 178 diag(Z, "reg: unknown op: %A", p->as); 179 break; 180 181 /* 182 * right side read 183 */ 184 case ACMPB: 185 case ACMPL: 186 case ACMPW: 187 case APREFETCHT0: 188 case APREFETCHT1: 189 case APREFETCHT2: 190 case APREFETCHNTA: 191 for(z=0; z<BITS; z++) 192 r->use2.b[z] |= bit.b[z]; 193 break; 194 195 /* 196 * right side write 197 */ 198 case ANOP: 199 case AMOVL: 200 case AMOVB: 201 case AMOVW: 202 case AMOVBLSX: 203 case AMOVBLZX: 204 case AMOVWLSX: 205 case AMOVWLZX: 206 for(z=0; z<BITS; z++) 207 r->set.b[z] |= bit.b[z]; 208 break; 209 210 /* 211 * right side read+write 212 */ 213 case AADDB: 214 case AADDL: 215 case AADDW: 216 case AANDB: 217 case AANDL: 218 case AANDW: 219 case ASUBB: 220 case ASUBL: 221 case ASUBW: 222 case AORB: 223 case AORL: 224 case AORW: 225 case AXORB: 226 case AXORL: 227 case AXORW: 228 case ASALB: 229 case ASALL: 230 case ASALW: 231 case ASARB: 232 case ASARL: 233 case ASARW: 234 case AROLB: 235 case AROLL: 236 case AROLW: 237 case ARORB: 238 case ARORL: 239 case ARORW: 240 case ASHLB: 241 case ASHLL: 242 case ASHLW: 243 case ASHRB: 244 case ASHRL: 245 case ASHRW: 246 case AIMULL: 247 case AIMULW: 248 case ANEGL: 249 case ANOTL: 250 case AADCL: 251 case ASBBL: 252 for(z=0; z<BITS; z++) { 253 r->set.b[z] |= bit.b[z]; 254 r->use2.b[z] |= bit.b[z]; 255 } 256 break; 257 258 /* 259 * funny 260 */ 261 case AFMOVDP: 262 case AFMOVFP: 263 case AFMOVLP: 264 case AFMOVVP: 265 case AFMOVWP: 266 case ACALL: 267 for(z=0; z<BITS; z++) 268 addrs.b[z] |= bit.b[z]; 269 break; 270 } 271 272 switch(p->as) { 273 case AIMULL: 274 case AIMULW: 275 if(p->to.type != D_NONE) 276 break; 277 278 case AIDIVB: 279 case AIDIVL: 280 case AIDIVW: 281 case AIMULB: 282 case ADIVB: 283 case ADIVL: 284 case ADIVW: 285 case AMULB: 286 case AMULL: 287 case AMULW: 288 289 case ACWD: 290 case ACDQ: 291 r->regu |= RtoB(D_AX) | RtoB(D_DX); 292 break; 293 294 case AREP: 295 case AREPN: 296 case ALOOP: 297 case ALOOPEQ: 298 case ALOOPNE: 299 r->regu |= RtoB(D_CX); 300 break; 301 302 case AMOVSB: 303 case AMOVSL: 304 case AMOVSW: 305 case ACMPSB: 306 case ACMPSL: 307 case ACMPSW: 308 r->regu |= RtoB(D_SI) | RtoB(D_DI); 309 break; 310 311 case ASTOSB: 312 case ASTOSL: 313 case ASTOSW: 314 case ASCASB: 315 case ASCASL: 316 case ASCASW: 317 r->regu |= RtoB(D_AX) | RtoB(D_DI); 318 break; 319 320 case AINSB: 321 case AINSL: 322 case AINSW: 323 case AOUTSB: 324 case AOUTSL: 325 case AOUTSW: 326 r->regu |= RtoB(D_DI) | RtoB(D_DX); 327 break; 328 329 case AFSTSW: 330 case ASAHF: 331 r->regu |= RtoB(D_AX); 332 break; 333 } 334 } 335 if(firstr == R) 336 return; 337 initpc = pc - val; 338 npc = val; 339 340 /* 341 * pass 2 342 * turn branch references to pointers 343 * build back pointers 344 */ 345 for(r = firstr; r != R; r = r->link) { 346 p = r->prog; 347 if(p->to.type == D_BRANCH) { 348 val = p->to.offset - initpc; 349 r1 = firstr; 350 while(r1 != R) { 351 r2 = r1->log5; 352 if(r2 != R && val >= r2->pc) { 353 r1 = r2; 354 continue; 355 } 356 if(r1->pc == val) 357 break; 358 r1 = r1->link; 359 } 360 if(r1 == R) { 361 nearln = p->lineno; 362 diag(Z, "ref not found\n%P", p); 363 continue; 364 } 365 if(r1 == r) { 366 nearln = p->lineno; 367 diag(Z, "ref to self\n%P", p); 368 continue; 369 } 370 r->s2 = r1; 371 r->p2link = r1->p2; 372 r1->p2 = r; 373 } 374 } 375 if(debug['R']) { 376 p = firstr->prog; 377 print("\n%L %D\n", p->lineno, &p->from); 378 } 379 380 /* 381 * pass 2.1 382 * fix jumps 383 */ 384 fixjmp(firstr); 385 386 /* 387 * pass 2.5 388 * find looping structure 389 */ 390 for(r = firstr; r != R; r = r->link) 391 r->active = 0; 392 change = 0; 393 loopit(firstr, npc); 394 if(debug['R'] && debug['v']) { 395 print("\nlooping structure:\n"); 396 for(r = firstr; r != R; r = r->link) { 397 print("%d:%P", r->loop, r->prog); 398 for(z=0; z<BITS; z++) 399 bit.b[z] = r->use1.b[z] | 400 r->use2.b[z] | 401 r->set.b[z]; 402 if(bany(&bit)) { 403 print("\t"); 404 if(bany(&r->use1)) 405 print(" u1=%B", r->use1); 406 if(bany(&r->use2)) 407 print(" u2=%B", r->use2); 408 if(bany(&r->set)) 409 print(" st=%B", r->set); 410 } 411 print("\n"); 412 } 413 } 414 415 /* 416 * pass 3 417 * iterate propagating usage 418 * back until flow graph is complete 419 */ 420 loop1: 421 change = 0; 422 for(r = firstr; r != R; r = r->link) 423 r->active = 0; 424 for(r = firstr; r != R; r = r->link) 425 if(r->prog->as == ARET) 426 prop(r, zbits, zbits); 427 loop11: 428 /* pick up unreachable code */ 429 i = 0; 430 for(r = firstr; r != R; r = r1) { 431 r1 = r->link; 432 if(r1 && r1->active && !r->active) { 433 prop(r, zbits, zbits); 434 i = 1; 435 } 436 } 437 if(i) 438 goto loop11; 439 if(change) 440 goto loop1; 441 442 443 /* 444 * pass 4 445 * iterate propagating register/variable synchrony 446 * forward until graph is complete 447 */ 448 loop2: 449 change = 0; 450 for(r = firstr; r != R; r = r->link) 451 r->active = 0; 452 synch(firstr, zbits); 453 if(change) 454 goto loop2; 455 456 457 /* 458 * pass 5 459 * isolate regions 460 * calculate costs (paint1) 461 */ 462 r = firstr; 463 if(r) { 464 for(z=0; z<BITS; z++) 465 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 466 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 467 if(bany(&bit)) { 468 nearln = r->prog->lineno; 469 warn(Z, "used and not set: %B", bit); 470 if(debug['R'] && !debug['w']) 471 print("used and not set: %B\n", bit); 472 } 473 } 474 if(debug['R'] && debug['v']) 475 print("\nprop structure:\n"); 476 for(r = firstr; r != R; r = r->link) 477 r->act = zbits; 478 rgp = region; 479 nregion = 0; 480 for(r = firstr; r != R; r = r->link) { 481 if(debug['R'] && debug['v']) { 482 print("%P\t", r->prog); 483 if(bany(&r->set)) 484 print("s:%B ", r->set); 485 if(bany(&r->refahead)) 486 print("ra:%B ", r->refahead); 487 if(bany(&r->calahead)) 488 print("ca:%B ", r->calahead); 489 print("\n"); 490 } 491 for(z=0; z<BITS; z++) 492 bit.b[z] = r->set.b[z] & 493 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 494 if(bany(&bit)) { 495 nearln = r->prog->lineno; 496 warn(Z, "set and not used: %B", bit); 497 if(debug['R']) 498 print("set and not used: %B\n", bit); 499 excise(r); 500 } 501 for(z=0; z<BITS; z++) 502 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 503 while(bany(&bit)) { 504 i = bnum(bit); 505 rgp->enter = r; 506 rgp->varno = i; 507 change = 0; 508 if(debug['R'] && debug['v']) 509 print("\n"); 510 paint1(r, i); 511 bit.b[i/32] &= ~(1L<<(i%32)); 512 if(change <= 0) { 513 if(debug['R']) 514 print("%L$%d: %B\n", 515 r->prog->lineno, change, blsh(i)); 516 continue; 517 } 518 rgp->cost = change; 519 nregion++; 520 if(nregion >= NRGN) { 521 fatal(Z, "too many regions"); 522 goto brk; 523 } 524 rgp++; 525 } 526 } 527 brk: 528 qsort(region, nregion, sizeof(region[0]), rcmp); 529 530 /* 531 * pass 6 532 * determine used registers (paint2) 533 * replace code (paint3) 534 */ 535 rgp = region; 536 for(i=0; i<nregion; i++) { 537 bit = blsh(rgp->varno); 538 vreg = paint2(rgp->enter, rgp->varno); 539 vreg = allreg(vreg, rgp); 540 if(debug['R']) { 541 print("%L$%d %R: %B\n", 542 rgp->enter->prog->lineno, 543 rgp->cost, 544 rgp->regno, 545 bit); 546 } 547 if(rgp->regno != 0) 548 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 549 rgp++; 550 } 551 /* 552 * pass 7 553 * peep-hole on basic block 554 */ 555 if(!debug['R'] || debug['P']) 556 peep(); 557 558 if(debug['R'] && debug['v']) { 559 print("after pass 7 (peep)\n"); 560 for(r=firstr; r; r=r->link) 561 print("%04d %P\n", (int)r->pc, r->prog); 562 print("\n"); 563 } 564 565 /* 566 * pass 8 567 * recalculate pc 568 */ 569 val = initpc; 570 for(r = firstr; r != R; r = r1) { 571 r->pc = val; 572 p = r->prog; 573 p1 = P; 574 r1 = r->link; 575 if(r1 != R) 576 p1 = r1->prog; 577 for(; p != p1; p = p->link) { 578 switch(p->as) { 579 default: 580 val++; 581 break; 582 583 case ANOP: 584 case ADATA: 585 case AGLOBL: 586 case ANAME: 587 case ASIGNAME: 588 case AFUNCDATA: 589 break; 590 } 591 } 592 } 593 pc = val; 594 595 /* 596 * fix up branches 597 */ 598 if(debug['R']) 599 if(bany(&addrs)) 600 print("addrs: %B\n", addrs); 601 602 r1 = 0; /* set */ 603 for(r = firstr; r != R; r = r->link) { 604 p = r->prog; 605 if(p->to.type == D_BRANCH) { 606 p->to.offset = r->s2->pc; 607 p->to.u.branch = r->s2->prog; 608 } 609 r1 = r; 610 } 611 612 /* 613 * last pass 614 * eliminate nops 615 * free aux structures 616 */ 617 for(p = firstr->prog; p != P; p = p->link){ 618 while(p->link && p->link->as == ANOP) 619 p->link = p->link->link; 620 } 621 622 if(debug['R'] && debug['v']) { 623 print("after pass 8 (fixup pc)\n"); 624 for(p1=firstr->prog; p1!=P; p1=p1->link) 625 print("%P\n", p1); 626 print("\n"); 627 } 628 629 if(r1 != R) { 630 r1->link = freer; 631 freer = firstr; 632 } 633 } 634 635 /* 636 * add mov b,rn 637 * just after r 638 */ 639 void 640 addmove(Reg *r, int bn, int rn, int f) 641 { 642 Prog *p, *p1; 643 Addr *a; 644 Var *v; 645 646 p1 = alloc(sizeof(*p1)); 647 *p1 = zprog; 648 p = r->prog; 649 650 p1->link = p->link; 651 p->link = p1; 652 p1->lineno = p->lineno; 653 654 v = var + bn; 655 656 a = &p1->to; 657 a->sym = v->sym; 658 a->offset = v->offset; 659 a->etype = v->etype; 660 a->type = v->name; 661 662 p1->as = AMOVL; 663 if(v->etype == TCHAR || v->etype == TUCHAR) 664 p1->as = AMOVB; 665 if(v->etype == TSHORT || v->etype == TUSHORT) 666 p1->as = AMOVW; 667 668 p1->from.type = rn; 669 if(!f) { 670 p1->from = *a; 671 *a = zprog.from; 672 a->type = rn; 673 if(v->etype == TUCHAR) 674 p1->as = AMOVB; 675 if(v->etype == TUSHORT) 676 p1->as = AMOVW; 677 } 678 if(debug['R']) 679 print("%P\t.a%P\n", p, p1); 680 } 681 682 uint32 683 doregbits(int r) 684 { 685 uint32 b; 686 687 b = 0; 688 if(r >= D_INDIR) 689 r -= D_INDIR; 690 if(r >= D_AX && r <= D_DI) 691 b |= RtoB(r); 692 else 693 if(r >= D_AL && r <= D_BL) 694 b |= RtoB(r-D_AL+D_AX); 695 else 696 if(r >= D_AH && r <= D_BH) 697 b |= RtoB(r-D_AH+D_AX); 698 return b; 699 } 700 701 Bits 702 mkvar(Reg *r, Addr *a) 703 { 704 Var *v; 705 int i, t, n, et, z; 706 int32 o; 707 Bits bit; 708 LSym *s; 709 710 /* 711 * mark registers used 712 */ 713 t = a->type; 714 r->regu |= doregbits(t); 715 r->regu |= doregbits(a->index); 716 717 switch(t) { 718 default: 719 goto none; 720 case D_ADDR: 721 a->type = a->index; 722 bit = mkvar(r, a); 723 for(z=0; z<BITS; z++) 724 addrs.b[z] |= bit.b[z]; 725 a->type = t; 726 goto none; 727 case D_EXTERN: 728 case D_STATIC: 729 case D_PARAM: 730 case D_AUTO: 731 n = t; 732 break; 733 } 734 s = a->sym; 735 if(s == nil) 736 goto none; 737 if(s->name[0] == '.') 738 goto none; 739 et = a->etype; 740 o = a->offset; 741 v = var; 742 for(i=0; i<nvar; i++) { 743 if(s == v->sym) 744 if(n == v->name) 745 if(o == v->offset) 746 goto out; 747 v++; 748 } 749 if(nvar >= NVAR) 750 fatal(Z, "variable not optimized: %s", s->name); 751 i = nvar; 752 nvar++; 753 v = &var[i]; 754 v->sym = s; 755 v->offset = o; 756 v->name = n; 757 v->etype = et; 758 if(debug['R']) 759 print("bit=%2d et=%2d %D\n", i, et, a); 760 761 out: 762 bit = blsh(i); 763 if(n == D_EXTERN || n == D_STATIC) 764 for(z=0; z<BITS; z++) 765 externs.b[z] |= bit.b[z]; 766 if(n == D_PARAM) 767 for(z=0; z<BITS; z++) 768 params.b[z] |= bit.b[z]; 769 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 770 for(z=0; z<BITS; z++) 771 addrs.b[z] |= bit.b[z]; 772 return bit; 773 774 none: 775 return zbits; 776 } 777 778 void 779 prop(Reg *r, Bits ref, Bits cal) 780 { 781 Reg *r1, *r2; 782 int z; 783 784 for(r1 = r; r1 != R; r1 = r1->p1) { 785 for(z=0; z<BITS; z++) { 786 ref.b[z] |= r1->refahead.b[z]; 787 if(ref.b[z] != r1->refahead.b[z]) { 788 r1->refahead.b[z] = ref.b[z]; 789 change++; 790 } 791 cal.b[z] |= r1->calahead.b[z]; 792 if(cal.b[z] != r1->calahead.b[z]) { 793 r1->calahead.b[z] = cal.b[z]; 794 change++; 795 } 796 } 797 switch(r1->prog->as) { 798 case ACALL: 799 for(z=0; z<BITS; z++) { 800 cal.b[z] |= ref.b[z] | externs.b[z]; 801 ref.b[z] = 0; 802 } 803 break; 804 805 case ATEXT: 806 for(z=0; z<BITS; z++) { 807 cal.b[z] = 0; 808 ref.b[z] = 0; 809 } 810 break; 811 812 case ARET: 813 for(z=0; z<BITS; z++) { 814 cal.b[z] = externs.b[z]; 815 ref.b[z] = 0; 816 } 817 } 818 for(z=0; z<BITS; z++) { 819 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 820 r1->use1.b[z] | r1->use2.b[z]; 821 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 822 r1->refbehind.b[z] = ref.b[z]; 823 r1->calbehind.b[z] = cal.b[z]; 824 } 825 if(r1->active) 826 break; 827 r1->active = 1; 828 } 829 for(; r != r1; r = r->p1) 830 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 831 prop(r2, r->refbehind, r->calbehind); 832 } 833 834 /* 835 * find looping structure 836 * 837 * 1) find reverse postordering 838 * 2) find approximate dominators, 839 * the actual dominators if the flow graph is reducible 840 * otherwise, dominators plus some other non-dominators. 841 * See Matthew S. Hecht and Jeffrey D. Ullman, 842 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 843 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 844 * Oct. 1-3, 1973, pp. 207-217. 845 * 3) find all nodes with a predecessor dominated by the current node. 846 * such a node is a loop head. 847 * recursively, all preds with a greater rpo number are in the loop 848 */ 849 int32 850 postorder(Reg *r, Reg **rpo2r, int32 n) 851 { 852 Reg *r1; 853 854 r->rpo = 1; 855 r1 = r->s1; 856 if(r1 && !r1->rpo) 857 n = postorder(r1, rpo2r, n); 858 r1 = r->s2; 859 if(r1 && !r1->rpo) 860 n = postorder(r1, rpo2r, n); 861 rpo2r[n] = r; 862 n++; 863 return n; 864 } 865 866 int32 867 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 868 { 869 int32 t; 870 871 if(rpo1 == -1) 872 return rpo2; 873 while(rpo1 != rpo2){ 874 if(rpo1 > rpo2){ 875 t = rpo2; 876 rpo2 = rpo1; 877 rpo1 = t; 878 } 879 while(rpo1 < rpo2){ 880 t = idom[rpo2]; 881 if(t >= rpo2) 882 fatal(Z, "bad idom"); 883 rpo2 = t; 884 } 885 } 886 return rpo1; 887 } 888 889 int 890 doms(int32 *idom, int32 r, int32 s) 891 { 892 while(s > r) 893 s = idom[s]; 894 return s == r; 895 } 896 897 int 898 loophead(int32 *idom, Reg *r) 899 { 900 int32 src; 901 902 src = r->rpo; 903 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 904 return 1; 905 for(r = r->p2; r != R; r = r->p2link) 906 if(doms(idom, src, r->rpo)) 907 return 1; 908 return 0; 909 } 910 911 void 912 loopmark(Reg **rpo2r, int32 head, Reg *r) 913 { 914 if(r->rpo < head || r->active == head) 915 return; 916 r->active = head; 917 r->loop += LOOP; 918 if(r->p1 != R) 919 loopmark(rpo2r, head, r->p1); 920 for(r = r->p2; r != R; r = r->p2link) 921 loopmark(rpo2r, head, r); 922 } 923 924 void 925 loopit(Reg *r, int32 nr) 926 { 927 Reg *r1; 928 int32 i, d, me; 929 930 if(nr > maxnr) { 931 rpo2r = alloc(nr * sizeof(Reg*)); 932 idom = alloc(nr * sizeof(int32)); 933 maxnr = nr; 934 } 935 936 d = postorder(r, rpo2r, 0); 937 if(d > nr) 938 fatal(Z, "too many reg nodes"); 939 nr = d; 940 for(i = 0; i < nr / 2; i++){ 941 r1 = rpo2r[i]; 942 rpo2r[i] = rpo2r[nr - 1 - i]; 943 rpo2r[nr - 1 - i] = r1; 944 } 945 for(i = 0; i < nr; i++) 946 rpo2r[i]->rpo = i; 947 948 idom[0] = 0; 949 for(i = 0; i < nr; i++){ 950 r1 = rpo2r[i]; 951 me = r1->rpo; 952 d = -1; 953 if(r1->p1 != R && r1->p1->rpo < me) 954 d = r1->p1->rpo; 955 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 956 if(r1->rpo < me) 957 d = rpolca(idom, d, r1->rpo); 958 idom[i] = d; 959 } 960 961 for(i = 0; i < nr; i++){ 962 r1 = rpo2r[i]; 963 r1->loop++; 964 if(r1->p2 != R && loophead(idom, r1)) 965 loopmark(rpo2r, i, r1); 966 } 967 } 968 969 void 970 synch(Reg *r, Bits dif) 971 { 972 Reg *r1; 973 int z; 974 975 for(r1 = r; r1 != R; r1 = r1->s1) { 976 for(z=0; z<BITS; z++) { 977 dif.b[z] = (dif.b[z] & 978 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 979 r1->set.b[z] | r1->regdiff.b[z]; 980 if(dif.b[z] != r1->regdiff.b[z]) { 981 r1->regdiff.b[z] = dif.b[z]; 982 change++; 983 } 984 } 985 if(r1->active) 986 break; 987 r1->active = 1; 988 for(z=0; z<BITS; z++) 989 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 990 if(r1->s2 != R) 991 synch(r1->s2, dif); 992 } 993 } 994 995 uint32 996 allreg(uint32 b, Rgn *r) 997 { 998 Var *v; 999 int i; 1000 1001 v = var + r->varno; 1002 r->regno = 0; 1003 switch(v->etype) { 1004 1005 default: 1006 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 1007 break; 1008 1009 case TCHAR: 1010 case TUCHAR: 1011 case TSHORT: 1012 case TUSHORT: 1013 case TINT: 1014 case TUINT: 1015 case TLONG: 1016 case TULONG: 1017 case TIND: 1018 case TARRAY: 1019 i = BtoR(~b); 1020 if(i && r->cost > 0) { 1021 r->regno = i; 1022 return RtoB(i); 1023 } 1024 break; 1025 1026 case TDOUBLE: 1027 case TFLOAT: 1028 break; 1029 } 1030 return 0; 1031 } 1032 1033 void 1034 paint1(Reg *r, int bn) 1035 { 1036 Reg *r1; 1037 Prog *p; 1038 int z; 1039 uint32 bb; 1040 1041 z = bn/32; 1042 bb = 1L<<(bn%32); 1043 if(r->act.b[z] & bb) 1044 return; 1045 for(;;) { 1046 if(!(r->refbehind.b[z] & bb)) 1047 break; 1048 r1 = r->p1; 1049 if(r1 == R) 1050 break; 1051 if(!(r1->refahead.b[z] & bb)) 1052 break; 1053 if(r1->act.b[z] & bb) 1054 break; 1055 r = r1; 1056 } 1057 1058 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1059 change -= CLOAD * r->loop; 1060 if(debug['R'] && debug['v']) 1061 print("%d%P\td %B $%d\n", r->loop, 1062 r->prog, blsh(bn), change); 1063 } 1064 for(;;) { 1065 r->act.b[z] |= bb; 1066 p = r->prog; 1067 1068 if(r->use1.b[z] & bb) { 1069 change += CREF * r->loop; 1070 if(p->as == AFMOVL) 1071 if(BtoR(bb) != D_F0) 1072 change = -CINF; 1073 if(debug['R'] && debug['v']) 1074 print("%d%P\tu1 %B $%d\n", r->loop, 1075 p, blsh(bn), change); 1076 } 1077 1078 if((r->use2.b[z]|r->set.b[z]) & bb) { 1079 change += CREF * r->loop; 1080 if(p->as == AFMOVL) 1081 if(BtoR(bb) != D_F0) 1082 change = -CINF; 1083 if(debug['R'] && debug['v']) 1084 print("%d%P\tu2 %B $%d\n", r->loop, 1085 p, blsh(bn), change); 1086 } 1087 1088 if(STORE(r) & r->regdiff.b[z] & bb) { 1089 change -= CLOAD * r->loop; 1090 if(p->as == AFMOVL) 1091 if(BtoR(bb) != D_F0) 1092 change = -CINF; 1093 if(debug['R'] && debug['v']) 1094 print("%d%P\tst %B $%d\n", r->loop, 1095 p, blsh(bn), change); 1096 } 1097 1098 if(r->refbehind.b[z] & bb) 1099 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1100 if(r1->refahead.b[z] & bb) 1101 paint1(r1, bn); 1102 1103 if(!(r->refahead.b[z] & bb)) 1104 break; 1105 r1 = r->s2; 1106 if(r1 != R) 1107 if(r1->refbehind.b[z] & bb) 1108 paint1(r1, bn); 1109 r = r->s1; 1110 if(r == R) 1111 break; 1112 if(r->act.b[z] & bb) 1113 break; 1114 if(!(r->refbehind.b[z] & bb)) 1115 break; 1116 } 1117 } 1118 1119 uint32 1120 regset(Reg *r, uint32 bb) 1121 { 1122 uint32 b, set; 1123 Addr v; 1124 int c; 1125 1126 set = 0; 1127 v = zprog.from; 1128 while(b = bb & ~(bb-1)) { 1129 v.type = BtoR(b); 1130 c = copyu(r->prog, &v, A); 1131 if(c == 3) 1132 set |= b; 1133 bb &= ~b; 1134 } 1135 return set; 1136 } 1137 1138 uint32 1139 reguse(Reg *r, uint32 bb) 1140 { 1141 uint32 b, set; 1142 Addr v; 1143 int c; 1144 1145 set = 0; 1146 v = zprog.from; 1147 while(b = bb & ~(bb-1)) { 1148 v.type = BtoR(b); 1149 c = copyu(r->prog, &v, A); 1150 if(c == 1 || c == 2 || c == 4) 1151 set |= b; 1152 bb &= ~b; 1153 } 1154 return set; 1155 } 1156 1157 uint32 1158 paint2(Reg *r, int bn) 1159 { 1160 Reg *r1; 1161 int z; 1162 uint32 bb, vreg, x; 1163 1164 z = bn/32; 1165 bb = 1L << (bn%32); 1166 vreg = regbits; 1167 if(!(r->act.b[z] & bb)) 1168 return vreg; 1169 for(;;) { 1170 if(!(r->refbehind.b[z] & bb)) 1171 break; 1172 r1 = r->p1; 1173 if(r1 == R) 1174 break; 1175 if(!(r1->refahead.b[z] & bb)) 1176 break; 1177 if(!(r1->act.b[z] & bb)) 1178 break; 1179 r = r1; 1180 } 1181 for(;;) { 1182 r->act.b[z] &= ~bb; 1183 1184 vreg |= r->regu; 1185 1186 if(r->refbehind.b[z] & bb) 1187 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1188 if(r1->refahead.b[z] & bb) 1189 vreg |= paint2(r1, bn); 1190 1191 if(!(r->refahead.b[z] & bb)) 1192 break; 1193 r1 = r->s2; 1194 if(r1 != R) 1195 if(r1->refbehind.b[z] & bb) 1196 vreg |= paint2(r1, bn); 1197 r = r->s1; 1198 if(r == R) 1199 break; 1200 if(!(r->act.b[z] & bb)) 1201 break; 1202 if(!(r->refbehind.b[z] & bb)) 1203 break; 1204 } 1205 1206 bb = vreg; 1207 for(; r; r=r->s1) { 1208 x = r->regu & ~bb; 1209 if(x) { 1210 vreg |= reguse(r, x); 1211 bb |= regset(r, x); 1212 } 1213 } 1214 return vreg; 1215 } 1216 1217 void 1218 paint3(Reg *r, int bn, int32 rb, int rn) 1219 { 1220 Reg *r1; 1221 Prog *p; 1222 int z; 1223 uint32 bb; 1224 1225 z = bn/32; 1226 bb = 1L << (bn%32); 1227 if(r->act.b[z] & bb) 1228 return; 1229 for(;;) { 1230 if(!(r->refbehind.b[z] & bb)) 1231 break; 1232 r1 = r->p1; 1233 if(r1 == R) 1234 break; 1235 if(!(r1->refahead.b[z] & bb)) 1236 break; 1237 if(r1->act.b[z] & bb) 1238 break; 1239 r = r1; 1240 } 1241 1242 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1243 addmove(r, bn, rn, 0); 1244 for(;;) { 1245 r->act.b[z] |= bb; 1246 p = r->prog; 1247 1248 if(r->use1.b[z] & bb) { 1249 if(debug['R']) 1250 print("%P", p); 1251 addreg(&p->from, rn); 1252 if(debug['R']) 1253 print("\t.c%P\n", p); 1254 } 1255 if((r->use2.b[z]|r->set.b[z]) & bb) { 1256 if(debug['R']) 1257 print("%P", p); 1258 addreg(&p->to, rn); 1259 if(debug['R']) 1260 print("\t.c%P\n", p); 1261 } 1262 1263 if(STORE(r) & r->regdiff.b[z] & bb) 1264 addmove(r, bn, rn, 1); 1265 r->regu |= rb; 1266 1267 if(r->refbehind.b[z] & bb) 1268 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1269 if(r1->refahead.b[z] & bb) 1270 paint3(r1, bn, rb, rn); 1271 1272 if(!(r->refahead.b[z] & bb)) 1273 break; 1274 r1 = r->s2; 1275 if(r1 != R) 1276 if(r1->refbehind.b[z] & bb) 1277 paint3(r1, bn, rb, rn); 1278 r = r->s1; 1279 if(r == R) 1280 break; 1281 if(r->act.b[z] & bb) 1282 break; 1283 if(!(r->refbehind.b[z] & bb)) 1284 break; 1285 } 1286 } 1287 1288 void 1289 addreg(Addr *a, int rn) 1290 { 1291 1292 a->sym = 0; 1293 a->offset = 0; 1294 a->type = rn; 1295 } 1296 1297 int32 1298 RtoB(int r) 1299 { 1300 1301 if(r < D_AX || r > D_DI) 1302 return 0; 1303 return 1L << (r-D_AX); 1304 } 1305 1306 int 1307 BtoR(int32 b) 1308 { 1309 1310 b &= 0xffL; 1311 if(b == 0) 1312 return 0; 1313 return bitno(b) + D_AX; 1314 } 1315 1316 /* what instruction does a JMP to p eventually land on? */ 1317 static Reg* 1318 chasejmp(Reg *r, int *jmploop) 1319 { 1320 int n; 1321 1322 n = 0; 1323 for(; r; r=r->s2) { 1324 if(r->prog->as != AJMP || r->prog->to.type != D_BRANCH) 1325 break; 1326 if(++n > 10) { 1327 *jmploop = 1; 1328 break; 1329 } 1330 } 1331 return r; 1332 } 1333 1334 /* mark all code reachable from firstp as alive */ 1335 static void 1336 mark(Reg *firstr) 1337 { 1338 Reg *r; 1339 Prog *p; 1340 1341 for(r=firstr; r; r=r->link) { 1342 if(r->active) 1343 break; 1344 r->active = 1; 1345 p = r->prog; 1346 if(p->as != ACALL && p->to.type == D_BRANCH) 1347 mark(r->s2); 1348 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 1349 break; 1350 } 1351 } 1352 1353 /* 1354 * the code generator depends on being able to write out JMP 1355 * instructions that it can jump to now but fill in later. 1356 * the linker will resolve them nicely, but they make the code 1357 * longer and more difficult to follow during debugging. 1358 * remove them. 1359 */ 1360 static void 1361 fixjmp(Reg *firstr) 1362 { 1363 int jmploop; 1364 Reg *r; 1365 Prog *p; 1366 1367 if(debug['R'] && debug['v']) 1368 print("\nfixjmp\n"); 1369 1370 // pass 1: resolve jump to AJMP, mark all code as dead. 1371 jmploop = 0; 1372 for(r=firstr; r; r=r->link) { 1373 p = r->prog; 1374 if(debug['R'] && debug['v']) 1375 print("%04d %P\n", (int)r->pc, p); 1376 if(p->as != ACALL && p->to.type == D_BRANCH && r->s2 && r->s2->prog->as == AJMP) { 1377 r->s2 = chasejmp(r->s2, &jmploop); 1378 p->to.offset = r->s2->pc; 1379 p->to.u.branch = r->s2->prog; 1380 if(debug['R'] && debug['v']) 1381 print("->%P\n", p); 1382 } 1383 r->active = 0; 1384 } 1385 if(debug['R'] && debug['v']) 1386 print("\n"); 1387 1388 // pass 2: mark all reachable code alive 1389 mark(firstr); 1390 1391 // pass 3: delete dead code (mostly JMPs). 1392 for(r=firstr; r; r=r->link) { 1393 if(!r->active) { 1394 p = r->prog; 1395 if(p->link == P && p->as == ARET && r->p1 && r->p1->prog->as != ARET) { 1396 // This is the final ARET, and the code so far doesn't have one. 1397 // Let it stay. 1398 } else { 1399 if(debug['R'] && debug['v']) 1400 print("del %04d %P\n", (int)r->pc, p); 1401 p->as = ANOP; 1402 } 1403 } 1404 } 1405 1406 // pass 4: elide JMP to next instruction. 1407 // only safe if there are no jumps to JMPs anymore. 1408 if(!jmploop) { 1409 for(r=firstr; r; r=r->link) { 1410 p = r->prog; 1411 if(p->as == AJMP && p->to.type == D_BRANCH && r->s2 == r->link) { 1412 if(debug['R'] && debug['v']) 1413 print("del %04d %P\n", (int)r->pc, p); 1414 p->as = ANOP; 1415 } 1416 } 1417 } 1418 1419 // fix back pointers. 1420 for(r=firstr; r; r=r->link) { 1421 r->p2 = R; 1422 r->p2link = R; 1423 } 1424 for(r=firstr; r; r=r->link) { 1425 if(r->s2) { 1426 r->p2link = r->s2->p2; 1427 r->s2->p2 = r; 1428 } 1429 } 1430 1431 if(debug['R'] && debug['v']) { 1432 print("\n"); 1433 for(r=firstr; r; r=r->link) 1434 print("%04d %P\n", (int)r->pc, r->prog); 1435 print("\n"); 1436 } 1437 } 1438