github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/5c/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include "gc.h" 33 34 void addsplits(void); 35 36 Reg* 37 rega(void) 38 { 39 Reg *r; 40 41 r = freer; 42 if(r == R) { 43 r = alloc(sizeof(*r)); 44 } else 45 freer = r->link; 46 47 *r = zreg; 48 return r; 49 } 50 51 int 52 rcmp(const void *a1, const void *a2) 53 { 54 Rgn *p1, *p2; 55 int c1, c2; 56 57 p1 = (Rgn*)a1; 58 p2 = (Rgn*)a2; 59 c1 = p2->cost; 60 c2 = p1->cost; 61 if(c1 -= c2) 62 return c1; 63 return p2->varno - p1->varno; 64 } 65 66 void 67 regopt(Prog *p) 68 { 69 Reg *r, *r1, *r2; 70 Prog *p1; 71 int i, z; 72 int32 initpc, val, npc; 73 uint32 vreg; 74 Bits bit; 75 struct 76 { 77 int32 m; 78 int32 c; 79 Reg* p; 80 } log5[6], *lp; 81 82 firstr = R; 83 lastr = R; 84 nvar = 0; 85 regbits = 0; 86 for(z=0; z<BITS; z++) { 87 externs.b[z] = 0; 88 params.b[z] = 0; 89 consts.b[z] = 0; 90 addrs.b[z] = 0; 91 } 92 93 /* 94 * pass 1 95 * build aux data structure 96 * allocate pcs 97 * find use and set of variables 98 */ 99 val = 5L * 5L * 5L * 5L * 5L; 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->m = val; 103 lp->c = 0; 104 lp->p = R; 105 val /= 5L; 106 lp++; 107 } 108 val = 0; 109 for(; p != P; p = p->link) { 110 switch(p->as) { 111 case ADATA: 112 case AGLOBL: 113 case ANAME: 114 case ASIGNAME: 115 continue; 116 } 117 r = rega(); 118 if(firstr == R) { 119 firstr = r; 120 lastr = r; 121 } else { 122 lastr->link = r; 123 r->p1 = lastr; 124 lastr->s1 = r; 125 lastr = r; 126 } 127 r->prog = p; 128 r->pc = val; 129 val++; 130 131 lp = log5; 132 for(i=0; i<5; i++) { 133 lp->c--; 134 if(lp->c <= 0) { 135 lp->c = lp->m; 136 if(lp->p != R) 137 lp->p->log5 = r; 138 lp->p = r; 139 (lp+1)->c = 0; 140 break; 141 } 142 lp++; 143 } 144 145 r1 = r->p1; 146 if(r1 != R) 147 switch(r1->prog->as) { 148 case ARET: 149 case AB: 150 case ARFE: 151 r->p1 = R; 152 r1->s1 = R; 153 } 154 155 /* 156 * left side always read 157 */ 158 bit = mkvar(&p->from, p->as==AMOVW); 159 for(z=0; z<BITS; z++) 160 r->use1.b[z] |= bit.b[z]; 161 162 /* 163 * right side depends on opcode 164 */ 165 bit = mkvar(&p->to, 0); 166 if(bany(&bit)) 167 switch(p->as) { 168 default: 169 diag(Z, "reg: unknown asop: %A", p->as); 170 break; 171 172 /* 173 * right side write 174 */ 175 case ANOP: 176 case AMOVB: 177 case AMOVBU: 178 case AMOVH: 179 case AMOVHU: 180 case AMOVW: 181 case AMOVF: 182 case AMOVD: 183 for(z=0; z<BITS; z++) 184 r->set.b[z] |= bit.b[z]; 185 break; 186 187 /* 188 * right side read 189 */ 190 case APLD: 191 for(z=0; z<BITS; z++) 192 r->use2.b[z] |= bit.b[z]; 193 break; 194 195 /* 196 * funny 197 */ 198 case ABL: 199 for(z=0; z<BITS; z++) 200 addrs.b[z] |= bit.b[z]; 201 break; 202 } 203 204 if(p->as == AMOVM) { 205 if(p->from.type == D_CONST) 206 z = p->from.offset; 207 else 208 z = p->to.offset; 209 for(i=0; z; i++) { 210 if(z&1) 211 regbits |= RtoB(i); 212 z >>= 1; 213 } 214 } 215 } 216 if(firstr == R) 217 return; 218 initpc = pc - val; 219 npc = val; 220 221 /* 222 * pass 2 223 * turn branch references to pointers 224 * build back pointers 225 */ 226 for(r = firstr; r != R; r = r->link) { 227 p = r->prog; 228 if(p->to.type == D_BRANCH) { 229 val = p->to.offset - initpc; 230 r1 = firstr; 231 while(r1 != R) { 232 r2 = r1->log5; 233 if(r2 != R && val >= r2->pc) { 234 r1 = r2; 235 continue; 236 } 237 if(r1->pc == val) 238 break; 239 r1 = r1->link; 240 } 241 if(r1 == R) { 242 nearln = p->lineno; 243 diag(Z, "ref not found\n%P", p); 244 continue; 245 } 246 if(r1 == r) { 247 nearln = p->lineno; 248 diag(Z, "ref to self\n%P", p); 249 continue; 250 } 251 r->s2 = r1; 252 r->p2link = r1->p2; 253 r1->p2 = r; 254 } 255 } 256 if(debug['R']) { 257 p = firstr->prog; 258 print("\n%L %D\n", p->lineno, &p->from); 259 } 260 261 /* 262 * pass 2.5 263 * find looping structure 264 */ 265 for(r = firstr; r != R; r = r->link) 266 r->active = 0; 267 change = 0; 268 loopit(firstr, npc); 269 270 /* 271 * pass 3 272 * iterate propagating usage 273 * back until flow graph is complete 274 */ 275 loop1: 276 change = 0; 277 for(r = firstr; r != R; r = r->link) 278 r->active = 0; 279 for(r = firstr; r != R; r = r->link) 280 if(r->prog->as == ARET) 281 prop(r, zbits, zbits); 282 loop11: 283 /* pick up unreachable code */ 284 i = 0; 285 for(r = firstr; r != R; r = r1) { 286 r1 = r->link; 287 if(r1 && r1->active && !r->active) { 288 prop(r, zbits, zbits); 289 i = 1; 290 } 291 } 292 if(i) 293 goto loop11; 294 if(change) 295 goto loop1; 296 297 298 /* 299 * pass 4 300 * iterate propagating register/variable synchrony 301 * forward until graph is complete 302 */ 303 loop2: 304 change = 0; 305 for(r = firstr; r != R; r = r->link) 306 r->active = 0; 307 synch(firstr, zbits); 308 if(change) 309 goto loop2; 310 311 addsplits(); 312 313 if(debug['R'] && debug['v']) { 314 print("\nprop structure:\n"); 315 for(r = firstr; r != R; r = r->link) { 316 print("%d:%P", r->loop, r->prog); 317 for(z=0; z<BITS; z++) 318 bit.b[z] = r->set.b[z] | 319 r->refahead.b[z] | r->calahead.b[z] | 320 r->refbehind.b[z] | r->calbehind.b[z] | 321 r->use1.b[z] | r->use2.b[z]; 322 if(bany(&bit)) { 323 print("\t"); 324 if(bany(&r->use1)) 325 print(" u1=%B", r->use1); 326 if(bany(&r->use2)) 327 print(" u2=%B", r->use2); 328 if(bany(&r->set)) 329 print(" st=%B", r->set); 330 if(bany(&r->refahead)) 331 print(" ra=%B", r->refahead); 332 if(bany(&r->calahead)) 333 print(" ca=%B", r->calahead); 334 if(bany(&r->refbehind)) 335 print(" rb=%B", r->refbehind); 336 if(bany(&r->calbehind)) 337 print(" cb=%B", r->calbehind); 338 } 339 print("\n"); 340 } 341 } 342 343 /* 344 * pass 5 345 * isolate regions 346 * calculate costs (paint1) 347 */ 348 r = firstr; 349 if(r) { 350 for(z=0; z<BITS; z++) 351 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 352 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 353 if(bany(&bit)) { 354 nearln = r->prog->lineno; 355 warn(Z, "used and not set: %B", bit); 356 if(debug['R'] && !debug['w']) 357 print("used and not set: %B\n", bit); 358 } 359 } 360 361 for(r = firstr; r != R; r = r->link) 362 r->act = zbits; 363 rgp = region; 364 nregion = 0; 365 for(r = firstr; r != R; r = r->link) { 366 for(z=0; z<BITS; z++) 367 bit.b[z] = r->set.b[z] & 368 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 369 if(bany(&bit)) { 370 nearln = r->prog->lineno; 371 warn(Z, "set and not used: %B", bit); 372 if(debug['R']) 373 print("set and not used: %B\n", bit); 374 excise(r); 375 } 376 for(z=0; z<BITS; z++) 377 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 378 while(bany(&bit)) { 379 i = bnum(bit); 380 rgp->enter = r; 381 rgp->varno = i; 382 change = 0; 383 if(debug['R'] && debug['v']) 384 print("\n"); 385 paint1(r, i); 386 bit.b[i/32] &= ~(1L<<(i%32)); 387 if(change <= 0) { 388 if(debug['R']) 389 print("%L $%d: %B\n", 390 r->prog->lineno, change, blsh(i)); 391 continue; 392 } 393 rgp->cost = change; 394 nregion++; 395 if(nregion >= NRGN) { 396 warn(Z, "too many regions"); 397 goto brk; 398 } 399 rgp++; 400 } 401 } 402 brk: 403 qsort(region, nregion, sizeof(region[0]), rcmp); 404 405 /* 406 * pass 6 407 * determine used registers (paint2) 408 * replace code (paint3) 409 */ 410 rgp = region; 411 for(i=0; i<nregion; i++) { 412 bit = blsh(rgp->varno); 413 vreg = paint2(rgp->enter, rgp->varno); 414 vreg = allreg(vreg, rgp); 415 if(debug['R']) { 416 if(rgp->regno >= NREG) 417 print("%L $%d F%d: %B\n", 418 rgp->enter->prog->lineno, 419 rgp->cost, 420 rgp->regno-NREG, 421 bit); 422 else 423 print("%L $%d R%d: %B\n", 424 rgp->enter->prog->lineno, 425 rgp->cost, 426 rgp->regno, 427 bit); 428 } 429 if(rgp->regno != 0) 430 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 431 rgp++; 432 } 433 /* 434 * pass 7 435 * peep-hole on basic block 436 */ 437 if(!debug['R'] || debug['P']) 438 peep(); 439 440 /* 441 * pass 8 442 * recalculate pc 443 */ 444 val = initpc; 445 for(r = firstr; r != R; r = r1) { 446 r->pc = val; 447 p = r->prog; 448 p1 = P; 449 r1 = r->link; 450 if(r1 != R) 451 p1 = r1->prog; 452 for(; p != p1; p = p->link) { 453 switch(p->as) { 454 default: 455 val++; 456 break; 457 458 case ANOP: 459 case ADATA: 460 case AGLOBL: 461 case ANAME: 462 case ASIGNAME: 463 break; 464 } 465 } 466 } 467 pc = val; 468 469 /* 470 * fix up branches 471 */ 472 if(debug['R']) 473 if(bany(&addrs)) 474 print("addrs: %B\n", addrs); 475 476 r1 = 0; /* set */ 477 for(r = firstr; r != R; r = r->link) { 478 p = r->prog; 479 if(p->to.type == D_BRANCH) 480 p->to.offset = r->s2->pc; 481 r1 = r; 482 } 483 484 /* 485 * last pass 486 * eliminate nops 487 * free aux structures 488 */ 489 for(p = firstr->prog; p != P; p = p->link){ 490 while(p->link && p->link->as == ANOP) 491 p->link = p->link->link; 492 } 493 if(r1 != R) { 494 r1->link = freer; 495 freer = firstr; 496 } 497 } 498 499 void 500 addsplits(void) 501 { 502 Reg *r, *r1; 503 int z, i; 504 Bits bit; 505 506 for(r = firstr; r != R; r = r->link) { 507 if(r->loop > 1) 508 continue; 509 if(r->prog->as == ABL) 510 continue; 511 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 512 if(r1->loop <= 1) 513 continue; 514 for(z=0; z<BITS; z++) 515 bit.b[z] = r1->calbehind.b[z] & 516 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 517 ~(r->calahead.b[z] & addrs.b[z]); 518 while(bany(&bit)) { 519 i = bnum(bit); 520 bit.b[i/32] &= ~(1L << (i%32)); 521 } 522 } 523 } 524 } 525 526 /* 527 * add mov b,rn 528 * just after r 529 */ 530 void 531 addmove(Reg *r, int bn, int rn, int f) 532 { 533 Prog *p, *p1; 534 Adr *a; 535 Var *v; 536 537 p1 = alloc(sizeof(*p1)); 538 *p1 = zprog; 539 p = r->prog; 540 541 p1->link = p->link; 542 p->link = p1; 543 p1->lineno = p->lineno; 544 545 v = var + bn; 546 547 a = &p1->to; 548 a->sym = v->sym; 549 a->name = v->name; 550 a->offset = v->offset; 551 a->etype = v->etype; 552 a->type = D_OREG; 553 if(a->etype == TARRAY || a->sym == S) 554 a->type = D_CONST; 555 556 p1->as = AMOVW; 557 if(v->etype == TCHAR || v->etype == TUCHAR) 558 p1->as = AMOVB; 559 if(v->etype == TSHORT || v->etype == TUSHORT) 560 p1->as = AMOVH; 561 if(v->etype == TFLOAT) 562 p1->as = AMOVF; 563 if(v->etype == TDOUBLE) 564 p1->as = AMOVD; 565 566 p1->from.type = D_REG; 567 p1->from.reg = rn; 568 if(rn >= NREG) { 569 p1->from.type = D_FREG; 570 p1->from.reg = rn-NREG; 571 } 572 if(!f) { 573 p1->from = *a; 574 *a = zprog.from; 575 a->type = D_REG; 576 a->reg = rn; 577 if(rn >= NREG) { 578 a->type = D_FREG; 579 a->reg = rn-NREG; 580 } 581 if(v->etype == TUCHAR) 582 p1->as = AMOVBU; 583 if(v->etype == TUSHORT) 584 p1->as = AMOVHU; 585 } 586 if(debug['R']) 587 print("%P\t.a%P\n", p, p1); 588 } 589 590 Bits 591 mkvar(Adr *a, int docon) 592 { 593 Var *v; 594 int i, t, n, et, z; 595 int32 o; 596 Bits bit; 597 Sym *s; 598 599 t = a->type; 600 if(t == D_REG && a->reg != NREG) 601 regbits |= RtoB(a->reg); 602 if(t == D_FREG && a->reg != NREG) 603 regbits |= FtoB(a->reg); 604 s = a->sym; 605 o = a->offset; 606 et = a->etype; 607 if(s == S) { 608 if(t != D_CONST || !docon || a->reg != NREG) 609 goto none; 610 et = TLONG; 611 } 612 if(t == D_CONST) { 613 if(s == S && sval(o)) 614 goto none; 615 } 616 617 n = a->name; 618 v = var; 619 for(i=0; i<nvar; i++) { 620 if(s == v->sym) 621 if(n == v->name) 622 if(o == v->offset) 623 goto out; 624 v++; 625 } 626 if(s) 627 if(s->name[0] == '.') 628 goto none; 629 if(nvar >= NVAR) { 630 if(debug['w'] > 1 && s) 631 warn(Z, "variable not optimized: %s", s->name); 632 goto none; 633 } 634 i = nvar; 635 nvar++; 636 v = &var[i]; 637 v->sym = s; 638 v->offset = o; 639 v->etype = et; 640 v->name = n; 641 if(debug['R']) 642 print("bit=%2d et=%2d %D\n", i, et, a); 643 out: 644 bit = blsh(i); 645 if(n == D_EXTERN || n == D_STATIC) 646 for(z=0; z<BITS; z++) 647 externs.b[z] |= bit.b[z]; 648 if(n == D_PARAM) 649 for(z=0; z<BITS; z++) 650 params.b[z] |= bit.b[z]; 651 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 652 for(z=0; z<BITS; z++) 653 addrs.b[z] |= bit.b[z]; 654 if(t == D_CONST) { 655 if(s == S) { 656 for(z=0; z<BITS; z++) 657 consts.b[z] |= bit.b[z]; 658 return bit; 659 } 660 if(et != TARRAY) 661 for(z=0; z<BITS; z++) 662 addrs.b[z] |= bit.b[z]; 663 for(z=0; z<BITS; z++) 664 params.b[z] |= bit.b[z]; 665 return bit; 666 } 667 if(t == D_OREG) 668 return bit; 669 670 none: 671 return zbits; 672 } 673 674 void 675 prop(Reg *r, Bits ref, Bits cal) 676 { 677 Reg *r1, *r2; 678 int z; 679 680 for(r1 = r; r1 != R; r1 = r1->p1) { 681 for(z=0; z<BITS; z++) { 682 ref.b[z] |= r1->refahead.b[z]; 683 if(ref.b[z] != r1->refahead.b[z]) { 684 r1->refahead.b[z] = ref.b[z]; 685 change++; 686 } 687 cal.b[z] |= r1->calahead.b[z]; 688 if(cal.b[z] != r1->calahead.b[z]) { 689 r1->calahead.b[z] = cal.b[z]; 690 change++; 691 } 692 } 693 switch(r1->prog->as) { 694 case ABL: 695 for(z=0; z<BITS; z++) { 696 cal.b[z] |= ref.b[z] | externs.b[z]; 697 ref.b[z] = 0; 698 } 699 break; 700 701 case ATEXT: 702 for(z=0; z<BITS; z++) { 703 cal.b[z] = 0; 704 ref.b[z] = 0; 705 } 706 break; 707 708 case ARET: 709 for(z=0; z<BITS; z++) { 710 cal.b[z] = externs.b[z]; 711 ref.b[z] = 0; 712 } 713 } 714 for(z=0; z<BITS; z++) { 715 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 716 r1->use1.b[z] | r1->use2.b[z]; 717 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 718 r1->refbehind.b[z] = ref.b[z]; 719 r1->calbehind.b[z] = cal.b[z]; 720 } 721 if(r1->active) 722 break; 723 r1->active = 1; 724 } 725 for(; r != r1; r = r->p1) 726 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 727 prop(r2, r->refbehind, r->calbehind); 728 } 729 730 /* 731 * find looping structure 732 * 733 * 1) find reverse postordering 734 * 2) find approximate dominators, 735 * the actual dominators if the flow graph is reducible 736 * otherwise, dominators plus some other non-dominators. 737 * See Matthew S. Hecht and Jeffrey D. Ullman, 738 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 739 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 740 * Oct. 1-3, 1973, pp. 207-217. 741 * 3) find all nodes with a predecessor dominated by the current node. 742 * such a node is a loop head. 743 * recursively, all preds with a greater rpo number are in the loop 744 */ 745 int32 746 postorder(Reg *r, Reg **rpo2r, int32 n) 747 { 748 Reg *r1; 749 750 r->rpo = 1; 751 r1 = r->s1; 752 if(r1 && !r1->rpo) 753 n = postorder(r1, rpo2r, n); 754 r1 = r->s2; 755 if(r1 && !r1->rpo) 756 n = postorder(r1, rpo2r, n); 757 rpo2r[n] = r; 758 n++; 759 return n; 760 } 761 762 int32 763 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 764 { 765 int32 t; 766 767 if(rpo1 == -1) 768 return rpo2; 769 while(rpo1 != rpo2){ 770 if(rpo1 > rpo2){ 771 t = rpo2; 772 rpo2 = rpo1; 773 rpo1 = t; 774 } 775 while(rpo1 < rpo2){ 776 t = idom[rpo2]; 777 if(t >= rpo2) 778 fatal(Z, "bad idom"); 779 rpo2 = t; 780 } 781 } 782 return rpo1; 783 } 784 785 int 786 doms(int32 *idom, int32 r, int32 s) 787 { 788 while(s > r) 789 s = idom[s]; 790 return s == r; 791 } 792 793 int 794 loophead(int32 *idom, Reg *r) 795 { 796 int32 src; 797 798 src = r->rpo; 799 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 800 return 1; 801 for(r = r->p2; r != R; r = r->p2link) 802 if(doms(idom, src, r->rpo)) 803 return 1; 804 return 0; 805 } 806 807 void 808 loopmark(Reg **rpo2r, int32 head, Reg *r) 809 { 810 if(r->rpo < head || r->active == head) 811 return; 812 r->active = head; 813 r->loop += LOOP; 814 if(r->p1 != R) 815 loopmark(rpo2r, head, r->p1); 816 for(r = r->p2; r != R; r = r->p2link) 817 loopmark(rpo2r, head, r); 818 } 819 820 void 821 loopit(Reg *r, int32 nr) 822 { 823 Reg *r1; 824 int32 i, d, me; 825 826 if(nr > maxnr) { 827 rpo2r = alloc(nr * sizeof(Reg*)); 828 idom = alloc(nr * sizeof(int32)); 829 maxnr = nr; 830 } 831 d = postorder(r, rpo2r, 0); 832 if(d > nr) 833 fatal(Z, "too many reg nodes"); 834 nr = d; 835 for(i = 0; i < nr / 2; i++){ 836 r1 = rpo2r[i]; 837 rpo2r[i] = rpo2r[nr - 1 - i]; 838 rpo2r[nr - 1 - i] = r1; 839 } 840 for(i = 0; i < nr; i++) 841 rpo2r[i]->rpo = i; 842 843 idom[0] = 0; 844 for(i = 0; i < nr; i++){ 845 r1 = rpo2r[i]; 846 me = r1->rpo; 847 d = -1; 848 if(r1->p1 != R && r1->p1->rpo < me) 849 d = r1->p1->rpo; 850 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 851 if(r1->rpo < me) 852 d = rpolca(idom, d, r1->rpo); 853 idom[i] = d; 854 } 855 856 for(i = 0; i < nr; i++){ 857 r1 = rpo2r[i]; 858 r1->loop++; 859 if(r1->p2 != R && loophead(idom, r1)) 860 loopmark(rpo2r, i, r1); 861 } 862 } 863 864 void 865 synch(Reg *r, Bits dif) 866 { 867 Reg *r1; 868 int z; 869 870 for(r1 = r; r1 != R; r1 = r1->s1) { 871 for(z=0; z<BITS; z++) { 872 dif.b[z] = (dif.b[z] & 873 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 874 r1->set.b[z] | r1->regdiff.b[z]; 875 if(dif.b[z] != r1->regdiff.b[z]) { 876 r1->regdiff.b[z] = dif.b[z]; 877 change++; 878 } 879 } 880 if(r1->active) 881 break; 882 r1->active = 1; 883 for(z=0; z<BITS; z++) 884 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 885 if(r1->s2 != R) 886 synch(r1->s2, dif); 887 } 888 } 889 890 uint32 891 allreg(uint32 b, Rgn *r) 892 { 893 Var *v; 894 int i; 895 896 v = var + r->varno; 897 r->regno = 0; 898 switch(v->etype) { 899 900 default: 901 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 902 break; 903 904 case TCHAR: 905 case TUCHAR: 906 case TSHORT: 907 case TUSHORT: 908 case TINT: 909 case TUINT: 910 case TLONG: 911 case TULONG: 912 case TIND: 913 case TARRAY: 914 i = BtoR(~b); 915 if(i && r->cost >= 0) { 916 r->regno = i; 917 return RtoB(i); 918 } 919 break; 920 921 case TVLONG: 922 case TDOUBLE: 923 case TFLOAT: 924 i = BtoF(~b); 925 if(i && r->cost >= 0) { 926 r->regno = i+NREG; 927 return FtoB(i); 928 } 929 break; 930 } 931 return 0; 932 } 933 934 void 935 paint1(Reg *r, int bn) 936 { 937 Reg *r1; 938 Prog *p; 939 int z; 940 uint32 bb; 941 942 z = bn/32; 943 bb = 1L<<(bn%32); 944 if(r->act.b[z] & bb) 945 return; 946 for(;;) { 947 if(!(r->refbehind.b[z] & bb)) 948 break; 949 r1 = r->p1; 950 if(r1 == R) 951 break; 952 if(!(r1->refahead.b[z] & bb)) 953 break; 954 if(r1->act.b[z] & bb) 955 break; 956 r = r1; 957 } 958 959 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 960 change -= CLOAD * r->loop; 961 if(debug['R'] && debug['v']) 962 print("%d%P\td %B $%d\n", r->loop, 963 r->prog, blsh(bn), change); 964 } 965 for(;;) { 966 r->act.b[z] |= bb; 967 p = r->prog; 968 969 if(r->use1.b[z] & bb) { 970 change += CREF * r->loop; 971 if(debug['R'] && debug['v']) 972 print("%d%P\tu1 %B $%d\n", r->loop, 973 p, blsh(bn), change); 974 } 975 976 if((r->use2.b[z]|r->set.b[z]) & bb) { 977 change += CREF * r->loop; 978 if(debug['R'] && debug['v']) 979 print("%d%P\tu2 %B $%d\n", r->loop, 980 p, blsh(bn), change); 981 } 982 983 if(STORE(r) & r->regdiff.b[z] & bb) { 984 change -= CLOAD * r->loop; 985 if(debug['R'] && debug['v']) 986 print("%d%P\tst %B $%d\n", r->loop, 987 p, blsh(bn), change); 988 } 989 990 if(r->refbehind.b[z] & bb) 991 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 992 if(r1->refahead.b[z] & bb) 993 paint1(r1, bn); 994 995 if(!(r->refahead.b[z] & bb)) 996 break; 997 r1 = r->s2; 998 if(r1 != R) 999 if(r1->refbehind.b[z] & bb) 1000 paint1(r1, bn); 1001 r = r->s1; 1002 if(r == R) 1003 break; 1004 if(r->act.b[z] & bb) 1005 break; 1006 if(!(r->refbehind.b[z] & bb)) 1007 break; 1008 } 1009 } 1010 1011 uint32 1012 paint2(Reg *r, int bn) 1013 { 1014 Reg *r1; 1015 int z; 1016 uint32 bb, vreg; 1017 1018 z = bn/32; 1019 bb = 1L << (bn%32); 1020 vreg = regbits; 1021 if(!(r->act.b[z] & bb)) 1022 return vreg; 1023 for(;;) { 1024 if(!(r->refbehind.b[z] & bb)) 1025 break; 1026 r1 = r->p1; 1027 if(r1 == R) 1028 break; 1029 if(!(r1->refahead.b[z] & bb)) 1030 break; 1031 if(!(r1->act.b[z] & bb)) 1032 break; 1033 r = r1; 1034 } 1035 for(;;) { 1036 r->act.b[z] &= ~bb; 1037 1038 vreg |= r->regu; 1039 1040 if(r->refbehind.b[z] & bb) 1041 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1042 if(r1->refahead.b[z] & bb) 1043 vreg |= paint2(r1, bn); 1044 1045 if(!(r->refahead.b[z] & bb)) 1046 break; 1047 r1 = r->s2; 1048 if(r1 != R) 1049 if(r1->refbehind.b[z] & bb) 1050 vreg |= paint2(r1, bn); 1051 r = r->s1; 1052 if(r == R) 1053 break; 1054 if(!(r->act.b[z] & bb)) 1055 break; 1056 if(!(r->refbehind.b[z] & bb)) 1057 break; 1058 } 1059 return vreg; 1060 } 1061 1062 void 1063 paint3(Reg *r, int bn, int32 rb, int rn) 1064 { 1065 Reg *r1; 1066 Prog *p; 1067 int z; 1068 uint32 bb; 1069 1070 z = bn/32; 1071 bb = 1L << (bn%32); 1072 if(r->act.b[z] & bb) 1073 return; 1074 for(;;) { 1075 if(!(r->refbehind.b[z] & bb)) 1076 break; 1077 r1 = r->p1; 1078 if(r1 == R) 1079 break; 1080 if(!(r1->refahead.b[z] & bb)) 1081 break; 1082 if(r1->act.b[z] & bb) 1083 break; 1084 r = r1; 1085 } 1086 1087 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1088 addmove(r, bn, rn, 0); 1089 for(;;) { 1090 r->act.b[z] |= bb; 1091 p = r->prog; 1092 1093 if(r->use1.b[z] & bb) { 1094 if(debug['R']) 1095 print("%P", p); 1096 addreg(&p->from, rn); 1097 if(debug['R']) 1098 print("\t.c%P\n", p); 1099 } 1100 if((r->use2.b[z]|r->set.b[z]) & bb) { 1101 if(debug['R']) 1102 print("%P", p); 1103 addreg(&p->to, rn); 1104 if(debug['R']) 1105 print("\t.c%P\n", p); 1106 } 1107 1108 if(STORE(r) & r->regdiff.b[z] & bb) 1109 addmove(r, bn, rn, 1); 1110 r->regu |= rb; 1111 1112 if(r->refbehind.b[z] & bb) 1113 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1114 if(r1->refahead.b[z] & bb) 1115 paint3(r1, bn, rb, rn); 1116 1117 if(!(r->refahead.b[z] & bb)) 1118 break; 1119 r1 = r->s2; 1120 if(r1 != R) 1121 if(r1->refbehind.b[z] & bb) 1122 paint3(r1, bn, rb, rn); 1123 r = r->s1; 1124 if(r == R) 1125 break; 1126 if(r->act.b[z] & bb) 1127 break; 1128 if(!(r->refbehind.b[z] & bb)) 1129 break; 1130 } 1131 } 1132 1133 void 1134 addreg(Adr *a, int rn) 1135 { 1136 1137 a->sym = 0; 1138 a->name = D_NONE; 1139 a->type = D_REG; 1140 a->reg = rn; 1141 if(rn >= NREG) { 1142 a->type = D_FREG; 1143 a->reg = rn-NREG; 1144 } 1145 } 1146 1147 /* 1148 * bit reg 1149 * 0 R0 1150 * 1 R1 1151 * ... ... 1152 * 10 R10 1153 * 12 R12 1154 */ 1155 int32 1156 RtoB(int r) 1157 { 1158 1159 if(r < 2 || (r >= REGTMP-2 && r != 12)) // excluded R9 and R10 for m and g, but not R12 1160 return 0; 1161 return 1L << r; 1162 } 1163 1164 int 1165 BtoR(int32 b) 1166 { 1167 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1168 if(b == 0) 1169 return 0; 1170 return bitno(b); 1171 } 1172 1173 /* 1174 * bit reg 1175 * 18 F2 1176 * 19 F3 1177 * ... ... 1178 * 31 F15 1179 */ 1180 int32 1181 FtoB(int f) 1182 { 1183 1184 if(f < 2 || f > NFREG-1) 1185 return 0; 1186 return 1L << (f + 16); 1187 } 1188 1189 int 1190 BtoF(int32 b) 1191 { 1192 1193 b &= 0xfffc0000L; 1194 if(b == 0) 1195 return 0; 1196 return bitno(b) - 16; 1197 }