github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/5c/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include "gc.h" 33 34 void addsplits(void); 35 36 Reg* 37 rega(void) 38 { 39 Reg *r; 40 41 r = freer; 42 if(r == R) { 43 r = alloc(sizeof(*r)); 44 } else 45 freer = r->link; 46 47 *r = zreg; 48 return r; 49 } 50 51 int 52 rcmp(const void *a1, const void *a2) 53 { 54 Rgn *p1, *p2; 55 int c1, c2; 56 57 p1 = (Rgn*)a1; 58 p2 = (Rgn*)a2; 59 c1 = p2->cost; 60 c2 = p1->cost; 61 if(c1 -= c2) 62 return c1; 63 return p2->varno - p1->varno; 64 } 65 66 void 67 regopt(Prog *p) 68 { 69 Reg *r, *r1, *r2; 70 Prog *p1; 71 int i, z; 72 int32 initpc, val, npc; 73 uint32 vreg; 74 Bits bit; 75 struct 76 { 77 int32 m; 78 int32 c; 79 Reg* p; 80 } log5[6], *lp; 81 82 firstr = R; 83 lastr = R; 84 nvar = 0; 85 regbits = 0; 86 for(z=0; z<BITS; z++) { 87 externs.b[z] = 0; 88 params.b[z] = 0; 89 consts.b[z] = 0; 90 addrs.b[z] = 0; 91 } 92 93 /* 94 * pass 1 95 * build aux data structure 96 * allocate pcs 97 * find use and set of variables 98 */ 99 val = 5L * 5L * 5L * 5L * 5L; 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->m = val; 103 lp->c = 0; 104 lp->p = R; 105 val /= 5L; 106 lp++; 107 } 108 val = 0; 109 for(; p != P; p = p->link) { 110 switch(p->as) { 111 case ADATA: 112 case AGLOBL: 113 case ANAME: 114 case ASIGNAME: 115 case AFUNCDATA: 116 continue; 117 } 118 r = rega(); 119 if(firstr == R) { 120 firstr = r; 121 lastr = r; 122 } else { 123 lastr->link = r; 124 r->p1 = lastr; 125 lastr->s1 = r; 126 lastr = r; 127 } 128 r->prog = p; 129 r->pc = val; 130 val++; 131 132 lp = log5; 133 for(i=0; i<5; i++) { 134 lp->c--; 135 if(lp->c <= 0) { 136 lp->c = lp->m; 137 if(lp->p != R) 138 lp->p->log5 = r; 139 lp->p = r; 140 (lp+1)->c = 0; 141 break; 142 } 143 lp++; 144 } 145 146 r1 = r->p1; 147 if(r1 != R) 148 switch(r1->prog->as) { 149 case ARET: 150 case AB: 151 case ARFE: 152 r->p1 = R; 153 r1->s1 = R; 154 } 155 156 /* 157 * left side always read 158 */ 159 bit = mkvar(&p->from, p->as==AMOVW); 160 for(z=0; z<BITS; z++) 161 r->use1.b[z] |= bit.b[z]; 162 163 /* 164 * right side depends on opcode 165 */ 166 bit = mkvar(&p->to, 0); 167 if(bany(&bit)) 168 switch(p->as) { 169 default: 170 diag(Z, "reg: unknown asop: %A", p->as); 171 break; 172 173 /* 174 * right side write 175 */ 176 case ANOP: 177 case AMOVB: 178 case AMOVBS: 179 case AMOVBU: 180 case AMOVH: 181 case AMOVHS: 182 case AMOVHU: 183 case AMOVW: 184 case AMOVF: 185 case AMOVD: 186 for(z=0; z<BITS; z++) 187 r->set.b[z] |= bit.b[z]; 188 break; 189 190 /* 191 * right side read 192 */ 193 case APLD: 194 for(z=0; z<BITS; z++) 195 r->use2.b[z] |= bit.b[z]; 196 break; 197 198 /* 199 * funny 200 */ 201 case ABL: 202 for(z=0; z<BITS; z++) 203 addrs.b[z] |= bit.b[z]; 204 break; 205 } 206 207 if(p->as == AMOVM) { 208 if(p->from.type == D_CONST) 209 z = p->from.offset; 210 else 211 z = p->to.offset; 212 for(i=0; z; i++) { 213 if(z&1) 214 regbits |= RtoB(i); 215 z >>= 1; 216 } 217 } 218 } 219 if(firstr == R) 220 return; 221 initpc = pc - val; 222 npc = val; 223 224 /* 225 * pass 2 226 * turn branch references to pointers 227 * build back pointers 228 */ 229 for(r = firstr; r != R; r = r->link) { 230 p = r->prog; 231 if(p->to.type == D_BRANCH) { 232 val = p->to.offset - initpc; 233 r1 = firstr; 234 while(r1 != R) { 235 r2 = r1->log5; 236 if(r2 != R && val >= r2->pc) { 237 r1 = r2; 238 continue; 239 } 240 if(r1->pc == val) 241 break; 242 r1 = r1->link; 243 } 244 if(r1 == R) { 245 nearln = p->lineno; 246 diag(Z, "ref not found\n%P", p); 247 continue; 248 } 249 if(r1 == r) { 250 nearln = p->lineno; 251 diag(Z, "ref to self\n%P", p); 252 continue; 253 } 254 r->s2 = r1; 255 r->p2link = r1->p2; 256 r1->p2 = r; 257 } 258 } 259 if(debug['R']) { 260 p = firstr->prog; 261 print("\n%L %D\n", p->lineno, &p->from); 262 } 263 264 /* 265 * pass 2.5 266 * find looping structure 267 */ 268 for(r = firstr; r != R; r = r->link) 269 r->active = 0; 270 change = 0; 271 loopit(firstr, npc); 272 273 /* 274 * pass 3 275 * iterate propagating usage 276 * back until flow graph is complete 277 */ 278 loop1: 279 change = 0; 280 for(r = firstr; r != R; r = r->link) 281 r->active = 0; 282 for(r = firstr; r != R; r = r->link) 283 if(r->prog->as == ARET) 284 prop(r, zbits, zbits); 285 loop11: 286 /* pick up unreachable code */ 287 i = 0; 288 for(r = firstr; r != R; r = r1) { 289 r1 = r->link; 290 if(r1 && r1->active && !r->active) { 291 prop(r, zbits, zbits); 292 i = 1; 293 } 294 } 295 if(i) 296 goto loop11; 297 if(change) 298 goto loop1; 299 300 301 /* 302 * pass 4 303 * iterate propagating register/variable synchrony 304 * forward until graph is complete 305 */ 306 loop2: 307 change = 0; 308 for(r = firstr; r != R; r = r->link) 309 r->active = 0; 310 synch(firstr, zbits); 311 if(change) 312 goto loop2; 313 314 addsplits(); 315 316 if(debug['R'] && debug['v']) { 317 print("\nprop structure:\n"); 318 for(r = firstr; r != R; r = r->link) { 319 print("%d:%P", r->loop, r->prog); 320 for(z=0; z<BITS; z++) 321 bit.b[z] = r->set.b[z] | 322 r->refahead.b[z] | r->calahead.b[z] | 323 r->refbehind.b[z] | r->calbehind.b[z] | 324 r->use1.b[z] | r->use2.b[z]; 325 if(bany(&bit)) { 326 print("\t"); 327 if(bany(&r->use1)) 328 print(" u1=%B", r->use1); 329 if(bany(&r->use2)) 330 print(" u2=%B", r->use2); 331 if(bany(&r->set)) 332 print(" st=%B", r->set); 333 if(bany(&r->refahead)) 334 print(" ra=%B", r->refahead); 335 if(bany(&r->calahead)) 336 print(" ca=%B", r->calahead); 337 if(bany(&r->refbehind)) 338 print(" rb=%B", r->refbehind); 339 if(bany(&r->calbehind)) 340 print(" cb=%B", r->calbehind); 341 } 342 print("\n"); 343 } 344 } 345 346 /* 347 * pass 5 348 * isolate regions 349 * calculate costs (paint1) 350 */ 351 r = firstr; 352 if(r) { 353 for(z=0; z<BITS; z++) 354 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 355 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 356 if(bany(&bit)) { 357 nearln = r->prog->lineno; 358 warn(Z, "used and not set: %B", bit); 359 if(debug['R'] && !debug['w']) 360 print("used and not set: %B\n", bit); 361 } 362 } 363 364 for(r = firstr; r != R; r = r->link) 365 r->act = zbits; 366 rgp = region; 367 nregion = 0; 368 for(r = firstr; r != R; r = r->link) { 369 for(z=0; z<BITS; z++) 370 bit.b[z] = r->set.b[z] & 371 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 372 if(bany(&bit)) { 373 nearln = r->prog->lineno; 374 warn(Z, "set and not used: %B", bit); 375 if(debug['R']) 376 print("set and not used: %B\n", bit); 377 excise(r); 378 } 379 for(z=0; z<BITS; z++) 380 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 381 while(bany(&bit)) { 382 i = bnum(bit); 383 rgp->enter = r; 384 rgp->varno = i; 385 change = 0; 386 if(debug['R'] && debug['v']) 387 print("\n"); 388 paint1(r, i); 389 bit.b[i/32] &= ~(1L<<(i%32)); 390 if(change <= 0) { 391 if(debug['R']) 392 print("%L $%d: %B\n", 393 r->prog->lineno, change, blsh(i)); 394 continue; 395 } 396 rgp->cost = change; 397 nregion++; 398 if(nregion >= NRGN) { 399 warn(Z, "too many regions"); 400 goto brk; 401 } 402 rgp++; 403 } 404 } 405 brk: 406 qsort(region, nregion, sizeof(region[0]), rcmp); 407 408 /* 409 * pass 6 410 * determine used registers (paint2) 411 * replace code (paint3) 412 */ 413 rgp = region; 414 for(i=0; i<nregion; i++) { 415 bit = blsh(rgp->varno); 416 vreg = paint2(rgp->enter, rgp->varno); 417 vreg = allreg(vreg, rgp); 418 if(debug['R']) { 419 if(rgp->regno >= NREG) 420 print("%L $%d F%d: %B\n", 421 rgp->enter->prog->lineno, 422 rgp->cost, 423 rgp->regno-NREG, 424 bit); 425 else 426 print("%L $%d R%d: %B\n", 427 rgp->enter->prog->lineno, 428 rgp->cost, 429 rgp->regno, 430 bit); 431 } 432 if(rgp->regno != 0) 433 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 434 rgp++; 435 } 436 /* 437 * pass 7 438 * peep-hole on basic block 439 */ 440 if(!debug['R'] || debug['P']) 441 peep(); 442 443 /* 444 * pass 8 445 * recalculate pc 446 */ 447 val = initpc; 448 for(r = firstr; r != R; r = r1) { 449 r->pc = val; 450 p = r->prog; 451 p1 = P; 452 r1 = r->link; 453 if(r1 != R) 454 p1 = r1->prog; 455 for(; p != p1; p = p->link) { 456 switch(p->as) { 457 default: 458 val++; 459 break; 460 461 case ANOP: 462 case ADATA: 463 case AGLOBL: 464 case ANAME: 465 case ASIGNAME: 466 case AFUNCDATA: 467 break; 468 } 469 } 470 } 471 pc = val; 472 473 /* 474 * fix up branches 475 */ 476 if(debug['R']) 477 if(bany(&addrs)) 478 print("addrs: %B\n", addrs); 479 480 r1 = 0; /* set */ 481 for(r = firstr; r != R; r = r->link) { 482 p = r->prog; 483 if(p->to.type == D_BRANCH) { 484 p->to.offset = r->s2->pc; 485 p->to.u.branch = r->s2->prog; 486 } 487 r1 = r; 488 } 489 490 /* 491 * last pass 492 * eliminate nops 493 * free aux structures 494 */ 495 for(p = firstr->prog; p != P; p = p->link){ 496 while(p->link && p->link->as == ANOP) 497 p->link = p->link->link; 498 } 499 if(r1 != R) { 500 r1->link = freer; 501 freer = firstr; 502 } 503 } 504 505 void 506 addsplits(void) 507 { 508 Reg *r, *r1; 509 int z, i; 510 Bits bit; 511 512 for(r = firstr; r != R; r = r->link) { 513 if(r->loop > 1) 514 continue; 515 if(r->prog->as == ABL) 516 continue; 517 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 518 if(r1->loop <= 1) 519 continue; 520 for(z=0; z<BITS; z++) 521 bit.b[z] = r1->calbehind.b[z] & 522 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 523 ~(r->calahead.b[z] & addrs.b[z]); 524 while(bany(&bit)) { 525 i = bnum(bit); 526 bit.b[i/32] &= ~(1L << (i%32)); 527 } 528 } 529 } 530 } 531 532 /* 533 * add mov b,rn 534 * just after r 535 */ 536 void 537 addmove(Reg *r, int bn, int rn, int f) 538 { 539 Prog *p, *p1; 540 Addr *a; 541 Var *v; 542 543 p1 = alloc(sizeof(*p1)); 544 *p1 = zprog; 545 p = r->prog; 546 547 p1->link = p->link; 548 p->link = p1; 549 p1->lineno = p->lineno; 550 551 v = var + bn; 552 553 a = &p1->to; 554 a->sym = v->sym; 555 a->name = v->name; 556 a->offset = v->offset; 557 a->etype = v->etype; 558 a->type = D_OREG; 559 if(a->etype == TARRAY || a->sym == nil) 560 a->type = D_CONST; 561 562 p1->as = AMOVW; 563 if(v->etype == TCHAR || v->etype == TUCHAR) 564 p1->as = AMOVBS; 565 if(v->etype == TSHORT || v->etype == TUSHORT) 566 p1->as = AMOVHS; 567 if(v->etype == TFLOAT) 568 p1->as = AMOVF; 569 if(v->etype == TDOUBLE) 570 p1->as = AMOVD; 571 572 p1->from.type = D_REG; 573 p1->from.reg = rn; 574 if(rn >= NREG) { 575 p1->from.type = D_FREG; 576 p1->from.reg = rn-NREG; 577 } 578 if(!f) { 579 p1->from = *a; 580 *a = zprog.from; 581 a->type = D_REG; 582 a->reg = rn; 583 if(rn >= NREG) { 584 a->type = D_FREG; 585 a->reg = rn-NREG; 586 } 587 if(v->etype == TUCHAR) 588 p1->as = AMOVBU; 589 if(v->etype == TUSHORT) 590 p1->as = AMOVHU; 591 } 592 if(debug['R']) 593 print("%P\t.a%P\n", p, p1); 594 } 595 596 Bits 597 mkvar(Addr *a, int docon) 598 { 599 Var *v; 600 int i, t, n, et, z; 601 int32 o; 602 Bits bit; 603 LSym *s; 604 605 t = a->type; 606 if(t == D_REG && a->reg != NREG) 607 regbits |= RtoB(a->reg); 608 if(t == D_FREG && a->reg != NREG) 609 regbits |= FtoB(a->reg); 610 s = a->sym; 611 o = a->offset; 612 et = a->etype; 613 if(s == nil) { 614 if(t != D_CONST || !docon || a->reg != NREG) 615 goto none; 616 et = TLONG; 617 } 618 if(t == D_CONST) { 619 if(s == nil && sval(o)) 620 goto none; 621 } 622 623 n = a->name; 624 v = var; 625 for(i=0; i<nvar; i++) { 626 if(s == v->sym) 627 if(n == v->name) 628 if(o == v->offset) 629 goto out; 630 v++; 631 } 632 if(s) 633 if(s->name[0] == '.') 634 goto none; 635 if(nvar >= NVAR) { 636 if(debug['w'] > 1 && s) 637 warn(Z, "variable not optimized: %s", s->name); 638 goto none; 639 } 640 i = nvar; 641 nvar++; 642 v = &var[i]; 643 v->sym = s; 644 v->offset = o; 645 v->etype = et; 646 v->name = n; 647 if(debug['R']) 648 print("bit=%2d et=%2d %D\n", i, et, a); 649 out: 650 bit = blsh(i); 651 if(n == D_EXTERN || n == D_STATIC) 652 for(z=0; z<BITS; z++) 653 externs.b[z] |= bit.b[z]; 654 if(n == D_PARAM) 655 for(z=0; z<BITS; z++) 656 params.b[z] |= bit.b[z]; 657 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 658 for(z=0; z<BITS; z++) 659 addrs.b[z] |= bit.b[z]; 660 if(t == D_CONST) { 661 if(s == nil) { 662 for(z=0; z<BITS; z++) 663 consts.b[z] |= bit.b[z]; 664 return bit; 665 } 666 if(et != TARRAY) 667 for(z=0; z<BITS; z++) 668 addrs.b[z] |= bit.b[z]; 669 for(z=0; z<BITS; z++) 670 params.b[z] |= bit.b[z]; 671 return bit; 672 } 673 if(t == D_OREG) 674 return bit; 675 676 none: 677 return zbits; 678 } 679 680 void 681 prop(Reg *r, Bits ref, Bits cal) 682 { 683 Reg *r1, *r2; 684 int z; 685 686 for(r1 = r; r1 != R; r1 = r1->p1) { 687 for(z=0; z<BITS; z++) { 688 ref.b[z] |= r1->refahead.b[z]; 689 if(ref.b[z] != r1->refahead.b[z]) { 690 r1->refahead.b[z] = ref.b[z]; 691 change++; 692 } 693 cal.b[z] |= r1->calahead.b[z]; 694 if(cal.b[z] != r1->calahead.b[z]) { 695 r1->calahead.b[z] = cal.b[z]; 696 change++; 697 } 698 } 699 switch(r1->prog->as) { 700 case ABL: 701 for(z=0; z<BITS; z++) { 702 cal.b[z] |= ref.b[z] | externs.b[z]; 703 ref.b[z] = 0; 704 } 705 break; 706 707 case ATEXT: 708 for(z=0; z<BITS; z++) { 709 cal.b[z] = 0; 710 ref.b[z] = 0; 711 } 712 break; 713 714 case ARET: 715 for(z=0; z<BITS; z++) { 716 cal.b[z] = externs.b[z]; 717 ref.b[z] = 0; 718 } 719 } 720 for(z=0; z<BITS; z++) { 721 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 722 r1->use1.b[z] | r1->use2.b[z]; 723 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 724 r1->refbehind.b[z] = ref.b[z]; 725 r1->calbehind.b[z] = cal.b[z]; 726 } 727 if(r1->active) 728 break; 729 r1->active = 1; 730 } 731 for(; r != r1; r = r->p1) 732 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 733 prop(r2, r->refbehind, r->calbehind); 734 } 735 736 /* 737 * find looping structure 738 * 739 * 1) find reverse postordering 740 * 2) find approximate dominators, 741 * the actual dominators if the flow graph is reducible 742 * otherwise, dominators plus some other non-dominators. 743 * See Matthew S. Hecht and Jeffrey D. Ullman, 744 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 745 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 746 * Oct. 1-3, 1973, pp. 207-217. 747 * 3) find all nodes with a predecessor dominated by the current node. 748 * such a node is a loop head. 749 * recursively, all preds with a greater rpo number are in the loop 750 */ 751 int32 752 postorder(Reg *r, Reg **rpo2r, int32 n) 753 { 754 Reg *r1; 755 756 r->rpo = 1; 757 r1 = r->s1; 758 if(r1 && !r1->rpo) 759 n = postorder(r1, rpo2r, n); 760 r1 = r->s2; 761 if(r1 && !r1->rpo) 762 n = postorder(r1, rpo2r, n); 763 rpo2r[n] = r; 764 n++; 765 return n; 766 } 767 768 int32 769 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 770 { 771 int32 t; 772 773 if(rpo1 == -1) 774 return rpo2; 775 while(rpo1 != rpo2){ 776 if(rpo1 > rpo2){ 777 t = rpo2; 778 rpo2 = rpo1; 779 rpo1 = t; 780 } 781 while(rpo1 < rpo2){ 782 t = idom[rpo2]; 783 if(t >= rpo2) 784 fatal(Z, "bad idom"); 785 rpo2 = t; 786 } 787 } 788 return rpo1; 789 } 790 791 int 792 doms(int32 *idom, int32 r, int32 s) 793 { 794 while(s > r) 795 s = idom[s]; 796 return s == r; 797 } 798 799 int 800 loophead(int32 *idom, Reg *r) 801 { 802 int32 src; 803 804 src = r->rpo; 805 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 806 return 1; 807 for(r = r->p2; r != R; r = r->p2link) 808 if(doms(idom, src, r->rpo)) 809 return 1; 810 return 0; 811 } 812 813 void 814 loopmark(Reg **rpo2r, int32 head, Reg *r) 815 { 816 if(r->rpo < head || r->active == head) 817 return; 818 r->active = head; 819 r->loop += LOOP; 820 if(r->p1 != R) 821 loopmark(rpo2r, head, r->p1); 822 for(r = r->p2; r != R; r = r->p2link) 823 loopmark(rpo2r, head, r); 824 } 825 826 void 827 loopit(Reg *r, int32 nr) 828 { 829 Reg *r1; 830 int32 i, d, me; 831 832 if(nr > maxnr) { 833 rpo2r = alloc(nr * sizeof(Reg*)); 834 idom = alloc(nr * sizeof(int32)); 835 maxnr = nr; 836 } 837 d = postorder(r, rpo2r, 0); 838 if(d > nr) 839 fatal(Z, "too many reg nodes"); 840 nr = d; 841 for(i = 0; i < nr / 2; i++){ 842 r1 = rpo2r[i]; 843 rpo2r[i] = rpo2r[nr - 1 - i]; 844 rpo2r[nr - 1 - i] = r1; 845 } 846 for(i = 0; i < nr; i++) 847 rpo2r[i]->rpo = i; 848 849 idom[0] = 0; 850 for(i = 0; i < nr; i++){ 851 r1 = rpo2r[i]; 852 me = r1->rpo; 853 d = -1; 854 if(r1->p1 != R && r1->p1->rpo < me) 855 d = r1->p1->rpo; 856 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 857 if(r1->rpo < me) 858 d = rpolca(idom, d, r1->rpo); 859 idom[i] = d; 860 } 861 862 for(i = 0; i < nr; i++){ 863 r1 = rpo2r[i]; 864 r1->loop++; 865 if(r1->p2 != R && loophead(idom, r1)) 866 loopmark(rpo2r, i, r1); 867 } 868 } 869 870 void 871 synch(Reg *r, Bits dif) 872 { 873 Reg *r1; 874 int z; 875 876 for(r1 = r; r1 != R; r1 = r1->s1) { 877 for(z=0; z<BITS; z++) { 878 dif.b[z] = (dif.b[z] & 879 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 880 r1->set.b[z] | r1->regdiff.b[z]; 881 if(dif.b[z] != r1->regdiff.b[z]) { 882 r1->regdiff.b[z] = dif.b[z]; 883 change++; 884 } 885 } 886 if(r1->active) 887 break; 888 r1->active = 1; 889 for(z=0; z<BITS; z++) 890 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 891 if(r1->s2 != R) 892 synch(r1->s2, dif); 893 } 894 } 895 896 uint32 897 allreg(uint32 b, Rgn *r) 898 { 899 Var *v; 900 int i; 901 902 v = var + r->varno; 903 r->regno = 0; 904 switch(v->etype) { 905 906 default: 907 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 908 break; 909 910 case TCHAR: 911 case TUCHAR: 912 case TSHORT: 913 case TUSHORT: 914 case TINT: 915 case TUINT: 916 case TLONG: 917 case TULONG: 918 case TIND: 919 case TARRAY: 920 i = BtoR(~b); 921 if(i && r->cost >= 0) { 922 r->regno = i; 923 return RtoB(i); 924 } 925 break; 926 927 case TVLONG: 928 case TDOUBLE: 929 case TFLOAT: 930 i = BtoF(~b); 931 if(i && r->cost >= 0) { 932 r->regno = i+NREG; 933 return FtoB(i); 934 } 935 break; 936 } 937 return 0; 938 } 939 940 void 941 paint1(Reg *r, int bn) 942 { 943 Reg *r1; 944 Prog *p; 945 int z; 946 uint32 bb; 947 948 z = bn/32; 949 bb = 1L<<(bn%32); 950 if(r->act.b[z] & bb) 951 return; 952 for(;;) { 953 if(!(r->refbehind.b[z] & bb)) 954 break; 955 r1 = r->p1; 956 if(r1 == R) 957 break; 958 if(!(r1->refahead.b[z] & bb)) 959 break; 960 if(r1->act.b[z] & bb) 961 break; 962 r = r1; 963 } 964 965 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 966 change -= CLOAD * r->loop; 967 if(debug['R'] && debug['v']) 968 print("%d%P\td %B $%d\n", r->loop, 969 r->prog, blsh(bn), change); 970 } 971 for(;;) { 972 r->act.b[z] |= bb; 973 p = r->prog; 974 975 if(r->use1.b[z] & bb) { 976 change += CREF * r->loop; 977 if(debug['R'] && debug['v']) 978 print("%d%P\tu1 %B $%d\n", r->loop, 979 p, blsh(bn), change); 980 } 981 982 if((r->use2.b[z]|r->set.b[z]) & bb) { 983 change += CREF * r->loop; 984 if(debug['R'] && debug['v']) 985 print("%d%P\tu2 %B $%d\n", r->loop, 986 p, blsh(bn), change); 987 } 988 989 if(STORE(r) & r->regdiff.b[z] & bb) { 990 change -= CLOAD * r->loop; 991 if(debug['R'] && debug['v']) 992 print("%d%P\tst %B $%d\n", r->loop, 993 p, blsh(bn), change); 994 } 995 996 if(r->refbehind.b[z] & bb) 997 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 998 if(r1->refahead.b[z] & bb) 999 paint1(r1, bn); 1000 1001 if(!(r->refahead.b[z] & bb)) 1002 break; 1003 r1 = r->s2; 1004 if(r1 != R) 1005 if(r1->refbehind.b[z] & bb) 1006 paint1(r1, bn); 1007 r = r->s1; 1008 if(r == R) 1009 break; 1010 if(r->act.b[z] & bb) 1011 break; 1012 if(!(r->refbehind.b[z] & bb)) 1013 break; 1014 } 1015 } 1016 1017 uint32 1018 paint2(Reg *r, int bn) 1019 { 1020 Reg *r1; 1021 int z; 1022 uint32 bb, vreg; 1023 1024 z = bn/32; 1025 bb = 1L << (bn%32); 1026 vreg = regbits; 1027 if(!(r->act.b[z] & bb)) 1028 return vreg; 1029 for(;;) { 1030 if(!(r->refbehind.b[z] & bb)) 1031 break; 1032 r1 = r->p1; 1033 if(r1 == R) 1034 break; 1035 if(!(r1->refahead.b[z] & bb)) 1036 break; 1037 if(!(r1->act.b[z] & bb)) 1038 break; 1039 r = r1; 1040 } 1041 for(;;) { 1042 r->act.b[z] &= ~bb; 1043 1044 vreg |= r->regu; 1045 1046 if(r->refbehind.b[z] & bb) 1047 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1048 if(r1->refahead.b[z] & bb) 1049 vreg |= paint2(r1, bn); 1050 1051 if(!(r->refahead.b[z] & bb)) 1052 break; 1053 r1 = r->s2; 1054 if(r1 != R) 1055 if(r1->refbehind.b[z] & bb) 1056 vreg |= paint2(r1, bn); 1057 r = r->s1; 1058 if(r == R) 1059 break; 1060 if(!(r->act.b[z] & bb)) 1061 break; 1062 if(!(r->refbehind.b[z] & bb)) 1063 break; 1064 } 1065 return vreg; 1066 } 1067 1068 void 1069 paint3(Reg *r, int bn, int32 rb, int rn) 1070 { 1071 Reg *r1; 1072 Prog *p; 1073 int z; 1074 uint32 bb; 1075 1076 z = bn/32; 1077 bb = 1L << (bn%32); 1078 if(r->act.b[z] & bb) 1079 return; 1080 for(;;) { 1081 if(!(r->refbehind.b[z] & bb)) 1082 break; 1083 r1 = r->p1; 1084 if(r1 == R) 1085 break; 1086 if(!(r1->refahead.b[z] & bb)) 1087 break; 1088 if(r1->act.b[z] & bb) 1089 break; 1090 r = r1; 1091 } 1092 1093 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1094 addmove(r, bn, rn, 0); 1095 for(;;) { 1096 r->act.b[z] |= bb; 1097 p = r->prog; 1098 1099 if(r->use1.b[z] & bb) { 1100 if(debug['R']) 1101 print("%P", p); 1102 addreg(&p->from, rn); 1103 if(debug['R']) 1104 print("\t.c%P\n", p); 1105 } 1106 if((r->use2.b[z]|r->set.b[z]) & bb) { 1107 if(debug['R']) 1108 print("%P", p); 1109 addreg(&p->to, rn); 1110 if(debug['R']) 1111 print("\t.c%P\n", p); 1112 } 1113 1114 if(STORE(r) & r->regdiff.b[z] & bb) 1115 addmove(r, bn, rn, 1); 1116 r->regu |= rb; 1117 1118 if(r->refbehind.b[z] & bb) 1119 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1120 if(r1->refahead.b[z] & bb) 1121 paint3(r1, bn, rb, rn); 1122 1123 if(!(r->refahead.b[z] & bb)) 1124 break; 1125 r1 = r->s2; 1126 if(r1 != R) 1127 if(r1->refbehind.b[z] & bb) 1128 paint3(r1, bn, rb, rn); 1129 r = r->s1; 1130 if(r == R) 1131 break; 1132 if(r->act.b[z] & bb) 1133 break; 1134 if(!(r->refbehind.b[z] & bb)) 1135 break; 1136 } 1137 } 1138 1139 void 1140 addreg(Addr *a, int rn) 1141 { 1142 1143 a->sym = 0; 1144 a->name = D_NONE; 1145 a->type = D_REG; 1146 a->reg = rn; 1147 if(rn >= NREG) { 1148 a->type = D_FREG; 1149 a->reg = rn-NREG; 1150 } 1151 } 1152 1153 /* 1154 * bit reg 1155 * 0 R0 1156 * 1 R1 1157 * ... ... 1158 * 10 R10 1159 * 12 R12 1160 */ 1161 int32 1162 RtoB(int r) 1163 { 1164 1165 if(r < 2 || (r >= REGTMP-2 && r != 12)) // excluded R9 and R10 for m and g, but not R12 1166 return 0; 1167 return 1L << r; 1168 } 1169 1170 int 1171 BtoR(int32 b) 1172 { 1173 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1174 if(b == 0) 1175 return 0; 1176 return bitno(b); 1177 } 1178 1179 /* 1180 * bit reg 1181 * 18 F2 1182 * 19 F3 1183 * ... ... 1184 * 31 F15 1185 */ 1186 int32 1187 FtoB(int f) 1188 { 1189 1190 if(f < 2 || f > NFREG-1) 1191 return 0; 1192 return 1L << (f + 16); 1193 } 1194 1195 int 1196 BtoF(int32 b) 1197 { 1198 1199 b &= 0xfffc0000L; 1200 if(b == 0) 1201 return 0; 1202 return bitno(b) - 16; 1203 }