github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/cmd/5c/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include "gc.h" 33 34 void addsplits(void); 35 36 Reg* 37 rega(void) 38 { 39 Reg *r; 40 41 r = freer; 42 if(r == R) { 43 r = alloc(sizeof(*r)); 44 } else 45 freer = r->link; 46 47 *r = zreg; 48 return r; 49 } 50 51 int 52 rcmp(const void *a1, const void *a2) 53 { 54 Rgn *p1, *p2; 55 int c1, c2; 56 57 p1 = (Rgn*)a1; 58 p2 = (Rgn*)a2; 59 c1 = p2->cost; 60 c2 = p1->cost; 61 if(c1 -= c2) 62 return c1; 63 return p2->varno - p1->varno; 64 } 65 66 void 67 regopt(Prog *p) 68 { 69 Reg *r, *r1, *r2; 70 Prog *p1; 71 int i, z; 72 int32 initpc, val, npc; 73 uint32 vreg; 74 Bits bit; 75 struct 76 { 77 int32 m; 78 int32 c; 79 Reg* p; 80 } log5[6], *lp; 81 82 firstr = R; 83 lastr = R; 84 nvar = 0; 85 regbits = 0; 86 for(z=0; z<BITS; z++) { 87 externs.b[z] = 0; 88 params.b[z] = 0; 89 consts.b[z] = 0; 90 addrs.b[z] = 0; 91 } 92 93 /* 94 * pass 1 95 * build aux data structure 96 * allocate pcs 97 * find use and set of variables 98 */ 99 val = 5L * 5L * 5L * 5L * 5L; 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->m = val; 103 lp->c = 0; 104 lp->p = R; 105 val /= 5L; 106 lp++; 107 } 108 val = 0; 109 for(; p != P; p = p->link) { 110 switch(p->as) { 111 case ADATA: 112 case AGLOBL: 113 case ANAME: 114 case ASIGNAME: 115 case AFUNCDATA: 116 continue; 117 } 118 r = rega(); 119 if(firstr == R) { 120 firstr = r; 121 lastr = r; 122 } else { 123 lastr->link = r; 124 r->p1 = lastr; 125 lastr->s1 = r; 126 lastr = r; 127 } 128 r->prog = p; 129 r->pc = val; 130 val++; 131 132 lp = log5; 133 for(i=0; i<5; i++) { 134 lp->c--; 135 if(lp->c <= 0) { 136 lp->c = lp->m; 137 if(lp->p != R) 138 lp->p->log5 = r; 139 lp->p = r; 140 (lp+1)->c = 0; 141 break; 142 } 143 lp++; 144 } 145 146 r1 = r->p1; 147 if(r1 != R) 148 switch(r1->prog->as) { 149 case ARET: 150 case AB: 151 case ARFE: 152 r->p1 = R; 153 r1->s1 = R; 154 } 155 156 /* 157 * left side always read 158 */ 159 bit = mkvar(&p->from, p->as==AMOVW); 160 for(z=0; z<BITS; z++) 161 r->use1.b[z] |= bit.b[z]; 162 163 /* 164 * right side depends on opcode 165 */ 166 bit = mkvar(&p->to, 0); 167 if(bany(&bit)) 168 switch(p->as) { 169 default: 170 diag(Z, "reg: unknown asop: %A", p->as); 171 break; 172 173 /* 174 * right side write 175 */ 176 case ANOP: 177 case AMOVB: 178 case AMOVBS: 179 case AMOVBU: 180 case AMOVH: 181 case AMOVHS: 182 case AMOVHU: 183 case AMOVW: 184 case AMOVF: 185 case AMOVD: 186 for(z=0; z<BITS; z++) 187 r->set.b[z] |= bit.b[z]; 188 break; 189 190 /* 191 * right side read 192 */ 193 case APLD: 194 for(z=0; z<BITS; z++) 195 r->use2.b[z] |= bit.b[z]; 196 break; 197 198 /* 199 * funny 200 */ 201 case ABL: 202 for(z=0; z<BITS; z++) 203 addrs.b[z] |= bit.b[z]; 204 break; 205 } 206 207 /* the mod/div runtime routines smash R12 */ 208 switch(p->as) { 209 case AMOD: 210 case AMODU: 211 case ADIV: 212 case ADIVU: 213 regbits |= RtoB(12); 214 break; 215 } 216 217 if(p->as == AMOVM) { 218 if(p->from.type == D_CONST) 219 z = p->from.offset; 220 else 221 z = p->to.offset; 222 for(i=0; z; i++) { 223 if(z&1) 224 regbits |= RtoB(i); 225 z >>= 1; 226 } 227 } 228 } 229 if(firstr == R) 230 return; 231 initpc = pc - val; 232 npc = val; 233 234 /* 235 * pass 2 236 * turn branch references to pointers 237 * build back pointers 238 */ 239 for(r = firstr; r != R; r = r->link) { 240 p = r->prog; 241 if(p->to.type == D_BRANCH) { 242 val = p->to.offset - initpc; 243 r1 = firstr; 244 while(r1 != R) { 245 r2 = r1->log5; 246 if(r2 != R && val >= r2->pc) { 247 r1 = r2; 248 continue; 249 } 250 if(r1->pc == val) 251 break; 252 r1 = r1->link; 253 } 254 if(r1 == R) { 255 nearln = p->lineno; 256 diag(Z, "ref not found\n%P", p); 257 continue; 258 } 259 if(r1 == r) { 260 nearln = p->lineno; 261 diag(Z, "ref to self\n%P", p); 262 continue; 263 } 264 r->s2 = r1; 265 r->p2link = r1->p2; 266 r1->p2 = r; 267 } 268 } 269 if(debug['R']) { 270 p = firstr->prog; 271 print("\n%L %D\n", p->lineno, &p->from); 272 } 273 274 /* 275 * pass 2.5 276 * find looping structure 277 */ 278 for(r = firstr; r != R; r = r->link) 279 r->active = 0; 280 change = 0; 281 loopit(firstr, npc); 282 283 /* 284 * pass 3 285 * iterate propagating usage 286 * back until flow graph is complete 287 */ 288 loop1: 289 change = 0; 290 for(r = firstr; r != R; r = r->link) 291 r->active = 0; 292 for(r = firstr; r != R; r = r->link) 293 if(r->prog->as == ARET) 294 prop(r, zbits, zbits); 295 loop11: 296 /* pick up unreachable code */ 297 i = 0; 298 for(r = firstr; r != R; r = r1) { 299 r1 = r->link; 300 if(r1 && r1->active && !r->active) { 301 prop(r, zbits, zbits); 302 i = 1; 303 } 304 } 305 if(i) 306 goto loop11; 307 if(change) 308 goto loop1; 309 310 311 /* 312 * pass 4 313 * iterate propagating register/variable synchrony 314 * forward until graph is complete 315 */ 316 loop2: 317 change = 0; 318 for(r = firstr; r != R; r = r->link) 319 r->active = 0; 320 synch(firstr, zbits); 321 if(change) 322 goto loop2; 323 324 addsplits(); 325 326 if(debug['R'] && debug['v']) { 327 print("\nprop structure:\n"); 328 for(r = firstr; r != R; r = r->link) { 329 print("%d:%P", r->loop, r->prog); 330 for(z=0; z<BITS; z++) 331 bit.b[z] = r->set.b[z] | 332 r->refahead.b[z] | r->calahead.b[z] | 333 r->refbehind.b[z] | r->calbehind.b[z] | 334 r->use1.b[z] | r->use2.b[z]; 335 if(bany(&bit)) { 336 print("\t"); 337 if(bany(&r->use1)) 338 print(" u1=%B", r->use1); 339 if(bany(&r->use2)) 340 print(" u2=%B", r->use2); 341 if(bany(&r->set)) 342 print(" st=%B", r->set); 343 if(bany(&r->refahead)) 344 print(" ra=%B", r->refahead); 345 if(bany(&r->calahead)) 346 print(" ca=%B", r->calahead); 347 if(bany(&r->refbehind)) 348 print(" rb=%B", r->refbehind); 349 if(bany(&r->calbehind)) 350 print(" cb=%B", r->calbehind); 351 } 352 print("\n"); 353 } 354 } 355 356 /* 357 * pass 5 358 * isolate regions 359 * calculate costs (paint1) 360 */ 361 r = firstr; 362 if(r) { 363 for(z=0; z<BITS; z++) 364 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 365 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 366 if(bany(&bit)) { 367 nearln = r->prog->lineno; 368 warn(Z, "used and not set: %B", bit); 369 if(debug['R'] && !debug['w']) 370 print("used and not set: %B\n", bit); 371 } 372 } 373 374 for(r = firstr; r != R; r = r->link) 375 r->act = zbits; 376 rgp = region; 377 nregion = 0; 378 for(r = firstr; r != R; r = r->link) { 379 for(z=0; z<BITS; z++) 380 bit.b[z] = r->set.b[z] & 381 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 382 if(bany(&bit)) { 383 nearln = r->prog->lineno; 384 warn(Z, "set and not used: %B", bit); 385 if(debug['R']) 386 print("set and not used: %B\n", bit); 387 excise(r); 388 } 389 for(z=0; z<BITS; z++) 390 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 391 while(bany(&bit)) { 392 i = bnum(bit); 393 rgp->enter = r; 394 rgp->varno = i; 395 change = 0; 396 if(debug['R'] && debug['v']) 397 print("\n"); 398 paint1(r, i); 399 bit.b[i/32] &= ~(1L<<(i%32)); 400 if(change <= 0) { 401 if(debug['R']) 402 print("%L $%d: %B\n", 403 r->prog->lineno, change, blsh(i)); 404 continue; 405 } 406 rgp->cost = change; 407 nregion++; 408 if(nregion >= NRGN) { 409 fatal(Z, "too many regions"); 410 goto brk; 411 } 412 rgp++; 413 } 414 } 415 brk: 416 qsort(region, nregion, sizeof(region[0]), rcmp); 417 418 /* 419 * pass 6 420 * determine used registers (paint2) 421 * replace code (paint3) 422 */ 423 rgp = region; 424 for(i=0; i<nregion; i++) { 425 bit = blsh(rgp->varno); 426 vreg = paint2(rgp->enter, rgp->varno); 427 vreg = allreg(vreg, rgp); 428 if(debug['R']) { 429 if(rgp->regno >= NREG) 430 print("%L $%d F%d: %B\n", 431 rgp->enter->prog->lineno, 432 rgp->cost, 433 rgp->regno-NREG, 434 bit); 435 else 436 print("%L $%d R%d: %B\n", 437 rgp->enter->prog->lineno, 438 rgp->cost, 439 rgp->regno, 440 bit); 441 } 442 if(rgp->regno != 0) 443 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 444 rgp++; 445 } 446 /* 447 * pass 7 448 * peep-hole on basic block 449 */ 450 if(!debug['R'] || debug['P']) 451 peep(); 452 453 /* 454 * pass 8 455 * recalculate pc 456 */ 457 val = initpc; 458 for(r = firstr; r != R; r = r1) { 459 r->pc = val; 460 p = r->prog; 461 p1 = P; 462 r1 = r->link; 463 if(r1 != R) 464 p1 = r1->prog; 465 for(; p != p1; p = p->link) { 466 switch(p->as) { 467 default: 468 val++; 469 break; 470 471 case ANOP: 472 case ADATA: 473 case AGLOBL: 474 case ANAME: 475 case ASIGNAME: 476 case AFUNCDATA: 477 break; 478 } 479 } 480 } 481 pc = val; 482 483 /* 484 * fix up branches 485 */ 486 if(debug['R']) 487 if(bany(&addrs)) 488 print("addrs: %B\n", addrs); 489 490 r1 = 0; /* set */ 491 for(r = firstr; r != R; r = r->link) { 492 p = r->prog; 493 if(p->to.type == D_BRANCH) { 494 p->to.offset = r->s2->pc; 495 p->to.u.branch = r->s2->prog; 496 } 497 r1 = r; 498 } 499 500 /* 501 * last pass 502 * eliminate nops 503 * free aux structures 504 */ 505 for(p = firstr->prog; p != P; p = p->link){ 506 while(p->link && p->link->as == ANOP) 507 p->link = p->link->link; 508 } 509 if(r1 != R) { 510 r1->link = freer; 511 freer = firstr; 512 } 513 } 514 515 void 516 addsplits(void) 517 { 518 Reg *r, *r1; 519 int z, i; 520 Bits bit; 521 522 for(r = firstr; r != R; r = r->link) { 523 if(r->loop > 1) 524 continue; 525 if(r->prog->as == ABL) 526 continue; 527 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 528 if(r1->loop <= 1) 529 continue; 530 for(z=0; z<BITS; z++) 531 bit.b[z] = r1->calbehind.b[z] & 532 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 533 ~(r->calahead.b[z] & addrs.b[z]); 534 while(bany(&bit)) { 535 i = bnum(bit); 536 bit.b[i/32] &= ~(1L << (i%32)); 537 } 538 } 539 } 540 } 541 542 /* 543 * add mov b,rn 544 * just after r 545 */ 546 void 547 addmove(Reg *r, int bn, int rn, int f) 548 { 549 Prog *p, *p1; 550 Addr *a; 551 Var *v; 552 553 p1 = alloc(sizeof(*p1)); 554 *p1 = zprog; 555 p = r->prog; 556 557 p1->link = p->link; 558 p->link = p1; 559 p1->lineno = p->lineno; 560 561 v = var + bn; 562 563 a = &p1->to; 564 a->sym = v->sym; 565 a->name = v->name; 566 a->offset = v->offset; 567 a->etype = v->etype; 568 a->type = D_OREG; 569 if(a->etype == TARRAY || a->sym == nil) 570 a->type = D_CONST; 571 572 p1->as = AMOVW; 573 if(v->etype == TCHAR || v->etype == TUCHAR) 574 p1->as = AMOVBS; 575 if(v->etype == TSHORT || v->etype == TUSHORT) 576 p1->as = AMOVHS; 577 if(v->etype == TFLOAT) 578 p1->as = AMOVF; 579 if(v->etype == TDOUBLE) 580 p1->as = AMOVD; 581 582 p1->from.type = D_REG; 583 p1->from.reg = rn; 584 if(rn >= NREG) { 585 p1->from.type = D_FREG; 586 p1->from.reg = rn-NREG; 587 } 588 if(!f) { 589 p1->from = *a; 590 *a = zprog.from; 591 a->type = D_REG; 592 a->reg = rn; 593 if(rn >= NREG) { 594 a->type = D_FREG; 595 a->reg = rn-NREG; 596 } 597 if(v->etype == TUCHAR) 598 p1->as = AMOVBU; 599 if(v->etype == TUSHORT) 600 p1->as = AMOVHU; 601 } 602 if(debug['R']) 603 print("%P\t.a%P\n", p, p1); 604 } 605 606 Bits 607 mkvar(Addr *a, int docon) 608 { 609 Var *v; 610 int i, t, n, et, z; 611 int32 o; 612 Bits bit; 613 LSym *s; 614 615 t = a->type; 616 if(t == D_REG && a->reg != NREG) 617 regbits |= RtoB(a->reg); 618 if(t == D_FREG && a->reg != NREG) 619 regbits |= FtoB(a->reg); 620 s = a->sym; 621 o = a->offset; 622 et = a->etype; 623 if(s == nil) { 624 if(t != D_CONST || !docon || a->reg != NREG) 625 goto none; 626 et = TLONG; 627 } 628 if(t == D_CONST) { 629 if(s == nil && sval(o)) 630 goto none; 631 } 632 633 n = a->name; 634 v = var; 635 for(i=0; i<nvar; i++) { 636 if(s == v->sym) 637 if(n == v->name) 638 if(o == v->offset) 639 goto out; 640 v++; 641 } 642 if(s) 643 if(s->name[0] == '.') 644 goto none; 645 if(nvar >= NVAR) 646 fatal(Z, "variable not optimized: %s", s->name); 647 i = nvar; 648 nvar++; 649 v = &var[i]; 650 v->sym = s; 651 v->offset = o; 652 v->etype = et; 653 v->name = n; 654 if(debug['R']) 655 print("bit=%2d et=%2d %D\n", i, et, a); 656 out: 657 bit = blsh(i); 658 if(n == D_EXTERN || n == D_STATIC) 659 for(z=0; z<BITS; z++) 660 externs.b[z] |= bit.b[z]; 661 if(n == D_PARAM) 662 for(z=0; z<BITS; z++) 663 params.b[z] |= bit.b[z]; 664 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 665 for(z=0; z<BITS; z++) 666 addrs.b[z] |= bit.b[z]; 667 if(t == D_CONST) { 668 if(s == nil) { 669 for(z=0; z<BITS; z++) 670 consts.b[z] |= bit.b[z]; 671 return bit; 672 } 673 if(et != TARRAY) 674 for(z=0; z<BITS; z++) 675 addrs.b[z] |= bit.b[z]; 676 for(z=0; z<BITS; z++) 677 params.b[z] |= bit.b[z]; 678 return bit; 679 } 680 if(t == D_OREG) 681 return bit; 682 683 none: 684 return zbits; 685 } 686 687 void 688 prop(Reg *r, Bits ref, Bits cal) 689 { 690 Reg *r1, *r2; 691 int z; 692 693 for(r1 = r; r1 != R; r1 = r1->p1) { 694 for(z=0; z<BITS; z++) { 695 ref.b[z] |= r1->refahead.b[z]; 696 if(ref.b[z] != r1->refahead.b[z]) { 697 r1->refahead.b[z] = ref.b[z]; 698 change++; 699 } 700 cal.b[z] |= r1->calahead.b[z]; 701 if(cal.b[z] != r1->calahead.b[z]) { 702 r1->calahead.b[z] = cal.b[z]; 703 change++; 704 } 705 } 706 switch(r1->prog->as) { 707 case ABL: 708 for(z=0; z<BITS; z++) { 709 cal.b[z] |= ref.b[z] | externs.b[z]; 710 ref.b[z] = 0; 711 } 712 break; 713 714 case ATEXT: 715 for(z=0; z<BITS; z++) { 716 cal.b[z] = 0; 717 ref.b[z] = 0; 718 } 719 break; 720 721 case ARET: 722 for(z=0; z<BITS; z++) { 723 cal.b[z] = externs.b[z]; 724 ref.b[z] = 0; 725 } 726 } 727 for(z=0; z<BITS; z++) { 728 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 729 r1->use1.b[z] | r1->use2.b[z]; 730 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 731 r1->refbehind.b[z] = ref.b[z]; 732 r1->calbehind.b[z] = cal.b[z]; 733 } 734 if(r1->active) 735 break; 736 r1->active = 1; 737 } 738 for(; r != r1; r = r->p1) 739 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 740 prop(r2, r->refbehind, r->calbehind); 741 } 742 743 /* 744 * find looping structure 745 * 746 * 1) find reverse postordering 747 * 2) find approximate dominators, 748 * the actual dominators if the flow graph is reducible 749 * otherwise, dominators plus some other non-dominators. 750 * See Matthew S. Hecht and Jeffrey D. Ullman, 751 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 752 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 753 * Oct. 1-3, 1973, pp. 207-217. 754 * 3) find all nodes with a predecessor dominated by the current node. 755 * such a node is a loop head. 756 * recursively, all preds with a greater rpo number are in the loop 757 */ 758 int32 759 postorder(Reg *r, Reg **rpo2r, int32 n) 760 { 761 Reg *r1; 762 763 r->rpo = 1; 764 r1 = r->s1; 765 if(r1 && !r1->rpo) 766 n = postorder(r1, rpo2r, n); 767 r1 = r->s2; 768 if(r1 && !r1->rpo) 769 n = postorder(r1, rpo2r, n); 770 rpo2r[n] = r; 771 n++; 772 return n; 773 } 774 775 int32 776 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 777 { 778 int32 t; 779 780 if(rpo1 == -1) 781 return rpo2; 782 while(rpo1 != rpo2){ 783 if(rpo1 > rpo2){ 784 t = rpo2; 785 rpo2 = rpo1; 786 rpo1 = t; 787 } 788 while(rpo1 < rpo2){ 789 t = idom[rpo2]; 790 if(t >= rpo2) 791 fatal(Z, "bad idom"); 792 rpo2 = t; 793 } 794 } 795 return rpo1; 796 } 797 798 int 799 doms(int32 *idom, int32 r, int32 s) 800 { 801 while(s > r) 802 s = idom[s]; 803 return s == r; 804 } 805 806 int 807 loophead(int32 *idom, Reg *r) 808 { 809 int32 src; 810 811 src = r->rpo; 812 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 813 return 1; 814 for(r = r->p2; r != R; r = r->p2link) 815 if(doms(idom, src, r->rpo)) 816 return 1; 817 return 0; 818 } 819 820 void 821 loopmark(Reg **rpo2r, int32 head, Reg *r) 822 { 823 if(r->rpo < head || r->active == head) 824 return; 825 r->active = head; 826 r->loop += LOOP; 827 if(r->p1 != R) 828 loopmark(rpo2r, head, r->p1); 829 for(r = r->p2; r != R; r = r->p2link) 830 loopmark(rpo2r, head, r); 831 } 832 833 void 834 loopit(Reg *r, int32 nr) 835 { 836 Reg *r1; 837 int32 i, d, me; 838 839 if(nr > maxnr) { 840 rpo2r = alloc(nr * sizeof(Reg*)); 841 idom = alloc(nr * sizeof(int32)); 842 maxnr = nr; 843 } 844 d = postorder(r, rpo2r, 0); 845 if(d > nr) 846 fatal(Z, "too many reg nodes"); 847 nr = d; 848 for(i = 0; i < nr / 2; i++){ 849 r1 = rpo2r[i]; 850 rpo2r[i] = rpo2r[nr - 1 - i]; 851 rpo2r[nr - 1 - i] = r1; 852 } 853 for(i = 0; i < nr; i++) 854 rpo2r[i]->rpo = i; 855 856 idom[0] = 0; 857 for(i = 0; i < nr; i++){ 858 r1 = rpo2r[i]; 859 me = r1->rpo; 860 d = -1; 861 if(r1->p1 != R && r1->p1->rpo < me) 862 d = r1->p1->rpo; 863 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 864 if(r1->rpo < me) 865 d = rpolca(idom, d, r1->rpo); 866 idom[i] = d; 867 } 868 869 for(i = 0; i < nr; i++){ 870 r1 = rpo2r[i]; 871 r1->loop++; 872 if(r1->p2 != R && loophead(idom, r1)) 873 loopmark(rpo2r, i, r1); 874 } 875 } 876 877 void 878 synch(Reg *r, Bits dif) 879 { 880 Reg *r1; 881 int z; 882 883 for(r1 = r; r1 != R; r1 = r1->s1) { 884 for(z=0; z<BITS; z++) { 885 dif.b[z] = (dif.b[z] & 886 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 887 r1->set.b[z] | r1->regdiff.b[z]; 888 if(dif.b[z] != r1->regdiff.b[z]) { 889 r1->regdiff.b[z] = dif.b[z]; 890 change++; 891 } 892 } 893 if(r1->active) 894 break; 895 r1->active = 1; 896 for(z=0; z<BITS; z++) 897 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 898 if(r1->s2 != R) 899 synch(r1->s2, dif); 900 } 901 } 902 903 uint32 904 allreg(uint32 b, Rgn *r) 905 { 906 Var *v; 907 int i; 908 909 v = var + r->varno; 910 r->regno = 0; 911 switch(v->etype) { 912 913 default: 914 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 915 break; 916 917 case TCHAR: 918 case TUCHAR: 919 case TSHORT: 920 case TUSHORT: 921 case TINT: 922 case TUINT: 923 case TLONG: 924 case TULONG: 925 case TIND: 926 case TARRAY: 927 i = BtoR(~b); 928 if(i && r->cost >= 0) { 929 r->regno = i; 930 return RtoB(i); 931 } 932 break; 933 934 case TVLONG: 935 case TDOUBLE: 936 case TFLOAT: 937 i = BtoF(~b); 938 if(i && r->cost >= 0) { 939 r->regno = i+NREG; 940 return FtoB(i); 941 } 942 break; 943 } 944 return 0; 945 } 946 947 void 948 paint1(Reg *r, int bn) 949 { 950 Reg *r1; 951 Prog *p; 952 int z; 953 uint32 bb; 954 955 z = bn/32; 956 bb = 1L<<(bn%32); 957 if(r->act.b[z] & bb) 958 return; 959 for(;;) { 960 if(!(r->refbehind.b[z] & bb)) 961 break; 962 r1 = r->p1; 963 if(r1 == R) 964 break; 965 if(!(r1->refahead.b[z] & bb)) 966 break; 967 if(r1->act.b[z] & bb) 968 break; 969 r = r1; 970 } 971 972 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 973 change -= CLOAD * r->loop; 974 if(debug['R'] && debug['v']) 975 print("%d%P\td %B $%d\n", r->loop, 976 r->prog, blsh(bn), change); 977 } 978 for(;;) { 979 r->act.b[z] |= bb; 980 p = r->prog; 981 982 if(r->use1.b[z] & bb) { 983 change += CREF * r->loop; 984 if(debug['R'] && debug['v']) 985 print("%d%P\tu1 %B $%d\n", r->loop, 986 p, blsh(bn), change); 987 } 988 989 if((r->use2.b[z]|r->set.b[z]) & bb) { 990 change += CREF * r->loop; 991 if(debug['R'] && debug['v']) 992 print("%d%P\tu2 %B $%d\n", r->loop, 993 p, blsh(bn), change); 994 } 995 996 if(STORE(r) & r->regdiff.b[z] & bb) { 997 change -= CLOAD * r->loop; 998 if(debug['R'] && debug['v']) 999 print("%d%P\tst %B $%d\n", r->loop, 1000 p, blsh(bn), change); 1001 } 1002 1003 if(r->refbehind.b[z] & bb) 1004 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1005 if(r1->refahead.b[z] & bb) 1006 paint1(r1, bn); 1007 1008 if(!(r->refahead.b[z] & bb)) 1009 break; 1010 r1 = r->s2; 1011 if(r1 != R) 1012 if(r1->refbehind.b[z] & bb) 1013 paint1(r1, bn); 1014 r = r->s1; 1015 if(r == R) 1016 break; 1017 if(r->act.b[z] & bb) 1018 break; 1019 if(!(r->refbehind.b[z] & bb)) 1020 break; 1021 } 1022 } 1023 1024 uint32 1025 paint2(Reg *r, int bn) 1026 { 1027 Reg *r1; 1028 int z; 1029 uint32 bb, vreg; 1030 1031 z = bn/32; 1032 bb = 1L << (bn%32); 1033 vreg = regbits; 1034 if(!(r->act.b[z] & bb)) 1035 return vreg; 1036 for(;;) { 1037 if(!(r->refbehind.b[z] & bb)) 1038 break; 1039 r1 = r->p1; 1040 if(r1 == R) 1041 break; 1042 if(!(r1->refahead.b[z] & bb)) 1043 break; 1044 if(!(r1->act.b[z] & bb)) 1045 break; 1046 r = r1; 1047 } 1048 for(;;) { 1049 r->act.b[z] &= ~bb; 1050 1051 vreg |= r->regu; 1052 1053 if(r->refbehind.b[z] & bb) 1054 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1055 if(r1->refahead.b[z] & bb) 1056 vreg |= paint2(r1, bn); 1057 1058 if(!(r->refahead.b[z] & bb)) 1059 break; 1060 r1 = r->s2; 1061 if(r1 != R) 1062 if(r1->refbehind.b[z] & bb) 1063 vreg |= paint2(r1, bn); 1064 r = r->s1; 1065 if(r == R) 1066 break; 1067 if(!(r->act.b[z] & bb)) 1068 break; 1069 if(!(r->refbehind.b[z] & bb)) 1070 break; 1071 } 1072 return vreg; 1073 } 1074 1075 void 1076 paint3(Reg *r, int bn, int32 rb, int rn) 1077 { 1078 Reg *r1; 1079 Prog *p; 1080 int z; 1081 uint32 bb; 1082 1083 z = bn/32; 1084 bb = 1L << (bn%32); 1085 if(r->act.b[z] & bb) 1086 return; 1087 for(;;) { 1088 if(!(r->refbehind.b[z] & bb)) 1089 break; 1090 r1 = r->p1; 1091 if(r1 == R) 1092 break; 1093 if(!(r1->refahead.b[z] & bb)) 1094 break; 1095 if(r1->act.b[z] & bb) 1096 break; 1097 r = r1; 1098 } 1099 1100 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1101 addmove(r, bn, rn, 0); 1102 for(;;) { 1103 r->act.b[z] |= bb; 1104 p = r->prog; 1105 1106 if(r->use1.b[z] & bb) { 1107 if(debug['R']) 1108 print("%P", p); 1109 addreg(&p->from, rn); 1110 if(debug['R']) 1111 print("\t.c%P\n", p); 1112 } 1113 if((r->use2.b[z]|r->set.b[z]) & bb) { 1114 if(debug['R']) 1115 print("%P", p); 1116 addreg(&p->to, rn); 1117 if(debug['R']) 1118 print("\t.c%P\n", p); 1119 } 1120 1121 if(STORE(r) & r->regdiff.b[z] & bb) 1122 addmove(r, bn, rn, 1); 1123 r->regu |= rb; 1124 1125 if(r->refbehind.b[z] & bb) 1126 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1127 if(r1->refahead.b[z] & bb) 1128 paint3(r1, bn, rb, rn); 1129 1130 if(!(r->refahead.b[z] & bb)) 1131 break; 1132 r1 = r->s2; 1133 if(r1 != R) 1134 if(r1->refbehind.b[z] & bb) 1135 paint3(r1, bn, rb, rn); 1136 r = r->s1; 1137 if(r == R) 1138 break; 1139 if(r->act.b[z] & bb) 1140 break; 1141 if(!(r->refbehind.b[z] & bb)) 1142 break; 1143 } 1144 } 1145 1146 void 1147 addreg(Addr *a, int rn) 1148 { 1149 1150 a->sym = 0; 1151 a->name = D_NONE; 1152 a->type = D_REG; 1153 a->reg = rn; 1154 if(rn >= NREG) { 1155 a->type = D_FREG; 1156 a->reg = rn-NREG; 1157 } 1158 } 1159 1160 /* 1161 * bit reg 1162 * 0 R0 1163 * 1 R1 1164 * ... ... 1165 * 10 R10 1166 * 12 R12 1167 */ 1168 int32 1169 RtoB(int r) 1170 { 1171 1172 if(r < 2 || (r >= REGTMP-2 && r != 12)) // excluded R9 and R10 for m and g, but not R12 1173 return 0; 1174 return 1L << r; 1175 } 1176 1177 int 1178 BtoR(int32 b) 1179 { 1180 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1181 if(b == 0) 1182 return 0; 1183 return bitno(b); 1184 } 1185 1186 /* 1187 * bit reg 1188 * 18 F2 1189 * 19 F3 1190 * ... ... 1191 * 31 F15 1192 */ 1193 int32 1194 FtoB(int f) 1195 { 1196 1197 if(f < 2 || f > NFREG-1) 1198 return 0; 1199 return 1L << (f + 16); 1200 } 1201 1202 int 1203 BtoF(int32 b) 1204 { 1205 1206 b &= 0xfffc0000L; 1207 if(b == 0) 1208 return 0; 1209 return bitno(b) - 16; 1210 }