github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/5g/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include <u.h> 33 #include <libc.h> 34 #include "gg.h" 35 #include "opt.h" 36 37 #define NREGVAR 32 38 #define REGBITS ((uint32)0xffffffff) 39 #define P2R(p) (Reg*)(p->reg) 40 41 void addsplits(void); 42 int noreturn(Prog *p); 43 static int first = 0; 44 45 static void fixjmp(Prog*); 46 47 48 Reg* 49 rega(void) 50 { 51 Reg *r; 52 53 r = freer; 54 if(r == R) { 55 r = mal(sizeof(*r)); 56 } else 57 freer = r->link; 58 59 *r = zreg; 60 return r; 61 } 62 63 int 64 rcmp(const void *a1, const void *a2) 65 { 66 Rgn *p1, *p2; 67 int c1, c2; 68 69 p1 = (Rgn*)a1; 70 p2 = (Rgn*)a2; 71 c1 = p2->cost; 72 c2 = p1->cost; 73 if(c1 -= c2) 74 return c1; 75 return p2->varno - p1->varno; 76 } 77 78 static void 79 setoutvar(void) 80 { 81 Type *t; 82 Node *n; 83 Addr a; 84 Iter save; 85 Bits bit; 86 int z; 87 88 t = structfirst(&save, getoutarg(curfn->type)); 89 while(t != T) { 90 n = nodarg(t, 1); 91 a = zprog.from; 92 naddr(n, &a, 0); 93 bit = mkvar(R, &a); 94 for(z=0; z<BITS; z++) 95 ovar.b[z] |= bit.b[z]; 96 t = structnext(&save); 97 } 98 //if(bany(&ovar)) 99 //print("ovar = %Q\n", ovar); 100 } 101 102 void 103 excise(Reg *r) 104 { 105 Prog *p; 106 107 p = r->prog; 108 p->as = ANOP; 109 p->scond = zprog.scond; 110 p->from = zprog.from; 111 p->to = zprog.to; 112 p->reg = zprog.reg; 113 } 114 115 static void 116 setaddrs(Bits bit) 117 { 118 int i, n; 119 Var *v; 120 Node *node; 121 122 while(bany(&bit)) { 123 // convert each bit to a variable 124 i = bnum(bit); 125 node = var[i].node; 126 n = var[i].name; 127 bit.b[i/32] &= ~(1L<<(i%32)); 128 129 // disable all pieces of that variable 130 for(i=0; i<nvar; i++) { 131 v = var+i; 132 if(v->node == node && v->name == n) 133 v->addr = 2; 134 } 135 } 136 } 137 138 static char* regname[] = { 139 ".R0", 140 ".R1", 141 ".R2", 142 ".R3", 143 ".R4", 144 ".R5", 145 ".R6", 146 ".R7", 147 ".R8", 148 ".R9", 149 ".R10", 150 ".R11", 151 ".R12", 152 ".R13", 153 ".R14", 154 ".R15", 155 ".F0", 156 ".F1", 157 ".F2", 158 ".F3", 159 ".F4", 160 ".F5", 161 ".F6", 162 ".F7", 163 ".F8", 164 ".F9", 165 ".F10", 166 ".F11", 167 ".F12", 168 ".F13", 169 ".F14", 170 ".F15", 171 }; 172 173 static Node* regnodes[NREGVAR]; 174 175 void 176 regopt(Prog *firstp) 177 { 178 Reg *r, *r1; 179 Prog *p; 180 int i, z, nr; 181 uint32 vreg; 182 Bits bit; 183 184 if(first == 0) { 185 fmtinstall('Q', Qconv); 186 } 187 188 fixjmp(firstp); 189 190 first++; 191 if(debug['K']) { 192 if(first != 13) 193 return; 194 // debug['R'] = 2; 195 // debug['P'] = 2; 196 print("optimizing %S\n", curfn->nname->sym); 197 } 198 199 // count instructions 200 nr = 0; 201 for(p=firstp; p!=P; p=p->link) 202 nr++; 203 204 // if too big dont bother 205 if(nr >= 10000) { 206 // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); 207 return; 208 } 209 210 firstr = R; 211 lastr = R; 212 213 /* 214 * control flow is more complicated in generated go code 215 * than in generated c code. define pseudo-variables for 216 * registers, so we have complete register usage information. 217 */ 218 nvar = NREGVAR; 219 memset(var, 0, NREGVAR*sizeof var[0]); 220 for(i=0; i<NREGVAR; i++) { 221 if(regnodes[i] == N) 222 regnodes[i] = newname(lookup(regname[i])); 223 var[i].node = regnodes[i]; 224 } 225 226 regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); 227 for(z=0; z<BITS; z++) { 228 externs.b[z] = 0; 229 params.b[z] = 0; 230 consts.b[z] = 0; 231 addrs.b[z] = 0; 232 ovar.b[z] = 0; 233 } 234 235 // build list of return variables 236 setoutvar(); 237 238 /* 239 * pass 1 240 * build aux data structure 241 * allocate pcs 242 * find use and set of variables 243 */ 244 nr = 0; 245 for(p=firstp; p != P; p = p->link) { 246 switch(p->as) { 247 case ADATA: 248 case AGLOBL: 249 case ANAME: 250 case ASIGNAME: 251 case ALOCALS: 252 case ATYPE: 253 continue; 254 } 255 r = rega(); 256 nr++; 257 if(firstr == R) { 258 firstr = r; 259 lastr = r; 260 } else { 261 lastr->link = r; 262 r->p1 = lastr; 263 lastr->s1 = r; 264 lastr = r; 265 } 266 r->prog = p; 267 p->regp = r; 268 269 r1 = r->p1; 270 if(r1 != R) { 271 switch(r1->prog->as) { 272 case ARET: 273 case AB: 274 case ARFE: 275 r->p1 = R; 276 r1->s1 = R; 277 } 278 } 279 280 // Avoid making variables for direct-called functions. 281 if(p->as == ABL && p->to.type == D_EXTERN) 282 continue; 283 284 /* 285 * left side always read 286 */ 287 bit = mkvar(r, &p->from); 288 for(z=0; z<BITS; z++) 289 r->use1.b[z] |= bit.b[z]; 290 291 /* 292 * middle always read when present 293 */ 294 if(p->reg != NREG) { 295 if(p->from.type != D_FREG) 296 r->use1.b[0] |= RtoB(p->reg); 297 else 298 r->use1.b[0] |= FtoB(p->reg); 299 } 300 301 /* 302 * right side depends on opcode 303 */ 304 bit = mkvar(r, &p->to); 305 if(bany(&bit)) 306 switch(p->as) { 307 default: 308 yyerror("reg: unknown op: %A", p->as); 309 break; 310 311 /* 312 * right side read 313 */ 314 case ATST: 315 case ATEQ: 316 case ACMP: 317 case ACMN: 318 case ACMPD: 319 case ACMPF: 320 rightread: 321 for(z=0; z<BITS; z++) 322 r->use2.b[z] |= bit.b[z]; 323 break; 324 325 /* 326 * right side read or read+write, depending on middle 327 * ADD x, z => z += x 328 * ADD x, y, z => z = x + y 329 */ 330 case AADD: 331 case AAND: 332 case AEOR: 333 case ASUB: 334 case ARSB: 335 case AADC: 336 case ASBC: 337 case ARSC: 338 case AORR: 339 case ABIC: 340 case ASLL: 341 case ASRL: 342 case ASRA: 343 case AMUL: 344 case AMULU: 345 case ADIV: 346 case AMOD: 347 case AMODU: 348 case ADIVU: 349 if(p->reg != NREG) 350 goto rightread; 351 // fall through 352 353 /* 354 * right side read+write 355 */ 356 case AADDF: 357 case AADDD: 358 case ASUBF: 359 case ASUBD: 360 case AMULF: 361 case AMULD: 362 case ADIVF: 363 case ADIVD: 364 case AMULA: 365 case AMULAL: 366 case AMULALU: 367 for(z=0; z<BITS; z++) { 368 r->use2.b[z] |= bit.b[z]; 369 r->set.b[z] |= bit.b[z]; 370 } 371 break; 372 373 /* 374 * right side write 375 */ 376 case ANOP: 377 case AMOVB: 378 case AMOVBU: 379 case AMOVD: 380 case AMOVDF: 381 case AMOVDW: 382 case AMOVF: 383 case AMOVFW: 384 case AMOVH: 385 case AMOVHU: 386 case AMOVW: 387 case AMOVWD: 388 case AMOVWF: 389 case AMVN: 390 case AMULL: 391 case AMULLU: 392 if((p->scond & C_SCOND) != C_SCOND_NONE) 393 for(z=0; z<BITS; z++) 394 r->use2.b[z] |= bit.b[z]; 395 for(z=0; z<BITS; z++) 396 r->set.b[z] |= bit.b[z]; 397 break; 398 399 /* 400 * funny 401 */ 402 case ABL: 403 setaddrs(bit); 404 break; 405 } 406 407 if(p->as == AMOVM) { 408 z = p->to.offset; 409 if(p->from.type == D_CONST) 410 z = p->from.offset; 411 for(i=0; z; i++) { 412 if(z&1) 413 regbits |= RtoB(i); 414 z >>= 1; 415 } 416 } 417 } 418 if(firstr == R) 419 return; 420 421 for(i=0; i<nvar; i++) { 422 Var *v = var+i; 423 if(v->addr) { 424 bit = blsh(i); 425 for(z=0; z<BITS; z++) 426 addrs.b[z] |= bit.b[z]; 427 } 428 429 if(debug['R'] && debug['v']) 430 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 431 i, v->addr, v->etype, v->width, v->node, v->offset); 432 } 433 434 if(debug['R'] && debug['v']) 435 dumpit("pass1", firstr); 436 437 /* 438 * pass 2 439 * turn branch references to pointers 440 * build back pointers 441 */ 442 for(r=firstr; r!=R; r=r->link) { 443 p = r->prog; 444 if(p->to.type == D_BRANCH) { 445 if(p->to.u.branch == P) 446 fatal("pnil %P", p); 447 r1 = p->to.u.branch->regp; 448 if(r1 == R) 449 fatal("rnil %P", p); 450 if(r1 == r) { 451 //fatal("ref to self %P", p); 452 continue; 453 } 454 r->s2 = r1; 455 r->p2link = r1->p2; 456 r1->p2 = r; 457 } 458 } 459 if(debug['R']) { 460 p = firstr->prog; 461 print("\n%L %D\n", p->lineno, &p->from); 462 print(" addr = %Q\n", addrs); 463 } 464 465 if(debug['R'] && debug['v']) 466 dumpit("pass2", firstr); 467 468 /* 469 * pass 2.5 470 * find looping structure 471 */ 472 for(r = firstr; r != R; r = r->link) 473 r->active = 0; 474 change = 0; 475 loopit(firstr, nr); 476 477 if(debug['R'] && debug['v']) 478 dumpit("pass2.5", firstr); 479 480 /* 481 * pass 3 482 * iterate propagating usage 483 * back until flow graph is complete 484 */ 485 loop1: 486 change = 0; 487 for(r = firstr; r != R; r = r->link) 488 r->active = 0; 489 for(r = firstr; r != R; r = r->link) 490 if(r->prog->as == ARET) 491 prop(r, zbits, zbits); 492 loop11: 493 /* pick up unreachable code */ 494 i = 0; 495 for(r = firstr; r != R; r = r1) { 496 r1 = r->link; 497 if(r1 && r1->active && !r->active) { 498 prop(r, zbits, zbits); 499 i = 1; 500 } 501 } 502 if(i) 503 goto loop11; 504 if(change) 505 goto loop1; 506 507 if(debug['R'] && debug['v']) 508 dumpit("pass3", firstr); 509 510 511 /* 512 * pass 4 513 * iterate propagating register/variable synchrony 514 * forward until graph is complete 515 */ 516 loop2: 517 change = 0; 518 for(r = firstr; r != R; r = r->link) 519 r->active = 0; 520 synch(firstr, zbits); 521 if(change) 522 goto loop2; 523 524 addsplits(); 525 526 if(debug['R'] && debug['v']) 527 dumpit("pass4", firstr); 528 529 if(debug['R'] > 1) { 530 print("\nprop structure:\n"); 531 for(r = firstr; r != R; r = r->link) { 532 print("%d:%P", r->loop, r->prog); 533 for(z=0; z<BITS; z++) { 534 bit.b[z] = r->set.b[z] | 535 r->refahead.b[z] | r->calahead.b[z] | 536 r->refbehind.b[z] | r->calbehind.b[z] | 537 r->use1.b[z] | r->use2.b[z]; 538 bit.b[z] &= ~addrs.b[z]; 539 } 540 541 if(bany(&bit)) { 542 print("\t"); 543 if(bany(&r->use1)) 544 print(" u1=%Q", r->use1); 545 if(bany(&r->use2)) 546 print(" u2=%Q", r->use2); 547 if(bany(&r->set)) 548 print(" st=%Q", r->set); 549 if(bany(&r->refahead)) 550 print(" ra=%Q", r->refahead); 551 if(bany(&r->calahead)) 552 print(" ca=%Q", r->calahead); 553 if(bany(&r->refbehind)) 554 print(" rb=%Q", r->refbehind); 555 if(bany(&r->calbehind)) 556 print(" cb=%Q", r->calbehind); 557 } 558 print("\n"); 559 } 560 } 561 562 /* 563 * pass 4.5 564 * move register pseudo-variables into regu. 565 */ 566 for(r = firstr; r != R; r = r->link) { 567 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 568 569 r->set.b[0] &= ~REGBITS; 570 r->use1.b[0] &= ~REGBITS; 571 r->use2.b[0] &= ~REGBITS; 572 r->refbehind.b[0] &= ~REGBITS; 573 r->refahead.b[0] &= ~REGBITS; 574 r->calbehind.b[0] &= ~REGBITS; 575 r->calahead.b[0] &= ~REGBITS; 576 r->regdiff.b[0] &= ~REGBITS; 577 r->act.b[0] &= ~REGBITS; 578 } 579 580 if(debug['R'] && debug['v']) 581 dumpit("pass4.5", firstr); 582 583 /* 584 * pass 5 585 * isolate regions 586 * calculate costs (paint1) 587 */ 588 r = firstr; 589 if(r) { 590 for(z=0; z<BITS; z++) 591 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 592 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 593 if(bany(&bit) & !r->refset) { 594 // should never happen - all variables are preset 595 if(debug['w']) 596 print("%L: used and not set: %Q\n", r->prog->lineno, bit); 597 r->refset = 1; 598 } 599 } 600 601 for(r = firstr; r != R; r = r->link) 602 r->act = zbits; 603 rgp = region; 604 nregion = 0; 605 for(r = firstr; r != R; r = r->link) { 606 for(z=0; z<BITS; z++) 607 bit.b[z] = r->set.b[z] & 608 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 609 if(bany(&bit) && !r->refset) { 610 if(debug['w']) 611 print("%L: set and not used: %Q\n", r->prog->lineno, bit); 612 r->refset = 1; 613 excise(r); 614 } 615 for(z=0; z<BITS; z++) 616 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 617 while(bany(&bit)) { 618 i = bnum(bit); 619 rgp->enter = r; 620 rgp->varno = i; 621 change = 0; 622 if(debug['R'] > 1) 623 print("\n"); 624 paint1(r, i); 625 bit.b[i/32] &= ~(1L<<(i%32)); 626 if(change <= 0) { 627 if(debug['R']) 628 print("%L $%d: %Q\n", 629 r->prog->lineno, change, blsh(i)); 630 continue; 631 } 632 rgp->cost = change; 633 nregion++; 634 if(nregion >= NRGN) { 635 if(debug['R'] > 1) 636 print("too many regions\n"); 637 goto brk; 638 } 639 rgp++; 640 } 641 } 642 brk: 643 qsort(region, nregion, sizeof(region[0]), rcmp); 644 645 if(debug['R'] && debug['v']) 646 dumpit("pass5", firstr); 647 648 /* 649 * pass 6 650 * determine used registers (paint2) 651 * replace code (paint3) 652 */ 653 rgp = region; 654 for(i=0; i<nregion; i++) { 655 bit = blsh(rgp->varno); 656 vreg = paint2(rgp->enter, rgp->varno); 657 vreg = allreg(vreg, rgp); 658 if(debug['R']) { 659 if(rgp->regno >= NREG) 660 print("%L $%d F%d: %Q\n", 661 rgp->enter->prog->lineno, 662 rgp->cost, 663 rgp->regno-NREG, 664 bit); 665 else 666 print("%L $%d R%d: %Q\n", 667 rgp->enter->prog->lineno, 668 rgp->cost, 669 rgp->regno, 670 bit); 671 } 672 if(rgp->regno != 0) 673 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 674 rgp++; 675 } 676 677 if(debug['R'] && debug['v']) 678 dumpit("pass6", firstr); 679 680 /* 681 * pass 7 682 * peep-hole on basic block 683 */ 684 if(!debug['R'] || debug['P']) { 685 peep(); 686 } 687 688 if(debug['R'] && debug['v']) 689 dumpit("pass7", firstr); 690 691 /* 692 * last pass 693 * eliminate nops 694 * free aux structures 695 * adjust the stack pointer 696 * MOVW.W R1,-12(R13) <<- start 697 * MOVW R0,R1 698 * MOVW R1,8(R13) 699 * MOVW $0,R1 700 * MOVW R1,4(R13) 701 * BL ,runtime.newproc+0(SB) 702 * MOVW &ft+-32(SP),R7 <<- adjust 703 * MOVW &j+-40(SP),R6 <<- adjust 704 * MOVW autotmp_0003+-24(SP),R5 <<- adjust 705 * MOVW $12(R13),R13 <<- finish 706 */ 707 vreg = 0; 708 for(p = firstp; p != P; p = p->link) { 709 while(p->link != P && p->link->as == ANOP) 710 p->link = p->link->link; 711 if(p->to.type == D_BRANCH) 712 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 713 p->to.u.branch = p->to.u.branch->link; 714 if(p->as == AMOVW && p->to.reg == 13) { 715 if(p->scond & C_WBIT) { 716 vreg = -p->to.offset; // in adjust region 717 // print("%P adjusting %d\n", p, vreg); 718 continue; 719 } 720 if(p->from.type == D_CONST && p->to.type == D_REG) { 721 if(p->from.offset != vreg) 722 print("in and out different\n"); 723 // print("%P finish %d\n", p, vreg); 724 vreg = 0; // done adjust region 725 continue; 726 } 727 728 // print("%P %d %d from type\n", p, p->from.type, D_CONST); 729 // print("%P %d %d to type\n\n", p, p->to.type, D_REG); 730 } 731 732 if(p->as == AMOVW && vreg != 0) { 733 if(p->from.sym != S) 734 if(p->from.name == D_AUTO || p->from.name == D_PARAM) { 735 p->from.offset += vreg; 736 // print("%P adjusting from %d %d\n", p, vreg, p->from.type); 737 } 738 if(p->to.sym != S) 739 if(p->to.name == D_AUTO || p->to.name == D_PARAM) { 740 p->to.offset += vreg; 741 // print("%P adjusting to %d %d\n", p, vreg, p->from.type); 742 } 743 } 744 } 745 if(lastr != R) { 746 lastr->link = freer; 747 freer = firstr; 748 } 749 750 } 751 752 void 753 addsplits(void) 754 { 755 Reg *r, *r1; 756 int z, i; 757 Bits bit; 758 759 for(r = firstr; r != R; r = r->link) { 760 if(r->loop > 1) 761 continue; 762 if(r->prog->as == ABL) 763 continue; 764 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 765 if(r1->loop <= 1) 766 continue; 767 for(z=0; z<BITS; z++) 768 bit.b[z] = r1->calbehind.b[z] & 769 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 770 ~(r->calahead.b[z] & addrs.b[z]); 771 while(bany(&bit)) { 772 i = bnum(bit); 773 bit.b[i/32] &= ~(1L << (i%32)); 774 } 775 } 776 } 777 } 778 779 /* 780 * add mov b,rn 781 * just after r 782 */ 783 void 784 addmove(Reg *r, int bn, int rn, int f) 785 { 786 Prog *p, *p1, *p2; 787 Adr *a; 788 Var *v; 789 790 p1 = mal(sizeof(*p1)); 791 *p1 = zprog; 792 p = r->prog; 793 794 // If there's a stack fixup coming (after BL newproc or BL deferproc), 795 // delay the load until after the fixup. 796 p2 = p->link; 797 if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG) 798 p = p2; 799 800 p1->link = p->link; 801 p->link = p1; 802 p1->lineno = p->lineno; 803 804 v = var + bn; 805 806 a = &p1->to; 807 a->name = v->name; 808 a->node = v->node; 809 a->sym = v->node->sym; 810 a->offset = v->offset; 811 a->etype = v->etype; 812 a->type = D_OREG; 813 if(a->etype == TARRAY || a->sym == S) 814 a->type = D_CONST; 815 816 if(v->addr) 817 fatal("addmove: shouldnt be doing this %A\n", a); 818 819 switch(v->etype) { 820 default: 821 print("What is this %E\n", v->etype); 822 823 case TINT8: 824 p1->as = AMOVB; 825 break; 826 case TBOOL: 827 case TUINT8: 828 //print("movbu %E %d %S\n", v->etype, bn, v->sym); 829 p1->as = AMOVBU; 830 break; 831 case TINT16: 832 p1->as = AMOVH; 833 break; 834 case TUINT16: 835 p1->as = AMOVHU; 836 break; 837 case TINT32: 838 case TUINT32: 839 case TPTR32: 840 p1->as = AMOVW; 841 break; 842 case TFLOAT32: 843 p1->as = AMOVF; 844 break; 845 case TFLOAT64: 846 p1->as = AMOVD; 847 break; 848 } 849 850 p1->from.type = D_REG; 851 p1->from.reg = rn; 852 if(rn >= NREG) { 853 p1->from.type = D_FREG; 854 p1->from.reg = rn-NREG; 855 } 856 if(!f) { 857 p1->from = *a; 858 *a = zprog.from; 859 a->type = D_REG; 860 a->reg = rn; 861 if(rn >= NREG) { 862 a->type = D_FREG; 863 a->reg = rn-NREG; 864 } 865 if(v->etype == TUINT8 || v->etype == TBOOL) 866 p1->as = AMOVBU; 867 if(v->etype == TUINT16) 868 p1->as = AMOVHU; 869 } 870 if(debug['R']) 871 print("%P\t.a%P\n", p, p1); 872 } 873 874 static int 875 overlap(int32 o1, int w1, int32 o2, int w2) 876 { 877 int32 t1, t2; 878 879 t1 = o1+w1; 880 t2 = o2+w2; 881 882 if(!(t1 > o2 && t2 > o1)) 883 return 0; 884 885 return 1; 886 } 887 888 Bits 889 mkvar(Reg *r, Adr *a) 890 { 891 Var *v; 892 int i, t, n, et, z, w, flag; 893 int32 o; 894 Bits bit; 895 Node *node; 896 897 // mark registers used 898 t = a->type; 899 900 flag = 0; 901 switch(t) { 902 default: 903 print("type %d %d %D\n", t, a->name, a); 904 goto none; 905 906 case D_NONE: 907 case D_FCONST: 908 case D_BRANCH: 909 break; 910 911 case D_CONST: 912 flag = 1; 913 goto onereg; 914 915 case D_REGREG: 916 case D_REGREG2: 917 bit = zbits; 918 if(a->offset != NREG) 919 bit.b[0] |= RtoB(a->offset); 920 if(a->reg != NREG) 921 bit.b[0] |= RtoB(a->reg); 922 return bit; 923 924 case D_REG: 925 case D_SHIFT: 926 onereg: 927 if(a->reg != NREG) { 928 bit = zbits; 929 bit.b[0] = RtoB(a->reg); 930 return bit; 931 } 932 break; 933 934 case D_OREG: 935 if(a->reg != NREG) { 936 if(a == &r->prog->from) 937 r->use1.b[0] |= RtoB(a->reg); 938 else 939 r->use2.b[0] |= RtoB(a->reg); 940 if(r->prog->scond & (C_PBIT|C_WBIT)) 941 r->set.b[0] |= RtoB(a->reg); 942 } 943 break; 944 945 case D_FREG: 946 if(a->reg != NREG) { 947 bit = zbits; 948 bit.b[0] = FtoB(a->reg); 949 return bit; 950 } 951 break; 952 } 953 954 switch(a->name) { 955 default: 956 goto none; 957 958 case D_EXTERN: 959 case D_STATIC: 960 case D_AUTO: 961 case D_PARAM: 962 n = a->name; 963 break; 964 } 965 966 node = a->node; 967 if(node == N || node->op != ONAME || node->orig == N) 968 goto none; 969 node = node->orig; 970 if(node->orig != node) 971 fatal("%D: bad node", a); 972 if(node->sym == S || node->sym->name[0] == '.') 973 goto none; 974 et = a->etype; 975 o = a->offset; 976 w = a->width; 977 if(w < 0) 978 fatal("bad width %d for %D", w, a); 979 980 for(i=0; i<nvar; i++) { 981 v = var+i; 982 if(v->node == node && v->name == n) { 983 if(v->offset == o) 984 if(v->etype == et) 985 if(v->width == w) 986 if(!flag) 987 return blsh(i); 988 989 // if they overlap, disable both 990 if(overlap(v->offset, v->width, o, w)) { 991 v->addr = 1; 992 flag = 1; 993 } 994 } 995 } 996 997 switch(et) { 998 case 0: 999 case TFUNC: 1000 goto none; 1001 } 1002 1003 if(nvar >= NVAR) { 1004 if(debug['w'] > 1 && node) 1005 fatal("variable not optimized: %D", a); 1006 goto none; 1007 } 1008 1009 i = nvar; 1010 nvar++; 1011 //print("var %d %E %D %S\n", i, et, a, s); 1012 v = var+i; 1013 v->offset = o; 1014 v->name = n; 1015 v->etype = et; 1016 v->width = w; 1017 v->addr = flag; // funny punning 1018 v->node = node; 1019 1020 if(debug['R']) 1021 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 1022 1023 bit = blsh(i); 1024 if(n == D_EXTERN || n == D_STATIC) 1025 for(z=0; z<BITS; z++) 1026 externs.b[z] |= bit.b[z]; 1027 if(n == D_PARAM) 1028 for(z=0; z<BITS; z++) 1029 params.b[z] |= bit.b[z]; 1030 1031 return bit; 1032 1033 none: 1034 return zbits; 1035 } 1036 1037 void 1038 prop(Reg *r, Bits ref, Bits cal) 1039 { 1040 Reg *r1, *r2; 1041 int z; 1042 1043 for(r1 = r; r1 != R; r1 = r1->p1) { 1044 for(z=0; z<BITS; z++) { 1045 ref.b[z] |= r1->refahead.b[z]; 1046 if(ref.b[z] != r1->refahead.b[z]) { 1047 r1->refahead.b[z] = ref.b[z]; 1048 change++; 1049 } 1050 cal.b[z] |= r1->calahead.b[z]; 1051 if(cal.b[z] != r1->calahead.b[z]) { 1052 r1->calahead.b[z] = cal.b[z]; 1053 change++; 1054 } 1055 } 1056 switch(r1->prog->as) { 1057 case ABL: 1058 if(noreturn(r1->prog)) 1059 break; 1060 for(z=0; z<BITS; z++) { 1061 cal.b[z] |= ref.b[z] | externs.b[z]; 1062 ref.b[z] = 0; 1063 } 1064 break; 1065 1066 case ATEXT: 1067 for(z=0; z<BITS; z++) { 1068 cal.b[z] = 0; 1069 ref.b[z] = 0; 1070 } 1071 break; 1072 1073 case ARET: 1074 for(z=0; z<BITS; z++) { 1075 cal.b[z] = externs.b[z] | ovar.b[z]; 1076 ref.b[z] = 0; 1077 } 1078 break; 1079 1080 default: 1081 // Work around for issue 1304: 1082 // flush modified globals before each instruction. 1083 for(z=0; z<BITS; z++) { 1084 cal.b[z] |= externs.b[z]; 1085 // issue 4066: flush modified return variables in case of panic 1086 if(hasdefer) 1087 cal.b[z] |= ovar.b[z]; 1088 } 1089 break; 1090 } 1091 for(z=0; z<BITS; z++) { 1092 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 1093 r1->use1.b[z] | r1->use2.b[z]; 1094 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 1095 r1->refbehind.b[z] = ref.b[z]; 1096 r1->calbehind.b[z] = cal.b[z]; 1097 } 1098 if(r1->active) 1099 break; 1100 r1->active = 1; 1101 } 1102 for(; r != r1; r = r->p1) 1103 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 1104 prop(r2, r->refbehind, r->calbehind); 1105 } 1106 1107 /* 1108 * find looping structure 1109 * 1110 * 1) find reverse postordering 1111 * 2) find approximate dominators, 1112 * the actual dominators if the flow graph is reducible 1113 * otherwise, dominators plus some other non-dominators. 1114 * See Matthew S. Hecht and Jeffrey D. Ullman, 1115 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 1116 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 1117 * Oct. 1-3, 1973, pp. 207-217. 1118 * 3) find all nodes with a predecessor dominated by the current node. 1119 * such a node is a loop head. 1120 * recursively, all preds with a greater rpo number are in the loop 1121 */ 1122 int32 1123 postorder(Reg *r, Reg **rpo2r, int32 n) 1124 { 1125 Reg *r1; 1126 1127 r->rpo = 1; 1128 r1 = r->s1; 1129 if(r1 && !r1->rpo) 1130 n = postorder(r1, rpo2r, n); 1131 r1 = r->s2; 1132 if(r1 && !r1->rpo) 1133 n = postorder(r1, rpo2r, n); 1134 rpo2r[n] = r; 1135 n++; 1136 return n; 1137 } 1138 1139 int32 1140 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 1141 { 1142 int32 t; 1143 1144 if(rpo1 == -1) 1145 return rpo2; 1146 while(rpo1 != rpo2){ 1147 if(rpo1 > rpo2){ 1148 t = rpo2; 1149 rpo2 = rpo1; 1150 rpo1 = t; 1151 } 1152 while(rpo1 < rpo2){ 1153 t = idom[rpo2]; 1154 if(t >= rpo2) 1155 fatal("bad idom"); 1156 rpo2 = t; 1157 } 1158 } 1159 return rpo1; 1160 } 1161 1162 int 1163 doms(int32 *idom, int32 r, int32 s) 1164 { 1165 while(s > r) 1166 s = idom[s]; 1167 return s == r; 1168 } 1169 1170 int 1171 loophead(int32 *idom, Reg *r) 1172 { 1173 int32 src; 1174 1175 src = r->rpo; 1176 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 1177 return 1; 1178 for(r = r->p2; r != R; r = r->p2link) 1179 if(doms(idom, src, r->rpo)) 1180 return 1; 1181 return 0; 1182 } 1183 1184 void 1185 loopmark(Reg **rpo2r, int32 head, Reg *r) 1186 { 1187 if(r->rpo < head || r->active == head) 1188 return; 1189 r->active = head; 1190 r->loop += LOOP; 1191 if(r->p1 != R) 1192 loopmark(rpo2r, head, r->p1); 1193 for(r = r->p2; r != R; r = r->p2link) 1194 loopmark(rpo2r, head, r); 1195 } 1196 1197 void 1198 loopit(Reg *r, int32 nr) 1199 { 1200 Reg *r1; 1201 int32 i, d, me; 1202 1203 if(nr > maxnr) { 1204 rpo2r = mal(nr * sizeof(Reg*)); 1205 idom = mal(nr * sizeof(int32)); 1206 maxnr = nr; 1207 } 1208 d = postorder(r, rpo2r, 0); 1209 if(d > nr) 1210 fatal("too many reg nodes"); 1211 nr = d; 1212 for(i = 0; i < nr / 2; i++){ 1213 r1 = rpo2r[i]; 1214 rpo2r[i] = rpo2r[nr - 1 - i]; 1215 rpo2r[nr - 1 - i] = r1; 1216 } 1217 for(i = 0; i < nr; i++) 1218 rpo2r[i]->rpo = i; 1219 1220 idom[0] = 0; 1221 for(i = 0; i < nr; i++){ 1222 r1 = rpo2r[i]; 1223 me = r1->rpo; 1224 d = -1; 1225 // rpo2r[r->rpo] == r protects against considering dead code, 1226 // which has r->rpo == 0. 1227 if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) 1228 d = r1->p1->rpo; 1229 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 1230 if(rpo2r[r1->rpo] == r1 && r1->rpo < me) 1231 d = rpolca(idom, d, r1->rpo); 1232 idom[i] = d; 1233 } 1234 1235 for(i = 0; i < nr; i++){ 1236 r1 = rpo2r[i]; 1237 r1->loop++; 1238 if(r1->p2 != R && loophead(idom, r1)) 1239 loopmark(rpo2r, i, r1); 1240 } 1241 } 1242 1243 void 1244 synch(Reg *r, Bits dif) 1245 { 1246 Reg *r1; 1247 int z; 1248 1249 for(r1 = r; r1 != R; r1 = r1->s1) { 1250 for(z=0; z<BITS; z++) { 1251 dif.b[z] = (dif.b[z] & 1252 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1253 r1->set.b[z] | r1->regdiff.b[z]; 1254 if(dif.b[z] != r1->regdiff.b[z]) { 1255 r1->regdiff.b[z] = dif.b[z]; 1256 change++; 1257 } 1258 } 1259 if(r1->active) 1260 break; 1261 r1->active = 1; 1262 for(z=0; z<BITS; z++) 1263 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1264 if(r1->s2 != R) 1265 synch(r1->s2, dif); 1266 } 1267 } 1268 1269 uint32 1270 allreg(uint32 b, Rgn *r) 1271 { 1272 Var *v; 1273 int i; 1274 1275 v = var + r->varno; 1276 r->regno = 0; 1277 switch(v->etype) { 1278 1279 default: 1280 fatal("unknown etype %d/%E", bitno(b), v->etype); 1281 break; 1282 1283 case TINT8: 1284 case TUINT8: 1285 case TINT16: 1286 case TUINT16: 1287 case TINT32: 1288 case TUINT32: 1289 case TINT: 1290 case TUINT: 1291 case TUINTPTR: 1292 case TBOOL: 1293 case TPTR32: 1294 i = BtoR(~b); 1295 if(i && r->cost >= 0) { 1296 r->regno = i; 1297 return RtoB(i); 1298 } 1299 break; 1300 1301 case TFLOAT32: 1302 case TFLOAT64: 1303 i = BtoF(~b); 1304 if(i && r->cost >= 0) { 1305 r->regno = i+NREG; 1306 return FtoB(i); 1307 } 1308 break; 1309 1310 case TINT64: 1311 case TUINT64: 1312 case TPTR64: 1313 case TINTER: 1314 case TSTRUCT: 1315 case TARRAY: 1316 break; 1317 } 1318 return 0; 1319 } 1320 1321 void 1322 paint1(Reg *r, int bn) 1323 { 1324 Reg *r1; 1325 Prog *p; 1326 int z; 1327 uint32 bb; 1328 1329 z = bn/32; 1330 bb = 1L<<(bn%32); 1331 if(r->act.b[z] & bb) 1332 return; 1333 for(;;) { 1334 if(!(r->refbehind.b[z] & bb)) 1335 break; 1336 r1 = r->p1; 1337 if(r1 == R) 1338 break; 1339 if(!(r1->refahead.b[z] & bb)) 1340 break; 1341 if(r1->act.b[z] & bb) 1342 break; 1343 r = r1; 1344 } 1345 1346 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 1347 change -= CLOAD * r->loop; 1348 if(debug['R'] > 1) 1349 print("%d%P\td %Q $%d\n", r->loop, 1350 r->prog, blsh(bn), change); 1351 } 1352 for(;;) { 1353 r->act.b[z] |= bb; 1354 p = r->prog; 1355 1356 if(r->use1.b[z] & bb) { 1357 change += CREF * r->loop; 1358 if(debug['R'] > 1) 1359 print("%d%P\tu1 %Q $%d\n", r->loop, 1360 p, blsh(bn), change); 1361 } 1362 1363 if((r->use2.b[z]|r->set.b[z]) & bb) { 1364 change += CREF * r->loop; 1365 if(debug['R'] > 1) 1366 print("%d%P\tu2 %Q $%d\n", r->loop, 1367 p, blsh(bn), change); 1368 } 1369 1370 if(STORE(r) & r->regdiff.b[z] & bb) { 1371 change -= CLOAD * r->loop; 1372 if(debug['R'] > 1) 1373 print("%d%P\tst %Q $%d\n", r->loop, 1374 p, blsh(bn), change); 1375 } 1376 1377 if(r->refbehind.b[z] & bb) 1378 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1379 if(r1->refahead.b[z] & bb) 1380 paint1(r1, bn); 1381 1382 if(!(r->refahead.b[z] & bb)) 1383 break; 1384 r1 = r->s2; 1385 if(r1 != R) 1386 if(r1->refbehind.b[z] & bb) 1387 paint1(r1, bn); 1388 r = r->s1; 1389 if(r == R) 1390 break; 1391 if(r->act.b[z] & bb) 1392 break; 1393 if(!(r->refbehind.b[z] & bb)) 1394 break; 1395 } 1396 } 1397 1398 uint32 1399 paint2(Reg *r, int bn) 1400 { 1401 Reg *r1; 1402 int z; 1403 uint32 bb, vreg; 1404 1405 z = bn/32; 1406 bb = 1L << (bn%32); 1407 vreg = regbits; 1408 if(!(r->act.b[z] & bb)) 1409 return vreg; 1410 for(;;) { 1411 if(!(r->refbehind.b[z] & bb)) 1412 break; 1413 r1 = r->p1; 1414 if(r1 == R) 1415 break; 1416 if(!(r1->refahead.b[z] & bb)) 1417 break; 1418 if(!(r1->act.b[z] & bb)) 1419 break; 1420 r = r1; 1421 } 1422 for(;;) { 1423 r->act.b[z] &= ~bb; 1424 1425 vreg |= r->regu; 1426 1427 if(r->refbehind.b[z] & bb) 1428 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1429 if(r1->refahead.b[z] & bb) 1430 vreg |= paint2(r1, bn); 1431 1432 if(!(r->refahead.b[z] & bb)) 1433 break; 1434 r1 = r->s2; 1435 if(r1 != R) 1436 if(r1->refbehind.b[z] & bb) 1437 vreg |= paint2(r1, bn); 1438 r = r->s1; 1439 if(r == R) 1440 break; 1441 if(!(r->act.b[z] & bb)) 1442 break; 1443 if(!(r->refbehind.b[z] & bb)) 1444 break; 1445 } 1446 return vreg; 1447 } 1448 1449 void 1450 paint3(Reg *r, int bn, int32 rb, int rn) 1451 { 1452 Reg *r1; 1453 Prog *p; 1454 int z; 1455 uint32 bb; 1456 1457 z = bn/32; 1458 bb = 1L << (bn%32); 1459 if(r->act.b[z] & bb) 1460 return; 1461 for(;;) { 1462 if(!(r->refbehind.b[z] & bb)) 1463 break; 1464 r1 = r->p1; 1465 if(r1 == R) 1466 break; 1467 if(!(r1->refahead.b[z] & bb)) 1468 break; 1469 if(r1->act.b[z] & bb) 1470 break; 1471 r = r1; 1472 } 1473 1474 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1475 addmove(r, bn, rn, 0); 1476 1477 for(;;) { 1478 r->act.b[z] |= bb; 1479 p = r->prog; 1480 1481 if(r->use1.b[z] & bb) { 1482 if(debug['R']) 1483 print("%P", p); 1484 addreg(&p->from, rn); 1485 if(debug['R']) 1486 print("\t.c%P\n", p); 1487 } 1488 if((r->use2.b[z]|r->set.b[z]) & bb) { 1489 if(debug['R']) 1490 print("%P", p); 1491 addreg(&p->to, rn); 1492 if(debug['R']) 1493 print("\t.c%P\n", p); 1494 } 1495 1496 if(STORE(r) & r->regdiff.b[z] & bb) 1497 addmove(r, bn, rn, 1); 1498 r->regu |= rb; 1499 1500 if(r->refbehind.b[z] & bb) 1501 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1502 if(r1->refahead.b[z] & bb) 1503 paint3(r1, bn, rb, rn); 1504 1505 if(!(r->refahead.b[z] & bb)) 1506 break; 1507 r1 = r->s2; 1508 if(r1 != R) 1509 if(r1->refbehind.b[z] & bb) 1510 paint3(r1, bn, rb, rn); 1511 r = r->s1; 1512 if(r == R) 1513 break; 1514 if(r->act.b[z] & bb) 1515 break; 1516 if(!(r->refbehind.b[z] & bb)) 1517 break; 1518 } 1519 } 1520 1521 void 1522 addreg(Adr *a, int rn) 1523 { 1524 a->sym = 0; 1525 a->name = D_NONE; 1526 a->type = D_REG; 1527 a->reg = rn; 1528 if(rn >= NREG) { 1529 a->type = D_FREG; 1530 a->reg = rn-NREG; 1531 } 1532 } 1533 1534 /* 1535 * bit reg 1536 * 0 R0 1537 * 1 R1 1538 * ... ... 1539 * 10 R10 1540 * 12 R12 1541 */ 1542 int32 1543 RtoB(int r) 1544 { 1545 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 1546 return 0; 1547 return 1L << r; 1548 } 1549 1550 int 1551 BtoR(int32 b) 1552 { 1553 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1554 if(b == 0) 1555 return 0; 1556 return bitno(b); 1557 } 1558 1559 /* 1560 * bit reg 1561 * 18 F2 1562 * 19 F3 1563 * ... ... 1564 * 31 F15 1565 */ 1566 int32 1567 FtoB(int f) 1568 { 1569 1570 if(f < 2 || f > NFREG-1) 1571 return 0; 1572 return 1L << (f + 16); 1573 } 1574 1575 int 1576 BtoF(int32 b) 1577 { 1578 1579 b &= 0xfffc0000L; 1580 if(b == 0) 1581 return 0; 1582 return bitno(b) - 16; 1583 } 1584 1585 static Sym* symlist[10]; 1586 1587 int 1588 noreturn(Prog *p) 1589 { 1590 Sym *s; 1591 int i; 1592 1593 if(symlist[0] == S) { 1594 symlist[0] = pkglookup("panicindex", runtimepkg); 1595 symlist[1] = pkglookup("panicslice", runtimepkg); 1596 symlist[2] = pkglookup("throwinit", runtimepkg); 1597 symlist[3] = pkglookup("panic", runtimepkg); 1598 symlist[4] = pkglookup("panicwrap", runtimepkg); 1599 } 1600 1601 s = p->to.sym; 1602 if(s == S) 1603 return 0; 1604 for(i=0; symlist[i]!=S; i++) 1605 if(s == symlist[i]) 1606 return 1; 1607 return 0; 1608 } 1609 1610 void 1611 dumpone(Reg *r) 1612 { 1613 int z; 1614 Bits bit; 1615 1616 print("%d:%P", r->loop, r->prog); 1617 for(z=0; z<BITS; z++) 1618 bit.b[z] = 1619 r->set.b[z] | 1620 r->use1.b[z] | 1621 r->use2.b[z] | 1622 r->refbehind.b[z] | 1623 r->refahead.b[z] | 1624 r->calbehind.b[z] | 1625 r->calahead.b[z] | 1626 r->regdiff.b[z] | 1627 r->act.b[z] | 1628 0; 1629 if(bany(&bit)) { 1630 print("\t"); 1631 if(bany(&r->set)) 1632 print(" s:%Q", r->set); 1633 if(bany(&r->use1)) 1634 print(" u1:%Q", r->use1); 1635 if(bany(&r->use2)) 1636 print(" u2:%Q", r->use2); 1637 if(bany(&r->refbehind)) 1638 print(" rb:%Q ", r->refbehind); 1639 if(bany(&r->refahead)) 1640 print(" ra:%Q ", r->refahead); 1641 if(bany(&r->calbehind)) 1642 print(" cb:%Q ", r->calbehind); 1643 if(bany(&r->calahead)) 1644 print(" ca:%Q ", r->calahead); 1645 if(bany(&r->regdiff)) 1646 print(" d:%Q ", r->regdiff); 1647 if(bany(&r->act)) 1648 print(" a:%Q ", r->act); 1649 } 1650 print("\n"); 1651 } 1652 1653 void 1654 dumpit(char *str, Reg *r0) 1655 { 1656 Reg *r, *r1; 1657 1658 print("\n%s\n", str); 1659 for(r = r0; r != R; r = r->link) { 1660 dumpone(r); 1661 r1 = r->p2; 1662 if(r1 != R) { 1663 print(" pred:"); 1664 for(; r1 != R; r1 = r1->p2link) 1665 print(" %.4ud", r1->prog->loc); 1666 print("\n"); 1667 } 1668 // r1 = r->s1; 1669 // if(r1 != R) { 1670 // print(" succ:"); 1671 // for(; r1 != R; r1 = r1->s1) 1672 // print(" %.4ud", r1->prog->loc); 1673 // print("\n"); 1674 // } 1675 } 1676 } 1677 1678 /* 1679 * the code generator depends on being able to write out JMP (B) 1680 * instructions that it can jump to now but fill in later. 1681 * the linker will resolve them nicely, but they make the code 1682 * longer and more difficult to follow during debugging. 1683 * remove them. 1684 */ 1685 1686 /* what instruction does a JMP to p eventually land on? */ 1687 static Prog* 1688 chasejmp(Prog *p, int *jmploop) 1689 { 1690 int n; 1691 1692 n = 0; 1693 while(p != P && p->as == AB && p->to.type == D_BRANCH) { 1694 if(++n > 10) { 1695 *jmploop = 1; 1696 break; 1697 } 1698 p = p->to.u.branch; 1699 } 1700 return p; 1701 } 1702 1703 /* 1704 * reuse reg pointer for mark/sweep state. 1705 * leave reg==nil at end because alive==nil. 1706 */ 1707 #define alive ((void*)0) 1708 #define dead ((void*)1) 1709 1710 /* mark all code reachable from firstp as alive */ 1711 static void 1712 mark(Prog *firstp) 1713 { 1714 Prog *p; 1715 1716 for(p=firstp; p; p=p->link) { 1717 if(p->regp != dead) 1718 break; 1719 p->regp = alive; 1720 if(p->as != ABL && p->to.type == D_BRANCH && p->to.u.branch) 1721 mark(p->to.u.branch); 1722 if(p->as == AB || p->as == ARET || (p->as == ABL && noreturn(p))) 1723 break; 1724 } 1725 } 1726 1727 static void 1728 fixjmp(Prog *firstp) 1729 { 1730 int jmploop; 1731 Prog *p, *last; 1732 1733 if(debug['R'] && debug['v']) 1734 print("\nfixjmp\n"); 1735 1736 // pass 1: resolve jump to B, mark all code as dead. 1737 jmploop = 0; 1738 for(p=firstp; p; p=p->link) { 1739 if(debug['R'] && debug['v']) 1740 print("%P\n", p); 1741 if(p->as != ABL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AB) { 1742 p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); 1743 if(debug['R'] && debug['v']) 1744 print("->%P\n", p); 1745 } 1746 p->regp = dead; 1747 } 1748 if(debug['R'] && debug['v']) 1749 print("\n"); 1750 1751 // pass 2: mark all reachable code alive 1752 mark(firstp); 1753 1754 // pass 3: delete dead code (mostly JMPs). 1755 last = nil; 1756 for(p=firstp; p; p=p->link) { 1757 if(p->regp == dead) { 1758 if(p->link == P && p->as == ARET && last && last->as != ARET) { 1759 // This is the final ARET, and the code so far doesn't have one. 1760 // Let it stay. 1761 } else { 1762 if(debug['R'] && debug['v']) 1763 print("del %P\n", p); 1764 continue; 1765 } 1766 } 1767 if(last) 1768 last->link = p; 1769 last = p; 1770 } 1771 last->link = P; 1772 1773 // pass 4: elide JMP to next instruction. 1774 // only safe if there are no jumps to JMPs anymore. 1775 if(!jmploop) { 1776 last = nil; 1777 for(p=firstp; p; p=p->link) { 1778 if(p->as == AB && p->to.type == D_BRANCH && p->to.u.branch == p->link) { 1779 if(debug['R'] && debug['v']) 1780 print("del %P\n", p); 1781 continue; 1782 } 1783 if(last) 1784 last->link = p; 1785 last = p; 1786 } 1787 last->link = P; 1788 } 1789 1790 if(debug['R'] && debug['v']) { 1791 print("\n"); 1792 for(p=firstp; p; p=p->link) 1793 print("%P\n", p); 1794 print("\n"); 1795 } 1796 }