github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 32 /* 16 general + 16 floating */ 37 #define REGBITS ((uint32)0xffffffff) 38 #define P2R(p) (Reg*)(p->reg) 39 40 static int first = 1; 41 42 Reg* 43 rega(void) 44 { 45 Reg *r; 46 47 r = freer; 48 if(r == R) { 49 r = mal(sizeof(*r)); 50 } else 51 freer = r->link; 52 53 *r = zreg; 54 return r; 55 } 56 57 int 58 rcmp(const void *a1, const void *a2) 59 { 60 Rgn *p1, *p2; 61 int c1, c2; 62 63 p1 = (Rgn*)a1; 64 p2 = (Rgn*)a2; 65 c1 = p2->cost; 66 c2 = p1->cost; 67 if(c1 -= c2) 68 return c1; 69 return p2->varno - p1->varno; 70 } 71 72 static void 73 setoutvar(void) 74 { 75 Type *t; 76 Node *n; 77 Addr a; 78 Iter save; 79 Bits bit; 80 int z; 81 82 t = structfirst(&save, getoutarg(curfn->type)); 83 while(t != T) { 84 n = nodarg(t, 1); 85 a = zprog.from; 86 naddr(n, &a, 0); 87 bit = mkvar(R, &a); 88 for(z=0; z<BITS; z++) 89 ovar.b[z] |= bit.b[z]; 90 t = structnext(&save); 91 } 92 //if(bany(&ovar)) 93 //print("ovars = %Q\n", ovar); 94 } 95 96 static void 97 setaddrs(Bits bit) 98 { 99 int i, n; 100 Var *v; 101 Node *node; 102 103 while(bany(&bit)) { 104 // convert each bit to a variable 105 i = bnum(bit); 106 node = var[i].node; 107 n = var[i].name; 108 bit.b[i/32] &= ~(1L<<(i%32)); 109 110 // disable all pieces of that variable 111 for(i=0; i<nvar; i++) { 112 v = var+i; 113 if(v->node == node && v->name == n) 114 v->addr = 2; 115 } 116 } 117 } 118 119 static char* regname[] = { 120 ".AX", 121 ".CX", 122 ".DX", 123 ".BX", 124 ".SP", 125 ".BP", 126 ".SI", 127 ".DI", 128 ".R8", 129 ".R9", 130 ".R10", 131 ".R11", 132 ".R12", 133 ".R13", 134 ".R14", 135 ".R15", 136 ".X0", 137 ".X1", 138 ".X2", 139 ".X3", 140 ".X4", 141 ".X5", 142 ".X6", 143 ".X7", 144 ".X8", 145 ".X9", 146 ".X10", 147 ".X11", 148 ".X12", 149 ".X13", 150 ".X14", 151 ".X15", 152 }; 153 154 static Node* regnodes[NREGVAR]; 155 156 static void fixjmp(Prog*); 157 158 void 159 regopt(Prog *firstp) 160 { 161 Reg *r, *r1; 162 Prog *p; 163 int i, z, nr; 164 uint32 vreg; 165 Bits bit; 166 167 if(first) { 168 fmtinstall('Q', Qconv); 169 exregoffset = D_R15; 170 first = 0; 171 } 172 173 fixjmp(firstp); 174 175 // count instructions 176 nr = 0; 177 for(p=firstp; p!=P; p=p->link) 178 nr++; 179 // if too big dont bother 180 if(nr >= 10000) { 181 // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); 182 return; 183 } 184 185 firstr = R; 186 lastr = R; 187 188 /* 189 * control flow is more complicated in generated go code 190 * than in generated c code. define pseudo-variables for 191 * registers, so we have complete register usage information. 192 */ 193 nvar = NREGVAR; 194 memset(var, 0, NREGVAR*sizeof var[0]); 195 for(i=0; i<NREGVAR; i++) { 196 if(regnodes[i] == N) 197 regnodes[i] = newname(lookup(regname[i])); 198 var[i].node = regnodes[i]; 199 } 200 201 regbits = RtoB(D_SP); 202 for(z=0; z<BITS; z++) { 203 externs.b[z] = 0; 204 params.b[z] = 0; 205 consts.b[z] = 0; 206 addrs.b[z] = 0; 207 ovar.b[z] = 0; 208 } 209 210 // build list of return variables 211 setoutvar(); 212 213 /* 214 * pass 1 215 * build aux data structure 216 * allocate pcs 217 * find use and set of variables 218 */ 219 nr = 0; 220 for(p=firstp; p!=P; p=p->link) { 221 switch(p->as) { 222 case ADATA: 223 case AGLOBL: 224 case ANAME: 225 case ASIGNAME: 226 case ALOCALS: 227 case ATYPE: 228 continue; 229 } 230 r = rega(); 231 nr++; 232 if(firstr == R) { 233 firstr = r; 234 lastr = r; 235 } else { 236 lastr->link = r; 237 r->p1 = lastr; 238 lastr->s1 = r; 239 lastr = r; 240 } 241 r->prog = p; 242 p->reg = r; 243 244 r1 = r->p1; 245 if(r1 != R) { 246 switch(r1->prog->as) { 247 case ARET: 248 case AJMP: 249 case AIRETL: 250 case AIRETQ: 251 r->p1 = R; 252 r1->s1 = R; 253 } 254 } 255 256 // Avoid making variables for direct-called functions. 257 if(p->as == ACALL && p->to.type == D_EXTERN) 258 continue; 259 260 // Addressing makes some registers used. 261 if(p->from.type >= D_INDIR) 262 r->use1.b[0] |= RtoB(p->from.type-D_INDIR); 263 if(p->from.index != D_NONE) 264 r->use1.b[0] |= RtoB(p->from.index); 265 if(p->to.type >= D_INDIR) 266 r->use2.b[0] |= RtoB(p->to.type-D_INDIR); 267 if(p->to.index != D_NONE) 268 r->use2.b[0] |= RtoB(p->to.index); 269 270 bit = mkvar(r, &p->from); 271 if(bany(&bit)) 272 switch(p->as) { 273 /* 274 * funny 275 */ 276 case ALEAL: 277 case ALEAQ: 278 setaddrs(bit); 279 break; 280 281 /* 282 * left side read 283 */ 284 default: 285 for(z=0; z<BITS; z++) 286 r->use1.b[z] |= bit.b[z]; 287 break; 288 289 /* 290 * left side read+write 291 */ 292 case AXCHGB: 293 case AXCHGW: 294 case AXCHGL: 295 case AXCHGQ: 296 for(z=0; z<BITS; z++) { 297 r->use1.b[z] |= bit.b[z]; 298 r->set.b[z] |= bit.b[z]; 299 } 300 break; 301 } 302 303 bit = mkvar(r, &p->to); 304 if(bany(&bit)) 305 switch(p->as) { 306 default: 307 yyerror("reg: unknown op: %A", p->as); 308 break; 309 310 /* 311 * right side read 312 */ 313 case ACMPB: 314 case ACMPL: 315 case ACMPQ: 316 case ACMPW: 317 case ACOMISS: 318 case ACOMISD: 319 case AUCOMISS: 320 case AUCOMISD: 321 case ATESTB: 322 case ATESTL: 323 case ATESTQ: 324 for(z=0; z<BITS; z++) 325 r->use2.b[z] |= bit.b[z]; 326 break; 327 328 /* 329 * right side write 330 */ 331 case ALEAQ: 332 case ANOP: 333 case AMOVL: 334 case AMOVQ: 335 case AMOVB: 336 case AMOVW: 337 case AMOVBLSX: 338 case AMOVBLZX: 339 case AMOVBWSX: 340 case AMOVBWZX: 341 case AMOVBQSX: 342 case AMOVBQZX: 343 case AMOVLQSX: 344 case AMOVLQZX: 345 case AMOVWLSX: 346 case AMOVWLZX: 347 case AMOVWQSX: 348 case AMOVWQZX: 349 case AMOVQL: 350 case APOPQ: 351 352 case AMOVSS: 353 case AMOVSD: 354 case ACVTSD2SL: 355 case ACVTSD2SQ: 356 case ACVTSD2SS: 357 case ACVTSL2SD: 358 case ACVTSL2SS: 359 case ACVTSQ2SD: 360 case ACVTSQ2SS: 361 case ACVTSS2SD: 362 case ACVTSS2SL: 363 case ACVTSS2SQ: 364 case ACVTTSD2SL: 365 case ACVTTSD2SQ: 366 case ACVTTSS2SL: 367 case ACVTTSS2SQ: 368 for(z=0; z<BITS; z++) 369 r->set.b[z] |= bit.b[z]; 370 break; 371 372 /* 373 * right side read+write 374 */ 375 case AINCB: 376 case AINCL: 377 case AINCQ: 378 case AINCW: 379 case ADECB: 380 case ADECL: 381 case ADECQ: 382 case ADECW: 383 384 case AADDB: 385 case AADDL: 386 case AADDQ: 387 case AADDW: 388 case AANDB: 389 case AANDL: 390 case AANDQ: 391 case AANDW: 392 case ASUBB: 393 case ASUBL: 394 case ASUBQ: 395 case ASUBW: 396 case AORB: 397 case AORL: 398 case AORQ: 399 case AORW: 400 case AXORB: 401 case AXORL: 402 case AXORQ: 403 case AXORW: 404 case ASALB: 405 case ASALL: 406 case ASALQ: 407 case ASALW: 408 case ASARB: 409 case ASARL: 410 case ASARQ: 411 case ASARW: 412 case ARCLB: 413 case ARCLL: 414 case ARCLQ: 415 case ARCLW: 416 case ARCRB: 417 case ARCRL: 418 case ARCRQ: 419 case ARCRW: 420 case AROLB: 421 case AROLL: 422 case AROLQ: 423 case AROLW: 424 case ARORB: 425 case ARORL: 426 case ARORQ: 427 case ARORW: 428 case ASHLB: 429 case ASHLL: 430 case ASHLQ: 431 case ASHLW: 432 case ASHRB: 433 case ASHRL: 434 case ASHRQ: 435 case ASHRW: 436 case AIMULL: 437 case AIMULQ: 438 case AIMULW: 439 case ANEGB: 440 case ANEGW: 441 case ANEGL: 442 case ANEGQ: 443 case ANOTL: 444 case ANOTQ: 445 case AADCL: 446 case AADCQ: 447 case ASBBL: 448 case ASBBQ: 449 450 case ASETCC: 451 case ASETCS: 452 case ASETEQ: 453 case ASETGE: 454 case ASETGT: 455 case ASETHI: 456 case ASETLE: 457 case ASETLS: 458 case ASETLT: 459 case ASETMI: 460 case ASETNE: 461 case ASETOC: 462 case ASETOS: 463 case ASETPC: 464 case ASETPL: 465 case ASETPS: 466 467 case AXCHGB: 468 case AXCHGW: 469 case AXCHGL: 470 case AXCHGQ: 471 472 case AADDSD: 473 case AADDSS: 474 case ACMPSD: 475 case ACMPSS: 476 case ADIVSD: 477 case ADIVSS: 478 case AMAXSD: 479 case AMAXSS: 480 case AMINSD: 481 case AMINSS: 482 case AMULSD: 483 case AMULSS: 484 case ARCPSS: 485 case ARSQRTSS: 486 case ASQRTSD: 487 case ASQRTSS: 488 case ASUBSD: 489 case ASUBSS: 490 case AXORPD: 491 for(z=0; z<BITS; z++) { 492 r->set.b[z] |= bit.b[z]; 493 r->use2.b[z] |= bit.b[z]; 494 } 495 break; 496 497 /* 498 * funny 499 */ 500 case ACALL: 501 setaddrs(bit); 502 break; 503 } 504 505 switch(p->as) { 506 case AIMULL: 507 case AIMULQ: 508 case AIMULW: 509 if(p->to.type != D_NONE) 510 break; 511 512 case AIDIVL: 513 case AIDIVW: 514 case AIDIVQ: 515 case ADIVL: 516 case ADIVW: 517 case ADIVQ: 518 case AMULL: 519 case AMULW: 520 case AMULQ: 521 r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); 522 r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX); 523 break; 524 525 case AIDIVB: 526 case AIMULB: 527 case ADIVB: 528 case AMULB: 529 r->set.b[0] |= RtoB(D_AX); 530 r->use1.b[0] |= RtoB(D_AX); 531 break; 532 533 case ACWD: 534 r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); 535 r->use1.b[0] |= RtoB(D_AX); 536 break; 537 538 case ACDQ: 539 r->set.b[0] |= RtoB(D_DX); 540 r->use1.b[0] |= RtoB(D_AX); 541 break; 542 543 case AREP: 544 case AREPN: 545 case ALOOP: 546 case ALOOPEQ: 547 case ALOOPNE: 548 r->set.b[0] |= RtoB(D_CX); 549 r->use1.b[0] |= RtoB(D_CX); 550 break; 551 552 case AMOVSB: 553 case AMOVSL: 554 case AMOVSQ: 555 case AMOVSW: 556 case ACMPSB: 557 case ACMPSL: 558 case ACMPSQ: 559 case ACMPSW: 560 r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI); 561 r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI); 562 break; 563 564 case ASTOSB: 565 case ASTOSL: 566 case ASTOSQ: 567 case ASTOSW: 568 case ASCASB: 569 case ASCASL: 570 case ASCASQ: 571 case ASCASW: 572 r->set.b[0] |= RtoB(D_DI); 573 r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI); 574 break; 575 576 case AINSB: 577 case AINSL: 578 case AINSW: 579 r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI); 580 r->use1.b[0] |= RtoB(D_DI); 581 break; 582 583 case AOUTSB: 584 case AOUTSL: 585 case AOUTSW: 586 r->set.b[0] |= RtoB(D_DI); 587 r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI); 588 break; 589 } 590 } 591 if(firstr == R) 592 return; 593 594 for(i=0; i<nvar; i++) { 595 Var *v = var+i; 596 if(v->addr) { 597 bit = blsh(i); 598 for(z=0; z<BITS; z++) 599 addrs.b[z] |= bit.b[z]; 600 } 601 602 if(debug['R'] && debug['v']) 603 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 604 i, v->addr, v->etype, v->width, v->node, v->offset); 605 } 606 607 if(debug['R'] && debug['v']) 608 dumpit("pass1", firstr); 609 610 /* 611 * pass 2 612 * turn branch references to pointers 613 * build back pointers 614 */ 615 for(r=firstr; r!=R; r=r->link) { 616 p = r->prog; 617 if(p->to.type == D_BRANCH) { 618 if(p->to.u.branch == P) 619 fatal("pnil %P", p); 620 r1 = p->to.u.branch->reg; 621 if(r1 == R) 622 fatal("rnil %P", p); 623 if(r1 == r) { 624 //fatal("ref to self %P", p); 625 continue; 626 } 627 r->s2 = r1; 628 r->p2link = r1->p2; 629 r1->p2 = r; 630 } 631 } 632 633 if(debug['R'] && debug['v']) 634 dumpit("pass2", firstr); 635 636 /* 637 * pass 2.5 638 * find looping structure 639 */ 640 for(r = firstr; r != R; r = r->link) 641 r->active = 0; 642 change = 0; 643 loopit(firstr, nr); 644 645 if(debug['R'] && debug['v']) 646 dumpit("pass2.5", firstr); 647 648 /* 649 * pass 3 650 * iterate propagating usage 651 * back until flow graph is complete 652 */ 653 loop1: 654 change = 0; 655 for(r = firstr; r != R; r = r->link) 656 r->active = 0; 657 for(r = firstr; r != R; r = r->link) 658 if(r->prog->as == ARET) 659 prop(r, zbits, zbits); 660 loop11: 661 /* pick up unreachable code */ 662 i = 0; 663 for(r = firstr; r != R; r = r1) { 664 r1 = r->link; 665 if(r1 && r1->active && !r->active) { 666 prop(r, zbits, zbits); 667 i = 1; 668 } 669 } 670 if(i) 671 goto loop11; 672 if(change) 673 goto loop1; 674 675 if(debug['R'] && debug['v']) 676 dumpit("pass3", firstr); 677 678 /* 679 * pass 4 680 * iterate propagating register/variable synchrony 681 * forward until graph is complete 682 */ 683 loop2: 684 change = 0; 685 for(r = firstr; r != R; r = r->link) 686 r->active = 0; 687 synch(firstr, zbits); 688 if(change) 689 goto loop2; 690 691 if(debug['R'] && debug['v']) 692 dumpit("pass4", firstr); 693 694 /* 695 * pass 4.5 696 * move register pseudo-variables into regu. 697 */ 698 for(r = firstr; r != R; r = r->link) { 699 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 700 701 r->set.b[0] &= ~REGBITS; 702 r->use1.b[0] &= ~REGBITS; 703 r->use2.b[0] &= ~REGBITS; 704 r->refbehind.b[0] &= ~REGBITS; 705 r->refahead.b[0] &= ~REGBITS; 706 r->calbehind.b[0] &= ~REGBITS; 707 r->calahead.b[0] &= ~REGBITS; 708 r->regdiff.b[0] &= ~REGBITS; 709 r->act.b[0] &= ~REGBITS; 710 } 711 712 /* 713 * pass 5 714 * isolate regions 715 * calculate costs (paint1) 716 */ 717 r = firstr; 718 if(r) { 719 for(z=0; z<BITS; z++) 720 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 721 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 722 if(bany(&bit) && !r->refset) { 723 // should never happen - all variables are preset 724 if(debug['w']) 725 print("%L: used and not set: %Q\n", r->prog->lineno, bit); 726 r->refset = 1; 727 } 728 } 729 for(r = firstr; r != R; r = r->link) 730 r->act = zbits; 731 rgp = region; 732 nregion = 0; 733 for(r = firstr; r != R; r = r->link) { 734 for(z=0; z<BITS; z++) 735 bit.b[z] = r->set.b[z] & 736 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 737 if(bany(&bit) && !r->refset) { 738 if(debug['w']) 739 print("%L: set and not used: %Q\n", r->prog->lineno, bit); 740 r->refset = 1; 741 excise(r); 742 } 743 for(z=0; z<BITS; z++) 744 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 745 while(bany(&bit)) { 746 i = bnum(bit); 747 rgp->enter = r; 748 rgp->varno = i; 749 change = 0; 750 paint1(r, i); 751 bit.b[i/32] &= ~(1L<<(i%32)); 752 if(change <= 0) 753 continue; 754 rgp->cost = change; 755 nregion++; 756 if(nregion >= NRGN) { 757 if(debug['R'] && debug['v']) 758 print("too many regions\n"); 759 goto brk; 760 } 761 rgp++; 762 } 763 } 764 brk: 765 qsort(region, nregion, sizeof(region[0]), rcmp); 766 767 if(debug['R'] && debug['v']) 768 dumpit("pass5", firstr); 769 770 /* 771 * pass 6 772 * determine used registers (paint2) 773 * replace code (paint3) 774 */ 775 rgp = region; 776 for(i=0; i<nregion; i++) { 777 bit = blsh(rgp->varno); 778 vreg = paint2(rgp->enter, rgp->varno); 779 vreg = allreg(vreg, rgp); 780 if(rgp->regno != 0) { 781 if(debug['R'] && debug['v']) { 782 Var *v; 783 784 v = var + rgp->varno; 785 print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", 786 v->node, v->offset, rgp->varno, v->etype, rgp->regno); 787 } 788 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 789 } 790 rgp++; 791 } 792 793 if(debug['R'] && debug['v']) 794 dumpit("pass6", firstr); 795 796 /* 797 * pass 7 798 * peep-hole on basic block 799 */ 800 if(!debug['R'] || debug['P']) { 801 peep(); 802 } 803 804 /* 805 * eliminate nops 806 * free aux structures 807 */ 808 for(p=firstp; p!=P; p=p->link) { 809 while(p->link != P && p->link->as == ANOP) 810 p->link = p->link->link; 811 if(p->to.type == D_BRANCH) 812 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 813 p->to.u.branch = p->to.u.branch->link; 814 } 815 816 if(lastr != R) { 817 lastr->link = freer; 818 freer = firstr; 819 } 820 821 if(debug['R']) { 822 if(ostats.ncvtreg || 823 ostats.nspill || 824 ostats.nreload || 825 ostats.ndelmov || 826 ostats.nvar || 827 ostats.naddr || 828 0) 829 print("\nstats\n"); 830 831 if(ostats.ncvtreg) 832 print(" %4d cvtreg\n", ostats.ncvtreg); 833 if(ostats.nspill) 834 print(" %4d spill\n", ostats.nspill); 835 if(ostats.nreload) 836 print(" %4d reload\n", ostats.nreload); 837 if(ostats.ndelmov) 838 print(" %4d delmov\n", ostats.ndelmov); 839 if(ostats.nvar) 840 print(" %4d var\n", ostats.nvar); 841 if(ostats.naddr) 842 print(" %4d addr\n", ostats.naddr); 843 844 memset(&ostats, 0, sizeof(ostats)); 845 } 846 } 847 848 /* 849 * add mov b,rn 850 * just after r 851 */ 852 void 853 addmove(Reg *r, int bn, int rn, int f) 854 { 855 Prog *p, *p1; 856 Adr *a; 857 Var *v; 858 859 p1 = mal(sizeof(*p1)); 860 clearp(p1); 861 p1->loc = 9999; 862 863 p = r->prog; 864 p1->link = p->link; 865 p->link = p1; 866 p1->lineno = p->lineno; 867 868 v = var + bn; 869 870 a = &p1->to; 871 a->offset = v->offset; 872 a->etype = v->etype; 873 a->type = v->name; 874 a->node = v->node; 875 a->sym = v->node->sym; 876 877 // need to clean this up with wptr and 878 // some of the defaults 879 p1->as = AMOVL; 880 switch(v->etype) { 881 default: 882 fatal("unknown type %E", v->etype); 883 case TINT8: 884 case TUINT8: 885 case TBOOL: 886 p1->as = AMOVB; 887 break; 888 case TINT16: 889 case TUINT16: 890 p1->as = AMOVW; 891 break; 892 case TINT64: 893 case TUINT64: 894 case TUINTPTR: 895 case TPTR64: 896 p1->as = AMOVQ; 897 break; 898 case TFLOAT32: 899 p1->as = AMOVSS; 900 break; 901 case TFLOAT64: 902 p1->as = AMOVSD; 903 break; 904 case TINT: 905 case TUINT: 906 case TINT32: 907 case TUINT32: 908 case TPTR32: 909 break; 910 } 911 912 p1->from.type = rn; 913 if(!f) { 914 p1->from = *a; 915 *a = zprog.from; 916 a->type = rn; 917 if(v->etype == TUINT8) 918 p1->as = AMOVB; 919 if(v->etype == TUINT16) 920 p1->as = AMOVW; 921 } 922 if(debug['R'] && debug['v']) 923 print("%P ===add=== %P\n", p, p1); 924 ostats.nspill++; 925 } 926 927 uint32 928 doregbits(int r) 929 { 930 uint32 b; 931 932 b = 0; 933 if(r >= D_INDIR) 934 r -= D_INDIR; 935 if(r >= D_AX && r <= D_R15) 936 b |= RtoB(r); 937 else 938 if(r >= D_AL && r <= D_R15B) 939 b |= RtoB(r-D_AL+D_AX); 940 else 941 if(r >= D_AH && r <= D_BH) 942 b |= RtoB(r-D_AH+D_AX); 943 else 944 if(r >= D_X0 && r <= D_X0+15) 945 b |= FtoB(r); 946 return b; 947 } 948 949 static int 950 overlap(int64 o1, int w1, int64 o2, int w2) 951 { 952 int64 t1, t2; 953 954 t1 = o1+w1; 955 t2 = o2+w2; 956 957 if(!(t1 > o2 && t2 > o1)) 958 return 0; 959 960 return 1; 961 } 962 963 Bits 964 mkvar(Reg *r, Adr *a) 965 { 966 Var *v; 967 int i, t, n, et, z, flag; 968 int64 w; 969 uint32 regu; 970 int64 o; 971 Bits bit; 972 Node *node; 973 974 /* 975 * mark registers used 976 */ 977 t = a->type; 978 if(t == D_NONE) 979 goto none; 980 981 if(r != R) 982 r->use1.b[0] |= doregbits(a->index); 983 984 switch(t) { 985 default: 986 regu = doregbits(t); 987 if(regu == 0) 988 goto none; 989 bit = zbits; 990 bit.b[0] = regu; 991 return bit; 992 993 case D_ADDR: 994 a->type = a->index; 995 bit = mkvar(r, a); 996 setaddrs(bit); 997 a->type = t; 998 ostats.naddr++; 999 goto none; 1000 1001 case D_EXTERN: 1002 case D_STATIC: 1003 case D_PARAM: 1004 case D_AUTO: 1005 n = t; 1006 break; 1007 } 1008 1009 node = a->node; 1010 if(node == N || node->op != ONAME || node->orig == N) 1011 goto none; 1012 node = node->orig; 1013 if(node->orig != node) 1014 fatal("%D: bad node", a); 1015 if(node->sym == S || node->sym->name[0] == '.') 1016 goto none; 1017 et = a->etype; 1018 o = a->offset; 1019 w = a->width; 1020 if(w < 0) 1021 fatal("bad width %lld for %D", w, a); 1022 1023 flag = 0; 1024 for(i=0; i<nvar; i++) { 1025 v = var+i; 1026 if(v->node == node && v->name == n) { 1027 if(v->offset == o) 1028 if(v->etype == et) 1029 if(v->width == w) 1030 return blsh(i); 1031 1032 // if they overlaps, disable both 1033 if(overlap(v->offset, v->width, o, w)) { 1034 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 1035 v->addr = 1; 1036 flag = 1; 1037 } 1038 } 1039 } 1040 switch(et) { 1041 case 0: 1042 case TFUNC: 1043 goto none; 1044 } 1045 1046 if(nvar >= NVAR) { 1047 if(debug['w'] > 1 && node != N) 1048 fatal("variable not optimized: %#N", node); 1049 goto none; 1050 } 1051 1052 i = nvar; 1053 nvar++; 1054 v = var+i; 1055 v->offset = o; 1056 v->name = n; 1057 v->etype = et; 1058 v->width = w; 1059 v->addr = flag; // funny punning 1060 v->node = node; 1061 1062 if(debug['R']) 1063 print("bit=%2d et=%2E w=%d+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 1064 1065 ostats.nvar++; 1066 1067 bit = blsh(i); 1068 if(n == D_EXTERN || n == D_STATIC) 1069 for(z=0; z<BITS; z++) 1070 externs.b[z] |= bit.b[z]; 1071 if(n == D_PARAM) 1072 for(z=0; z<BITS; z++) 1073 params.b[z] |= bit.b[z]; 1074 1075 return bit; 1076 1077 none: 1078 return zbits; 1079 } 1080 1081 void 1082 prop(Reg *r, Bits ref, Bits cal) 1083 { 1084 Reg *r1, *r2; 1085 int z; 1086 1087 for(r1 = r; r1 != R; r1 = r1->p1) { 1088 for(z=0; z<BITS; z++) { 1089 ref.b[z] |= r1->refahead.b[z]; 1090 if(ref.b[z] != r1->refahead.b[z]) { 1091 r1->refahead.b[z] = ref.b[z]; 1092 change++; 1093 } 1094 cal.b[z] |= r1->calahead.b[z]; 1095 if(cal.b[z] != r1->calahead.b[z]) { 1096 r1->calahead.b[z] = cal.b[z]; 1097 change++; 1098 } 1099 } 1100 switch(r1->prog->as) { 1101 case ACALL: 1102 if(noreturn(r1->prog)) 1103 break; 1104 for(z=0; z<BITS; z++) { 1105 cal.b[z] |= ref.b[z] | externs.b[z]; 1106 ref.b[z] = 0; 1107 } 1108 break; 1109 1110 case ATEXT: 1111 for(z=0; z<BITS; z++) { 1112 cal.b[z] = 0; 1113 ref.b[z] = 0; 1114 } 1115 break; 1116 1117 case ARET: 1118 for(z=0; z<BITS; z++) { 1119 cal.b[z] = externs.b[z] | ovar.b[z]; 1120 ref.b[z] = 0; 1121 } 1122 break; 1123 1124 default: 1125 // Work around for issue 1304: 1126 // flush modified globals before each instruction. 1127 for(z=0; z<BITS; z++) { 1128 cal.b[z] |= externs.b[z]; 1129 // issue 4066: flush modified return variables in case of panic 1130 if(hasdefer) 1131 cal.b[z] |= ovar.b[z]; 1132 } 1133 break; 1134 } 1135 for(z=0; z<BITS; z++) { 1136 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 1137 r1->use1.b[z] | r1->use2.b[z]; 1138 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 1139 r1->refbehind.b[z] = ref.b[z]; 1140 r1->calbehind.b[z] = cal.b[z]; 1141 } 1142 if(r1->active) 1143 break; 1144 r1->active = 1; 1145 } 1146 for(; r != r1; r = r->p1) 1147 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 1148 prop(r2, r->refbehind, r->calbehind); 1149 } 1150 1151 /* 1152 * find looping structure 1153 * 1154 * 1) find reverse postordering 1155 * 2) find approximate dominators, 1156 * the actual dominators if the flow graph is reducible 1157 * otherwise, dominators plus some other non-dominators. 1158 * See Matthew S. Hecht and Jeffrey D. Ullman, 1159 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 1160 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 1161 * Oct. 1-3, 1973, pp. 207-217. 1162 * 3) find all nodes with a predecessor dominated by the current node. 1163 * such a node is a loop head. 1164 * recursively, all preds with a greater rpo number are in the loop 1165 */ 1166 int32 1167 postorder(Reg *r, Reg **rpo2r, int32 n) 1168 { 1169 Reg *r1; 1170 1171 r->rpo = 1; 1172 r1 = r->s1; 1173 if(r1 && !r1->rpo) 1174 n = postorder(r1, rpo2r, n); 1175 r1 = r->s2; 1176 if(r1 && !r1->rpo) 1177 n = postorder(r1, rpo2r, n); 1178 rpo2r[n] = r; 1179 n++; 1180 return n; 1181 } 1182 1183 int32 1184 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 1185 { 1186 int32 t; 1187 1188 if(rpo1 == -1) 1189 return rpo2; 1190 while(rpo1 != rpo2){ 1191 if(rpo1 > rpo2){ 1192 t = rpo2; 1193 rpo2 = rpo1; 1194 rpo1 = t; 1195 } 1196 while(rpo1 < rpo2){ 1197 t = idom[rpo2]; 1198 if(t >= rpo2) 1199 fatal("bad idom"); 1200 rpo2 = t; 1201 } 1202 } 1203 return rpo1; 1204 } 1205 1206 int 1207 doms(int32 *idom, int32 r, int32 s) 1208 { 1209 while(s > r) 1210 s = idom[s]; 1211 return s == r; 1212 } 1213 1214 int 1215 loophead(int32 *idom, Reg *r) 1216 { 1217 int32 src; 1218 1219 src = r->rpo; 1220 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 1221 return 1; 1222 for(r = r->p2; r != R; r = r->p2link) 1223 if(doms(idom, src, r->rpo)) 1224 return 1; 1225 return 0; 1226 } 1227 1228 void 1229 loopmark(Reg **rpo2r, int32 head, Reg *r) 1230 { 1231 if(r->rpo < head || r->active == head) 1232 return; 1233 r->active = head; 1234 r->loop += LOOP; 1235 if(r->p1 != R) 1236 loopmark(rpo2r, head, r->p1); 1237 for(r = r->p2; r != R; r = r->p2link) 1238 loopmark(rpo2r, head, r); 1239 } 1240 1241 void 1242 loopit(Reg *r, int32 nr) 1243 { 1244 Reg *r1; 1245 int32 i, d, me; 1246 1247 if(nr > maxnr) { 1248 rpo2r = mal(nr * sizeof(Reg*)); 1249 idom = mal(nr * sizeof(int32)); 1250 maxnr = nr; 1251 } 1252 1253 d = postorder(r, rpo2r, 0); 1254 if(d > nr) 1255 fatal("too many reg nodes %d %d", d, nr); 1256 nr = d; 1257 for(i = 0; i < nr / 2; i++) { 1258 r1 = rpo2r[i]; 1259 rpo2r[i] = rpo2r[nr - 1 - i]; 1260 rpo2r[nr - 1 - i] = r1; 1261 } 1262 for(i = 0; i < nr; i++) 1263 rpo2r[i]->rpo = i; 1264 1265 idom[0] = 0; 1266 for(i = 0; i < nr; i++) { 1267 r1 = rpo2r[i]; 1268 me = r1->rpo; 1269 d = -1; 1270 // rpo2r[r->rpo] == r protects against considering dead code, 1271 // which has r->rpo == 0. 1272 if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) 1273 d = r1->p1->rpo; 1274 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 1275 if(rpo2r[r1->rpo] == r1 && r1->rpo < me) 1276 d = rpolca(idom, d, r1->rpo); 1277 idom[i] = d; 1278 } 1279 1280 for(i = 0; i < nr; i++) { 1281 r1 = rpo2r[i]; 1282 r1->loop++; 1283 if(r1->p2 != R && loophead(idom, r1)) 1284 loopmark(rpo2r, i, r1); 1285 } 1286 } 1287 1288 void 1289 synch(Reg *r, Bits dif) 1290 { 1291 Reg *r1; 1292 int z; 1293 1294 for(r1 = r; r1 != R; r1 = r1->s1) { 1295 for(z=0; z<BITS; z++) { 1296 dif.b[z] = (dif.b[z] & 1297 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1298 r1->set.b[z] | r1->regdiff.b[z]; 1299 if(dif.b[z] != r1->regdiff.b[z]) { 1300 r1->regdiff.b[z] = dif.b[z]; 1301 change++; 1302 } 1303 } 1304 if(r1->active) 1305 break; 1306 r1->active = 1; 1307 for(z=0; z<BITS; z++) 1308 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1309 if(r1->s2 != R) 1310 synch(r1->s2, dif); 1311 } 1312 } 1313 1314 uint32 1315 allreg(uint32 b, Rgn *r) 1316 { 1317 Var *v; 1318 int i; 1319 1320 v = var + r->varno; 1321 r->regno = 0; 1322 switch(v->etype) { 1323 1324 default: 1325 fatal("unknown etype %d/%E", bitno(b), v->etype); 1326 break; 1327 1328 case TINT8: 1329 case TUINT8: 1330 case TINT16: 1331 case TUINT16: 1332 case TINT32: 1333 case TUINT32: 1334 case TINT64: 1335 case TUINT64: 1336 case TINT: 1337 case TUINT: 1338 case TUINTPTR: 1339 case TBOOL: 1340 case TPTR32: 1341 case TPTR64: 1342 i = BtoR(~b); 1343 if(i && r->cost > 0) { 1344 r->regno = i; 1345 return RtoB(i); 1346 } 1347 break; 1348 1349 case TFLOAT32: 1350 case TFLOAT64: 1351 i = BtoF(~b); 1352 if(i && r->cost > 0) { 1353 r->regno = i; 1354 return FtoB(i); 1355 } 1356 break; 1357 } 1358 return 0; 1359 } 1360 1361 void 1362 paint1(Reg *r, int bn) 1363 { 1364 Reg *r1; 1365 int z; 1366 uint32 bb; 1367 1368 z = bn/32; 1369 bb = 1L<<(bn%32); 1370 if(r->act.b[z] & bb) 1371 return; 1372 for(;;) { 1373 if(!(r->refbehind.b[z] & bb)) 1374 break; 1375 r1 = r->p1; 1376 if(r1 == R) 1377 break; 1378 if(!(r1->refahead.b[z] & bb)) 1379 break; 1380 if(r1->act.b[z] & bb) 1381 break; 1382 r = r1; 1383 } 1384 1385 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1386 change -= CLOAD * r->loop; 1387 } 1388 for(;;) { 1389 r->act.b[z] |= bb; 1390 1391 if(r->use1.b[z] & bb) { 1392 change += CREF * r->loop; 1393 } 1394 1395 if((r->use2.b[z]|r->set.b[z]) & bb) { 1396 change += CREF * r->loop; 1397 } 1398 1399 if(STORE(r) & r->regdiff.b[z] & bb) { 1400 change -= CLOAD * r->loop; 1401 } 1402 1403 if(r->refbehind.b[z] & bb) 1404 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1405 if(r1->refahead.b[z] & bb) 1406 paint1(r1, bn); 1407 1408 if(!(r->refahead.b[z] & bb)) 1409 break; 1410 r1 = r->s2; 1411 if(r1 != R) 1412 if(r1->refbehind.b[z] & bb) 1413 paint1(r1, bn); 1414 r = r->s1; 1415 if(r == R) 1416 break; 1417 if(r->act.b[z] & bb) 1418 break; 1419 if(!(r->refbehind.b[z] & bb)) 1420 break; 1421 } 1422 } 1423 1424 uint32 1425 regset(Reg *r, uint32 bb) 1426 { 1427 uint32 b, set; 1428 Adr v; 1429 int c; 1430 1431 set = 0; 1432 v = zprog.from; 1433 while(b = bb & ~(bb-1)) { 1434 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1435 if(v.type == 0) 1436 fatal("zero v.type for %#ux", b); 1437 c = copyu(r->prog, &v, A); 1438 if(c == 3) 1439 set |= b; 1440 bb &= ~b; 1441 } 1442 return set; 1443 } 1444 1445 uint32 1446 reguse(Reg *r, uint32 bb) 1447 { 1448 uint32 b, set; 1449 Adr v; 1450 int c; 1451 1452 set = 0; 1453 v = zprog.from; 1454 while(b = bb & ~(bb-1)) { 1455 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1456 c = copyu(r->prog, &v, A); 1457 if(c == 1 || c == 2 || c == 4) 1458 set |= b; 1459 bb &= ~b; 1460 } 1461 return set; 1462 } 1463 1464 uint32 1465 paint2(Reg *r, int bn) 1466 { 1467 Reg *r1; 1468 int z; 1469 uint32 bb, vreg, x; 1470 1471 z = bn/32; 1472 bb = 1L << (bn%32); 1473 vreg = regbits; 1474 if(!(r->act.b[z] & bb)) 1475 return vreg; 1476 for(;;) { 1477 if(!(r->refbehind.b[z] & bb)) 1478 break; 1479 r1 = r->p1; 1480 if(r1 == R) 1481 break; 1482 if(!(r1->refahead.b[z] & bb)) 1483 break; 1484 if(!(r1->act.b[z] & bb)) 1485 break; 1486 r = r1; 1487 } 1488 for(;;) { 1489 r->act.b[z] &= ~bb; 1490 1491 vreg |= r->regu; 1492 1493 if(r->refbehind.b[z] & bb) 1494 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1495 if(r1->refahead.b[z] & bb) 1496 vreg |= paint2(r1, bn); 1497 1498 if(!(r->refahead.b[z] & bb)) 1499 break; 1500 r1 = r->s2; 1501 if(r1 != R) 1502 if(r1->refbehind.b[z] & bb) 1503 vreg |= paint2(r1, bn); 1504 r = r->s1; 1505 if(r == R) 1506 break; 1507 if(!(r->act.b[z] & bb)) 1508 break; 1509 if(!(r->refbehind.b[z] & bb)) 1510 break; 1511 } 1512 1513 bb = vreg; 1514 for(; r; r=r->s1) { 1515 x = r->regu & ~bb; 1516 if(x) { 1517 vreg |= reguse(r, x); 1518 bb |= regset(r, x); 1519 } 1520 } 1521 return vreg; 1522 } 1523 1524 void 1525 paint3(Reg *r, int bn, int32 rb, int rn) 1526 { 1527 Reg *r1; 1528 Prog *p; 1529 int z; 1530 uint32 bb; 1531 1532 z = bn/32; 1533 bb = 1L << (bn%32); 1534 if(r->act.b[z] & bb) 1535 return; 1536 for(;;) { 1537 if(!(r->refbehind.b[z] & bb)) 1538 break; 1539 r1 = r->p1; 1540 if(r1 == R) 1541 break; 1542 if(!(r1->refahead.b[z] & bb)) 1543 break; 1544 if(r1->act.b[z] & bb) 1545 break; 1546 r = r1; 1547 } 1548 1549 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1550 addmove(r, bn, rn, 0); 1551 for(;;) { 1552 r->act.b[z] |= bb; 1553 p = r->prog; 1554 1555 if(r->use1.b[z] & bb) { 1556 if(debug['R'] && debug['v']) 1557 print("%P", p); 1558 addreg(&p->from, rn); 1559 if(debug['R'] && debug['v']) 1560 print(" ===change== %P\n", p); 1561 } 1562 if((r->use2.b[z]|r->set.b[z]) & bb) { 1563 if(debug['R'] && debug['v']) 1564 print("%P", p); 1565 addreg(&p->to, rn); 1566 if(debug['R'] && debug['v']) 1567 print(" ===change== %P\n", p); 1568 } 1569 1570 if(STORE(r) & r->regdiff.b[z] & bb) 1571 addmove(r, bn, rn, 1); 1572 r->regu |= rb; 1573 1574 if(r->refbehind.b[z] & bb) 1575 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1576 if(r1->refahead.b[z] & bb) 1577 paint3(r1, bn, rb, rn); 1578 1579 if(!(r->refahead.b[z] & bb)) 1580 break; 1581 r1 = r->s2; 1582 if(r1 != R) 1583 if(r1->refbehind.b[z] & bb) 1584 paint3(r1, bn, rb, rn); 1585 r = r->s1; 1586 if(r == R) 1587 break; 1588 if(r->act.b[z] & bb) 1589 break; 1590 if(!(r->refbehind.b[z] & bb)) 1591 break; 1592 } 1593 } 1594 1595 void 1596 addreg(Adr *a, int rn) 1597 { 1598 1599 a->sym = 0; 1600 a->offset = 0; 1601 a->type = rn; 1602 1603 ostats.ncvtreg++; 1604 } 1605 1606 int32 1607 RtoB(int r) 1608 { 1609 1610 if(r < D_AX || r > D_R15) 1611 return 0; 1612 return 1L << (r-D_AX); 1613 } 1614 1615 int 1616 BtoR(int32 b) 1617 { 1618 b &= 0xffffL; 1619 if(b == 0) 1620 return 0; 1621 return bitno(b) + D_AX; 1622 } 1623 1624 /* 1625 * bit reg 1626 * 16 X0 1627 * ... 1628 * 31 X15 1629 */ 1630 int32 1631 FtoB(int f) 1632 { 1633 if(f < D_X0 || f > D_X15) 1634 return 0; 1635 return 1L << (f - D_X0 + 16); 1636 } 1637 1638 int 1639 BtoF(int32 b) 1640 { 1641 1642 b &= 0xFFFF0000L; 1643 if(b == 0) 1644 return 0; 1645 return bitno(b) - 16 + D_X0; 1646 } 1647 1648 void 1649 dumpone(Reg *r) 1650 { 1651 int z; 1652 Bits bit; 1653 1654 print("%d:%P", r->loop, r->prog); 1655 for(z=0; z<BITS; z++) 1656 bit.b[z] = 1657 r->set.b[z] | 1658 r->use1.b[z] | 1659 r->use2.b[z] | 1660 r->refbehind.b[z] | 1661 r->refahead.b[z] | 1662 r->calbehind.b[z] | 1663 r->calahead.b[z] | 1664 r->regdiff.b[z] | 1665 r->act.b[z] | 1666 0; 1667 if(bany(&bit)) { 1668 print("\t"); 1669 if(bany(&r->set)) 1670 print(" s:%Q", r->set); 1671 if(bany(&r->use1)) 1672 print(" u1:%Q", r->use1); 1673 if(bany(&r->use2)) 1674 print(" u2:%Q", r->use2); 1675 if(bany(&r->refbehind)) 1676 print(" rb:%Q ", r->refbehind); 1677 if(bany(&r->refahead)) 1678 print(" ra:%Q ", r->refahead); 1679 if(bany(&r->calbehind)) 1680 print(" cb:%Q ", r->calbehind); 1681 if(bany(&r->calahead)) 1682 print(" ca:%Q ", r->calahead); 1683 if(bany(&r->regdiff)) 1684 print(" d:%Q ", r->regdiff); 1685 if(bany(&r->act)) 1686 print(" a:%Q ", r->act); 1687 } 1688 print("\n"); 1689 } 1690 1691 void 1692 dumpit(char *str, Reg *r0) 1693 { 1694 Reg *r, *r1; 1695 1696 print("\n%s\n", str); 1697 for(r = r0; r != R; r = r->link) { 1698 dumpone(r); 1699 r1 = r->p2; 1700 if(r1 != R) { 1701 print(" pred:"); 1702 for(; r1 != R; r1 = r1->p2link) 1703 print(" %.4ud", r1->prog->loc); 1704 print("\n"); 1705 } 1706 // r1 = r->s1; 1707 // if(r1 != R) { 1708 // print(" succ:"); 1709 // for(; r1 != R; r1 = r1->s1) 1710 // print(" %.4ud", r1->prog->loc); 1711 // print("\n"); 1712 // } 1713 } 1714 } 1715 1716 static Sym* symlist[10]; 1717 1718 int 1719 noreturn(Prog *p) 1720 { 1721 Sym *s; 1722 int i; 1723 1724 if(symlist[0] == S) { 1725 symlist[0] = pkglookup("panicindex", runtimepkg); 1726 symlist[1] = pkglookup("panicslice", runtimepkg); 1727 symlist[2] = pkglookup("throwinit", runtimepkg); 1728 symlist[3] = pkglookup("panic", runtimepkg); 1729 symlist[4] = pkglookup("panicwrap", runtimepkg); 1730 } 1731 1732 s = p->to.sym; 1733 if(s == S) 1734 return 0; 1735 for(i=0; symlist[i]!=S; i++) 1736 if(s == symlist[i]) 1737 return 1; 1738 return 0; 1739 } 1740 1741 /* 1742 * the code generator depends on being able to write out JMP 1743 * instructions that it can jump to now but fill in later. 1744 * the linker will resolve them nicely, but they make the code 1745 * longer and more difficult to follow during debugging. 1746 * remove them. 1747 */ 1748 1749 /* what instruction does a JMP to p eventually land on? */ 1750 static Prog* 1751 chasejmp(Prog *p, int *jmploop) 1752 { 1753 int n; 1754 1755 n = 0; 1756 while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { 1757 if(++n > 10) { 1758 *jmploop = 1; 1759 break; 1760 } 1761 p = p->to.u.branch; 1762 } 1763 return p; 1764 } 1765 1766 /* 1767 * reuse reg pointer for mark/sweep state. 1768 * leave reg==nil at end because alive==nil. 1769 */ 1770 #define alive ((void*)0) 1771 #define dead ((void*)1) 1772 1773 /* mark all code reachable from firstp as alive */ 1774 static void 1775 mark(Prog *firstp) 1776 { 1777 Prog *p; 1778 1779 for(p=firstp; p; p=p->link) { 1780 if(p->reg != dead) 1781 break; 1782 p->reg = alive; 1783 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch) 1784 mark(p->to.u.branch); 1785 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 1786 break; 1787 } 1788 } 1789 1790 static void 1791 fixjmp(Prog *firstp) 1792 { 1793 int jmploop; 1794 Prog *p, *last; 1795 1796 if(debug['R'] && debug['v']) 1797 print("\nfixjmp\n"); 1798 1799 // pass 1: resolve jump to AJMP, mark all code as dead. 1800 jmploop = 0; 1801 for(p=firstp; p; p=p->link) { 1802 if(debug['R'] && debug['v']) 1803 print("%P\n", p); 1804 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { 1805 p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); 1806 if(debug['R'] && debug['v']) 1807 print("->%P\n", p); 1808 } 1809 p->reg = dead; 1810 } 1811 if(debug['R'] && debug['v']) 1812 print("\n"); 1813 1814 // pass 2: mark all reachable code alive 1815 mark(firstp); 1816 1817 // pass 3: delete dead code (mostly JMPs). 1818 last = nil; 1819 for(p=firstp; p; p=p->link) { 1820 if(p->reg == dead) { 1821 if(p->link == P && p->as == ARET && last && last->as != ARET) { 1822 // This is the final ARET, and the code so far doesn't have one. 1823 // Let it stay. 1824 } else { 1825 if(debug['R'] && debug['v']) 1826 print("del %P\n", p); 1827 continue; 1828 } 1829 } 1830 if(last) 1831 last->link = p; 1832 last = p; 1833 } 1834 last->link = P; 1835 1836 // pass 4: elide JMP to next instruction. 1837 // only safe if there are no jumps to JMPs anymore. 1838 if(!jmploop) { 1839 last = nil; 1840 for(p=firstp; p; p=p->link) { 1841 if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) { 1842 if(debug['R'] && debug['v']) 1843 print("del %P\n", p); 1844 continue; 1845 } 1846 if(last) 1847 last->link = p; 1848 last = p; 1849 } 1850 last->link = P; 1851 } 1852 1853 if(debug['R'] && debug['v']) { 1854 print("\n"); 1855 for(p=firstp; p; p=p->link) 1856 print("%P\n", p); 1857 print("\n"); 1858 } 1859 }