github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/8g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 16 /* 8 integer + 8 floating */ 37 #define REGBITS ((uint32)0xffff) 38 #define P2R(p) (Reg*)(p->reg) 39 40 static int first = 1; 41 42 static void fixjmp(Prog*); 43 static void fixtemp(Prog*); 44 45 Reg* 46 rega(void) 47 { 48 Reg *r; 49 50 r = freer; 51 if(r == R) { 52 r = mal(sizeof(*r)); 53 } else 54 freer = r->link; 55 56 *r = zreg; 57 return r; 58 } 59 60 int 61 rcmp(const void *a1, const void *a2) 62 { 63 Rgn *p1, *p2; 64 int c1, c2; 65 66 p1 = (Rgn*)a1; 67 p2 = (Rgn*)a2; 68 c1 = p2->cost; 69 c2 = p1->cost; 70 if(c1 -= c2) 71 return c1; 72 return p2->varno - p1->varno; 73 } 74 75 static void 76 setoutvar(void) 77 { 78 Type *t; 79 Node *n; 80 Addr a; 81 Iter save; 82 Bits bit; 83 int z; 84 85 t = structfirst(&save, getoutarg(curfn->type)); 86 while(t != T) { 87 n = nodarg(t, 1); 88 a = zprog.from; 89 naddr(n, &a, 0); 90 bit = mkvar(R, &a); 91 for(z=0; z<BITS; z++) 92 ovar.b[z] |= bit.b[z]; 93 t = structnext(&save); 94 } 95 //if(bany(ovar)) 96 //print("ovars = %Q\n", ovar); 97 } 98 99 static void 100 setaddrs(Bits bit) 101 { 102 int i, n; 103 Var *v; 104 Node *node; 105 106 while(bany(&bit)) { 107 // convert each bit to a variable 108 i = bnum(bit); 109 node = var[i].node; 110 n = var[i].name; 111 bit.b[i/32] &= ~(1L<<(i%32)); 112 113 // disable all pieces of that variable 114 for(i=0; i<nvar; i++) { 115 v = var+i; 116 if(v->node == node && v->name == n) 117 v->addr = 2; 118 } 119 } 120 } 121 122 static char* regname[] = { 123 ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di", 124 ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7", 125 }; 126 127 static Node* regnodes[NREGVAR]; 128 129 void 130 regopt(Prog *firstp) 131 { 132 Reg *r, *r1; 133 Prog *p; 134 int i, z, nr; 135 uint32 vreg; 136 Bits bit; 137 138 if(first) { 139 fmtinstall('Q', Qconv); 140 exregoffset = D_DI; // no externals 141 first = 0; 142 } 143 144 fixtemp(firstp); 145 fixjmp(firstp); 146 147 // count instructions 148 nr = 0; 149 for(p=firstp; p!=P; p=p->link) 150 nr++; 151 // if too big dont bother 152 if(nr >= 10000) { 153 // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); 154 return; 155 } 156 157 firstr = R; 158 lastr = R; 159 160 /* 161 * control flow is more complicated in generated go code 162 * than in generated c code. define pseudo-variables for 163 * registers, so we have complete register usage information. 164 */ 165 nvar = NREGVAR; 166 memset(var, 0, NREGVAR*sizeof var[0]); 167 for(i=0; i<NREGVAR; i++) { 168 if(regnodes[i] == N) 169 regnodes[i] = newname(lookup(regname[i])); 170 var[i].node = regnodes[i]; 171 } 172 173 regbits = RtoB(D_SP); 174 for(z=0; z<BITS; z++) { 175 externs.b[z] = 0; 176 params.b[z] = 0; 177 consts.b[z] = 0; 178 addrs.b[z] = 0; 179 ovar.b[z] = 0; 180 } 181 182 // build list of return variables 183 setoutvar(); 184 185 /* 186 * pass 1 187 * build aux data structure 188 * allocate pcs 189 * find use and set of variables 190 */ 191 nr = 0; 192 for(p=firstp; p!=P; p=p->link) { 193 switch(p->as) { 194 case ADATA: 195 case AGLOBL: 196 case ANAME: 197 case ASIGNAME: 198 case ALOCALS: 199 case ATYPE: 200 continue; 201 } 202 r = rega(); 203 nr++; 204 if(firstr == R) { 205 firstr = r; 206 lastr = r; 207 } else { 208 lastr->link = r; 209 r->p1 = lastr; 210 lastr->s1 = r; 211 lastr = r; 212 } 213 r->prog = p; 214 p->reg = r; 215 216 r1 = r->p1; 217 if(r1 != R) { 218 switch(r1->prog->as) { 219 case ARET: 220 case AJMP: 221 case AIRETL: 222 r->p1 = R; 223 r1->s1 = R; 224 } 225 } 226 227 // Avoid making variables for direct-called functions. 228 if(p->as == ACALL && p->to.type == D_EXTERN) 229 continue; 230 231 // Addressing makes some registers used. 232 if(p->from.type >= D_INDIR) 233 r->use1.b[0] |= RtoB(p->from.type-D_INDIR); 234 if(p->from.index != D_NONE) 235 r->use1.b[0] |= RtoB(p->from.index); 236 if(p->to.type >= D_INDIR) 237 r->use2.b[0] |= RtoB(p->to.type-D_INDIR); 238 if(p->to.index != D_NONE) 239 r->use2.b[0] |= RtoB(p->to.index); 240 241 bit = mkvar(r, &p->from); 242 if(bany(&bit)) 243 switch(p->as) { 244 /* 245 * funny 246 */ 247 case ALEAL: 248 case AFMOVD: 249 case AFMOVF: 250 case AFMOVL: 251 case AFMOVW: 252 case AFMOVV: 253 setaddrs(bit); 254 break; 255 256 /* 257 * left side read 258 */ 259 default: 260 for(z=0; z<BITS; z++) 261 r->use1.b[z] |= bit.b[z]; 262 break; 263 264 /* 265 * left side read+write 266 */ 267 case AXCHGB: 268 case AXCHGW: 269 case AXCHGL: 270 for(z=0; z<BITS; z++) { 271 r->use1.b[z] |= bit.b[z]; 272 r->set.b[z] |= bit.b[z]; 273 } 274 break; 275 } 276 277 bit = mkvar(r, &p->to); 278 if(bany(&bit)) 279 switch(p->as) { 280 default: 281 yyerror("reg: unknown op: %A", p->as); 282 break; 283 284 /* 285 * right side read 286 */ 287 case ACMPB: 288 case ACMPL: 289 case ACMPW: 290 case ACOMISS: 291 case ACOMISD: 292 case AUCOMISS: 293 case AUCOMISD: 294 case ATESTB: 295 case ATESTL: 296 case ATESTW: 297 for(z=0; z<BITS; z++) 298 r->use2.b[z] |= bit.b[z]; 299 break; 300 301 /* 302 * right side write 303 */ 304 case AFSTSW: 305 case ALEAL: 306 case ANOP: 307 case AMOVL: 308 case AMOVB: 309 case AMOVW: 310 case AMOVBLSX: 311 case AMOVBLZX: 312 case AMOVBWSX: 313 case AMOVBWZX: 314 case AMOVWLSX: 315 case AMOVWLZX: 316 case APOPL: 317 318 case AMOVSS: 319 case AMOVSD: 320 case ACVTSD2SL: 321 case ACVTSD2SS: 322 case ACVTSL2SD: 323 case ACVTSL2SS: 324 case ACVTSS2SD: 325 case ACVTSS2SL: 326 case ACVTTSD2SL: 327 case ACVTTSS2SL: 328 for(z=0; z<BITS; z++) 329 r->set.b[z] |= bit.b[z]; 330 break; 331 332 /* 333 * right side read+write 334 */ 335 case AINCB: 336 case AINCL: 337 case AINCW: 338 case ADECB: 339 case ADECL: 340 case ADECW: 341 342 case AADDB: 343 case AADDL: 344 case AADDW: 345 case AANDB: 346 case AANDL: 347 case AANDW: 348 case ASUBB: 349 case ASUBL: 350 case ASUBW: 351 case AORB: 352 case AORL: 353 case AORW: 354 case AXORB: 355 case AXORL: 356 case AXORW: 357 case ASALB: 358 case ASALL: 359 case ASALW: 360 case ASARB: 361 case ASARL: 362 case ASARW: 363 case ARCLB: 364 case ARCLL: 365 case ARCLW: 366 case ARCRB: 367 case ARCRL: 368 case ARCRW: 369 case AROLB: 370 case AROLL: 371 case AROLW: 372 case ARORB: 373 case ARORL: 374 case ARORW: 375 case ASHLB: 376 case ASHLL: 377 case ASHLW: 378 case ASHRB: 379 case ASHRL: 380 case ASHRW: 381 case AIMULL: 382 case AIMULW: 383 case ANEGB: 384 case ANEGL: 385 case ANEGW: 386 case ANOTB: 387 case ANOTL: 388 case ANOTW: 389 case AADCL: 390 case ASBBL: 391 392 case ASETCC: 393 case ASETCS: 394 case ASETEQ: 395 case ASETGE: 396 case ASETGT: 397 case ASETHI: 398 case ASETLE: 399 case ASETLS: 400 case ASETLT: 401 case ASETMI: 402 case ASETNE: 403 case ASETOC: 404 case ASETOS: 405 case ASETPC: 406 case ASETPL: 407 case ASETPS: 408 409 case AXCHGB: 410 case AXCHGW: 411 case AXCHGL: 412 413 case AADDSD: 414 case AADDSS: 415 case ACMPSD: 416 case ACMPSS: 417 case ADIVSD: 418 case ADIVSS: 419 case AMAXSD: 420 case AMAXSS: 421 case AMINSD: 422 case AMINSS: 423 case AMULSD: 424 case AMULSS: 425 case ARCPSS: 426 case ARSQRTSS: 427 case ASQRTSD: 428 case ASQRTSS: 429 case ASUBSD: 430 case ASUBSS: 431 case AXORPD: 432 for(z=0; z<BITS; z++) { 433 r->set.b[z] |= bit.b[z]; 434 r->use2.b[z] |= bit.b[z]; 435 } 436 break; 437 438 /* 439 * funny 440 */ 441 case AFMOVDP: 442 case AFMOVFP: 443 case AFMOVLP: 444 case AFMOVVP: 445 case AFMOVWP: 446 case ACALL: 447 setaddrs(bit); 448 break; 449 } 450 451 switch(p->as) { 452 case AIMULL: 453 case AIMULW: 454 if(p->to.type != D_NONE) 455 break; 456 457 case AIDIVL: 458 case AIDIVW: 459 case ADIVL: 460 case ADIVW: 461 case AMULL: 462 case AMULW: 463 r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); 464 r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX); 465 break; 466 467 case AIDIVB: 468 case AIMULB: 469 case ADIVB: 470 case AMULB: 471 r->set.b[0] |= RtoB(D_AX); 472 r->use1.b[0] |= RtoB(D_AX); 473 break; 474 475 case ACWD: 476 r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); 477 r->use1.b[0] |= RtoB(D_AX); 478 break; 479 480 case ACDQ: 481 r->set.b[0] |= RtoB(D_DX); 482 r->use1.b[0] |= RtoB(D_AX); 483 break; 484 485 case AREP: 486 case AREPN: 487 case ALOOP: 488 case ALOOPEQ: 489 case ALOOPNE: 490 r->set.b[0] |= RtoB(D_CX); 491 r->use1.b[0] |= RtoB(D_CX); 492 break; 493 494 case AMOVSB: 495 case AMOVSL: 496 case AMOVSW: 497 case ACMPSB: 498 case ACMPSL: 499 case ACMPSW: 500 r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI); 501 r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI); 502 break; 503 504 case ASTOSB: 505 case ASTOSL: 506 case ASTOSW: 507 case ASCASB: 508 case ASCASL: 509 case ASCASW: 510 r->set.b[0] |= RtoB(D_DI); 511 r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI); 512 break; 513 514 case AINSB: 515 case AINSL: 516 case AINSW: 517 r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI); 518 r->use1.b[0] |= RtoB(D_DI); 519 break; 520 521 case AOUTSB: 522 case AOUTSL: 523 case AOUTSW: 524 r->set.b[0] |= RtoB(D_DI); 525 r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI); 526 break; 527 } 528 } 529 if(firstr == R) 530 return; 531 532 for(i=0; i<nvar; i++) { 533 Var *v = var+i; 534 if(v->addr) { 535 bit = blsh(i); 536 for(z=0; z<BITS; z++) 537 addrs.b[z] |= bit.b[z]; 538 } 539 540 if(debug['R'] && debug['v']) 541 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 542 i, v->addr, v->etype, v->width, v->node, v->offset); 543 } 544 545 if(debug['R'] && debug['v']) 546 dumpit("pass1", firstr); 547 548 /* 549 * pass 2 550 * turn branch references to pointers 551 * build back pointers 552 */ 553 for(r=firstr; r!=R; r=r->link) { 554 p = r->prog; 555 if(p->to.type == D_BRANCH) { 556 if(p->to.u.branch == P) 557 fatal("pnil %P", p); 558 r1 = p->to.u.branch->reg; 559 if(r1 == R) 560 fatal("rnil %P", p); 561 if(r1 == r) { 562 //fatal("ref to self %P", p); 563 continue; 564 } 565 r->s2 = r1; 566 r->p2link = r1->p2; 567 r1->p2 = r; 568 } 569 } 570 571 if(debug['R'] && debug['v']) 572 dumpit("pass2", firstr); 573 574 /* 575 * pass 2.5 576 * find looping structure 577 */ 578 for(r = firstr; r != R; r = r->link) 579 r->active = 0; 580 change = 0; 581 loopit(firstr, nr); 582 583 if(debug['R'] && debug['v']) 584 dumpit("pass2.5", firstr); 585 586 /* 587 * pass 3 588 * iterate propagating usage 589 * back until flow graph is complete 590 */ 591 loop1: 592 change = 0; 593 for(r = firstr; r != R; r = r->link) 594 r->active = 0; 595 for(r = firstr; r != R; r = r->link) 596 if(r->prog->as == ARET) 597 prop(r, zbits, zbits); 598 loop11: 599 /* pick up unreachable code */ 600 i = 0; 601 for(r = firstr; r != R; r = r1) { 602 r1 = r->link; 603 if(r1 && r1->active && !r->active) { 604 prop(r, zbits, zbits); 605 i = 1; 606 } 607 } 608 if(i) 609 goto loop11; 610 if(change) 611 goto loop1; 612 613 if(debug['R'] && debug['v']) 614 dumpit("pass3", firstr); 615 616 /* 617 * pass 4 618 * iterate propagating register/variable synchrony 619 * forward until graph is complete 620 */ 621 loop2: 622 change = 0; 623 for(r = firstr; r != R; r = r->link) 624 r->active = 0; 625 synch(firstr, zbits); 626 if(change) 627 goto loop2; 628 629 if(debug['R'] && debug['v']) 630 dumpit("pass4", firstr); 631 632 /* 633 * pass 4.5 634 * move register pseudo-variables into regu. 635 */ 636 for(r = firstr; r != R; r = r->link) { 637 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 638 639 r->set.b[0] &= ~REGBITS; 640 r->use1.b[0] &= ~REGBITS; 641 r->use2.b[0] &= ~REGBITS; 642 r->refbehind.b[0] &= ~REGBITS; 643 r->refahead.b[0] &= ~REGBITS; 644 r->calbehind.b[0] &= ~REGBITS; 645 r->calahead.b[0] &= ~REGBITS; 646 r->regdiff.b[0] &= ~REGBITS; 647 r->act.b[0] &= ~REGBITS; 648 } 649 650 /* 651 * pass 5 652 * isolate regions 653 * calculate costs (paint1) 654 */ 655 r = firstr; 656 if(r) { 657 for(z=0; z<BITS; z++) 658 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 659 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 660 if(bany(&bit) && !r->refset) { 661 // should never happen - all variables are preset 662 if(debug['w']) 663 print("%L: used and not set: %Q\n", r->prog->lineno, bit); 664 r->refset = 1; 665 } 666 } 667 for(r = firstr; r != R; r = r->link) 668 r->act = zbits; 669 rgp = region; 670 nregion = 0; 671 for(r = firstr; r != R; r = r->link) { 672 for(z=0; z<BITS; z++) 673 bit.b[z] = r->set.b[z] & 674 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 675 if(bany(&bit) && !r->refset) { 676 if(debug['w']) 677 print("%L: set and not used: %Q\n", r->prog->lineno, bit); 678 r->refset = 1; 679 excise(r); 680 } 681 for(z=0; z<BITS; z++) 682 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 683 while(bany(&bit)) { 684 i = bnum(bit); 685 rgp->enter = r; 686 rgp->varno = i; 687 change = 0; 688 paint1(r, i); 689 bit.b[i/32] &= ~(1L<<(i%32)); 690 if(change <= 0) 691 continue; 692 rgp->cost = change; 693 nregion++; 694 if(nregion >= NRGN) { 695 if(debug['R'] && debug['v']) 696 print("too many regions\n"); 697 goto brk; 698 } 699 rgp++; 700 } 701 } 702 brk: 703 qsort(region, nregion, sizeof(region[0]), rcmp); 704 705 /* 706 * pass 6 707 * determine used registers (paint2) 708 * replace code (paint3) 709 */ 710 rgp = region; 711 for(i=0; i<nregion; i++) { 712 bit = blsh(rgp->varno); 713 vreg = paint2(rgp->enter, rgp->varno); 714 vreg = allreg(vreg, rgp); 715 if(rgp->regno != 0) 716 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 717 rgp++; 718 } 719 720 if(debug['R'] && debug['v']) 721 dumpit("pass6", firstr); 722 723 /* 724 * pass 7 725 * peep-hole on basic block 726 */ 727 if(!debug['R'] || debug['P']) { 728 peep(); 729 } 730 731 /* 732 * eliminate nops 733 * free aux structures 734 */ 735 for(p=firstp; p!=P; p=p->link) { 736 while(p->link != P && p->link->as == ANOP) 737 p->link = p->link->link; 738 if(p->to.type == D_BRANCH) 739 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 740 p->to.u.branch = p->to.u.branch->link; 741 } 742 743 if(!use_sse) 744 for(p=firstp; p!=P; p=p->link) { 745 if(p->from.type >= D_X0 && p->from.type <= D_X7) 746 fatal("invalid use of %R with GO386=387: %P", p->from.type, p); 747 if(p->to.type >= D_X0 && p->to.type <= D_X7) 748 fatal("invalid use of %R with GO386=387: %P", p->to.type, p); 749 } 750 751 if(lastr != R) { 752 lastr->link = freer; 753 freer = firstr; 754 } 755 756 if(debug['R']) { 757 if(ostats.ncvtreg || 758 ostats.nspill || 759 ostats.nreload || 760 ostats.ndelmov || 761 ostats.nvar || 762 ostats.naddr || 763 0) 764 print("\nstats\n"); 765 766 if(ostats.ncvtreg) 767 print(" %4d cvtreg\n", ostats.ncvtreg); 768 if(ostats.nspill) 769 print(" %4d spill\n", ostats.nspill); 770 if(ostats.nreload) 771 print(" %4d reload\n", ostats.nreload); 772 if(ostats.ndelmov) 773 print(" %4d delmov\n", ostats.ndelmov); 774 if(ostats.nvar) 775 print(" %4d var\n", ostats.nvar); 776 if(ostats.naddr) 777 print(" %4d addr\n", ostats.naddr); 778 779 memset(&ostats, 0, sizeof(ostats)); 780 } 781 } 782 783 /* 784 * add mov b,rn 785 * just after r 786 */ 787 void 788 addmove(Reg *r, int bn, int rn, int f) 789 { 790 Prog *p, *p1; 791 Adr *a; 792 Var *v; 793 794 p1 = mal(sizeof(*p1)); 795 clearp(p1); 796 p1->loc = 9999; 797 798 p = r->prog; 799 p1->link = p->link; 800 p->link = p1; 801 p1->lineno = p->lineno; 802 803 v = var + bn; 804 805 a = &p1->to; 806 a->offset = v->offset; 807 a->etype = v->etype; 808 a->type = v->name; 809 a->node = v->node; 810 a->sym = v->node->sym; 811 812 // need to clean this up with wptr and 813 // some of the defaults 814 p1->as = AMOVL; 815 switch(v->etype) { 816 default: 817 fatal("unknown type %E", v->etype); 818 case TINT8: 819 case TUINT8: 820 case TBOOL: 821 p1->as = AMOVB; 822 break; 823 case TINT16: 824 case TUINT16: 825 p1->as = AMOVW; 826 break; 827 case TFLOAT32: 828 p1->as = AMOVSS; 829 break; 830 case TFLOAT64: 831 p1->as = AMOVSD; 832 break; 833 case TINT: 834 case TUINT: 835 case TINT32: 836 case TUINT32: 837 case TPTR32: 838 break; 839 } 840 841 p1->from.type = rn; 842 if(!f) { 843 p1->from = *a; 844 *a = zprog.from; 845 a->type = rn; 846 if(v->etype == TUINT8) 847 p1->as = AMOVB; 848 if(v->etype == TUINT16) 849 p1->as = AMOVW; 850 } 851 if(debug['R'] && debug['v']) 852 print("%P ===add=== %P\n", p, p1); 853 ostats.nspill++; 854 } 855 856 uint32 857 doregbits(int r) 858 { 859 uint32 b; 860 861 b = 0; 862 if(r >= D_INDIR) 863 r -= D_INDIR; 864 if(r >= D_AX && r <= D_DI) 865 b |= RtoB(r); 866 else 867 if(r >= D_AL && r <= D_BL) 868 b |= RtoB(r-D_AL+D_AX); 869 else 870 if(r >= D_AH && r <= D_BH) 871 b |= RtoB(r-D_AH+D_AX); 872 else 873 if(r >= D_X0 && r <= D_X0+7) 874 b |= FtoB(r); 875 return b; 876 } 877 878 static int 879 overlap(int32 o1, int w1, int32 o2, int w2) 880 { 881 int32 t1, t2; 882 883 t1 = o1+w1; 884 t2 = o2+w2; 885 886 if(!(t1 > o2 && t2 > o1)) 887 return 0; 888 889 return 1; 890 } 891 892 Bits 893 mkvar(Reg *r, Adr *a) 894 { 895 Var *v; 896 int i, t, n, et, z, w, flag, regu; 897 int32 o; 898 Bits bit; 899 Node *node; 900 901 /* 902 * mark registers used 903 */ 904 t = a->type; 905 if(t == D_NONE) 906 goto none; 907 908 if(r != R) 909 r->use1.b[0] |= doregbits(a->index); 910 911 switch(t) { 912 default: 913 regu = doregbits(t); 914 if(regu == 0) 915 goto none; 916 bit = zbits; 917 bit.b[0] = regu; 918 return bit; 919 920 case D_ADDR: 921 a->type = a->index; 922 bit = mkvar(r, a); 923 setaddrs(bit); 924 a->type = t; 925 ostats.naddr++; 926 goto none; 927 928 case D_EXTERN: 929 case D_STATIC: 930 case D_PARAM: 931 case D_AUTO: 932 n = t; 933 break; 934 } 935 936 node = a->node; 937 if(node == N || node->op != ONAME || node->orig == N) 938 goto none; 939 node = node->orig; 940 if(node->orig != node) 941 fatal("%D: bad node", a); 942 if(node->sym == S || node->sym->name[0] == '.') 943 goto none; 944 et = a->etype; 945 o = a->offset; 946 w = a->width; 947 if(w < 0) 948 fatal("bad width %d for %D", w, a); 949 950 flag = 0; 951 for(i=0; i<nvar; i++) { 952 v = var+i; 953 if(v->node == node && v->name == n) { 954 if(v->offset == o) 955 if(v->etype == et) 956 if(v->width == w) 957 return blsh(i); 958 959 // if they overlap, disable both 960 if(overlap(v->offset, v->width, o, w)) { 961 if(debug['R']) 962 print("disable %s\n", node->sym->name); 963 v->addr = 1; 964 flag = 1; 965 } 966 } 967 } 968 969 switch(et) { 970 case 0: 971 case TFUNC: 972 goto none; 973 } 974 975 if(nvar >= NVAR) { 976 if(debug['w'] > 1 && node != N) 977 fatal("variable not optimized: %D", a); 978 goto none; 979 } 980 981 i = nvar; 982 nvar++; 983 v = var+i; 984 v->offset = o; 985 v->name = n; 986 v->etype = et; 987 v->width = w; 988 v->addr = flag; // funny punning 989 v->node = node; 990 991 if(debug['R']) 992 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 993 ostats.nvar++; 994 995 bit = blsh(i); 996 if(n == D_EXTERN || n == D_STATIC) 997 for(z=0; z<BITS; z++) 998 externs.b[z] |= bit.b[z]; 999 if(n == D_PARAM) 1000 for(z=0; z<BITS; z++) 1001 params.b[z] |= bit.b[z]; 1002 1003 return bit; 1004 1005 none: 1006 return zbits; 1007 } 1008 1009 void 1010 prop(Reg *r, Bits ref, Bits cal) 1011 { 1012 Reg *r1, *r2; 1013 int z; 1014 1015 for(r1 = r; r1 != R; r1 = r1->p1) { 1016 for(z=0; z<BITS; z++) { 1017 ref.b[z] |= r1->refahead.b[z]; 1018 if(ref.b[z] != r1->refahead.b[z]) { 1019 r1->refahead.b[z] = ref.b[z]; 1020 change++; 1021 } 1022 cal.b[z] |= r1->calahead.b[z]; 1023 if(cal.b[z] != r1->calahead.b[z]) { 1024 r1->calahead.b[z] = cal.b[z]; 1025 change++; 1026 } 1027 } 1028 switch(r1->prog->as) { 1029 case ACALL: 1030 if(noreturn(r1->prog)) 1031 break; 1032 for(z=0; z<BITS; z++) { 1033 cal.b[z] |= ref.b[z] | externs.b[z]; 1034 ref.b[z] = 0; 1035 } 1036 break; 1037 1038 case ATEXT: 1039 for(z=0; z<BITS; z++) { 1040 cal.b[z] = 0; 1041 ref.b[z] = 0; 1042 } 1043 break; 1044 1045 case ARET: 1046 for(z=0; z<BITS; z++) { 1047 cal.b[z] = externs.b[z] | ovar.b[z]; 1048 ref.b[z] = 0; 1049 } 1050 break; 1051 1052 default: 1053 // Work around for issue 1304: 1054 // flush modified globals before each instruction. 1055 for(z=0; z<BITS; z++) { 1056 cal.b[z] |= externs.b[z]; 1057 // issue 4066: flush modified return variables in case of panic 1058 if(hasdefer) 1059 cal.b[z] |= ovar.b[z]; 1060 } 1061 break; 1062 } 1063 for(z=0; z<BITS; z++) { 1064 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 1065 r1->use1.b[z] | r1->use2.b[z]; 1066 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 1067 r1->refbehind.b[z] = ref.b[z]; 1068 r1->calbehind.b[z] = cal.b[z]; 1069 } 1070 if(r1->active) 1071 break; 1072 r1->active = 1; 1073 } 1074 for(; r != r1; r = r->p1) 1075 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 1076 prop(r2, r->refbehind, r->calbehind); 1077 } 1078 1079 /* 1080 * find looping structure 1081 * 1082 * 1) find reverse postordering 1083 * 2) find approximate dominators, 1084 * the actual dominators if the flow graph is reducible 1085 * otherwise, dominators plus some other non-dominators. 1086 * See Matthew S. Hecht and Jeffrey D. Ullman, 1087 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 1088 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 1089 * Oct. 1-3, 1973, pp. 207-217. 1090 * 3) find all nodes with a predecessor dominated by the current node. 1091 * such a node is a loop head. 1092 * recursively, all preds with a greater rpo number are in the loop 1093 */ 1094 int32 1095 postorder(Reg *r, Reg **rpo2r, int32 n) 1096 { 1097 Reg *r1; 1098 1099 r->rpo = 1; 1100 r1 = r->s1; 1101 if(r1 && !r1->rpo) 1102 n = postorder(r1, rpo2r, n); 1103 r1 = r->s2; 1104 if(r1 && !r1->rpo) 1105 n = postorder(r1, rpo2r, n); 1106 rpo2r[n] = r; 1107 n++; 1108 return n; 1109 } 1110 1111 int32 1112 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 1113 { 1114 int32 t; 1115 1116 if(rpo1 == -1) 1117 return rpo2; 1118 while(rpo1 != rpo2){ 1119 if(rpo1 > rpo2){ 1120 t = rpo2; 1121 rpo2 = rpo1; 1122 rpo1 = t; 1123 } 1124 while(rpo1 < rpo2){ 1125 t = idom[rpo2]; 1126 if(t >= rpo2) 1127 fatal("bad idom"); 1128 rpo2 = t; 1129 } 1130 } 1131 return rpo1; 1132 } 1133 1134 int 1135 doms(int32 *idom, int32 r, int32 s) 1136 { 1137 while(s > r) 1138 s = idom[s]; 1139 return s == r; 1140 } 1141 1142 int 1143 loophead(int32 *idom, Reg *r) 1144 { 1145 int32 src; 1146 1147 src = r->rpo; 1148 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 1149 return 1; 1150 for(r = r->p2; r != R; r = r->p2link) 1151 if(doms(idom, src, r->rpo)) 1152 return 1; 1153 return 0; 1154 } 1155 1156 void 1157 loopmark(Reg **rpo2r, int32 head, Reg *r) 1158 { 1159 if(r->rpo < head || r->active == head) 1160 return; 1161 r->active = head; 1162 r->loop += LOOP; 1163 if(r->p1 != R) 1164 loopmark(rpo2r, head, r->p1); 1165 for(r = r->p2; r != R; r = r->p2link) 1166 loopmark(rpo2r, head, r); 1167 } 1168 1169 void 1170 loopit(Reg *r, int32 nr) 1171 { 1172 Reg *r1; 1173 int32 i, d, me; 1174 1175 if(nr > maxnr) { 1176 rpo2r = mal(nr * sizeof(Reg*)); 1177 idom = mal(nr * sizeof(int32)); 1178 maxnr = nr; 1179 } 1180 1181 d = postorder(r, rpo2r, 0); 1182 if(d > nr) 1183 fatal("too many reg nodes %d %d", d, nr); 1184 nr = d; 1185 for(i = 0; i < nr / 2; i++) { 1186 r1 = rpo2r[i]; 1187 rpo2r[i] = rpo2r[nr - 1 - i]; 1188 rpo2r[nr - 1 - i] = r1; 1189 } 1190 for(i = 0; i < nr; i++) 1191 rpo2r[i]->rpo = i; 1192 1193 idom[0] = 0; 1194 for(i = 0; i < nr; i++) { 1195 r1 = rpo2r[i]; 1196 me = r1->rpo; 1197 d = -1; 1198 // rpo2r[r->rpo] == r protects against considering dead code, 1199 // which has r->rpo == 0. 1200 if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) 1201 d = r1->p1->rpo; 1202 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 1203 if(rpo2r[r1->rpo] == r1 && r1->rpo < me) 1204 d = rpolca(idom, d, r1->rpo); 1205 idom[i] = d; 1206 } 1207 1208 for(i = 0; i < nr; i++) { 1209 r1 = rpo2r[i]; 1210 r1->loop++; 1211 if(r1->p2 != R && loophead(idom, r1)) 1212 loopmark(rpo2r, i, r1); 1213 } 1214 } 1215 1216 void 1217 synch(Reg *r, Bits dif) 1218 { 1219 Reg *r1; 1220 int z; 1221 1222 for(r1 = r; r1 != R; r1 = r1->s1) { 1223 for(z=0; z<BITS; z++) { 1224 dif.b[z] = (dif.b[z] & 1225 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1226 r1->set.b[z] | r1->regdiff.b[z]; 1227 if(dif.b[z] != r1->regdiff.b[z]) { 1228 r1->regdiff.b[z] = dif.b[z]; 1229 change++; 1230 } 1231 } 1232 if(r1->active) 1233 break; 1234 r1->active = 1; 1235 for(z=0; z<BITS; z++) 1236 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1237 if(r1->s2 != R) 1238 synch(r1->s2, dif); 1239 } 1240 } 1241 1242 uint32 1243 allreg(uint32 b, Rgn *r) 1244 { 1245 Var *v; 1246 int i; 1247 1248 v = var + r->varno; 1249 r->regno = 0; 1250 switch(v->etype) { 1251 1252 default: 1253 fatal("unknown etype %d/%E", bitno(b), v->etype); 1254 break; 1255 1256 case TINT8: 1257 case TUINT8: 1258 case TINT16: 1259 case TUINT16: 1260 case TINT32: 1261 case TUINT32: 1262 case TINT64: 1263 case TINT: 1264 case TUINT: 1265 case TUINTPTR: 1266 case TBOOL: 1267 case TPTR32: 1268 i = BtoR(~b); 1269 if(i && r->cost > 0) { 1270 r->regno = i; 1271 return RtoB(i); 1272 } 1273 break; 1274 1275 case TFLOAT32: 1276 case TFLOAT64: 1277 if(!use_sse) 1278 break; 1279 i = BtoF(~b); 1280 if(i && r->cost > 0) { 1281 r->regno = i; 1282 return FtoB(i); 1283 } 1284 break; 1285 } 1286 return 0; 1287 } 1288 1289 void 1290 paint1(Reg *r, int bn) 1291 { 1292 Reg *r1; 1293 Prog *p; 1294 int z; 1295 uint32 bb; 1296 1297 z = bn/32; 1298 bb = 1L<<(bn%32); 1299 if(r->act.b[z] & bb) 1300 return; 1301 for(;;) { 1302 if(!(r->refbehind.b[z] & bb)) 1303 break; 1304 r1 = r->p1; 1305 if(r1 == R) 1306 break; 1307 if(!(r1->refahead.b[z] & bb)) 1308 break; 1309 if(r1->act.b[z] & bb) 1310 break; 1311 r = r1; 1312 } 1313 1314 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1315 change -= CLOAD * r->loop; 1316 } 1317 for(;;) { 1318 r->act.b[z] |= bb; 1319 p = r->prog; 1320 1321 if(r->use1.b[z] & bb) { 1322 change += CREF * r->loop; 1323 if(p->as == AFMOVL || p->as == AFMOVW) 1324 if(BtoR(bb) != D_F0) 1325 change = -CINF; 1326 } 1327 1328 if((r->use2.b[z]|r->set.b[z]) & bb) { 1329 change += CREF * r->loop; 1330 if(p->as == AFMOVL || p->as == AFMOVW) 1331 if(BtoR(bb) != D_F0) 1332 change = -CINF; 1333 } 1334 1335 if(STORE(r) & r->regdiff.b[z] & bb) { 1336 change -= CLOAD * r->loop; 1337 if(p->as == AFMOVL || p->as == AFMOVW) 1338 if(BtoR(bb) != D_F0) 1339 change = -CINF; 1340 } 1341 1342 if(r->refbehind.b[z] & bb) 1343 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1344 if(r1->refahead.b[z] & bb) 1345 paint1(r1, bn); 1346 1347 if(!(r->refahead.b[z] & bb)) 1348 break; 1349 r1 = r->s2; 1350 if(r1 != R) 1351 if(r1->refbehind.b[z] & bb) 1352 paint1(r1, bn); 1353 r = r->s1; 1354 if(r == R) 1355 break; 1356 if(r->act.b[z] & bb) 1357 break; 1358 if(!(r->refbehind.b[z] & bb)) 1359 break; 1360 } 1361 } 1362 1363 uint32 1364 regset(Reg *r, uint32 bb) 1365 { 1366 uint32 b, set; 1367 Adr v; 1368 int c; 1369 1370 set = 0; 1371 v = zprog.from; 1372 while(b = bb & ~(bb-1)) { 1373 v.type = b & 0xFF ? BtoR(b): BtoF(b); 1374 c = copyu(r->prog, &v, A); 1375 if(c == 3) 1376 set |= b; 1377 bb &= ~b; 1378 } 1379 return set; 1380 } 1381 1382 uint32 1383 reguse(Reg *r, uint32 bb) 1384 { 1385 uint32 b, set; 1386 Adr v; 1387 int c; 1388 1389 set = 0; 1390 v = zprog.from; 1391 while(b = bb & ~(bb-1)) { 1392 v.type = b & 0xFF ? BtoR(b): BtoF(b); 1393 c = copyu(r->prog, &v, A); 1394 if(c == 1 || c == 2 || c == 4) 1395 set |= b; 1396 bb &= ~b; 1397 } 1398 return set; 1399 } 1400 1401 uint32 1402 paint2(Reg *r, int bn) 1403 { 1404 Reg *r1; 1405 int z; 1406 uint32 bb, vreg, x; 1407 1408 z = bn/32; 1409 bb = 1L << (bn%32); 1410 vreg = regbits; 1411 if(!(r->act.b[z] & bb)) 1412 return vreg; 1413 for(;;) { 1414 if(!(r->refbehind.b[z] & bb)) 1415 break; 1416 r1 = r->p1; 1417 if(r1 == R) 1418 break; 1419 if(!(r1->refahead.b[z] & bb)) 1420 break; 1421 if(!(r1->act.b[z] & bb)) 1422 break; 1423 r = r1; 1424 } 1425 for(;;) { 1426 r->act.b[z] &= ~bb; 1427 1428 vreg |= r->regu; 1429 1430 if(r->refbehind.b[z] & bb) 1431 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1432 if(r1->refahead.b[z] & bb) 1433 vreg |= paint2(r1, bn); 1434 1435 if(!(r->refahead.b[z] & bb)) 1436 break; 1437 r1 = r->s2; 1438 if(r1 != R) 1439 if(r1->refbehind.b[z] & bb) 1440 vreg |= paint2(r1, bn); 1441 r = r->s1; 1442 if(r == R) 1443 break; 1444 if(!(r->act.b[z] & bb)) 1445 break; 1446 if(!(r->refbehind.b[z] & bb)) 1447 break; 1448 } 1449 1450 bb = vreg; 1451 for(; r; r=r->s1) { 1452 x = r->regu & ~bb; 1453 if(x) { 1454 vreg |= reguse(r, x); 1455 bb |= regset(r, x); 1456 } 1457 } 1458 return vreg; 1459 } 1460 1461 void 1462 paint3(Reg *r, int bn, int32 rb, int rn) 1463 { 1464 Reg *r1; 1465 Prog *p; 1466 int z; 1467 uint32 bb; 1468 1469 z = bn/32; 1470 bb = 1L << (bn%32); 1471 if(r->act.b[z] & bb) 1472 return; 1473 for(;;) { 1474 if(!(r->refbehind.b[z] & bb)) 1475 break; 1476 r1 = r->p1; 1477 if(r1 == R) 1478 break; 1479 if(!(r1->refahead.b[z] & bb)) 1480 break; 1481 if(r1->act.b[z] & bb) 1482 break; 1483 r = r1; 1484 } 1485 1486 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1487 addmove(r, bn, rn, 0); 1488 for(;;) { 1489 r->act.b[z] |= bb; 1490 p = r->prog; 1491 1492 if(r->use1.b[z] & bb) { 1493 if(debug['R'] && debug['v']) 1494 print("%P", p); 1495 addreg(&p->from, rn); 1496 if(debug['R'] && debug['v']) 1497 print(" ===change== %P\n", p); 1498 } 1499 if((r->use2.b[z]|r->set.b[z]) & bb) { 1500 if(debug['R'] && debug['v']) 1501 print("%P", p); 1502 addreg(&p->to, rn); 1503 if(debug['R'] && debug['v']) 1504 print(" ===change== %P\n", p); 1505 } 1506 1507 if(STORE(r) & r->regdiff.b[z] & bb) 1508 addmove(r, bn, rn, 1); 1509 r->regu |= rb; 1510 1511 if(r->refbehind.b[z] & bb) 1512 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1513 if(r1->refahead.b[z] & bb) 1514 paint3(r1, bn, rb, rn); 1515 1516 if(!(r->refahead.b[z] & bb)) 1517 break; 1518 r1 = r->s2; 1519 if(r1 != R) 1520 if(r1->refbehind.b[z] & bb) 1521 paint3(r1, bn, rb, rn); 1522 r = r->s1; 1523 if(r == R) 1524 break; 1525 if(r->act.b[z] & bb) 1526 break; 1527 if(!(r->refbehind.b[z] & bb)) 1528 break; 1529 } 1530 } 1531 1532 void 1533 addreg(Adr *a, int rn) 1534 { 1535 1536 a->sym = 0; 1537 a->offset = 0; 1538 a->type = rn; 1539 1540 ostats.ncvtreg++; 1541 } 1542 1543 int32 1544 RtoB(int r) 1545 { 1546 1547 if(r < D_AX || r > D_DI) 1548 return 0; 1549 return 1L << (r-D_AX); 1550 } 1551 1552 int 1553 BtoR(int32 b) 1554 { 1555 1556 b &= 0xffL; 1557 if(b == 0) 1558 return 0; 1559 return bitno(b) + D_AX; 1560 } 1561 1562 int32 1563 FtoB(int f) 1564 { 1565 if(f < D_X0 || f > D_X7) 1566 return 0; 1567 return 1L << (f - D_X0 + 8); 1568 } 1569 1570 int 1571 BtoF(int32 b) 1572 { 1573 b &= 0xFF00L; 1574 if(b == 0) 1575 return 0; 1576 return bitno(b) - 8 + D_X0; 1577 } 1578 1579 void 1580 dumpone(Reg *r) 1581 { 1582 int z; 1583 Bits bit; 1584 1585 print("%d:%P", r->loop, r->prog); 1586 for(z=0; z<BITS; z++) 1587 bit.b[z] = 1588 r->set.b[z] | 1589 r->use1.b[z] | 1590 r->use2.b[z] | 1591 r->refbehind.b[z] | 1592 r->refahead.b[z] | 1593 r->calbehind.b[z] | 1594 r->calahead.b[z] | 1595 r->regdiff.b[z] | 1596 r->act.b[z] | 1597 0; 1598 if(bany(&bit)) { 1599 print("\t"); 1600 if(bany(&r->set)) 1601 print(" s:%Q", r->set); 1602 if(bany(&r->use1)) 1603 print(" u1:%Q", r->use1); 1604 if(bany(&r->use2)) 1605 print(" u2:%Q", r->use2); 1606 if(bany(&r->refbehind)) 1607 print(" rb:%Q ", r->refbehind); 1608 if(bany(&r->refahead)) 1609 print(" ra:%Q ", r->refahead); 1610 if(bany(&r->calbehind)) 1611 print(" cb:%Q ", r->calbehind); 1612 if(bany(&r->calahead)) 1613 print(" ca:%Q ", r->calahead); 1614 if(bany(&r->regdiff)) 1615 print(" d:%Q ", r->regdiff); 1616 if(bany(&r->act)) 1617 print(" a:%Q ", r->act); 1618 } 1619 print("\n"); 1620 } 1621 1622 void 1623 dumpit(char *str, Reg *r0) 1624 { 1625 Reg *r, *r1; 1626 1627 print("\n%s\n", str); 1628 for(r = r0; r != R; r = r->link) { 1629 dumpone(r); 1630 r1 = r->p2; 1631 if(r1 != R) { 1632 print(" pred:"); 1633 for(; r1 != R; r1 = r1->p2link) 1634 print(" %.4ud", r1->prog->loc); 1635 print("\n"); 1636 } 1637 // r1 = r->s1; 1638 // if(r1 != R) { 1639 // print(" succ:"); 1640 // for(; r1 != R; r1 = r1->s1) 1641 // print(" %.4ud", r1->prog->loc); 1642 // print("\n"); 1643 // } 1644 } 1645 } 1646 1647 static Sym* symlist[10]; 1648 1649 int 1650 noreturn(Prog *p) 1651 { 1652 Sym *s; 1653 int i; 1654 1655 if(symlist[0] == S) { 1656 symlist[0] = pkglookup("panicindex", runtimepkg); 1657 symlist[1] = pkglookup("panicslice", runtimepkg); 1658 symlist[2] = pkglookup("throwinit", runtimepkg); 1659 symlist[3] = pkglookup("panic", runtimepkg); 1660 symlist[4] = pkglookup("panicwrap", runtimepkg); 1661 } 1662 1663 s = p->to.sym; 1664 if(s == S) 1665 return 0; 1666 for(i=0; symlist[i]!=S; i++) 1667 if(s == symlist[i]) 1668 return 1; 1669 return 0; 1670 } 1671 1672 /* 1673 * the code generator depends on being able to write out JMP 1674 * instructions that it can jump to now but fill in later. 1675 * the linker will resolve them nicely, but they make the code 1676 * longer and more difficult to follow during debugging. 1677 * remove them. 1678 */ 1679 1680 /* what instruction does a JMP to p eventually land on? */ 1681 static Prog* 1682 chasejmp(Prog *p, int *jmploop) 1683 { 1684 int n; 1685 1686 n = 0; 1687 while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { 1688 if(++n > 10) { 1689 *jmploop = 1; 1690 break; 1691 } 1692 p = p->to.u.branch; 1693 } 1694 return p; 1695 } 1696 1697 /* 1698 * reuse reg pointer for mark/sweep state. 1699 * leave reg==nil at end because alive==nil. 1700 */ 1701 #define alive ((void*)0) 1702 #define dead ((void*)1) 1703 1704 /* mark all code reachable from firstp as alive */ 1705 static void 1706 mark(Prog *firstp) 1707 { 1708 Prog *p; 1709 1710 for(p=firstp; p; p=p->link) { 1711 if(p->reg != dead) 1712 break; 1713 p->reg = alive; 1714 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch) 1715 mark(p->to.u.branch); 1716 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 1717 break; 1718 } 1719 } 1720 1721 static void 1722 fixjmp(Prog *firstp) 1723 { 1724 int jmploop; 1725 Prog *p, *last; 1726 1727 if(debug['R'] && debug['v']) 1728 print("\nfixjmp\n"); 1729 1730 // pass 1: resolve jump to AJMP, mark all code as dead. 1731 jmploop = 0; 1732 for(p=firstp; p; p=p->link) { 1733 if(debug['R'] && debug['v']) 1734 print("%P\n", p); 1735 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { 1736 p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); 1737 if(debug['R'] && debug['v']) 1738 print("->%P\n", p); 1739 } 1740 p->reg = dead; 1741 } 1742 if(debug['R'] && debug['v']) 1743 print("\n"); 1744 1745 // pass 2: mark all reachable code alive 1746 mark(firstp); 1747 1748 // pass 3: delete dead code (mostly JMPs). 1749 last = nil; 1750 for(p=firstp; p; p=p->link) { 1751 if(p->reg == dead) { 1752 if(p->link == P && p->as == ARET && last && last->as != ARET) { 1753 // This is the final ARET, and the code so far doesn't have one. 1754 // Let it stay. 1755 } else { 1756 if(debug['R'] && debug['v']) 1757 print("del %P\n", p); 1758 continue; 1759 } 1760 } 1761 if(last) 1762 last->link = p; 1763 last = p; 1764 } 1765 last->link = P; 1766 1767 // pass 4: elide JMP to next instruction. 1768 // only safe if there are no jumps to JMPs anymore. 1769 if(!jmploop) { 1770 last = nil; 1771 for(p=firstp; p; p=p->link) { 1772 if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) { 1773 if(debug['R'] && debug['v']) 1774 print("del %P\n", p); 1775 continue; 1776 } 1777 if(last) 1778 last->link = p; 1779 last = p; 1780 } 1781 last->link = P; 1782 } 1783 1784 if(debug['R'] && debug['v']) { 1785 print("\n"); 1786 for(p=firstp; p; p=p->link) 1787 print("%P\n", p); 1788 print("\n"); 1789 } 1790 } 1791 1792 static uint32 1793 fnv1(Sym *sym) 1794 { 1795 uint32 h; 1796 char *s; 1797 1798 h = 2166136261U; 1799 for(s=sym->name;*s;s++) { 1800 h = (16777619 * h) ^ (uint32)(uint8)(*s); 1801 } 1802 return h; 1803 } 1804 1805 static uint16 1806 hash32to16(uint32 h) 1807 { 1808 return (h & 0xffff) ^ (h >> 16); 1809 } 1810 1811 /* 1812 * fixtemp eliminates sequences like: 1813 * MOV reg1, mem 1814 * OP mem, reg2 1815 * when mem is a stack variable which is not mentioned 1816 * anywhere else. The instructions are replaced by 1817 * OP reg1, reg2 1818 * this reduces the number of variables that the register optimizer 1819 * sees, which lets it do a better job and makes it less likely to turn 1820 * itself off. 1821 */ 1822 static void 1823 fixtemp(Prog *firstp) 1824 { 1825 static uint8 counts[1<<16]; // A hash table to count variable occurences. 1826 int i; 1827 Prog *p, *p2; 1828 uint32 h; 1829 1830 if(debug['R'] && debug['v']) 1831 print("\nfixtemp\n"); 1832 1833 // Count variable references. We actually use a hashtable so this 1834 // is only approximate. 1835 for(i=0; i<nelem(counts); i++) 1836 counts[i] = 0; 1837 for(p=firstp; p!=P; p=p->link) { 1838 if(p->from.type == D_AUTO) { 1839 h = hash32to16(fnv1(p->from.sym)); 1840 //print("seen %S hash %d\n", p->from.sym, hash32to16(h)); 1841 if(counts[h] < 10) 1842 counts[h]++; 1843 } 1844 if(p->to.type == D_AUTO) { 1845 h = hash32to16(fnv1(p->to.sym)); 1846 //print("seen %S hash %d\n", p->to.sym, hash32to16(h)); 1847 if(counts[h] < 10) 1848 counts[h]++; 1849 } 1850 } 1851 1852 // Eliminate single-write, single-read stack variables. 1853 for(p=firstp; p!=P; p=p->link) { 1854 if(debug['R'] && debug['v']) 1855 print("%P\n", p); 1856 if(p->link == P || p->to.type != D_AUTO) 1857 continue; 1858 if(isfloat[p->to.etype] && FtoB(p->from.type)) { 1859 switch(p->as) { 1860 case AMOVSS: 1861 case AMOVSD: 1862 break; 1863 default: 1864 continue; 1865 } 1866 } else if(!isfloat[p->to.etype] && RtoB(p->from.type)) { 1867 switch(p->as) { 1868 case AMOVB: 1869 if(p->to.width == 1) 1870 break; 1871 case AMOVW: 1872 if(p->to.width == 2) 1873 break; 1874 case AMOVL: 1875 if(p->to.width == 4) 1876 break; 1877 default: 1878 continue; 1879 } 1880 } else 1881 continue; 1882 // p is a MOV reg, mem. 1883 p2 = p->link; 1884 h = hash32to16(fnv1(p->to.sym)); 1885 if(counts[h] != 2) { 1886 continue; 1887 } 1888 switch(p2->as) { 1889 case ALEAL: 1890 case AFMOVD: 1891 case AFMOVF: 1892 case AFMOVL: 1893 case AFMOVW: 1894 case AFMOVV: 1895 // funny 1896 continue; 1897 } 1898 // p2 is OP mem, reg2 1899 // and OP is not a funny instruction. 1900 if(p2->from.sym == p->to.sym 1901 && p2->from.offset == p->to.offset 1902 && p2->from.type == p->to.type) { 1903 if(debug['R'] && debug['v']) { 1904 print(" ===elide== %D\n", &p->to); 1905 print("%P", p2); 1906 } 1907 // p2 is OP mem, reg2. 1908 // change to OP reg, reg2 and 1909 // eliminate the mov. 1910 p2->from = p->from; 1911 *p = *p2; 1912 p->link = p2->link; 1913 if(debug['R'] && debug['v']) { 1914 print(" ===change== %P\n", p); 1915 } 1916 } 1917 } 1918 }