github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/6g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 32 /* 16 general + 16 floating */ 37 #define REGBITS ((uint32)0xffffffff) 38 39 static Reg* firstr; 40 static int first = 1; 41 42 int 43 rcmp(const void *a1, const void *a2) 44 { 45 Rgn *p1, *p2; 46 int c1, c2; 47 48 p1 = (Rgn*)a1; 49 p2 = (Rgn*)a2; 50 c1 = p2->cost; 51 c2 = p1->cost; 52 if(c1 -= c2) 53 return c1; 54 return p2->varno - p1->varno; 55 } 56 57 static void 58 setoutvar(void) 59 { 60 Type *t; 61 Node *n; 62 Addr a; 63 Iter save; 64 Bits bit; 65 int z; 66 67 t = structfirst(&save, getoutarg(curfn->type)); 68 while(t != T) { 69 n = nodarg(t, 1); 70 a = zprog.from; 71 naddr(n, &a, 0); 72 bit = mkvar(R, &a); 73 for(z=0; z<BITS; z++) 74 ovar.b[z] |= bit.b[z]; 75 t = structnext(&save); 76 } 77 //if(bany(&ovar)) 78 //print("ovars = %Q\n", ovar); 79 } 80 81 static void 82 setaddrs(Bits bit) 83 { 84 int i, n; 85 Var *v; 86 Node *node; 87 88 while(bany(&bit)) { 89 // convert each bit to a variable 90 i = bnum(bit); 91 node = var[i].node; 92 n = var[i].name; 93 bit.b[i/32] &= ~(1L<<(i%32)); 94 95 // disable all pieces of that variable 96 for(i=0; i<nvar; i++) { 97 v = var+i; 98 if(v->node == node && v->name == n) 99 v->addr = 2; 100 } 101 } 102 } 103 104 static char* regname[] = { 105 ".AX", 106 ".CX", 107 ".DX", 108 ".BX", 109 ".SP", 110 ".BP", 111 ".SI", 112 ".DI", 113 ".R8", 114 ".R9", 115 ".R10", 116 ".R11", 117 ".R12", 118 ".R13", 119 ".R14", 120 ".R15", 121 ".X0", 122 ".X1", 123 ".X2", 124 ".X3", 125 ".X4", 126 ".X5", 127 ".X6", 128 ".X7", 129 ".X8", 130 ".X9", 131 ".X10", 132 ".X11", 133 ".X12", 134 ".X13", 135 ".X14", 136 ".X15", 137 }; 138 139 static Node* regnodes[NREGVAR]; 140 141 void 142 regopt(Prog *firstp) 143 { 144 Reg *r, *r1; 145 Prog *p; 146 Graph *g; 147 ProgInfo info; 148 int i, z; 149 uint32 vreg; 150 Bits bit; 151 152 if(first) { 153 fmtinstall('Q', Qconv); 154 exregoffset = D_R15; 155 first = 0; 156 } 157 158 fixjmp(firstp); 159 mergetemp(firstp); 160 161 /* 162 * control flow is more complicated in generated go code 163 * than in generated c code. define pseudo-variables for 164 * registers, so we have complete register usage information. 165 */ 166 nvar = NREGVAR; 167 memset(var, 0, NREGVAR*sizeof var[0]); 168 for(i=0; i<NREGVAR; i++) { 169 if(regnodes[i] == N) 170 regnodes[i] = newname(lookup(regname[i])); 171 var[i].node = regnodes[i]; 172 } 173 174 regbits = RtoB(D_SP); 175 for(z=0; z<BITS; z++) { 176 externs.b[z] = 0; 177 params.b[z] = 0; 178 consts.b[z] = 0; 179 addrs.b[z] = 0; 180 ovar.b[z] = 0; 181 } 182 183 // build list of return variables 184 setoutvar(); 185 186 /* 187 * pass 1 188 * build aux data structure 189 * allocate pcs 190 * find use and set of variables 191 */ 192 g = flowstart(firstp, sizeof(Reg)); 193 if(g == nil) 194 return; 195 firstr = (Reg*)g->start; 196 197 for(r = firstr; r != R; r = (Reg*)r->f.link) { 198 p = r->f.prog; 199 proginfo(&info, p); 200 201 // Avoid making variables for direct-called functions. 202 if(p->as == ACALL && p->to.type == D_EXTERN) 203 continue; 204 205 r->use1.b[0] |= info.reguse | info.regindex; 206 r->set.b[0] |= info.regset; 207 208 bit = mkvar(r, &p->from); 209 if(bany(&bit)) { 210 if(info.flags & LeftAddr) 211 setaddrs(bit); 212 if(info.flags & LeftRead) 213 for(z=0; z<BITS; z++) 214 r->use1.b[z] |= bit.b[z]; 215 if(info.flags & LeftWrite) 216 for(z=0; z<BITS; z++) 217 r->set.b[z] |= bit.b[z]; 218 } 219 220 bit = mkvar(r, &p->to); 221 if(bany(&bit)) { 222 if(info.flags & RightAddr) 223 setaddrs(bit); 224 if(info.flags & RightRead) 225 for(z=0; z<BITS; z++) 226 r->use2.b[z] |= bit.b[z]; 227 if(info.flags & RightWrite) 228 for(z=0; z<BITS; z++) 229 r->set.b[z] |= bit.b[z]; 230 } 231 } 232 233 for(i=0; i<nvar; i++) { 234 Var *v = var+i; 235 if(v->addr) { 236 bit = blsh(i); 237 for(z=0; z<BITS; z++) 238 addrs.b[z] |= bit.b[z]; 239 } 240 241 if(debug['R'] && debug['v']) 242 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 243 i, v->addr, v->etype, v->width, v->node, v->offset); 244 } 245 246 if(debug['R'] && debug['v']) 247 dumpit("pass1", &firstr->f, 1); 248 249 /* 250 * pass 2 251 * find looping structure 252 */ 253 flowrpo(g); 254 255 if(debug['R'] && debug['v']) 256 dumpit("pass2", &firstr->f, 1); 257 258 /* 259 * pass 3 260 * iterate propagating usage 261 * back until flow graph is complete 262 */ 263 loop1: 264 change = 0; 265 for(r = firstr; r != R; r = (Reg*)r->f.link) 266 r->f.active = 0; 267 for(r = firstr; r != R; r = (Reg*)r->f.link) 268 if(r->f.prog->as == ARET) 269 prop(r, zbits, zbits); 270 loop11: 271 /* pick up unreachable code */ 272 i = 0; 273 for(r = firstr; r != R; r = r1) { 274 r1 = (Reg*)r->f.link; 275 if(r1 && r1->f.active && !r->f.active) { 276 prop(r, zbits, zbits); 277 i = 1; 278 } 279 } 280 if(i) 281 goto loop11; 282 if(change) 283 goto loop1; 284 285 if(debug['R'] && debug['v']) 286 dumpit("pass3", &firstr->f, 1); 287 288 /* 289 * pass 4 290 * iterate propagating register/variable synchrony 291 * forward until graph is complete 292 */ 293 loop2: 294 change = 0; 295 for(r = firstr; r != R; r = (Reg*)r->f.link) 296 r->f.active = 0; 297 synch(firstr, zbits); 298 if(change) 299 goto loop2; 300 301 if(debug['R'] && debug['v']) 302 dumpit("pass4", &firstr->f, 1); 303 304 /* 305 * pass 4.5 306 * move register pseudo-variables into regu. 307 */ 308 for(r = firstr; r != R; r = (Reg*)r->f.link) { 309 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 310 311 r->set.b[0] &= ~REGBITS; 312 r->use1.b[0] &= ~REGBITS; 313 r->use2.b[0] &= ~REGBITS; 314 r->refbehind.b[0] &= ~REGBITS; 315 r->refahead.b[0] &= ~REGBITS; 316 r->calbehind.b[0] &= ~REGBITS; 317 r->calahead.b[0] &= ~REGBITS; 318 r->regdiff.b[0] &= ~REGBITS; 319 r->act.b[0] &= ~REGBITS; 320 } 321 322 /* 323 * pass 5 324 * isolate regions 325 * calculate costs (paint1) 326 */ 327 r = firstr; 328 if(r) { 329 for(z=0; z<BITS; z++) 330 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 331 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 332 if(bany(&bit) && !r->f.refset) { 333 // should never happen - all variables are preset 334 if(debug['w']) 335 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 336 r->f.refset = 1; 337 } 338 } 339 for(r = firstr; r != R; r = (Reg*)r->f.link) 340 r->act = zbits; 341 rgp = region; 342 nregion = 0; 343 for(r = firstr; r != R; r = (Reg*)r->f.link) { 344 for(z=0; z<BITS; z++) 345 bit.b[z] = r->set.b[z] & 346 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 347 if(bany(&bit) && !r->f.refset) { 348 if(debug['w']) 349 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 350 r->f.refset = 1; 351 excise(&r->f); 352 } 353 for(z=0; z<BITS; z++) 354 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 355 while(bany(&bit)) { 356 i = bnum(bit); 357 rgp->enter = r; 358 rgp->varno = i; 359 change = 0; 360 paint1(r, i); 361 bit.b[i/32] &= ~(1L<<(i%32)); 362 if(change <= 0) 363 continue; 364 rgp->cost = change; 365 nregion++; 366 if(nregion >= NRGN) { 367 if(debug['R'] && debug['v']) 368 print("too many regions\n"); 369 goto brk; 370 } 371 rgp++; 372 } 373 } 374 brk: 375 qsort(region, nregion, sizeof(region[0]), rcmp); 376 377 if(debug['R'] && debug['v']) 378 dumpit("pass5", &firstr->f, 1); 379 380 /* 381 * pass 6 382 * determine used registers (paint2) 383 * replace code (paint3) 384 */ 385 rgp = region; 386 for(i=0; i<nregion; i++) { 387 bit = blsh(rgp->varno); 388 vreg = paint2(rgp->enter, rgp->varno); 389 vreg = allreg(vreg, rgp); 390 if(rgp->regno != 0) { 391 if(debug['R'] && debug['v']) { 392 Var *v; 393 394 v = var + rgp->varno; 395 print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", 396 v->node, v->offset, rgp->varno, v->etype, rgp->regno); 397 } 398 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 399 } 400 rgp++; 401 } 402 403 if(debug['R'] && debug['v']) 404 dumpit("pass6", &firstr->f, 1); 405 406 /* 407 * free aux structures. peep allocates new ones. 408 */ 409 flowend(g); 410 firstr = R; 411 412 /* 413 * pass 7 414 * peep-hole on basic block 415 */ 416 if(!debug['R'] || debug['P']) 417 peep(firstp); 418 419 /* 420 * eliminate nops 421 */ 422 for(p=firstp; p!=P; p=p->link) { 423 while(p->link != P && p->link->as == ANOP) 424 p->link = p->link->link; 425 if(p->to.type == D_BRANCH) 426 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 427 p->to.u.branch = p->to.u.branch->link; 428 } 429 430 if(debug['R']) { 431 if(ostats.ncvtreg || 432 ostats.nspill || 433 ostats.nreload || 434 ostats.ndelmov || 435 ostats.nvar || 436 ostats.naddr || 437 0) 438 print("\nstats\n"); 439 440 if(ostats.ncvtreg) 441 print(" %4d cvtreg\n", ostats.ncvtreg); 442 if(ostats.nspill) 443 print(" %4d spill\n", ostats.nspill); 444 if(ostats.nreload) 445 print(" %4d reload\n", ostats.nreload); 446 if(ostats.ndelmov) 447 print(" %4d delmov\n", ostats.ndelmov); 448 if(ostats.nvar) 449 print(" %4d var\n", ostats.nvar); 450 if(ostats.naddr) 451 print(" %4d addr\n", ostats.naddr); 452 453 memset(&ostats, 0, sizeof(ostats)); 454 } 455 } 456 457 /* 458 * add mov b,rn 459 * just after r 460 */ 461 void 462 addmove(Reg *r, int bn, int rn, int f) 463 { 464 Prog *p, *p1; 465 Adr *a; 466 Var *v; 467 468 p1 = mal(sizeof(*p1)); 469 clearp(p1); 470 p1->loc = 9999; 471 472 p = r->f.prog; 473 p1->link = p->link; 474 p->link = p1; 475 p1->lineno = p->lineno; 476 477 v = var + bn; 478 479 a = &p1->to; 480 a->offset = v->offset; 481 a->etype = v->etype; 482 a->type = v->name; 483 a->node = v->node; 484 a->sym = v->node->sym; 485 486 // need to clean this up with wptr and 487 // some of the defaults 488 p1->as = AMOVL; 489 switch(v->etype) { 490 default: 491 fatal("unknown type %E", v->etype); 492 case TINT8: 493 case TUINT8: 494 case TBOOL: 495 p1->as = AMOVB; 496 break; 497 case TINT16: 498 case TUINT16: 499 p1->as = AMOVW; 500 break; 501 case TINT64: 502 case TUINT64: 503 case TUINTPTR: 504 case TPTR64: 505 p1->as = AMOVQ; 506 break; 507 case TFLOAT32: 508 p1->as = AMOVSS; 509 break; 510 case TFLOAT64: 511 p1->as = AMOVSD; 512 break; 513 case TINT: 514 case TUINT: 515 case TINT32: 516 case TUINT32: 517 case TPTR32: 518 break; 519 } 520 521 p1->from.type = rn; 522 if(!f) { 523 p1->from = *a; 524 *a = zprog.from; 525 a->type = rn; 526 if(v->etype == TUINT8) 527 p1->as = AMOVB; 528 if(v->etype == TUINT16) 529 p1->as = AMOVW; 530 } 531 if(debug['R'] && debug['v']) 532 print("%P ===add=== %P\n", p, p1); 533 ostats.nspill++; 534 } 535 536 uint32 537 doregbits(int r) 538 { 539 uint32 b; 540 541 b = 0; 542 if(r >= D_INDIR) 543 r -= D_INDIR; 544 if(r >= D_AX && r <= D_R15) 545 b |= RtoB(r); 546 else 547 if(r >= D_AL && r <= D_R15B) 548 b |= RtoB(r-D_AL+D_AX); 549 else 550 if(r >= D_AH && r <= D_BH) 551 b |= RtoB(r-D_AH+D_AX); 552 else 553 if(r >= D_X0 && r <= D_X0+15) 554 b |= FtoB(r); 555 return b; 556 } 557 558 static int 559 overlap(int64 o1, int w1, int64 o2, int w2) 560 { 561 int64 t1, t2; 562 563 t1 = o1+w1; 564 t2 = o2+w2; 565 566 if(!(t1 > o2 && t2 > o1)) 567 return 0; 568 569 return 1; 570 } 571 572 Bits 573 mkvar(Reg *r, Adr *a) 574 { 575 Var *v; 576 int i, t, n, et, z, flag; 577 int64 w; 578 uint32 regu; 579 int64 o; 580 Bits bit; 581 Node *node; 582 583 /* 584 * mark registers used 585 */ 586 t = a->type; 587 if(t == D_NONE) 588 goto none; 589 590 if(r != R) 591 r->use1.b[0] |= doregbits(a->index); 592 593 switch(t) { 594 default: 595 regu = doregbits(t); 596 if(regu == 0) 597 goto none; 598 bit = zbits; 599 bit.b[0] = regu; 600 return bit; 601 602 case D_ADDR: 603 a->type = a->index; 604 bit = mkvar(r, a); 605 setaddrs(bit); 606 a->type = t; 607 ostats.naddr++; 608 goto none; 609 610 case D_EXTERN: 611 case D_STATIC: 612 case D_PARAM: 613 case D_AUTO: 614 n = t; 615 break; 616 } 617 618 node = a->node; 619 if(node == N || node->op != ONAME || node->orig == N) 620 goto none; 621 node = node->orig; 622 if(node->orig != node) 623 fatal("%D: bad node", a); 624 if(node->sym == S || node->sym->name[0] == '.') 625 goto none; 626 et = a->etype; 627 o = a->offset; 628 w = a->width; 629 if(w < 0) 630 fatal("bad width %lld for %D", w, a); 631 632 flag = 0; 633 for(i=0; i<nvar; i++) { 634 v = var+i; 635 if(v->node == node && v->name == n) { 636 if(v->offset == o) 637 if(v->etype == et) 638 if(v->width == w) 639 return blsh(i); 640 641 // if they overlaps, disable both 642 if(overlap(v->offset, v->width, o, w)) { 643 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 644 v->addr = 1; 645 flag = 1; 646 } 647 } 648 } 649 switch(et) { 650 case 0: 651 case TFUNC: 652 goto none; 653 } 654 655 if(nvar >= NVAR) { 656 if(debug['w'] > 1 && node != N) 657 fatal("variable not optimized: %#N", node); 658 goto none; 659 } 660 661 i = nvar; 662 nvar++; 663 v = var+i; 664 v->offset = o; 665 v->name = n; 666 v->etype = et; 667 v->width = w; 668 v->addr = flag; // funny punning 669 v->node = node; 670 671 if(debug['R']) 672 print("bit=%2d et=%2E w=%d+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 673 674 ostats.nvar++; 675 676 bit = blsh(i); 677 if(n == D_EXTERN || n == D_STATIC) 678 for(z=0; z<BITS; z++) 679 externs.b[z] |= bit.b[z]; 680 if(n == D_PARAM) 681 for(z=0; z<BITS; z++) 682 params.b[z] |= bit.b[z]; 683 684 return bit; 685 686 none: 687 return zbits; 688 } 689 690 void 691 prop(Reg *r, Bits ref, Bits cal) 692 { 693 Reg *r1, *r2; 694 int z; 695 696 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 697 for(z=0; z<BITS; z++) { 698 ref.b[z] |= r1->refahead.b[z]; 699 if(ref.b[z] != r1->refahead.b[z]) { 700 r1->refahead.b[z] = ref.b[z]; 701 change++; 702 } 703 cal.b[z] |= r1->calahead.b[z]; 704 if(cal.b[z] != r1->calahead.b[z]) { 705 r1->calahead.b[z] = cal.b[z]; 706 change++; 707 } 708 } 709 switch(r1->f.prog->as) { 710 case ACALL: 711 if(noreturn(r1->f.prog)) 712 break; 713 for(z=0; z<BITS; z++) { 714 cal.b[z] |= ref.b[z] | externs.b[z]; 715 ref.b[z] = 0; 716 } 717 break; 718 719 case ATEXT: 720 for(z=0; z<BITS; z++) { 721 cal.b[z] = 0; 722 ref.b[z] = 0; 723 } 724 break; 725 726 case ARET: 727 for(z=0; z<BITS; z++) { 728 cal.b[z] = externs.b[z] | ovar.b[z]; 729 ref.b[z] = 0; 730 } 731 break; 732 733 default: 734 // Work around for issue 1304: 735 // flush modified globals before each instruction. 736 for(z=0; z<BITS; z++) { 737 cal.b[z] |= externs.b[z]; 738 // issue 4066: flush modified return variables in case of panic 739 if(hasdefer) 740 cal.b[z] |= ovar.b[z]; 741 } 742 break; 743 } 744 for(z=0; z<BITS; z++) { 745 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 746 r1->use1.b[z] | r1->use2.b[z]; 747 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 748 r1->refbehind.b[z] = ref.b[z]; 749 r1->calbehind.b[z] = cal.b[z]; 750 } 751 if(r1->f.active) 752 break; 753 r1->f.active = 1; 754 } 755 for(; r != r1; r = (Reg*)r->f.p1) 756 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 757 prop(r2, r->refbehind, r->calbehind); 758 } 759 760 void 761 synch(Reg *r, Bits dif) 762 { 763 Reg *r1; 764 int z; 765 766 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 767 for(z=0; z<BITS; z++) { 768 dif.b[z] = (dif.b[z] & 769 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 770 r1->set.b[z] | r1->regdiff.b[z]; 771 if(dif.b[z] != r1->regdiff.b[z]) { 772 r1->regdiff.b[z] = dif.b[z]; 773 change++; 774 } 775 } 776 if(r1->f.active) 777 break; 778 r1->f.active = 1; 779 for(z=0; z<BITS; z++) 780 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 781 if(r1->f.s2 != nil) 782 synch((Reg*)r1->f.s2, dif); 783 } 784 } 785 786 uint32 787 allreg(uint32 b, Rgn *r) 788 { 789 Var *v; 790 int i; 791 792 v = var + r->varno; 793 r->regno = 0; 794 switch(v->etype) { 795 796 default: 797 fatal("unknown etype %d/%E", bitno(b), v->etype); 798 break; 799 800 case TINT8: 801 case TUINT8: 802 case TINT16: 803 case TUINT16: 804 case TINT32: 805 case TUINT32: 806 case TINT64: 807 case TUINT64: 808 case TINT: 809 case TUINT: 810 case TUINTPTR: 811 case TBOOL: 812 case TPTR32: 813 case TPTR64: 814 i = BtoR(~b); 815 if(i && r->cost > 0) { 816 r->regno = i; 817 return RtoB(i); 818 } 819 break; 820 821 case TFLOAT32: 822 case TFLOAT64: 823 i = BtoF(~b); 824 if(i && r->cost > 0) { 825 r->regno = i; 826 return FtoB(i); 827 } 828 break; 829 } 830 return 0; 831 } 832 833 void 834 paint1(Reg *r, int bn) 835 { 836 Reg *r1; 837 int z; 838 uint32 bb; 839 840 z = bn/32; 841 bb = 1L<<(bn%32); 842 if(r->act.b[z] & bb) 843 return; 844 for(;;) { 845 if(!(r->refbehind.b[z] & bb)) 846 break; 847 r1 = (Reg*)r->f.p1; 848 if(r1 == R) 849 break; 850 if(!(r1->refahead.b[z] & bb)) 851 break; 852 if(r1->act.b[z] & bb) 853 break; 854 r = r1; 855 } 856 857 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 858 change -= CLOAD * r->f.loop; 859 } 860 for(;;) { 861 r->act.b[z] |= bb; 862 863 if(r->use1.b[z] & bb) { 864 change += CREF * r->f.loop; 865 } 866 867 if((r->use2.b[z]|r->set.b[z]) & bb) { 868 change += CREF * r->f.loop; 869 } 870 871 if(STORE(r) & r->regdiff.b[z] & bb) { 872 change -= CLOAD * r->f.loop; 873 } 874 875 if(r->refbehind.b[z] & bb) 876 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 877 if(r1->refahead.b[z] & bb) 878 paint1(r1, bn); 879 880 if(!(r->refahead.b[z] & bb)) 881 break; 882 r1 = (Reg*)r->f.s2; 883 if(r1 != R) 884 if(r1->refbehind.b[z] & bb) 885 paint1(r1, bn); 886 r = (Reg*)r->f.s1; 887 if(r == R) 888 break; 889 if(r->act.b[z] & bb) 890 break; 891 if(!(r->refbehind.b[z] & bb)) 892 break; 893 } 894 } 895 896 uint32 897 regset(Reg *r, uint32 bb) 898 { 899 uint32 b, set; 900 Adr v; 901 int c; 902 903 set = 0; 904 v = zprog.from; 905 while(b = bb & ~(bb-1)) { 906 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 907 if(v.type == 0) 908 fatal("zero v.type for %#ux", b); 909 c = copyu(r->f.prog, &v, A); 910 if(c == 3) 911 set |= b; 912 bb &= ~b; 913 } 914 return set; 915 } 916 917 uint32 918 reguse(Reg *r, uint32 bb) 919 { 920 uint32 b, set; 921 Adr v; 922 int c; 923 924 set = 0; 925 v = zprog.from; 926 while(b = bb & ~(bb-1)) { 927 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 928 c = copyu(r->f.prog, &v, A); 929 if(c == 1 || c == 2 || c == 4) 930 set |= b; 931 bb &= ~b; 932 } 933 return set; 934 } 935 936 uint32 937 paint2(Reg *r, int bn) 938 { 939 Reg *r1; 940 int z; 941 uint32 bb, vreg, x; 942 943 z = bn/32; 944 bb = 1L << (bn%32); 945 vreg = regbits; 946 if(!(r->act.b[z] & bb)) 947 return vreg; 948 for(;;) { 949 if(!(r->refbehind.b[z] & bb)) 950 break; 951 r1 = (Reg*)r->f.p1; 952 if(r1 == R) 953 break; 954 if(!(r1->refahead.b[z] & bb)) 955 break; 956 if(!(r1->act.b[z] & bb)) 957 break; 958 r = r1; 959 } 960 for(;;) { 961 r->act.b[z] &= ~bb; 962 963 vreg |= r->regu; 964 965 if(r->refbehind.b[z] & bb) 966 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 967 if(r1->refahead.b[z] & bb) 968 vreg |= paint2(r1, bn); 969 970 if(!(r->refahead.b[z] & bb)) 971 break; 972 r1 = (Reg*)r->f.s2; 973 if(r1 != R) 974 if(r1->refbehind.b[z] & bb) 975 vreg |= paint2(r1, bn); 976 r = (Reg*)r->f.s1; 977 if(r == R) 978 break; 979 if(!(r->act.b[z] & bb)) 980 break; 981 if(!(r->refbehind.b[z] & bb)) 982 break; 983 } 984 985 bb = vreg; 986 for(; r; r=(Reg*)r->f.s1) { 987 x = r->regu & ~bb; 988 if(x) { 989 vreg |= reguse(r, x); 990 bb |= regset(r, x); 991 } 992 } 993 return vreg; 994 } 995 996 void 997 paint3(Reg *r, int bn, int32 rb, int rn) 998 { 999 Reg *r1; 1000 Prog *p; 1001 int z; 1002 uint32 bb; 1003 1004 z = bn/32; 1005 bb = 1L << (bn%32); 1006 if(r->act.b[z] & bb) 1007 return; 1008 for(;;) { 1009 if(!(r->refbehind.b[z] & bb)) 1010 break; 1011 r1 = (Reg*)r->f.p1; 1012 if(r1 == R) 1013 break; 1014 if(!(r1->refahead.b[z] & bb)) 1015 break; 1016 if(r1->act.b[z] & bb) 1017 break; 1018 r = r1; 1019 } 1020 1021 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1022 addmove(r, bn, rn, 0); 1023 for(;;) { 1024 r->act.b[z] |= bb; 1025 p = r->f.prog; 1026 1027 if(r->use1.b[z] & bb) { 1028 if(debug['R'] && debug['v']) 1029 print("%P", p); 1030 addreg(&p->from, rn); 1031 if(debug['R'] && debug['v']) 1032 print(" ===change== %P\n", p); 1033 } 1034 if((r->use2.b[z]|r->set.b[z]) & bb) { 1035 if(debug['R'] && debug['v']) 1036 print("%P", p); 1037 addreg(&p->to, rn); 1038 if(debug['R'] && debug['v']) 1039 print(" ===change== %P\n", p); 1040 } 1041 1042 if(STORE(r) & r->regdiff.b[z] & bb) 1043 addmove(r, bn, rn, 1); 1044 r->regu |= rb; 1045 1046 if(r->refbehind.b[z] & bb) 1047 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1048 if(r1->refahead.b[z] & bb) 1049 paint3(r1, bn, rb, rn); 1050 1051 if(!(r->refahead.b[z] & bb)) 1052 break; 1053 r1 = (Reg*)r->f.s2; 1054 if(r1 != R) 1055 if(r1->refbehind.b[z] & bb) 1056 paint3(r1, bn, rb, rn); 1057 r = (Reg*)r->f.s1; 1058 if(r == R) 1059 break; 1060 if(r->act.b[z] & bb) 1061 break; 1062 if(!(r->refbehind.b[z] & bb)) 1063 break; 1064 } 1065 } 1066 1067 void 1068 addreg(Adr *a, int rn) 1069 { 1070 1071 a->sym = 0; 1072 a->offset = 0; 1073 a->type = rn; 1074 1075 ostats.ncvtreg++; 1076 } 1077 1078 int32 1079 RtoB(int r) 1080 { 1081 1082 if(r < D_AX || r > D_R15) 1083 return 0; 1084 return 1L << (r-D_AX); 1085 } 1086 1087 int 1088 BtoR(int32 b) 1089 { 1090 b &= 0xffffL; 1091 if(b == 0) 1092 return 0; 1093 return bitno(b) + D_AX; 1094 } 1095 1096 /* 1097 * bit reg 1098 * 16 X0 1099 * ... 1100 * 31 X15 1101 */ 1102 int32 1103 FtoB(int f) 1104 { 1105 if(f < D_X0 || f > D_X15) 1106 return 0; 1107 return 1L << (f - D_X0 + 16); 1108 } 1109 1110 int 1111 BtoF(int32 b) 1112 { 1113 1114 b &= 0xFFFF0000L; 1115 if(b == 0) 1116 return 0; 1117 return bitno(b) - 16 + D_X0; 1118 } 1119 1120 void 1121 dumpone(Flow *f, int isreg) 1122 { 1123 int z; 1124 Bits bit; 1125 Reg *r; 1126 1127 print("%d:%P", f->loop, f->prog); 1128 if(isreg) { 1129 r = (Reg*)f; 1130 for(z=0; z<BITS; z++) 1131 bit.b[z] = 1132 r->set.b[z] | 1133 r->use1.b[z] | 1134 r->use2.b[z] | 1135 r->refbehind.b[z] | 1136 r->refahead.b[z] | 1137 r->calbehind.b[z] | 1138 r->calahead.b[z] | 1139 r->regdiff.b[z] | 1140 r->act.b[z] | 1141 0; 1142 if(bany(&bit)) { 1143 print("\t"); 1144 if(bany(&r->set)) 1145 print(" s:%Q", r->set); 1146 if(bany(&r->use1)) 1147 print(" u1:%Q", r->use1); 1148 if(bany(&r->use2)) 1149 print(" u2:%Q", r->use2); 1150 if(bany(&r->refbehind)) 1151 print(" rb:%Q ", r->refbehind); 1152 if(bany(&r->refahead)) 1153 print(" ra:%Q ", r->refahead); 1154 if(bany(&r->calbehind)) 1155 print(" cb:%Q ", r->calbehind); 1156 if(bany(&r->calahead)) 1157 print(" ca:%Q ", r->calahead); 1158 if(bany(&r->regdiff)) 1159 print(" d:%Q ", r->regdiff); 1160 if(bany(&r->act)) 1161 print(" a:%Q ", r->act); 1162 } 1163 } 1164 print("\n"); 1165 } 1166 1167 void 1168 dumpit(char *str, Flow *r0, int isreg) 1169 { 1170 Flow *r, *r1; 1171 1172 print("\n%s\n", str); 1173 for(r = r0; r != nil; r = r->link) { 1174 dumpone(r, isreg); 1175 r1 = r->p2; 1176 if(r1 != nil) { 1177 print(" pred:"); 1178 for(; r1 != nil; r1 = r1->p2link) 1179 print(" %.4ud", r1->prog->loc); 1180 print("\n"); 1181 } 1182 // r1 = r->s1; 1183 // if(r1 != R) { 1184 // print(" succ:"); 1185 // for(; r1 != R; r1 = r1->s1) 1186 // print(" %.4ud", r1->prog->loc); 1187 // print("\n"); 1188 // } 1189 } 1190 }