github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/8g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 16 /* 8 integer + 8 floating */ 37 #define REGBITS ((uint32)0xffff) 38 39 static Reg* firstr; 40 static int first = 1; 41 42 int 43 rcmp(const void *a1, const void *a2) 44 { 45 Rgn *p1, *p2; 46 int c1, c2; 47 48 p1 = (Rgn*)a1; 49 p2 = (Rgn*)a2; 50 c1 = p2->cost; 51 c2 = p1->cost; 52 if(c1 -= c2) 53 return c1; 54 return p2->varno - p1->varno; 55 } 56 57 static void 58 setoutvar(void) 59 { 60 Type *t; 61 Node *n; 62 Addr a; 63 Iter save; 64 Bits bit; 65 int z; 66 67 t = structfirst(&save, getoutarg(curfn->type)); 68 while(t != T) { 69 n = nodarg(t, 1); 70 a = zprog.from; 71 naddr(n, &a, 0); 72 bit = mkvar(R, &a); 73 for(z=0; z<BITS; z++) 74 ovar.b[z] |= bit.b[z]; 75 t = structnext(&save); 76 } 77 //if(bany(ovar)) 78 //print("ovars = %Q\n", ovar); 79 } 80 81 static void 82 setaddrs(Bits bit) 83 { 84 int i, n; 85 Var *v; 86 Node *node; 87 88 while(bany(&bit)) { 89 // convert each bit to a variable 90 i = bnum(bit); 91 node = var[i].node; 92 n = var[i].name; 93 bit.b[i/32] &= ~(1L<<(i%32)); 94 95 // disable all pieces of that variable 96 for(i=0; i<nvar; i++) { 97 v = var+i; 98 if(v->node == node && v->name == n) 99 v->addr = 2; 100 } 101 } 102 } 103 104 static char* regname[] = { 105 ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di", 106 ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7", 107 }; 108 109 static Node* regnodes[NREGVAR]; 110 111 void 112 regopt(Prog *firstp) 113 { 114 Reg *r, *r1; 115 Prog *p; 116 Graph *g; 117 ProgInfo info; 118 int i, z; 119 uint32 vreg; 120 Bits bit; 121 122 if(first) { 123 fmtinstall('Q', Qconv); 124 exregoffset = D_DI; // no externals 125 first = 0; 126 } 127 128 fixjmp(firstp); 129 mergetemp(firstp); 130 131 /* 132 * control flow is more complicated in generated go code 133 * than in generated c code. define pseudo-variables for 134 * registers, so we have complete register usage information. 135 */ 136 nvar = NREGVAR; 137 memset(var, 0, NREGVAR*sizeof var[0]); 138 for(i=0; i<NREGVAR; i++) { 139 if(regnodes[i] == N) 140 regnodes[i] = newname(lookup(regname[i])); 141 var[i].node = regnodes[i]; 142 } 143 144 regbits = RtoB(D_SP); 145 for(z=0; z<BITS; z++) { 146 externs.b[z] = 0; 147 params.b[z] = 0; 148 consts.b[z] = 0; 149 addrs.b[z] = 0; 150 ovar.b[z] = 0; 151 } 152 153 // build list of return variables 154 setoutvar(); 155 156 /* 157 * pass 1 158 * build aux data structure 159 * allocate pcs 160 * find use and set of variables 161 */ 162 g = flowstart(firstp, sizeof(Reg)); 163 if(g == nil) 164 return; 165 firstr = (Reg*)g->start; 166 167 for(r = firstr; r != R; r = (Reg*)r->f.link) { 168 p = r->f.prog; 169 proginfo(&info, p); 170 171 // Avoid making variables for direct-called functions. 172 if(p->as == ACALL && p->to.type == D_EXTERN) 173 continue; 174 175 r->use1.b[0] |= info.reguse | info.regindex; 176 r->set.b[0] |= info.regset; 177 178 bit = mkvar(r, &p->from); 179 if(bany(&bit)) { 180 if(info.flags & LeftAddr) 181 setaddrs(bit); 182 if(info.flags & LeftRead) 183 for(z=0; z<BITS; z++) 184 r->use1.b[z] |= bit.b[z]; 185 if(info.flags & LeftWrite) 186 for(z=0; z<BITS; z++) 187 r->set.b[z] |= bit.b[z]; 188 } 189 190 bit = mkvar(r, &p->to); 191 if(bany(&bit)) { 192 if(info.flags & RightAddr) 193 setaddrs(bit); 194 if(info.flags & RightRead) 195 for(z=0; z<BITS; z++) 196 r->use2.b[z] |= bit.b[z]; 197 if(info.flags & RightWrite) 198 for(z=0; z<BITS; z++) 199 r->set.b[z] |= bit.b[z]; 200 } 201 } 202 if(firstr == R) 203 return; 204 205 for(i=0; i<nvar; i++) { 206 Var *v = var+i; 207 if(v->addr) { 208 bit = blsh(i); 209 for(z=0; z<BITS; z++) 210 addrs.b[z] |= bit.b[z]; 211 } 212 213 if(debug['R'] && debug['v']) 214 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 215 i, v->addr, v->etype, v->width, v->node, v->offset); 216 } 217 218 if(debug['R'] && debug['v']) 219 dumpit("pass1", &firstr->f, 1); 220 221 /* 222 * pass 2 223 * find looping structure 224 */ 225 flowrpo(g); 226 227 if(debug['R'] && debug['v']) 228 dumpit("pass2", &firstr->f, 1); 229 230 /* 231 * pass 3 232 * iterate propagating usage 233 * back until flow graph is complete 234 */ 235 loop1: 236 change = 0; 237 for(r = firstr; r != R; r = (Reg*)r->f.link) 238 r->f.active = 0; 239 for(r = firstr; r != R; r = (Reg*)r->f.link) 240 if(r->f.prog->as == ARET) 241 prop(r, zbits, zbits); 242 loop11: 243 /* pick up unreachable code */ 244 i = 0; 245 for(r = firstr; r != R; r = r1) { 246 r1 = (Reg*)r->f.link; 247 if(r1 && r1->f.active && !r->f.active) { 248 prop(r, zbits, zbits); 249 i = 1; 250 } 251 } 252 if(i) 253 goto loop11; 254 if(change) 255 goto loop1; 256 257 if(debug['R'] && debug['v']) 258 dumpit("pass3", &firstr->f, 1); 259 260 /* 261 * pass 4 262 * iterate propagating register/variable synchrony 263 * forward until graph is complete 264 */ 265 loop2: 266 change = 0; 267 for(r = firstr; r != R; r = (Reg*)r->f.link) 268 r->f.active = 0; 269 synch(firstr, zbits); 270 if(change) 271 goto loop2; 272 273 if(debug['R'] && debug['v']) 274 dumpit("pass4", &firstr->f, 1); 275 276 /* 277 * pass 4.5 278 * move register pseudo-variables into regu. 279 */ 280 for(r = firstr; r != R; r = (Reg*)r->f.link) { 281 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 282 283 r->set.b[0] &= ~REGBITS; 284 r->use1.b[0] &= ~REGBITS; 285 r->use2.b[0] &= ~REGBITS; 286 r->refbehind.b[0] &= ~REGBITS; 287 r->refahead.b[0] &= ~REGBITS; 288 r->calbehind.b[0] &= ~REGBITS; 289 r->calahead.b[0] &= ~REGBITS; 290 r->regdiff.b[0] &= ~REGBITS; 291 r->act.b[0] &= ~REGBITS; 292 } 293 294 /* 295 * pass 5 296 * isolate regions 297 * calculate costs (paint1) 298 */ 299 r = firstr; 300 if(r) { 301 for(z=0; z<BITS; z++) 302 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 303 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 304 if(bany(&bit) && !r->f.refset) { 305 // should never happen - all variables are preset 306 if(debug['w']) 307 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 308 r->f.refset = 1; 309 } 310 } 311 for(r = firstr; r != R; r = (Reg*)r->f.link) 312 r->act = zbits; 313 rgp = region; 314 nregion = 0; 315 for(r = firstr; r != R; r = (Reg*)r->f.link) { 316 for(z=0; z<BITS; z++) 317 bit.b[z] = r->set.b[z] & 318 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 319 if(bany(&bit) && !r->f.refset) { 320 if(debug['w']) 321 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 322 r->f.refset = 1; 323 excise(&r->f); 324 } 325 for(z=0; z<BITS; z++) 326 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 327 while(bany(&bit)) { 328 i = bnum(bit); 329 rgp->enter = r; 330 rgp->varno = i; 331 change = 0; 332 paint1(r, i); 333 bit.b[i/32] &= ~(1L<<(i%32)); 334 if(change <= 0) 335 continue; 336 rgp->cost = change; 337 nregion++; 338 if(nregion >= NRGN) { 339 if(debug['R'] && debug['v']) 340 print("too many regions\n"); 341 goto brk; 342 } 343 rgp++; 344 } 345 } 346 brk: 347 qsort(region, nregion, sizeof(region[0]), rcmp); 348 349 /* 350 * pass 6 351 * determine used registers (paint2) 352 * replace code (paint3) 353 */ 354 rgp = region; 355 for(i=0; i<nregion; i++) { 356 bit = blsh(rgp->varno); 357 vreg = paint2(rgp->enter, rgp->varno); 358 vreg = allreg(vreg, rgp); 359 if(rgp->regno != 0) 360 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 361 rgp++; 362 } 363 364 if(debug['R'] && debug['v']) 365 dumpit("pass6", &firstr->f, 1); 366 367 /* 368 * free aux structures. peep allocates new ones. 369 */ 370 flowend(g); 371 firstr = R; 372 373 /* 374 * pass 7 375 * peep-hole on basic block 376 */ 377 if(!debug['R'] || debug['P']) 378 peep(firstp); 379 380 /* 381 * eliminate nops 382 */ 383 for(p=firstp; p!=P; p=p->link) { 384 while(p->link != P && p->link->as == ANOP) 385 p->link = p->link->link; 386 if(p->to.type == D_BRANCH) 387 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 388 p->to.u.branch = p->to.u.branch->link; 389 } 390 391 if(!use_sse) 392 for(p=firstp; p!=P; p=p->link) { 393 if(p->from.type >= D_X0 && p->from.type <= D_X7) 394 fatal("invalid use of %R with GO386=387: %P", p->from.type, p); 395 if(p->to.type >= D_X0 && p->to.type <= D_X7) 396 fatal("invalid use of %R with GO386=387: %P", p->to.type, p); 397 } 398 399 if(debug['R']) { 400 if(ostats.ncvtreg || 401 ostats.nspill || 402 ostats.nreload || 403 ostats.ndelmov || 404 ostats.nvar || 405 ostats.naddr || 406 0) 407 print("\nstats\n"); 408 409 if(ostats.ncvtreg) 410 print(" %4d cvtreg\n", ostats.ncvtreg); 411 if(ostats.nspill) 412 print(" %4d spill\n", ostats.nspill); 413 if(ostats.nreload) 414 print(" %4d reload\n", ostats.nreload); 415 if(ostats.ndelmov) 416 print(" %4d delmov\n", ostats.ndelmov); 417 if(ostats.nvar) 418 print(" %4d var\n", ostats.nvar); 419 if(ostats.naddr) 420 print(" %4d addr\n", ostats.naddr); 421 422 memset(&ostats, 0, sizeof(ostats)); 423 } 424 } 425 426 /* 427 * add mov b,rn 428 * just after r 429 */ 430 void 431 addmove(Reg *r, int bn, int rn, int f) 432 { 433 Prog *p, *p1; 434 Adr *a; 435 Var *v; 436 437 p1 = mal(sizeof(*p1)); 438 clearp(p1); 439 p1->loc = 9999; 440 441 p = r->f.prog; 442 p1->link = p->link; 443 p->link = p1; 444 p1->lineno = p->lineno; 445 446 v = var + bn; 447 448 a = &p1->to; 449 a->offset = v->offset; 450 a->etype = v->etype; 451 a->type = v->name; 452 a->node = v->node; 453 a->sym = v->node->sym; 454 455 // need to clean this up with wptr and 456 // some of the defaults 457 p1->as = AMOVL; 458 switch(v->etype) { 459 default: 460 fatal("unknown type %E", v->etype); 461 case TINT8: 462 case TUINT8: 463 case TBOOL: 464 p1->as = AMOVB; 465 break; 466 case TINT16: 467 case TUINT16: 468 p1->as = AMOVW; 469 break; 470 case TFLOAT32: 471 p1->as = AMOVSS; 472 break; 473 case TFLOAT64: 474 p1->as = AMOVSD; 475 break; 476 case TINT: 477 case TUINT: 478 case TINT32: 479 case TUINT32: 480 case TPTR32: 481 break; 482 } 483 484 p1->from.type = rn; 485 if(!f) { 486 p1->from = *a; 487 *a = zprog.from; 488 a->type = rn; 489 if(v->etype == TUINT8) 490 p1->as = AMOVB; 491 if(v->etype == TUINT16) 492 p1->as = AMOVW; 493 } 494 if(debug['R'] && debug['v']) 495 print("%P ===add=== %P\n", p, p1); 496 ostats.nspill++; 497 } 498 499 uint32 500 doregbits(int r) 501 { 502 uint32 b; 503 504 b = 0; 505 if(r >= D_INDIR) 506 r -= D_INDIR; 507 if(r >= D_AX && r <= D_DI) 508 b |= RtoB(r); 509 else 510 if(r >= D_AL && r <= D_BL) 511 b |= RtoB(r-D_AL+D_AX); 512 else 513 if(r >= D_AH && r <= D_BH) 514 b |= RtoB(r-D_AH+D_AX); 515 else 516 if(r >= D_X0 && r <= D_X0+7) 517 b |= FtoB(r); 518 return b; 519 } 520 521 static int 522 overlap(int32 o1, int w1, int32 o2, int w2) 523 { 524 int32 t1, t2; 525 526 t1 = o1+w1; 527 t2 = o2+w2; 528 529 if(!(t1 > o2 && t2 > o1)) 530 return 0; 531 532 return 1; 533 } 534 535 Bits 536 mkvar(Reg *r, Adr *a) 537 { 538 Var *v; 539 int i, t, n, et, z, w, flag, regu; 540 int32 o; 541 Bits bit; 542 Node *node; 543 544 /* 545 * mark registers used 546 */ 547 t = a->type; 548 if(t == D_NONE) 549 goto none; 550 551 if(r != R) 552 r->use1.b[0] |= doregbits(a->index); 553 554 switch(t) { 555 default: 556 regu = doregbits(t); 557 if(regu == 0) 558 goto none; 559 bit = zbits; 560 bit.b[0] = regu; 561 return bit; 562 563 case D_ADDR: 564 a->type = a->index; 565 bit = mkvar(r, a); 566 setaddrs(bit); 567 a->type = t; 568 ostats.naddr++; 569 goto none; 570 571 case D_EXTERN: 572 case D_STATIC: 573 case D_PARAM: 574 case D_AUTO: 575 n = t; 576 break; 577 } 578 579 node = a->node; 580 if(node == N || node->op != ONAME || node->orig == N) 581 goto none; 582 node = node->orig; 583 if(node->orig != node) 584 fatal("%D: bad node", a); 585 if(node->sym == S || node->sym->name[0] == '.') 586 goto none; 587 et = a->etype; 588 o = a->offset; 589 w = a->width; 590 if(w < 0) 591 fatal("bad width %d for %D", w, a); 592 593 flag = 0; 594 for(i=0; i<nvar; i++) { 595 v = var+i; 596 if(v->node == node && v->name == n) { 597 if(v->offset == o) 598 if(v->etype == et) 599 if(v->width == w) 600 return blsh(i); 601 602 // if they overlap, disable both 603 if(overlap(v->offset, v->width, o, w)) { 604 if(debug['R']) 605 print("disable %s\n", node->sym->name); 606 v->addr = 1; 607 flag = 1; 608 } 609 } 610 } 611 612 switch(et) { 613 case 0: 614 case TFUNC: 615 goto none; 616 } 617 618 if(nvar >= NVAR) { 619 if(debug['w'] > 1 && node != N) 620 fatal("variable not optimized: %D", a); 621 goto none; 622 } 623 624 i = nvar; 625 nvar++; 626 v = var+i; 627 v->offset = o; 628 v->name = n; 629 v->etype = et; 630 v->width = w; 631 v->addr = flag; // funny punning 632 v->node = node; 633 634 if(debug['R']) 635 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 636 ostats.nvar++; 637 638 bit = blsh(i); 639 if(n == D_EXTERN || n == D_STATIC) 640 for(z=0; z<BITS; z++) 641 externs.b[z] |= bit.b[z]; 642 if(n == D_PARAM) 643 for(z=0; z<BITS; z++) 644 params.b[z] |= bit.b[z]; 645 646 return bit; 647 648 none: 649 return zbits; 650 } 651 652 void 653 prop(Reg *r, Bits ref, Bits cal) 654 { 655 Reg *r1, *r2; 656 int z; 657 658 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 659 for(z=0; z<BITS; z++) { 660 ref.b[z] |= r1->refahead.b[z]; 661 if(ref.b[z] != r1->refahead.b[z]) { 662 r1->refahead.b[z] = ref.b[z]; 663 change++; 664 } 665 cal.b[z] |= r1->calahead.b[z]; 666 if(cal.b[z] != r1->calahead.b[z]) { 667 r1->calahead.b[z] = cal.b[z]; 668 change++; 669 } 670 } 671 switch(r1->f.prog->as) { 672 case ACALL: 673 if(noreturn(r1->f.prog)) 674 break; 675 for(z=0; z<BITS; z++) { 676 cal.b[z] |= ref.b[z] | externs.b[z]; 677 ref.b[z] = 0; 678 } 679 break; 680 681 case ATEXT: 682 for(z=0; z<BITS; z++) { 683 cal.b[z] = 0; 684 ref.b[z] = 0; 685 } 686 break; 687 688 case ARET: 689 for(z=0; z<BITS; z++) { 690 cal.b[z] = externs.b[z] | ovar.b[z]; 691 ref.b[z] = 0; 692 } 693 break; 694 695 default: 696 // Work around for issue 1304: 697 // flush modified globals before each instruction. 698 for(z=0; z<BITS; z++) { 699 cal.b[z] |= externs.b[z]; 700 // issue 4066: flush modified return variables in case of panic 701 if(hasdefer) 702 cal.b[z] |= ovar.b[z]; 703 } 704 break; 705 } 706 for(z=0; z<BITS; z++) { 707 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 708 r1->use1.b[z] | r1->use2.b[z]; 709 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 710 r1->refbehind.b[z] = ref.b[z]; 711 r1->calbehind.b[z] = cal.b[z]; 712 } 713 if(r1->f.active) 714 break; 715 r1->f.active = 1; 716 } 717 for(; r != r1; r = (Reg*)r->f.p1) 718 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 719 prop(r2, r->refbehind, r->calbehind); 720 } 721 722 void 723 synch(Reg *r, Bits dif) 724 { 725 Reg *r1; 726 int z; 727 728 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 729 for(z=0; z<BITS; z++) { 730 dif.b[z] = (dif.b[z] & 731 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 732 r1->set.b[z] | r1->regdiff.b[z]; 733 if(dif.b[z] != r1->regdiff.b[z]) { 734 r1->regdiff.b[z] = dif.b[z]; 735 change++; 736 } 737 } 738 if(r1->f.active) 739 break; 740 r1->f.active = 1; 741 for(z=0; z<BITS; z++) 742 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 743 if((Reg*)r1->f.s2 != R) 744 synch((Reg*)r1->f.s2, dif); 745 } 746 } 747 748 uint32 749 allreg(uint32 b, Rgn *r) 750 { 751 Var *v; 752 int i; 753 754 v = var + r->varno; 755 r->regno = 0; 756 switch(v->etype) { 757 758 default: 759 fatal("unknown etype %d/%E", bitno(b), v->etype); 760 break; 761 762 case TINT8: 763 case TUINT8: 764 case TINT16: 765 case TUINT16: 766 case TINT32: 767 case TUINT32: 768 case TINT64: 769 case TINT: 770 case TUINT: 771 case TUINTPTR: 772 case TBOOL: 773 case TPTR32: 774 i = BtoR(~b); 775 if(i && r->cost > 0) { 776 r->regno = i; 777 return RtoB(i); 778 } 779 break; 780 781 case TFLOAT32: 782 case TFLOAT64: 783 if(!use_sse) 784 break; 785 i = BtoF(~b); 786 if(i && r->cost > 0) { 787 r->regno = i; 788 return FtoB(i); 789 } 790 break; 791 } 792 return 0; 793 } 794 795 void 796 paint1(Reg *r, int bn) 797 { 798 Reg *r1; 799 Prog *p; 800 int z; 801 uint32 bb; 802 803 z = bn/32; 804 bb = 1L<<(bn%32); 805 if(r->act.b[z] & bb) 806 return; 807 for(;;) { 808 if(!(r->refbehind.b[z] & bb)) 809 break; 810 r1 = (Reg*)r->f.p1; 811 if(r1 == R) 812 break; 813 if(!(r1->refahead.b[z] & bb)) 814 break; 815 if(r1->act.b[z] & bb) 816 break; 817 r = r1; 818 } 819 820 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 821 change -= CLOAD * r->f.loop; 822 } 823 for(;;) { 824 r->act.b[z] |= bb; 825 p = r->f.prog; 826 827 if(r->use1.b[z] & bb) { 828 change += CREF * r->f.loop; 829 if(p->as == AFMOVL || p->as == AFMOVW) 830 if(BtoR(bb) != D_F0) 831 change = -CINF; 832 } 833 834 if((r->use2.b[z]|r->set.b[z]) & bb) { 835 change += CREF * r->f.loop; 836 if(p->as == AFMOVL || p->as == AFMOVW) 837 if(BtoR(bb) != D_F0) 838 change = -CINF; 839 } 840 841 if(STORE(r) & r->regdiff.b[z] & bb) { 842 change -= CLOAD * r->f.loop; 843 if(p->as == AFMOVL || p->as == AFMOVW) 844 if(BtoR(bb) != D_F0) 845 change = -CINF; 846 } 847 848 if(r->refbehind.b[z] & bb) 849 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 850 if(r1->refahead.b[z] & bb) 851 paint1(r1, bn); 852 853 if(!(r->refahead.b[z] & bb)) 854 break; 855 r1 = (Reg*)r->f.s2; 856 if(r1 != R) 857 if(r1->refbehind.b[z] & bb) 858 paint1(r1, bn); 859 r = (Reg*)r->f.s1; 860 if(r == R) 861 break; 862 if(r->act.b[z] & bb) 863 break; 864 if(!(r->refbehind.b[z] & bb)) 865 break; 866 } 867 } 868 869 uint32 870 regset(Reg *r, uint32 bb) 871 { 872 uint32 b, set; 873 Adr v; 874 int c; 875 876 set = 0; 877 v = zprog.from; 878 while(b = bb & ~(bb-1)) { 879 v.type = b & 0xFF ? BtoR(b): BtoF(b); 880 c = copyu(r->f.prog, &v, A); 881 if(c == 3) 882 set |= b; 883 bb &= ~b; 884 } 885 return set; 886 } 887 888 uint32 889 reguse(Reg *r, uint32 bb) 890 { 891 uint32 b, set; 892 Adr v; 893 int c; 894 895 set = 0; 896 v = zprog.from; 897 while(b = bb & ~(bb-1)) { 898 v.type = b & 0xFF ? BtoR(b): BtoF(b); 899 c = copyu(r->f.prog, &v, A); 900 if(c == 1 || c == 2 || c == 4) 901 set |= b; 902 bb &= ~b; 903 } 904 return set; 905 } 906 907 uint32 908 paint2(Reg *r, int bn) 909 { 910 Reg *r1; 911 int z; 912 uint32 bb, vreg, x; 913 914 z = bn/32; 915 bb = 1L << (bn%32); 916 vreg = regbits; 917 if(!(r->act.b[z] & bb)) 918 return vreg; 919 for(;;) { 920 if(!(r->refbehind.b[z] & bb)) 921 break; 922 r1 = (Reg*)r->f.p1; 923 if(r1 == R) 924 break; 925 if(!(r1->refahead.b[z] & bb)) 926 break; 927 if(!(r1->act.b[z] & bb)) 928 break; 929 r = r1; 930 } 931 for(;;) { 932 r->act.b[z] &= ~bb; 933 934 vreg |= r->regu; 935 936 if(r->refbehind.b[z] & bb) 937 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 938 if(r1->refahead.b[z] & bb) 939 vreg |= paint2(r1, bn); 940 941 if(!(r->refahead.b[z] & bb)) 942 break; 943 r1 = (Reg*)r->f.s2; 944 if(r1 != R) 945 if(r1->refbehind.b[z] & bb) 946 vreg |= paint2(r1, bn); 947 r = (Reg*)r->f.s1; 948 if(r == R) 949 break; 950 if(!(r->act.b[z] & bb)) 951 break; 952 if(!(r->refbehind.b[z] & bb)) 953 break; 954 } 955 956 bb = vreg; 957 for(; r; r=(Reg*)r->f.s1) { 958 x = r->regu & ~bb; 959 if(x) { 960 vreg |= reguse(r, x); 961 bb |= regset(r, x); 962 } 963 } 964 return vreg; 965 } 966 967 void 968 paint3(Reg *r, int bn, int32 rb, int rn) 969 { 970 Reg *r1; 971 Prog *p; 972 int z; 973 uint32 bb; 974 975 z = bn/32; 976 bb = 1L << (bn%32); 977 if(r->act.b[z] & bb) 978 return; 979 for(;;) { 980 if(!(r->refbehind.b[z] & bb)) 981 break; 982 r1 = (Reg*)r->f.p1; 983 if(r1 == R) 984 break; 985 if(!(r1->refahead.b[z] & bb)) 986 break; 987 if(r1->act.b[z] & bb) 988 break; 989 r = r1; 990 } 991 992 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 993 addmove(r, bn, rn, 0); 994 for(;;) { 995 r->act.b[z] |= bb; 996 p = r->f.prog; 997 998 if(r->use1.b[z] & bb) { 999 if(debug['R'] && debug['v']) 1000 print("%P", p); 1001 addreg(&p->from, rn); 1002 if(debug['R'] && debug['v']) 1003 print(" ===change== %P\n", p); 1004 } 1005 if((r->use2.b[z]|r->set.b[z]) & bb) { 1006 if(debug['R'] && debug['v']) 1007 print("%P", p); 1008 addreg(&p->to, rn); 1009 if(debug['R'] && debug['v']) 1010 print(" ===change== %P\n", p); 1011 } 1012 1013 if(STORE(r) & r->regdiff.b[z] & bb) 1014 addmove(r, bn, rn, 1); 1015 r->regu |= rb; 1016 1017 if(r->refbehind.b[z] & bb) 1018 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1019 if(r1->refahead.b[z] & bb) 1020 paint3(r1, bn, rb, rn); 1021 1022 if(!(r->refahead.b[z] & bb)) 1023 break; 1024 r1 = (Reg*)r->f.s2; 1025 if(r1 != R) 1026 if(r1->refbehind.b[z] & bb) 1027 paint3(r1, bn, rb, rn); 1028 r = (Reg*)r->f.s1; 1029 if(r == R) 1030 break; 1031 if(r->act.b[z] & bb) 1032 break; 1033 if(!(r->refbehind.b[z] & bb)) 1034 break; 1035 } 1036 } 1037 1038 void 1039 addreg(Adr *a, int rn) 1040 { 1041 1042 a->sym = 0; 1043 a->offset = 0; 1044 a->type = rn; 1045 1046 ostats.ncvtreg++; 1047 } 1048 1049 int32 1050 RtoB(int r) 1051 { 1052 1053 if(r < D_AX || r > D_DI) 1054 return 0; 1055 return 1L << (r-D_AX); 1056 } 1057 1058 int 1059 BtoR(int32 b) 1060 { 1061 1062 b &= 0xffL; 1063 if(b == 0) 1064 return 0; 1065 return bitno(b) + D_AX; 1066 } 1067 1068 int32 1069 FtoB(int f) 1070 { 1071 if(f < D_X0 || f > D_X7) 1072 return 0; 1073 return 1L << (f - D_X0 + 8); 1074 } 1075 1076 int 1077 BtoF(int32 b) 1078 { 1079 b &= 0xFF00L; 1080 if(b == 0) 1081 return 0; 1082 return bitno(b) - 8 + D_X0; 1083 } 1084 1085 void 1086 dumpone(Flow *f, int isreg) 1087 { 1088 int z; 1089 Bits bit; 1090 Reg *r; 1091 1092 print("%d:%P", f->loop, f->prog); 1093 if(isreg) { 1094 r = (Reg*)f; 1095 for(z=0; z<BITS; z++) 1096 bit.b[z] = 1097 r->set.b[z] | 1098 r->use1.b[z] | 1099 r->use2.b[z] | 1100 r->refbehind.b[z] | 1101 r->refahead.b[z] | 1102 r->calbehind.b[z] | 1103 r->calahead.b[z] | 1104 r->regdiff.b[z] | 1105 r->act.b[z] | 1106 0; 1107 if(bany(&bit)) { 1108 print("\t"); 1109 if(bany(&r->set)) 1110 print(" s:%Q", r->set); 1111 if(bany(&r->use1)) 1112 print(" u1:%Q", r->use1); 1113 if(bany(&r->use2)) 1114 print(" u2:%Q", r->use2); 1115 if(bany(&r->refbehind)) 1116 print(" rb:%Q ", r->refbehind); 1117 if(bany(&r->refahead)) 1118 print(" ra:%Q ", r->refahead); 1119 if(bany(&r->calbehind)) 1120 print(" cb:%Q ", r->calbehind); 1121 if(bany(&r->calahead)) 1122 print(" ca:%Q ", r->calahead); 1123 if(bany(&r->regdiff)) 1124 print(" d:%Q ", r->regdiff); 1125 if(bany(&r->act)) 1126 print(" a:%Q ", r->act); 1127 } 1128 } 1129 print("\n"); 1130 } 1131 1132 void 1133 dumpit(char *str, Flow *r0, int isreg) 1134 { 1135 Flow *r, *r1; 1136 1137 print("\n%s\n", str); 1138 for(r = r0; r != nil; r = r->link) { 1139 dumpone(r, isreg); 1140 r1 = r->p2; 1141 if(r1 != nil) { 1142 print(" pred:"); 1143 for(; r1 != nil; r1 = r->p2link) 1144 print(" %.4ud", r1->prog->loc); 1145 print("\n"); 1146 } 1147 // r1 = r->s1; 1148 // if(r1 != nil) { 1149 // print(" succ:"); 1150 // for(; r1 != R; r1 = r1->s1) 1151 // print(" %.4ud", r1->prog->loc); 1152 // print("\n"); 1153 // } 1154 } 1155 }