github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/5g/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include <u.h> 33 #include <libc.h> 34 #include "gg.h" 35 #include "opt.h" 36 37 #define NREGVAR 32 38 #define REGBITS ((uint32)0xffffffff) 39 40 void addsplits(void); 41 static Reg* firstr; 42 static int first = 1; 43 44 int 45 rcmp(const void *a1, const void *a2) 46 { 47 Rgn *p1, *p2; 48 int c1, c2; 49 50 p1 = (Rgn*)a1; 51 p2 = (Rgn*)a2; 52 c1 = p2->cost; 53 c2 = p1->cost; 54 if(c1 -= c2) 55 return c1; 56 return p2->varno - p1->varno; 57 } 58 59 static void 60 setoutvar(void) 61 { 62 Type *t; 63 Node *n; 64 Addr a; 65 Iter save; 66 Bits bit; 67 int z; 68 69 t = structfirst(&save, getoutarg(curfn->type)); 70 while(t != T) { 71 n = nodarg(t, 1); 72 a = zprog.from; 73 naddr(n, &a, 0); 74 bit = mkvar(R, &a); 75 for(z=0; z<BITS; z++) 76 ovar.b[z] |= bit.b[z]; 77 t = structnext(&save); 78 } 79 //if(bany(&ovar)) 80 //print("ovar = %Q\n", ovar); 81 } 82 83 void 84 excise(Flow *r) 85 { 86 Prog *p; 87 88 p = r->prog; 89 p->as = ANOP; 90 p->scond = zprog.scond; 91 p->from = zprog.from; 92 p->to = zprog.to; 93 p->reg = zprog.reg; 94 } 95 96 static void 97 setaddrs(Bits bit) 98 { 99 int i, n; 100 Var *v; 101 Node *node; 102 103 while(bany(&bit)) { 104 // convert each bit to a variable 105 i = bnum(bit); 106 node = var[i].node; 107 n = var[i].name; 108 bit.b[i/32] &= ~(1L<<(i%32)); 109 110 // disable all pieces of that variable 111 for(i=0; i<nvar; i++) { 112 v = var+i; 113 if(v->node == node && v->name == n) 114 v->addr = 2; 115 } 116 } 117 } 118 119 static char* regname[] = { 120 ".R0", 121 ".R1", 122 ".R2", 123 ".R3", 124 ".R4", 125 ".R5", 126 ".R6", 127 ".R7", 128 ".R8", 129 ".R9", 130 ".R10", 131 ".R11", 132 ".R12", 133 ".R13", 134 ".R14", 135 ".R15", 136 ".F0", 137 ".F1", 138 ".F2", 139 ".F3", 140 ".F4", 141 ".F5", 142 ".F6", 143 ".F7", 144 ".F8", 145 ".F9", 146 ".F10", 147 ".F11", 148 ".F12", 149 ".F13", 150 ".F14", 151 ".F15", 152 }; 153 154 static Node* regnodes[NREGVAR]; 155 156 void 157 regopt(Prog *firstp) 158 { 159 Reg *r, *r1; 160 Prog *p; 161 Graph *g; 162 int i, z; 163 uint32 vreg; 164 Bits bit; 165 ProgInfo info; 166 167 if(first) { 168 fmtinstall('Q', Qconv); 169 first = 0; 170 } 171 172 fixjmp(firstp); 173 mergetemp(firstp); 174 175 /* 176 * control flow is more complicated in generated go code 177 * than in generated c code. define pseudo-variables for 178 * registers, so we have complete register usage information. 179 */ 180 nvar = NREGVAR; 181 memset(var, 0, NREGVAR*sizeof var[0]); 182 for(i=0; i<NREGVAR; i++) { 183 if(regnodes[i] == N) 184 regnodes[i] = newname(lookup(regname[i])); 185 var[i].node = regnodes[i]; 186 } 187 188 regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); 189 for(z=0; z<BITS; z++) { 190 externs.b[z] = 0; 191 params.b[z] = 0; 192 consts.b[z] = 0; 193 addrs.b[z] = 0; 194 ovar.b[z] = 0; 195 } 196 197 // build list of return variables 198 setoutvar(); 199 200 /* 201 * pass 1 202 * build aux data structure 203 * allocate pcs 204 * find use and set of variables 205 */ 206 g = flowstart(firstp, sizeof(Reg)); 207 if(g == nil) 208 return; 209 firstr = (Reg*)g->start; 210 211 for(r = firstr; r != R; r = (Reg*)r->f.link) { 212 p = r->f.prog; 213 proginfo(&info, p); 214 215 // Avoid making variables for direct-called functions. 216 if(p->as == ABL && p->to.type == D_EXTERN) 217 continue; 218 219 bit = mkvar(r, &p->from); 220 if(info.flags & LeftRead) 221 for(z=0; z<BITS; z++) 222 r->use1.b[z] |= bit.b[z]; 223 if(info.flags & LeftAddr) 224 setaddrs(bit); 225 226 if(info.flags & RegRead) { 227 if(p->from.type != D_FREG) 228 r->use1.b[0] |= RtoB(p->reg); 229 else 230 r->use1.b[0] |= FtoB(p->reg); 231 } 232 233 if(info.flags & (RightAddr | RightRead | RightWrite)) { 234 bit = mkvar(r, &p->to); 235 if(info.flags & RightAddr) 236 setaddrs(bit); 237 if(info.flags & RightRead) 238 for(z=0; z<BITS; z++) 239 r->use2.b[z] |= bit.b[z]; 240 if(info.flags & RightWrite) 241 for(z=0; z<BITS; z++) 242 r->set.b[z] |= bit.b[z]; 243 } 244 } 245 if(firstr == R) 246 return; 247 248 for(i=0; i<nvar; i++) { 249 Var *v = var+i; 250 if(v->addr) { 251 bit = blsh(i); 252 for(z=0; z<BITS; z++) 253 addrs.b[z] |= bit.b[z]; 254 } 255 256 if(debug['R'] && debug['v']) 257 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 258 i, v->addr, v->etype, v->width, v->node, v->offset); 259 } 260 261 if(debug['R'] && debug['v']) 262 dumpit("pass1", &firstr->f, 1); 263 264 /* 265 * pass 2 266 * find looping structure 267 */ 268 flowrpo(g); 269 270 if(debug['R'] && debug['v']) 271 dumpit("pass2", &firstr->f, 1); 272 273 /* 274 * pass 3 275 * iterate propagating usage 276 * back until flow graph is complete 277 */ 278 loop1: 279 change = 0; 280 for(r = firstr; r != R; r = (Reg*)r->f.link) 281 r->f.active = 0; 282 for(r = firstr; r != R; r = (Reg*)r->f.link) 283 if(r->f.prog->as == ARET) 284 prop(r, zbits, zbits); 285 loop11: 286 /* pick up unreachable code */ 287 i = 0; 288 for(r = firstr; r != R; r = r1) { 289 r1 = (Reg*)r->f.link; 290 if(r1 && r1->f.active && !r->f.active) { 291 prop(r, zbits, zbits); 292 i = 1; 293 } 294 } 295 if(i) 296 goto loop11; 297 if(change) 298 goto loop1; 299 300 if(debug['R'] && debug['v']) 301 dumpit("pass3", &firstr->f, 1); 302 303 304 /* 305 * pass 4 306 * iterate propagating register/variable synchrony 307 * forward until graph is complete 308 */ 309 loop2: 310 change = 0; 311 for(r = firstr; r != R; r = (Reg*)r->f.link) 312 r->f.active = 0; 313 synch(firstr, zbits); 314 if(change) 315 goto loop2; 316 317 addsplits(); 318 319 if(debug['R'] && debug['v']) 320 dumpit("pass4", &firstr->f, 1); 321 322 if(debug['R'] > 1) { 323 print("\nprop structure:\n"); 324 for(r = firstr; r != R; r = (Reg*)r->f.link) { 325 print("%d:%P", r->f.loop, r->f.prog); 326 for(z=0; z<BITS; z++) { 327 bit.b[z] = r->set.b[z] | 328 r->refahead.b[z] | r->calahead.b[z] | 329 r->refbehind.b[z] | r->calbehind.b[z] | 330 r->use1.b[z] | r->use2.b[z]; 331 bit.b[z] &= ~addrs.b[z]; 332 } 333 334 if(bany(&bit)) { 335 print("\t"); 336 if(bany(&r->use1)) 337 print(" u1=%Q", r->use1); 338 if(bany(&r->use2)) 339 print(" u2=%Q", r->use2); 340 if(bany(&r->set)) 341 print(" st=%Q", r->set); 342 if(bany(&r->refahead)) 343 print(" ra=%Q", r->refahead); 344 if(bany(&r->calahead)) 345 print(" ca=%Q", r->calahead); 346 if(bany(&r->refbehind)) 347 print(" rb=%Q", r->refbehind); 348 if(bany(&r->calbehind)) 349 print(" cb=%Q", r->calbehind); 350 } 351 print("\n"); 352 } 353 } 354 355 /* 356 * pass 4.5 357 * move register pseudo-variables into regu. 358 */ 359 for(r = firstr; r != R; r = (Reg*)r->f.link) { 360 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 361 362 r->set.b[0] &= ~REGBITS; 363 r->use1.b[0] &= ~REGBITS; 364 r->use2.b[0] &= ~REGBITS; 365 r->refbehind.b[0] &= ~REGBITS; 366 r->refahead.b[0] &= ~REGBITS; 367 r->calbehind.b[0] &= ~REGBITS; 368 r->calahead.b[0] &= ~REGBITS; 369 r->regdiff.b[0] &= ~REGBITS; 370 r->act.b[0] &= ~REGBITS; 371 } 372 373 if(debug['R'] && debug['v']) 374 dumpit("pass4.5", &firstr->f, 1); 375 376 /* 377 * pass 5 378 * isolate regions 379 * calculate costs (paint1) 380 */ 381 r = firstr; 382 if(r) { 383 for(z=0; z<BITS; z++) 384 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 385 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 386 if(bany(&bit) & !r->f.refset) { 387 // should never happen - all variables are preset 388 if(debug['w']) 389 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 390 r->f.refset = 1; 391 } 392 } 393 394 for(r = firstr; r != R; r = (Reg*)r->f.link) 395 r->act = zbits; 396 rgp = region; 397 nregion = 0; 398 for(r = firstr; r != R; r = (Reg*)r->f.link) { 399 for(z=0; z<BITS; z++) 400 bit.b[z] = r->set.b[z] & 401 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 402 if(bany(&bit) && !r->f.refset) { 403 if(debug['w']) 404 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 405 r->f.refset = 1; 406 excise(&r->f); 407 } 408 for(z=0; z<BITS; z++) 409 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 410 while(bany(&bit)) { 411 i = bnum(bit); 412 rgp->enter = r; 413 rgp->varno = i; 414 change = 0; 415 if(debug['R'] > 1) 416 print("\n"); 417 paint1(r, i); 418 bit.b[i/32] &= ~(1L<<(i%32)); 419 if(change <= 0) { 420 if(debug['R']) 421 print("%L $%d: %Q\n", 422 r->f.prog->lineno, change, blsh(i)); 423 continue; 424 } 425 rgp->cost = change; 426 nregion++; 427 if(nregion >= NRGN) { 428 if(debug['R'] > 1) 429 print("too many regions\n"); 430 goto brk; 431 } 432 rgp++; 433 } 434 } 435 brk: 436 qsort(region, nregion, sizeof(region[0]), rcmp); 437 438 if(debug['R'] && debug['v']) 439 dumpit("pass5", &firstr->f, 1); 440 441 /* 442 * pass 6 443 * determine used registers (paint2) 444 * replace code (paint3) 445 */ 446 rgp = region; 447 for(i=0; i<nregion; i++) { 448 bit = blsh(rgp->varno); 449 vreg = paint2(rgp->enter, rgp->varno); 450 vreg = allreg(vreg, rgp); 451 if(debug['R']) { 452 if(rgp->regno >= NREG) 453 print("%L $%d F%d: %Q\n", 454 rgp->enter->f.prog->lineno, 455 rgp->cost, 456 rgp->regno-NREG, 457 bit); 458 else 459 print("%L $%d R%d: %Q\n", 460 rgp->enter->f.prog->lineno, 461 rgp->cost, 462 rgp->regno, 463 bit); 464 } 465 if(rgp->regno != 0) 466 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 467 rgp++; 468 } 469 470 if(debug['R'] && debug['v']) 471 dumpit("pass6", &firstr->f, 1); 472 473 /* 474 * pass 7 475 * peep-hole on basic block 476 */ 477 if(!debug['R'] || debug['P']) { 478 peep(firstp); 479 } 480 481 if(debug['R'] && debug['v']) 482 dumpit("pass7", &firstr->f, 1); 483 484 /* 485 * last pass 486 * eliminate nops 487 * free aux structures 488 * adjust the stack pointer 489 * MOVW.W R1,-12(R13) <<- start 490 * MOVW R0,R1 491 * MOVW R1,8(R13) 492 * MOVW $0,R1 493 * MOVW R1,4(R13) 494 * BL ,runtime.newproc+0(SB) 495 * MOVW &ft+-32(SP),R7 <<- adjust 496 * MOVW &j+-40(SP),R6 <<- adjust 497 * MOVW autotmp_0003+-24(SP),R5 <<- adjust 498 * MOVW $12(R13),R13 <<- finish 499 */ 500 vreg = 0; 501 for(p = firstp; p != P; p = p->link) { 502 while(p->link != P && p->link->as == ANOP) 503 p->link = p->link->link; 504 if(p->to.type == D_BRANCH) 505 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 506 p->to.u.branch = p->to.u.branch->link; 507 if(p->as == AMOVW && p->to.reg == 13) { 508 if(p->scond & C_WBIT) { 509 vreg = -p->to.offset; // in adjust region 510 // print("%P adjusting %d\n", p, vreg); 511 continue; 512 } 513 if(p->from.type == D_CONST && p->to.type == D_REG) { 514 if(p->from.offset != vreg) 515 print("in and out different\n"); 516 // print("%P finish %d\n", p, vreg); 517 vreg = 0; // done adjust region 518 continue; 519 } 520 521 // print("%P %d %d from type\n", p, p->from.type, D_CONST); 522 // print("%P %d %d to type\n\n", p, p->to.type, D_REG); 523 } 524 525 if(p->as == AMOVW && vreg != 0) { 526 if(p->from.sym != S) 527 if(p->from.name == D_AUTO || p->from.name == D_PARAM) { 528 p->from.offset += vreg; 529 // print("%P adjusting from %d %d\n", p, vreg, p->from.type); 530 } 531 if(p->to.sym != S) 532 if(p->to.name == D_AUTO || p->to.name == D_PARAM) { 533 p->to.offset += vreg; 534 // print("%P adjusting to %d %d\n", p, vreg, p->from.type); 535 } 536 } 537 } 538 539 flowend(g); 540 } 541 542 void 543 addsplits(void) 544 { 545 Reg *r, *r1; 546 int z, i; 547 Bits bit; 548 549 for(r = firstr; r != R; r = (Reg*)r->f.link) { 550 if(r->f.loop > 1) 551 continue; 552 if(r->f.prog->as == ABL) 553 continue; 554 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) { 555 if(r1->f.loop <= 1) 556 continue; 557 for(z=0; z<BITS; z++) 558 bit.b[z] = r1->calbehind.b[z] & 559 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 560 ~(r->calahead.b[z] & addrs.b[z]); 561 while(bany(&bit)) { 562 i = bnum(bit); 563 bit.b[i/32] &= ~(1L << (i%32)); 564 } 565 } 566 } 567 } 568 569 /* 570 * add mov b,rn 571 * just after r 572 */ 573 void 574 addmove(Reg *r, int bn, int rn, int f) 575 { 576 Prog *p, *p1, *p2; 577 Adr *a; 578 Var *v; 579 580 p1 = mal(sizeof(*p1)); 581 *p1 = zprog; 582 p = r->f.prog; 583 584 // If there's a stack fixup coming (after BL newproc or BL deferproc), 585 // delay the load until after the fixup. 586 p2 = p->link; 587 if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG) 588 p = p2; 589 590 p1->link = p->link; 591 p->link = p1; 592 p1->lineno = p->lineno; 593 594 v = var + bn; 595 596 a = &p1->to; 597 a->name = v->name; 598 a->node = v->node; 599 a->sym = v->node->sym; 600 a->offset = v->offset; 601 a->etype = v->etype; 602 a->type = D_OREG; 603 if(a->etype == TARRAY || a->sym == S) 604 a->type = D_CONST; 605 606 if(v->addr) 607 fatal("addmove: shouldn't be doing this %A\n", a); 608 609 switch(v->etype) { 610 default: 611 print("What is this %E\n", v->etype); 612 613 case TINT8: 614 p1->as = AMOVBS; 615 break; 616 case TBOOL: 617 case TUINT8: 618 //print("movbu %E %d %S\n", v->etype, bn, v->sym); 619 p1->as = AMOVBU; 620 break; 621 case TINT16: 622 p1->as = AMOVHS; 623 break; 624 case TUINT16: 625 p1->as = AMOVHU; 626 break; 627 case TINT32: 628 case TUINT32: 629 case TPTR32: 630 p1->as = AMOVW; 631 break; 632 case TFLOAT32: 633 p1->as = AMOVF; 634 break; 635 case TFLOAT64: 636 p1->as = AMOVD; 637 break; 638 } 639 640 p1->from.type = D_REG; 641 p1->from.reg = rn; 642 if(rn >= NREG) { 643 p1->from.type = D_FREG; 644 p1->from.reg = rn-NREG; 645 } 646 if(!f) { 647 p1->from = *a; 648 *a = zprog.from; 649 a->type = D_REG; 650 a->reg = rn; 651 if(rn >= NREG) { 652 a->type = D_FREG; 653 a->reg = rn-NREG; 654 } 655 if(v->etype == TUINT8 || v->etype == TBOOL) 656 p1->as = AMOVBU; 657 if(v->etype == TUINT16) 658 p1->as = AMOVHU; 659 } 660 if(debug['R']) 661 print("%P\t.a%P\n", p, p1); 662 } 663 664 static int 665 overlap(int32 o1, int w1, int32 o2, int w2) 666 { 667 int32 t1, t2; 668 669 t1 = o1+w1; 670 t2 = o2+w2; 671 672 if(!(t1 > o2 && t2 > o1)) 673 return 0; 674 675 return 1; 676 } 677 678 Bits 679 mkvar(Reg *r, Adr *a) 680 { 681 Var *v; 682 int i, t, n, et, z, w, flag; 683 int32 o; 684 Bits bit; 685 Node *node; 686 687 // mark registers used 688 t = a->type; 689 690 flag = 0; 691 switch(t) { 692 default: 693 print("type %d %d %D\n", t, a->name, a); 694 goto none; 695 696 case D_NONE: 697 case D_FCONST: 698 case D_BRANCH: 699 break; 700 701 702 case D_REGREG: 703 case D_REGREG2: 704 bit = zbits; 705 if(a->offset != NREG) 706 bit.b[0] |= RtoB(a->offset); 707 if(a->reg != NREG) 708 bit.b[0] |= RtoB(a->reg); 709 return bit; 710 711 case D_CONST: 712 case D_REG: 713 case D_SHIFT: 714 if(a->reg != NREG) { 715 bit = zbits; 716 bit.b[0] = RtoB(a->reg); 717 return bit; 718 } 719 break; 720 721 case D_OREG: 722 if(a->reg != NREG) { 723 if(a == &r->f.prog->from) 724 r->use1.b[0] |= RtoB(a->reg); 725 else 726 r->use2.b[0] |= RtoB(a->reg); 727 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 728 r->set.b[0] |= RtoB(a->reg); 729 } 730 break; 731 732 case D_FREG: 733 if(a->reg != NREG) { 734 bit = zbits; 735 bit.b[0] = FtoB(a->reg); 736 return bit; 737 } 738 break; 739 } 740 741 switch(a->name) { 742 default: 743 goto none; 744 745 case D_EXTERN: 746 case D_STATIC: 747 case D_AUTO: 748 case D_PARAM: 749 n = a->name; 750 break; 751 } 752 753 node = a->node; 754 if(node == N || node->op != ONAME || node->orig == N) 755 goto none; 756 node = node->orig; 757 if(node->orig != node) 758 fatal("%D: bad node", a); 759 if(node->sym == S || node->sym->name[0] == '.') 760 goto none; 761 et = a->etype; 762 o = a->offset; 763 w = a->width; 764 if(w < 0) 765 fatal("bad width %d for %D", w, a); 766 767 for(i=0; i<nvar; i++) { 768 v = var+i; 769 if(v->node == node && v->name == n) { 770 if(v->offset == o) 771 if(v->etype == et) 772 if(v->width == w) 773 if(!flag) 774 return blsh(i); 775 776 // if they overlap, disable both 777 if(overlap(v->offset, v->width, o, w)) { 778 v->addr = 1; 779 flag = 1; 780 } 781 } 782 } 783 784 switch(et) { 785 case 0: 786 case TFUNC: 787 goto none; 788 } 789 790 if(nvar >= NVAR) { 791 if(debug['w'] > 1 && node) 792 fatal("variable not optimized: %D", a); 793 goto none; 794 } 795 796 i = nvar; 797 nvar++; 798 //print("var %d %E %D %S\n", i, et, a, s); 799 v = var+i; 800 v->offset = o; 801 v->name = n; 802 v->etype = et; 803 v->width = w; 804 v->addr = flag; // funny punning 805 v->node = node; 806 807 if(debug['R']) 808 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 809 810 bit = blsh(i); 811 if(n == D_EXTERN || n == D_STATIC) 812 for(z=0; z<BITS; z++) 813 externs.b[z] |= bit.b[z]; 814 if(n == D_PARAM) 815 for(z=0; z<BITS; z++) 816 params.b[z] |= bit.b[z]; 817 818 return bit; 819 820 none: 821 return zbits; 822 } 823 824 void 825 prop(Reg *r, Bits ref, Bits cal) 826 { 827 Reg *r1, *r2; 828 int z; 829 830 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 831 for(z=0; z<BITS; z++) { 832 ref.b[z] |= r1->refahead.b[z]; 833 if(ref.b[z] != r1->refahead.b[z]) { 834 r1->refahead.b[z] = ref.b[z]; 835 change++; 836 } 837 cal.b[z] |= r1->calahead.b[z]; 838 if(cal.b[z] != r1->calahead.b[z]) { 839 r1->calahead.b[z] = cal.b[z]; 840 change++; 841 } 842 } 843 switch(r1->f.prog->as) { 844 case ABL: 845 if(noreturn(r1->f.prog)) 846 break; 847 for(z=0; z<BITS; z++) { 848 cal.b[z] |= ref.b[z] | externs.b[z]; 849 ref.b[z] = 0; 850 } 851 break; 852 853 case ATEXT: 854 for(z=0; z<BITS; z++) { 855 cal.b[z] = 0; 856 ref.b[z] = 0; 857 } 858 break; 859 860 case ARET: 861 for(z=0; z<BITS; z++) { 862 cal.b[z] = externs.b[z] | ovar.b[z]; 863 ref.b[z] = 0; 864 } 865 break; 866 867 default: 868 // Work around for issue 1304: 869 // flush modified globals before each instruction. 870 for(z=0; z<BITS; z++) { 871 cal.b[z] |= externs.b[z]; 872 // issue 4066: flush modified return variables in case of panic 873 if(hasdefer) 874 cal.b[z] |= ovar.b[z]; 875 } 876 break; 877 } 878 for(z=0; z<BITS; z++) { 879 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 880 r1->use1.b[z] | r1->use2.b[z]; 881 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 882 r1->refbehind.b[z] = ref.b[z]; 883 r1->calbehind.b[z] = cal.b[z]; 884 } 885 if(r1->f.active) 886 break; 887 r1->f.active = 1; 888 } 889 for(; r != r1; r = (Reg*)r->f.p1) 890 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 891 prop(r2, r->refbehind, r->calbehind); 892 } 893 894 void 895 synch(Reg *r, Bits dif) 896 { 897 Reg *r1; 898 int z; 899 900 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 901 for(z=0; z<BITS; z++) { 902 dif.b[z] = (dif.b[z] & 903 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 904 r1->set.b[z] | r1->regdiff.b[z]; 905 if(dif.b[z] != r1->regdiff.b[z]) { 906 r1->regdiff.b[z] = dif.b[z]; 907 change++; 908 } 909 } 910 if(r1->f.active) 911 break; 912 r1->f.active = 1; 913 for(z=0; z<BITS; z++) 914 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 915 if(r1->f.s2 != nil) 916 synch((Reg*)r1->f.s2, dif); 917 } 918 } 919 920 uint32 921 allreg(uint32 b, Rgn *r) 922 { 923 Var *v; 924 int i; 925 926 v = var + r->varno; 927 r->regno = 0; 928 switch(v->etype) { 929 930 default: 931 fatal("unknown etype %d/%E", bitno(b), v->etype); 932 break; 933 934 case TINT8: 935 case TUINT8: 936 case TINT16: 937 case TUINT16: 938 case TINT32: 939 case TUINT32: 940 case TINT: 941 case TUINT: 942 case TUINTPTR: 943 case TBOOL: 944 case TPTR32: 945 i = BtoR(~b); 946 if(i && r->cost >= 0) { 947 r->regno = i; 948 return RtoB(i); 949 } 950 break; 951 952 case TFLOAT32: 953 case TFLOAT64: 954 i = BtoF(~b); 955 if(i && r->cost >= 0) { 956 r->regno = i+NREG; 957 return FtoB(i); 958 } 959 break; 960 961 case TINT64: 962 case TUINT64: 963 case TPTR64: 964 case TINTER: 965 case TSTRUCT: 966 case TARRAY: 967 break; 968 } 969 return 0; 970 } 971 972 void 973 paint1(Reg *r, int bn) 974 { 975 Reg *r1; 976 Prog *p; 977 int z; 978 uint32 bb; 979 980 z = bn/32; 981 bb = 1L<<(bn%32); 982 if(r->act.b[z] & bb) 983 return; 984 for(;;) { 985 if(!(r->refbehind.b[z] & bb)) 986 break; 987 r1 = (Reg*)r->f.p1; 988 if(r1 == R) 989 break; 990 if(!(r1->refahead.b[z] & bb)) 991 break; 992 if(r1->act.b[z] & bb) 993 break; 994 r = r1; 995 } 996 997 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 998 change -= CLOAD * r->f.loop; 999 if(debug['R'] > 1) 1000 print("%d%P\td %Q $%d\n", r->f.loop, 1001 r->f.prog, blsh(bn), change); 1002 } 1003 for(;;) { 1004 r->act.b[z] |= bb; 1005 p = r->f.prog; 1006 1007 if(r->use1.b[z] & bb) { 1008 change += CREF * r->f.loop; 1009 if(debug['R'] > 1) 1010 print("%d%P\tu1 %Q $%d\n", r->f.loop, 1011 p, blsh(bn), change); 1012 } 1013 1014 if((r->use2.b[z]|r->set.b[z]) & bb) { 1015 change += CREF * r->f.loop; 1016 if(debug['R'] > 1) 1017 print("%d%P\tu2 %Q $%d\n", r->f.loop, 1018 p, blsh(bn), change); 1019 } 1020 1021 if(STORE(r) & r->regdiff.b[z] & bb) { 1022 change -= CLOAD * r->f.loop; 1023 if(debug['R'] > 1) 1024 print("%d%P\tst %Q $%d\n", r->f.loop, 1025 p, blsh(bn), change); 1026 } 1027 1028 if(r->refbehind.b[z] & bb) 1029 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1030 if(r1->refahead.b[z] & bb) 1031 paint1(r1, bn); 1032 1033 if(!(r->refahead.b[z] & bb)) 1034 break; 1035 r1 = (Reg*)r->f.s2; 1036 if(r1 != R) 1037 if(r1->refbehind.b[z] & bb) 1038 paint1(r1, bn); 1039 r = (Reg*)r->f.s1; 1040 if(r == R) 1041 break; 1042 if(r->act.b[z] & bb) 1043 break; 1044 if(!(r->refbehind.b[z] & bb)) 1045 break; 1046 } 1047 } 1048 1049 uint32 1050 paint2(Reg *r, int bn) 1051 { 1052 Reg *r1; 1053 int z; 1054 uint32 bb, vreg; 1055 1056 z = bn/32; 1057 bb = 1L << (bn%32); 1058 vreg = regbits; 1059 if(!(r->act.b[z] & bb)) 1060 return vreg; 1061 for(;;) { 1062 if(!(r->refbehind.b[z] & bb)) 1063 break; 1064 r1 = (Reg*)r->f.p1; 1065 if(r1 == R) 1066 break; 1067 if(!(r1->refahead.b[z] & bb)) 1068 break; 1069 if(!(r1->act.b[z] & bb)) 1070 break; 1071 r = r1; 1072 } 1073 for(;;) { 1074 r->act.b[z] &= ~bb; 1075 1076 vreg |= r->regu; 1077 1078 if(r->refbehind.b[z] & bb) 1079 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1080 if(r1->refahead.b[z] & bb) 1081 vreg |= paint2(r1, bn); 1082 1083 if(!(r->refahead.b[z] & bb)) 1084 break; 1085 r1 = (Reg*)r->f.s2; 1086 if(r1 != R) 1087 if(r1->refbehind.b[z] & bb) 1088 vreg |= paint2(r1, bn); 1089 r = (Reg*)r->f.s1; 1090 if(r == R) 1091 break; 1092 if(!(r->act.b[z] & bb)) 1093 break; 1094 if(!(r->refbehind.b[z] & bb)) 1095 break; 1096 } 1097 return vreg; 1098 } 1099 1100 void 1101 paint3(Reg *r, int bn, int32 rb, int rn) 1102 { 1103 Reg *r1; 1104 Prog *p; 1105 int z; 1106 uint32 bb; 1107 1108 z = bn/32; 1109 bb = 1L << (bn%32); 1110 if(r->act.b[z] & bb) 1111 return; 1112 for(;;) { 1113 if(!(r->refbehind.b[z] & bb)) 1114 break; 1115 r1 = (Reg*)r->f.p1; 1116 if(r1 == R) 1117 break; 1118 if(!(r1->refahead.b[z] & bb)) 1119 break; 1120 if(r1->act.b[z] & bb) 1121 break; 1122 r = r1; 1123 } 1124 1125 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1126 addmove(r, bn, rn, 0); 1127 1128 for(;;) { 1129 r->act.b[z] |= bb; 1130 p = r->f.prog; 1131 1132 if(r->use1.b[z] & bb) { 1133 if(debug['R']) 1134 print("%P", p); 1135 addreg(&p->from, rn); 1136 if(debug['R']) 1137 print("\t.c%P\n", p); 1138 } 1139 if((r->use2.b[z]|r->set.b[z]) & bb) { 1140 if(debug['R']) 1141 print("%P", p); 1142 addreg(&p->to, rn); 1143 if(debug['R']) 1144 print("\t.c%P\n", p); 1145 } 1146 1147 if(STORE(r) & r->regdiff.b[z] & bb) 1148 addmove(r, bn, rn, 1); 1149 r->regu |= rb; 1150 1151 if(r->refbehind.b[z] & bb) 1152 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1153 if(r1->refahead.b[z] & bb) 1154 paint3(r1, bn, rb, rn); 1155 1156 if(!(r->refahead.b[z] & bb)) 1157 break; 1158 r1 = (Reg*)r->f.s2; 1159 if(r1 != R) 1160 if(r1->refbehind.b[z] & bb) 1161 paint3(r1, bn, rb, rn); 1162 r = (Reg*)r->f.s1; 1163 if(r == R) 1164 break; 1165 if(r->act.b[z] & bb) 1166 break; 1167 if(!(r->refbehind.b[z] & bb)) 1168 break; 1169 } 1170 } 1171 1172 void 1173 addreg(Adr *a, int rn) 1174 { 1175 a->sym = 0; 1176 a->name = D_NONE; 1177 a->type = D_REG; 1178 a->reg = rn; 1179 if(rn >= NREG) { 1180 a->type = D_FREG; 1181 a->reg = rn-NREG; 1182 } 1183 } 1184 1185 /* 1186 * bit reg 1187 * 0 R0 1188 * 1 R1 1189 * ... ... 1190 * 10 R10 1191 * 12 R12 1192 */ 1193 int32 1194 RtoB(int r) 1195 { 1196 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 1197 return 0; 1198 return 1L << r; 1199 } 1200 1201 int 1202 BtoR(int32 b) 1203 { 1204 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1205 if(b == 0) 1206 return 0; 1207 return bitno(b); 1208 } 1209 1210 /* 1211 * bit reg 1212 * 18 F2 1213 * 19 F3 1214 * ... ... 1215 * 31 F15 1216 */ 1217 int32 1218 FtoB(int f) 1219 { 1220 1221 if(f < 2 || f > NFREG-1) 1222 return 0; 1223 return 1L << (f + 16); 1224 } 1225 1226 int 1227 BtoF(int32 b) 1228 { 1229 1230 b &= 0xfffc0000L; 1231 if(b == 0) 1232 return 0; 1233 return bitno(b) - 16; 1234 } 1235 1236 void 1237 dumpone(Flow *f, int isreg) 1238 { 1239 int z; 1240 Bits bit; 1241 Reg *r; 1242 1243 print("%d:%P", f->loop, f->prog); 1244 if(isreg) { 1245 r = (Reg*)f; 1246 for(z=0; z<BITS; z++) 1247 bit.b[z] = 1248 r->set.b[z] | 1249 r->use1.b[z] | 1250 r->use2.b[z] | 1251 r->refbehind.b[z] | 1252 r->refahead.b[z] | 1253 r->calbehind.b[z] | 1254 r->calahead.b[z] | 1255 r->regdiff.b[z] | 1256 r->act.b[z] | 1257 0; 1258 if(bany(&bit)) { 1259 print("\t"); 1260 if(bany(&r->set)) 1261 print(" s:%Q", r->set); 1262 if(bany(&r->use1)) 1263 print(" u1:%Q", r->use1); 1264 if(bany(&r->use2)) 1265 print(" u2:%Q", r->use2); 1266 if(bany(&r->refbehind)) 1267 print(" rb:%Q ", r->refbehind); 1268 if(bany(&r->refahead)) 1269 print(" ra:%Q ", r->refahead); 1270 if(bany(&r->calbehind)) 1271 print(" cb:%Q ", r->calbehind); 1272 if(bany(&r->calahead)) 1273 print(" ca:%Q ", r->calahead); 1274 if(bany(&r->regdiff)) 1275 print(" d:%Q ", r->regdiff); 1276 if(bany(&r->act)) 1277 print(" a:%Q ", r->act); 1278 } 1279 } 1280 print("\n"); 1281 } 1282 1283 void 1284 dumpit(char *str, Flow *r0, int isreg) 1285 { 1286 Flow *r, *r1; 1287 1288 print("\n%s\n", str); 1289 for(r = r0; r != nil; r = r->link) { 1290 dumpone(r, isreg); 1291 r1 = r->p2; 1292 if(r1 != nil) { 1293 print(" pred:"); 1294 for(; r1 != nil; r1 = r1->p2link) 1295 print(" %.4ud", r1->prog->loc); 1296 if(r->p1 != nil) 1297 print(" (and %.4ud)", r->p1->prog->loc); 1298 else 1299 print(" (only)"); 1300 print("\n"); 1301 } 1302 // r1 = r->s1; 1303 // if(r1 != nil) { 1304 // print(" succ:"); 1305 // for(; r1 != R; r1 = r1->s1) 1306 // print(" %.4ud", r1->prog->loc); 1307 // print("\n"); 1308 // } 1309 } 1310 }