github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/5g/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include <u.h> 33 #include <libc.h> 34 #include "gg.h" 35 #include "opt.h" 36 37 #define NREGVAR 32 38 #define REGBITS ((uint64)0xffffffffull) 39 /*c2go enum { 40 NREGVAR = 32, 41 REGBITS = 0xffffffff, 42 }; 43 */ 44 45 void addsplits(void); 46 static Reg* firstr; 47 static int first = 1; 48 49 int 50 rcmp(const void *a1, const void *a2) 51 { 52 Rgn *p1, *p2; 53 int c1, c2; 54 55 p1 = (Rgn*)a1; 56 p2 = (Rgn*)a2; 57 c1 = p2->cost; 58 c2 = p1->cost; 59 if(c1 -= c2) 60 return c1; 61 return p2->varno - p1->varno; 62 } 63 64 void 65 excise(Flow *r) 66 { 67 Prog *p; 68 69 p = r->prog; 70 p->as = ANOP; 71 p->scond = zprog.scond; 72 p->from = zprog.from; 73 p->to = zprog.to; 74 p->reg = zprog.reg; 75 } 76 77 static void 78 setaddrs(Bits bit) 79 { 80 int i, n; 81 Var *v; 82 Node *node; 83 84 while(bany(&bit)) { 85 // convert each bit to a variable 86 i = bnum(bit); 87 node = var[i].node; 88 n = var[i].name; 89 biclr(&bit, i); 90 91 // disable all pieces of that variable 92 for(i=0; i<nvar; i++) { 93 v = var+i; 94 if(v->node == node && v->name == n) 95 v->addr = 2; 96 } 97 } 98 } 99 100 static char* regname[] = { 101 ".R0", 102 ".R1", 103 ".R2", 104 ".R3", 105 ".R4", 106 ".R5", 107 ".R6", 108 ".R7", 109 ".R8", 110 ".R9", 111 ".R10", 112 ".R11", 113 ".R12", 114 ".R13", 115 ".R14", 116 ".R15", 117 ".F0", 118 ".F1", 119 ".F2", 120 ".F3", 121 ".F4", 122 ".F5", 123 ".F6", 124 ".F7", 125 ".F8", 126 ".F9", 127 ".F10", 128 ".F11", 129 ".F12", 130 ".F13", 131 ".F14", 132 ".F15", 133 }; 134 135 static Node* regnodes[NREGVAR]; 136 137 static void walkvardef(Node *n, Reg *r, int active); 138 139 void 140 regopt(Prog *firstp) 141 { 142 Reg *r, *r1; 143 Prog *p; 144 Graph *g; 145 int i, z, active; 146 uint32 vreg; 147 Bits bit; 148 ProgInfo info; 149 150 if(first) { 151 fmtinstall('Q', Qconv); 152 first = 0; 153 } 154 155 mergetemp(firstp); 156 157 /* 158 * control flow is more complicated in generated go code 159 * than in generated c code. define pseudo-variables for 160 * registers, so we have complete register usage information. 161 */ 162 nvar = NREGVAR; 163 memset(var, 0, NREGVAR*sizeof var[0]); 164 for(i=0; i<NREGVAR; i++) { 165 if(regnodes[i] == N) 166 regnodes[i] = newname(lookup(regname[i])); 167 var[i].node = regnodes[i]; 168 } 169 170 regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); 171 for(z=0; z<BITS; z++) { 172 externs.b[z] = 0; 173 params.b[z] = 0; 174 consts.b[z] = 0; 175 addrs.b[z] = 0; 176 ivar.b[z] = 0; 177 ovar.b[z] = 0; 178 } 179 180 /* 181 * pass 1 182 * build aux data structure 183 * allocate pcs 184 * find use and set of variables 185 */ 186 g = flowstart(firstp, sizeof(Reg)); 187 if(g == nil) { 188 for(i=0; i<nvar; i++) 189 var[i].node->opt = nil; 190 return; 191 } 192 193 firstr = (Reg*)g->start; 194 195 for(r = firstr; r != R; r = (Reg*)r->f.link) { 196 p = r->f.prog; 197 if(p->as == AVARDEF || p->as == AVARKILL) 198 continue; 199 proginfo(&info, p); 200 201 // Avoid making variables for direct-called functions. 202 if(p->as == ABL && p->to.name == D_EXTERN) 203 continue; 204 205 bit = mkvar(r, &p->from); 206 if(info.flags & LeftRead) 207 for(z=0; z<BITS; z++) 208 r->use1.b[z] |= bit.b[z]; 209 if(info.flags & LeftAddr) 210 setaddrs(bit); 211 212 if(info.flags & RegRead) { 213 if(p->from.type != D_FREG) 214 r->use1.b[0] |= RtoB(p->reg); 215 else 216 r->use1.b[0] |= FtoB(p->reg); 217 } 218 219 if(info.flags & (RightAddr | RightRead | RightWrite)) { 220 bit = mkvar(r, &p->to); 221 if(info.flags & RightAddr) 222 setaddrs(bit); 223 if(info.flags & RightRead) 224 for(z=0; z<BITS; z++) 225 r->use2.b[z] |= bit.b[z]; 226 if(info.flags & RightWrite) 227 for(z=0; z<BITS; z++) 228 r->set.b[z] |= bit.b[z]; 229 } 230 231 /* the mod/div runtime routines smash R12 */ 232 if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU) 233 r->set.b[0] |= RtoB(12); 234 } 235 if(firstr == R) 236 return; 237 238 for(i=0; i<nvar; i++) { 239 Var *v = var+i; 240 if(v->addr) { 241 bit = blsh(i); 242 for(z=0; z<BITS; z++) 243 addrs.b[z] |= bit.b[z]; 244 } 245 246 if(debug['R'] && debug['v']) 247 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 248 i, v->addr, v->etype, v->width, v->node, v->offset); 249 } 250 251 if(debug['R'] && debug['v']) 252 dumpit("pass1", &firstr->f, 1); 253 254 /* 255 * pass 2 256 * find looping structure 257 */ 258 flowrpo(g); 259 260 if(debug['R'] && debug['v']) 261 dumpit("pass2", &firstr->f, 1); 262 263 /* 264 * pass 2.5 265 * iterate propagating fat vardef covering forward 266 * r->act records vars with a VARDEF since the last CALL. 267 * (r->act will be reused in pass 5 for something else, 268 * but we'll be done with it by then.) 269 */ 270 active = 0; 271 for(r = firstr; r != R; r = (Reg*)r->f.link) { 272 r->f.active = 0; 273 r->act = zbits; 274 } 275 for(r = firstr; r != R; r = (Reg*)r->f.link) { 276 p = r->f.prog; 277 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 278 active++; 279 walkvardef(p->to.node, r, active); 280 } 281 } 282 283 /* 284 * pass 3 285 * iterate propagating usage 286 * back until flow graph is complete 287 */ 288 loop1: 289 change = 0; 290 for(r = firstr; r != R; r = (Reg*)r->f.link) 291 r->f.active = 0; 292 for(r = firstr; r != R; r = (Reg*)r->f.link) 293 if(r->f.prog->as == ARET) 294 prop(r, zbits, zbits); 295 loop11: 296 /* pick up unreachable code */ 297 i = 0; 298 for(r = firstr; r != R; r = r1) { 299 r1 = (Reg*)r->f.link; 300 if(r1 && r1->f.active && !r->f.active) { 301 prop(r, zbits, zbits); 302 i = 1; 303 } 304 } 305 if(i) 306 goto loop11; 307 if(change) 308 goto loop1; 309 310 if(debug['R'] && debug['v']) 311 dumpit("pass3", &firstr->f, 1); 312 313 314 /* 315 * pass 4 316 * iterate propagating register/variable synchrony 317 * forward until graph is complete 318 */ 319 loop2: 320 change = 0; 321 for(r = firstr; r != R; r = (Reg*)r->f.link) 322 r->f.active = 0; 323 synch(firstr, zbits); 324 if(change) 325 goto loop2; 326 327 addsplits(); 328 329 if(debug['R'] && debug['v']) 330 dumpit("pass4", &firstr->f, 1); 331 332 if(debug['R'] > 1) { 333 print("\nprop structure:\n"); 334 for(r = firstr; r != R; r = (Reg*)r->f.link) { 335 print("%d:%P", r->f.loop, r->f.prog); 336 for(z=0; z<BITS; z++) { 337 bit.b[z] = r->set.b[z] | 338 r->refahead.b[z] | r->calahead.b[z] | 339 r->refbehind.b[z] | r->calbehind.b[z] | 340 r->use1.b[z] | r->use2.b[z]; 341 bit.b[z] &= ~addrs.b[z]; 342 } 343 344 if(bany(&bit)) { 345 print("\t"); 346 if(bany(&r->use1)) 347 print(" u1=%Q", r->use1); 348 if(bany(&r->use2)) 349 print(" u2=%Q", r->use2); 350 if(bany(&r->set)) 351 print(" st=%Q", r->set); 352 if(bany(&r->refahead)) 353 print(" ra=%Q", r->refahead); 354 if(bany(&r->calahead)) 355 print(" ca=%Q", r->calahead); 356 if(bany(&r->refbehind)) 357 print(" rb=%Q", r->refbehind); 358 if(bany(&r->calbehind)) 359 print(" cb=%Q", r->calbehind); 360 } 361 print("\n"); 362 } 363 } 364 365 /* 366 * pass 4.5 367 * move register pseudo-variables into regu. 368 */ 369 for(r = firstr; r != R; r = (Reg*)r->f.link) { 370 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 371 372 r->set.b[0] &= ~REGBITS; 373 r->use1.b[0] &= ~REGBITS; 374 r->use2.b[0] &= ~REGBITS; 375 r->refbehind.b[0] &= ~REGBITS; 376 r->refahead.b[0] &= ~REGBITS; 377 r->calbehind.b[0] &= ~REGBITS; 378 r->calahead.b[0] &= ~REGBITS; 379 r->regdiff.b[0] &= ~REGBITS; 380 r->act.b[0] &= ~REGBITS; 381 } 382 383 if(debug['R'] && debug['v']) 384 dumpit("pass4.5", &firstr->f, 1); 385 386 /* 387 * pass 5 388 * isolate regions 389 * calculate costs (paint1) 390 */ 391 r = firstr; 392 if(r) { 393 for(z=0; z<BITS; z++) 394 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 395 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 396 if(bany(&bit) && !r->f.refset) { 397 // should never happen - all variables are preset 398 if(debug['w']) 399 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 400 r->f.refset = 1; 401 } 402 } 403 404 for(r = firstr; r != R; r = (Reg*)r->f.link) 405 r->act = zbits; 406 rgp = region; 407 nregion = 0; 408 for(r = firstr; r != R; r = (Reg*)r->f.link) { 409 for(z=0; z<BITS; z++) 410 bit.b[z] = r->set.b[z] & 411 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 412 if(bany(&bit) && !r->f.refset) { 413 if(debug['w']) 414 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 415 r->f.refset = 1; 416 excise(&r->f); 417 } 418 for(z=0; z<BITS; z++) 419 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 420 while(bany(&bit)) { 421 i = bnum(bit); 422 rgp->enter = r; 423 rgp->varno = i; 424 change = 0; 425 if(debug['R'] > 1) 426 print("\n"); 427 paint1(r, i); 428 biclr(&bit, i); 429 if(change <= 0) { 430 if(debug['R']) 431 print("%L $%d: %Q\n", 432 r->f.prog->lineno, change, blsh(i)); 433 continue; 434 } 435 rgp->cost = change; 436 nregion++; 437 if(nregion >= NRGN) { 438 if(debug['R'] > 1) 439 print("too many regions\n"); 440 goto brk; 441 } 442 rgp++; 443 } 444 } 445 brk: 446 qsort(region, nregion, sizeof(region[0]), rcmp); 447 448 if(debug['R'] && debug['v']) 449 dumpit("pass5", &firstr->f, 1); 450 451 /* 452 * pass 6 453 * determine used registers (paint2) 454 * replace code (paint3) 455 */ 456 rgp = region; 457 if(debug['R'] && debug['v']) 458 print("\nregisterizing\n"); 459 for(i=0; i<nregion; i++) { 460 if(debug['R'] && debug['v']) 461 print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc); 462 bit = blsh(rgp->varno); 463 vreg = paint2(rgp->enter, rgp->varno, 0); 464 vreg = allreg(vreg, rgp); 465 if(debug['R']) { 466 if(rgp->regno >= NREG) 467 print("%L $%d F%d: %Q\n", 468 rgp->enter->f.prog->lineno, 469 rgp->cost, 470 rgp->regno-NREG, 471 bit); 472 else 473 print("%L $%d R%d: %Q\n", 474 rgp->enter->f.prog->lineno, 475 rgp->cost, 476 rgp->regno, 477 bit); 478 } 479 if(rgp->regno != 0) 480 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 481 rgp++; 482 } 483 484 /* 485 * free aux structures. peep allocates new ones. 486 */ 487 for(i=0; i<nvar; i++) 488 var[i].node->opt = nil; 489 flowend(g); 490 firstr = R; 491 492 if(debug['R'] && debug['v']) { 493 // Rebuild flow graph, since we inserted instructions 494 g = flowstart(firstp, sizeof(Reg)); 495 firstr = (Reg*)g->start; 496 dumpit("pass6", &firstr->f, 1); 497 flowend(g); 498 firstr = R; 499 } 500 501 /* 502 * pass 7 503 * peep-hole on basic block 504 */ 505 if(!debug['R'] || debug['P']) { 506 peep(firstp); 507 } 508 509 if(debug['R'] && debug['v']) 510 dumpit("pass7", &firstr->f, 1); 511 512 /* 513 * last pass 514 * eliminate nops 515 * free aux structures 516 * adjust the stack pointer 517 * MOVW.W R1,-12(R13) <<- start 518 * MOVW R0,R1 519 * MOVW R1,8(R13) 520 * MOVW $0,R1 521 * MOVW R1,4(R13) 522 * BL ,runtime.newproc+0(SB) 523 * MOVW &ft+-32(SP),R7 <<- adjust 524 * MOVW &j+-40(SP),R6 <<- adjust 525 * MOVW autotmp_0003+-24(SP),R5 <<- adjust 526 * MOVW $12(R13),R13 <<- finish 527 */ 528 vreg = 0; 529 for(p = firstp; p != P; p = p->link) { 530 while(p->link != P && p->link->as == ANOP) 531 p->link = p->link->link; 532 if(p->to.type == D_BRANCH) 533 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 534 p->to.u.branch = p->to.u.branch->link; 535 if(p->as == AMOVW && p->to.reg == 13) { 536 if(p->scond & C_WBIT) { 537 vreg = -p->to.offset; // in adjust region 538 // print("%P adjusting %d\n", p, vreg); 539 continue; 540 } 541 if(p->from.type == D_CONST && p->to.type == D_REG) { 542 if(p->from.offset != vreg) 543 print("in and out different\n"); 544 // print("%P finish %d\n", p, vreg); 545 vreg = 0; // done adjust region 546 continue; 547 } 548 549 // print("%P %d %d from type\n", p, p->from.type, D_CONST); 550 // print("%P %d %d to type\n\n", p, p->to.type, D_REG); 551 } 552 553 if(p->as == AMOVW && vreg != 0) { 554 if(p->from.sym != nil) 555 if(p->from.name == D_AUTO || p->from.name == D_PARAM) { 556 p->from.offset += vreg; 557 // print("%P adjusting from %d %d\n", p, vreg, p->from.type); 558 } 559 if(p->to.sym != nil) 560 if(p->to.name == D_AUTO || p->to.name == D_PARAM) { 561 p->to.offset += vreg; 562 // print("%P adjusting to %d %d\n", p, vreg, p->from.type); 563 } 564 } 565 } 566 } 567 568 static void 569 walkvardef(Node *n, Reg *r, int active) 570 { 571 Reg *r1, *r2; 572 int bn; 573 Var *v; 574 575 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 576 if(r1->f.active == active) 577 break; 578 r1->f.active = active; 579 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 580 break; 581 for(v=n->opt; v!=nil; v=v->nextinnode) { 582 bn = v - var; 583 biset(&r1->act, bn); 584 } 585 if(r1->f.prog->as == ABL) 586 break; 587 } 588 589 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 590 if(r2->f.s2 != nil) 591 walkvardef(n, (Reg*)r2->f.s2, active); 592 } 593 594 void 595 addsplits(void) 596 { 597 Reg *r, *r1; 598 int z, i; 599 Bits bit; 600 601 for(r = firstr; r != R; r = (Reg*)r->f.link) { 602 if(r->f.loop > 1) 603 continue; 604 if(r->f.prog->as == ABL) 605 continue; 606 if(r->f.prog->as == ADUFFZERO) 607 continue; 608 if(r->f.prog->as == ADUFFCOPY) 609 continue; 610 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) { 611 if(r1->f.loop <= 1) 612 continue; 613 for(z=0; z<BITS; z++) 614 bit.b[z] = r1->calbehind.b[z] & 615 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 616 ~(r->calahead.b[z] & addrs.b[z]); 617 while(bany(&bit)) { 618 i = bnum(bit); 619 biclr(&bit, i); 620 } 621 } 622 } 623 } 624 625 /* 626 * add mov b,rn 627 * just after r 628 */ 629 void 630 addmove(Reg *r, int bn, int rn, int f) 631 { 632 Prog *p, *p1, *p2; 633 Adr *a; 634 Var *v; 635 636 p1 = mal(sizeof(*p1)); 637 *p1 = zprog; 638 p = r->f.prog; 639 640 // If there's a stack fixup coming (after BL newproc or BL deferproc), 641 // delay the load until after the fixup. 642 p2 = p->link; 643 if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG) 644 p = p2; 645 646 p1->link = p->link; 647 p->link = p1; 648 p1->lineno = p->lineno; 649 650 v = var + bn; 651 652 a = &p1->to; 653 a->name = v->name; 654 a->node = v->node; 655 a->sym = linksym(v->node->sym); 656 a->offset = v->offset; 657 a->etype = v->etype; 658 a->type = D_OREG; 659 if(a->etype == TARRAY || a->sym == nil) 660 a->type = D_CONST; 661 662 if(v->addr) 663 fatal("addmove: shouldn't be doing this %A\n", a); 664 665 switch(v->etype) { 666 default: 667 print("What is this %E\n", v->etype); 668 669 case TINT8: 670 p1->as = AMOVBS; 671 break; 672 case TBOOL: 673 case TUINT8: 674 //print("movbu %E %d %S\n", v->etype, bn, v->sym); 675 p1->as = AMOVBU; 676 break; 677 case TINT16: 678 p1->as = AMOVHS; 679 break; 680 case TUINT16: 681 p1->as = AMOVHU; 682 break; 683 case TINT32: 684 case TUINT32: 685 case TPTR32: 686 p1->as = AMOVW; 687 break; 688 case TFLOAT32: 689 p1->as = AMOVF; 690 break; 691 case TFLOAT64: 692 p1->as = AMOVD; 693 break; 694 } 695 696 p1->from.type = D_REG; 697 p1->from.reg = rn; 698 if(rn >= NREG) { 699 p1->from.type = D_FREG; 700 p1->from.reg = rn-NREG; 701 } 702 if(!f) { 703 p1->from = *a; 704 *a = zprog.from; 705 a->type = D_REG; 706 a->reg = rn; 707 if(rn >= NREG) { 708 a->type = D_FREG; 709 a->reg = rn-NREG; 710 } 711 if(v->etype == TUINT8 || v->etype == TBOOL) 712 p1->as = AMOVBU; 713 if(v->etype == TUINT16) 714 p1->as = AMOVHU; 715 } 716 if(debug['R']) 717 print("%P\t.a%P\n", p, p1); 718 } 719 720 static int 721 overlap(int32 o1, int w1, int32 o2, int w2) 722 { 723 int32 t1, t2; 724 725 t1 = o1+w1; 726 t2 = o2+w2; 727 728 if(!(t1 > o2 && t2 > o1)) 729 return 0; 730 731 return 1; 732 } 733 734 Bits 735 mkvar(Reg *r, Adr *a) 736 { 737 Var *v; 738 int i, t, n, et, z, w, flag; 739 int32 o; 740 Bits bit; 741 Node *node; 742 743 // mark registers used 744 t = a->type; 745 746 flag = 0; 747 switch(t) { 748 default: 749 print("type %d %d %D\n", t, a->name, a); 750 goto none; 751 752 case D_NONE: 753 case D_FCONST: 754 case D_BRANCH: 755 break; 756 757 758 case D_REGREG: 759 case D_REGREG2: 760 bit = zbits; 761 if(a->offset != NREG) 762 bit.b[0] |= RtoB(a->offset); 763 if(a->reg != NREG) 764 bit.b[0] |= RtoB(a->reg); 765 return bit; 766 767 case D_CONST: 768 case D_REG: 769 case D_SHIFT: 770 if(a->reg != NREG) { 771 bit = zbits; 772 bit.b[0] = RtoB(a->reg); 773 return bit; 774 } 775 break; 776 777 case D_OREG: 778 if(a->reg != NREG) { 779 if(a == &r->f.prog->from) 780 r->use1.b[0] |= RtoB(a->reg); 781 else 782 r->use2.b[0] |= RtoB(a->reg); 783 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 784 r->set.b[0] |= RtoB(a->reg); 785 } 786 break; 787 788 case D_FREG: 789 if(a->reg != NREG) { 790 bit = zbits; 791 bit.b[0] = FtoB(a->reg); 792 return bit; 793 } 794 break; 795 } 796 797 switch(a->name) { 798 default: 799 goto none; 800 801 case D_EXTERN: 802 case D_STATIC: 803 case D_AUTO: 804 case D_PARAM: 805 n = a->name; 806 break; 807 } 808 809 node = a->node; 810 if(node == N || node->op != ONAME || node->orig == N) 811 goto none; 812 node = node->orig; 813 if(node->orig != node) 814 fatal("%D: bad node", a); 815 if(node->sym == S || node->sym->name[0] == '.') 816 goto none; 817 et = a->etype; 818 o = a->offset; 819 w = a->width; 820 if(w < 0) 821 fatal("bad width %d for %D", w, a); 822 823 for(i=0; i<nvar; i++) { 824 v = var+i; 825 if(v->node == node && v->name == n) { 826 if(v->offset == o) 827 if(v->etype == et) 828 if(v->width == w) 829 if(!flag) 830 return blsh(i); 831 832 // if they overlap, disable both 833 if(overlap(v->offset, v->width, o, w)) { 834 v->addr = 1; 835 flag = 1; 836 } 837 } 838 } 839 840 switch(et) { 841 case 0: 842 case TFUNC: 843 goto none; 844 } 845 846 if(nvar >= NVAR) { 847 if(debug['w'] > 1 && node) 848 fatal("variable not optimized: %D", a); 849 850 // If we're not tracking a word in a variable, mark the rest as 851 // having its address taken, so that we keep the whole thing 852 // live at all calls. otherwise we might optimize away part of 853 // a variable but not all of it. 854 for(i=0; i<nvar; i++) { 855 v = var+i; 856 if(v->node == node) 857 v->addr = 1; 858 } 859 goto none; 860 } 861 862 i = nvar; 863 nvar++; 864 //print("var %d %E %D %S\n", i, et, a, s); 865 v = var+i; 866 v->offset = o; 867 v->name = n; 868 v->etype = et; 869 v->width = w; 870 v->addr = flag; // funny punning 871 v->node = node; 872 873 // node->opt is the head of a linked list 874 // of Vars within the given Node, so that 875 // we can start at a Var and find all the other 876 // Vars in the same Go variable. 877 v->nextinnode = node->opt; 878 node->opt = v; 879 880 bit = blsh(i); 881 if(n == D_EXTERN || n == D_STATIC) 882 for(z=0; z<BITS; z++) 883 externs.b[z] |= bit.b[z]; 884 if(n == D_PARAM) 885 for(z=0; z<BITS; z++) 886 params.b[z] |= bit.b[z]; 887 888 if(node->class == PPARAM) 889 for(z=0; z<BITS; z++) 890 ivar.b[z] |= bit.b[z]; 891 if(node->class == PPARAMOUT) 892 for(z=0; z<BITS; z++) 893 ovar.b[z] |= bit.b[z]; 894 895 // Treat values with their address taken as live at calls, 896 // because the garbage collector's liveness analysis in ../gc/plive.c does. 897 // These must be consistent or else we will elide stores and the garbage 898 // collector will see uninitialized data. 899 // The typical case where our own analysis is out of sync is when the 900 // node appears to have its address taken but that code doesn't actually 901 // get generated and therefore doesn't show up as an address being 902 // taken when we analyze the instruction stream. 903 // One instance of this case is when a closure uses the same name as 904 // an outer variable for one of its own variables declared with :=. 905 // The parser flags the outer variable as possibly shared, and therefore 906 // sets addrtaken, even though it ends up not being actually shared. 907 // If we were better about _ elision, _ = &x would suffice too. 908 // The broader := in a closure problem is mentioned in a comment in 909 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 910 if(node->addrtaken) 911 v->addr = 1; 912 913 // Disable registerization for globals, because: 914 // (1) we might panic at any time and we want the recovery code 915 // to see the latest values (issue 1304). 916 // (2) we don't know what pointers might point at them and we want 917 // loads via those pointers to see updated values and vice versa (issue 7995). 918 // 919 // Disable registerization for results if using defer, because the deferred func 920 // might recover and return, causing the current values to be used. 921 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 922 v->addr = 1; 923 924 if(debug['R']) 925 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 926 927 return bit; 928 929 none: 930 return zbits; 931 } 932 933 void 934 prop(Reg *r, Bits ref, Bits cal) 935 { 936 Reg *r1, *r2; 937 int z, i, j; 938 Var *v, *v1; 939 940 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 941 for(z=0; z<BITS; z++) { 942 ref.b[z] |= r1->refahead.b[z]; 943 if(ref.b[z] != r1->refahead.b[z]) { 944 r1->refahead.b[z] = ref.b[z]; 945 change++; 946 } 947 cal.b[z] |= r1->calahead.b[z]; 948 if(cal.b[z] != r1->calahead.b[z]) { 949 r1->calahead.b[z] = cal.b[z]; 950 change++; 951 } 952 } 953 switch(r1->f.prog->as) { 954 case ABL: 955 if(noreturn(r1->f.prog)) 956 break; 957 958 // Mark all input variables (ivar) as used, because that's what the 959 // liveness bitmaps say. The liveness bitmaps say that so that a 960 // panic will not show stale values in the parameter dump. 961 // Mark variables with a recent VARDEF (r1->act) as used, 962 // so that the optimizer flushes initializations to memory, 963 // so that if a garbage collection happens during this CALL, 964 // the collector will see initialized memory. Again this is to 965 // match what the liveness bitmaps say. 966 for(z=0; z<BITS; z++) { 967 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 968 ref.b[z] = 0; 969 } 970 971 // cal.b is the current approximation of what's live across the call. 972 // Every bit in cal.b is a single stack word. For each such word, 973 // find all the other tracked stack words in the same Go variable 974 // (struct/slice/string/interface) and mark them live too. 975 // This is necessary because the liveness analysis for the garbage 976 // collector works at variable granularity, not at word granularity. 977 // It is fundamental for slice/string/interface: the garbage collector 978 // needs the whole value, not just some of the words, in order to 979 // interpret the other bits correctly. Specifically, slice needs a consistent 980 // ptr and cap, string needs a consistent ptr and len, and interface 981 // needs a consistent type word and data word. 982 for(z=0; z<BITS; z++) { 983 if(cal.b[z] == 0) 984 continue; 985 for(i=0; i<64; i++) { 986 if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) 987 continue; 988 v = var+z*64+i; 989 if(v->node->opt == nil) // v represents fixed register, not Go variable 990 continue; 991 992 // v->node->opt is the head of a linked list of Vars 993 // corresponding to tracked words from the Go variable v->node. 994 // Walk the list and set all the bits. 995 // For a large struct this could end up being quadratic: 996 // after the first setting, the outer loop (for z, i) would see a 1 bit 997 // for all of the remaining words in the struct, and for each such 998 // word would go through and turn on all the bits again. 999 // To avoid the quadratic behavior, we only turn on the bits if 1000 // v is the head of the list or if the head's bit is not yet turned on. 1001 // This will set the bits at most twice, keeping the overall loop linear. 1002 v1 = v->node->opt; 1003 j = v1 - var; 1004 if(v == v1 || !btest(&cal, j)) { 1005 for(; v1 != nil; v1 = v1->nextinnode) { 1006 j = v1 - var; 1007 biset(&cal, j); 1008 } 1009 } 1010 } 1011 } 1012 break; 1013 1014 case ATEXT: 1015 for(z=0; z<BITS; z++) { 1016 cal.b[z] = 0; 1017 ref.b[z] = 0; 1018 } 1019 break; 1020 1021 case ARET: 1022 for(z=0; z<BITS; z++) { 1023 cal.b[z] = externs.b[z] | ovar.b[z]; 1024 ref.b[z] = 0; 1025 } 1026 break; 1027 } 1028 for(z=0; z<BITS; z++) { 1029 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 1030 r1->use1.b[z] | r1->use2.b[z]; 1031 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 1032 r1->refbehind.b[z] = ref.b[z]; 1033 r1->calbehind.b[z] = cal.b[z]; 1034 } 1035 if(r1->f.active) 1036 break; 1037 r1->f.active = 1; 1038 } 1039 for(; r != r1; r = (Reg*)r->f.p1) 1040 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 1041 prop(r2, r->refbehind, r->calbehind); 1042 } 1043 1044 void 1045 synch(Reg *r, Bits dif) 1046 { 1047 Reg *r1; 1048 int z; 1049 1050 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 1051 for(z=0; z<BITS; z++) { 1052 dif.b[z] = (dif.b[z] & 1053 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1054 r1->set.b[z] | r1->regdiff.b[z]; 1055 if(dif.b[z] != r1->regdiff.b[z]) { 1056 r1->regdiff.b[z] = dif.b[z]; 1057 change++; 1058 } 1059 } 1060 if(r1->f.active) 1061 break; 1062 r1->f.active = 1; 1063 for(z=0; z<BITS; z++) 1064 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1065 if(r1->f.s2 != nil) 1066 synch((Reg*)r1->f.s2, dif); 1067 } 1068 } 1069 1070 uint32 1071 allreg(uint32 b, Rgn *r) 1072 { 1073 Var *v; 1074 int i; 1075 1076 v = var + r->varno; 1077 r->regno = 0; 1078 switch(v->etype) { 1079 1080 default: 1081 fatal("unknown etype %d/%E", bitno(b), v->etype); 1082 break; 1083 1084 case TINT8: 1085 case TUINT8: 1086 case TINT16: 1087 case TUINT16: 1088 case TINT32: 1089 case TUINT32: 1090 case TINT: 1091 case TUINT: 1092 case TUINTPTR: 1093 case TBOOL: 1094 case TPTR32: 1095 i = BtoR(~b); 1096 if(i && r->cost >= 0) { 1097 r->regno = i; 1098 return RtoB(i); 1099 } 1100 break; 1101 1102 case TFLOAT32: 1103 case TFLOAT64: 1104 i = BtoF(~b); 1105 if(i && r->cost >= 0) { 1106 r->regno = i+NREG; 1107 return FtoB(i); 1108 } 1109 break; 1110 1111 case TINT64: 1112 case TUINT64: 1113 case TPTR64: 1114 case TINTER: 1115 case TSTRUCT: 1116 case TARRAY: 1117 break; 1118 } 1119 return 0; 1120 } 1121 1122 void 1123 paint1(Reg *r, int bn) 1124 { 1125 Reg *r1; 1126 Prog *p; 1127 int z; 1128 uint64 bb; 1129 1130 z = bn/64; 1131 bb = 1LL<<(bn%64); 1132 if(r->act.b[z] & bb) 1133 return; 1134 for(;;) { 1135 if(!(r->refbehind.b[z] & bb)) 1136 break; 1137 r1 = (Reg*)r->f.p1; 1138 if(r1 == R) 1139 break; 1140 if(!(r1->refahead.b[z] & bb)) 1141 break; 1142 if(r1->act.b[z] & bb) 1143 break; 1144 r = r1; 1145 } 1146 1147 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 1148 change -= CLOAD * r->f.loop; 1149 if(debug['R'] > 1) 1150 print("%d%P\td %Q $%d\n", r->f.loop, 1151 r->f.prog, blsh(bn), change); 1152 } 1153 for(;;) { 1154 r->act.b[z] |= bb; 1155 p = r->f.prog; 1156 1157 1158 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 1159 if(r->use1.b[z] & bb) { 1160 change += CREF * r->f.loop; 1161 if(debug['R'] > 1) 1162 print("%d%P\tu1 %Q $%d\n", r->f.loop, 1163 p, blsh(bn), change); 1164 } 1165 if((r->use2.b[z]|r->set.b[z]) & bb) { 1166 change += CREF * r->f.loop; 1167 if(debug['R'] > 1) 1168 print("%d%P\tu2 %Q $%d\n", r->f.loop, 1169 p, blsh(bn), change); 1170 } 1171 } 1172 1173 if(STORE(r) & r->regdiff.b[z] & bb) { 1174 change -= CLOAD * r->f.loop; 1175 if(debug['R'] > 1) 1176 print("%d%P\tst %Q $%d\n", r->f.loop, 1177 p, blsh(bn), change); 1178 } 1179 1180 if(r->refbehind.b[z] & bb) 1181 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1182 if(r1->refahead.b[z] & bb) 1183 paint1(r1, bn); 1184 1185 if(!(r->refahead.b[z] & bb)) 1186 break; 1187 r1 = (Reg*)r->f.s2; 1188 if(r1 != R) 1189 if(r1->refbehind.b[z] & bb) 1190 paint1(r1, bn); 1191 r = (Reg*)r->f.s1; 1192 if(r == R) 1193 break; 1194 if(r->act.b[z] & bb) 1195 break; 1196 if(!(r->refbehind.b[z] & bb)) 1197 break; 1198 } 1199 } 1200 1201 uint32 1202 paint2(Reg *r, int bn, int depth) 1203 { 1204 Reg *r1; 1205 int z; 1206 uint64 bb, vreg; 1207 1208 z = bn/64; 1209 bb = 1LL << (bn%64); 1210 vreg = regbits; 1211 if(!(r->act.b[z] & bb)) 1212 return vreg; 1213 for(;;) { 1214 if(!(r->refbehind.b[z] & bb)) 1215 break; 1216 r1 = (Reg*)r->f.p1; 1217 if(r1 == R) 1218 break; 1219 if(!(r1->refahead.b[z] & bb)) 1220 break; 1221 if(!(r1->act.b[z] & bb)) 1222 break; 1223 r = r1; 1224 } 1225 for(;;) { 1226 if(debug['R'] && debug['v']) 1227 print(" paint2 %d %P\n", depth, r->f.prog); 1228 1229 r->act.b[z] &= ~bb; 1230 1231 vreg |= r->regu; 1232 1233 if(r->refbehind.b[z] & bb) 1234 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1235 if(r1->refahead.b[z] & bb) 1236 vreg |= paint2(r1, bn, depth+1); 1237 1238 if(!(r->refahead.b[z] & bb)) 1239 break; 1240 r1 = (Reg*)r->f.s2; 1241 if(r1 != R) 1242 if(r1->refbehind.b[z] & bb) 1243 vreg |= paint2(r1, bn, depth+1); 1244 r = (Reg*)r->f.s1; 1245 if(r == R) 1246 break; 1247 if(!(r->act.b[z] & bb)) 1248 break; 1249 if(!(r->refbehind.b[z] & bb)) 1250 break; 1251 } 1252 return vreg; 1253 } 1254 1255 void 1256 paint3(Reg *r, int bn, uint32 rb, int rn) 1257 { 1258 Reg *r1; 1259 Prog *p; 1260 int z; 1261 uint64 bb; 1262 1263 z = bn/64; 1264 bb = 1LL << (bn%64); 1265 if(r->act.b[z] & bb) 1266 return; 1267 for(;;) { 1268 if(!(r->refbehind.b[z] & bb)) 1269 break; 1270 r1 = (Reg*)r->f.p1; 1271 if(r1 == R) 1272 break; 1273 if(!(r1->refahead.b[z] & bb)) 1274 break; 1275 if(r1->act.b[z] & bb) 1276 break; 1277 r = r1; 1278 } 1279 1280 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1281 addmove(r, bn, rn, 0); 1282 1283 for(;;) { 1284 r->act.b[z] |= bb; 1285 p = r->f.prog; 1286 1287 if(r->use1.b[z] & bb) { 1288 if(debug['R']) 1289 print("%P", p); 1290 addreg(&p->from, rn); 1291 if(debug['R']) 1292 print("\t.c%P\n", p); 1293 } 1294 if((r->use2.b[z]|r->set.b[z]) & bb) { 1295 if(debug['R']) 1296 print("%P", p); 1297 addreg(&p->to, rn); 1298 if(debug['R']) 1299 print("\t.c%P\n", p); 1300 } 1301 1302 if(STORE(r) & r->regdiff.b[z] & bb) 1303 addmove(r, bn, rn, 1); 1304 r->regu |= rb; 1305 1306 if(r->refbehind.b[z] & bb) 1307 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1308 if(r1->refahead.b[z] & bb) 1309 paint3(r1, bn, rb, rn); 1310 1311 if(!(r->refahead.b[z] & bb)) 1312 break; 1313 r1 = (Reg*)r->f.s2; 1314 if(r1 != R) 1315 if(r1->refbehind.b[z] & bb) 1316 paint3(r1, bn, rb, rn); 1317 r = (Reg*)r->f.s1; 1318 if(r == R) 1319 break; 1320 if(r->act.b[z] & bb) 1321 break; 1322 if(!(r->refbehind.b[z] & bb)) 1323 break; 1324 } 1325 } 1326 1327 void 1328 addreg(Adr *a, int rn) 1329 { 1330 a->sym = nil; 1331 a->node = nil; 1332 a->name = D_NONE; 1333 a->type = D_REG; 1334 a->reg = rn; 1335 if(rn >= NREG) { 1336 a->type = D_FREG; 1337 a->reg = rn-NREG; 1338 } 1339 } 1340 1341 /* 1342 * bit reg 1343 * 0 R0 1344 * 1 R1 1345 * ... ... 1346 * 10 R10 1347 * 12 R12 1348 */ 1349 uint32 1350 RtoB(int r) 1351 { 1352 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 1353 return 0; 1354 return 1L << r; 1355 } 1356 1357 int 1358 BtoR(uint32 b) 1359 { 1360 // TODO Allow R0 and R1, but be careful with a 0 return 1361 // TODO Allow R9. Only R10 is reserved now (just g, not m). 1362 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1363 if(b == 0) 1364 return 0; 1365 return bitno(b); 1366 } 1367 1368 /* 1369 * bit reg 1370 * 18 F2 1371 * 19 F3 1372 * ... ... 1373 * 31 F15 1374 */ 1375 uint32 1376 FtoB(int f) 1377 { 1378 1379 if(f < 2 || f > NFREG-1) 1380 return 0; 1381 return 1L << (f + 16); 1382 } 1383 1384 int 1385 BtoF(uint32 b) 1386 { 1387 1388 b &= 0xfffc0000L; 1389 if(b == 0) 1390 return 0; 1391 return bitno(b) - 16; 1392 } 1393 1394 void 1395 dumpone(Flow *f, int isreg) 1396 { 1397 int z; 1398 Bits bit; 1399 Reg *r; 1400 1401 print("%d:%P", f->loop, f->prog); 1402 if(isreg) { 1403 r = (Reg*)f; 1404 for(z=0; z<BITS; z++) 1405 bit.b[z] = 1406 r->set.b[z] | 1407 r->use1.b[z] | 1408 r->use2.b[z] | 1409 r->refbehind.b[z] | 1410 r->refahead.b[z] | 1411 r->calbehind.b[z] | 1412 r->calahead.b[z] | 1413 r->regdiff.b[z] | 1414 r->act.b[z] | 1415 0; 1416 if(bany(&bit)) { 1417 print("\t"); 1418 if(bany(&r->set)) 1419 print(" s:%Q", r->set); 1420 if(bany(&r->use1)) 1421 print(" u1:%Q", r->use1); 1422 if(bany(&r->use2)) 1423 print(" u2:%Q", r->use2); 1424 if(bany(&r->refbehind)) 1425 print(" rb:%Q ", r->refbehind); 1426 if(bany(&r->refahead)) 1427 print(" ra:%Q ", r->refahead); 1428 if(bany(&r->calbehind)) 1429 print(" cb:%Q ", r->calbehind); 1430 if(bany(&r->calahead)) 1431 print(" ca:%Q ", r->calahead); 1432 if(bany(&r->regdiff)) 1433 print(" d:%Q ", r->regdiff); 1434 if(bany(&r->act)) 1435 print(" a:%Q ", r->act); 1436 } 1437 } 1438 print("\n"); 1439 } 1440 1441 void 1442 dumpit(char *str, Flow *r0, int isreg) 1443 { 1444 Flow *r, *r1; 1445 1446 print("\n%s\n", str); 1447 for(r = r0; r != nil; r = r->link) { 1448 dumpone(r, isreg); 1449 r1 = r->p2; 1450 if(r1 != nil) { 1451 print(" pred:"); 1452 for(; r1 != nil; r1 = r1->p2link) 1453 print(" %.4ud", (int)r1->prog->pc); 1454 if(r->p1 != nil) 1455 print(" (and %.4ud)", (int)r->p1->prog->pc); 1456 else 1457 print(" (only)"); 1458 print("\n"); 1459 } 1460 // Print successors if it's not just the next one 1461 if(r->s1 != r->link || r->s2 != nil) { 1462 print(" succ:"); 1463 if(r->s1 != nil) 1464 print(" %.4ud", (int)r->s1->prog->pc); 1465 if(r->s2 != nil) 1466 print(" %.4ud", (int)r->s2->prog->pc); 1467 print("\n"); 1468 } 1469 } 1470 }