github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/5g/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include <u.h> 33 #include <libc.h> 34 #include "gg.h" 35 #include "opt.h" 36 37 #define NREGVAR 32 38 #define REGBITS ((uint32)0xffffffff) 39 40 void addsplits(void); 41 static Reg* firstr; 42 static int first = 1; 43 44 int 45 rcmp(const void *a1, const void *a2) 46 { 47 Rgn *p1, *p2; 48 int c1, c2; 49 50 p1 = (Rgn*)a1; 51 p2 = (Rgn*)a2; 52 c1 = p2->cost; 53 c2 = p1->cost; 54 if(c1 -= c2) 55 return c1; 56 return p2->varno - p1->varno; 57 } 58 59 void 60 excise(Flow *r) 61 { 62 Prog *p; 63 64 p = r->prog; 65 p->as = ANOP; 66 p->scond = zprog.scond; 67 p->from = zprog.from; 68 p->to = zprog.to; 69 p->reg = zprog.reg; 70 } 71 72 static void 73 setaddrs(Bits bit) 74 { 75 int i, n; 76 Var *v; 77 Node *node; 78 79 while(bany(&bit)) { 80 // convert each bit to a variable 81 i = bnum(bit); 82 node = var[i].node; 83 n = var[i].name; 84 bit.b[i/32] &= ~(1L<<(i%32)); 85 86 // disable all pieces of that variable 87 for(i=0; i<nvar; i++) { 88 v = var+i; 89 if(v->node == node && v->name == n) 90 v->addr = 2; 91 } 92 } 93 } 94 95 static char* regname[] = { 96 ".R0", 97 ".R1", 98 ".R2", 99 ".R3", 100 ".R4", 101 ".R5", 102 ".R6", 103 ".R7", 104 ".R8", 105 ".R9", 106 ".R10", 107 ".R11", 108 ".R12", 109 ".R13", 110 ".R14", 111 ".R15", 112 ".F0", 113 ".F1", 114 ".F2", 115 ".F3", 116 ".F4", 117 ".F5", 118 ".F6", 119 ".F7", 120 ".F8", 121 ".F9", 122 ".F10", 123 ".F11", 124 ".F12", 125 ".F13", 126 ".F14", 127 ".F15", 128 }; 129 130 static Node* regnodes[NREGVAR]; 131 132 static void walkvardef(Node *n, Reg *r, int active); 133 134 void 135 regopt(Prog *firstp) 136 { 137 Reg *r, *r1; 138 Prog *p; 139 Graph *g; 140 int i, z, active; 141 uint32 vreg; 142 Bits bit; 143 ProgInfo info; 144 145 if(first) { 146 fmtinstall('Q', Qconv); 147 first = 0; 148 } 149 150 mergetemp(firstp); 151 152 /* 153 * control flow is more complicated in generated go code 154 * than in generated c code. define pseudo-variables for 155 * registers, so we have complete register usage information. 156 */ 157 nvar = NREGVAR; 158 memset(var, 0, NREGVAR*sizeof var[0]); 159 for(i=0; i<NREGVAR; i++) { 160 if(regnodes[i] == N) 161 regnodes[i] = newname(lookup(regname[i])); 162 var[i].node = regnodes[i]; 163 } 164 165 regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); 166 for(z=0; z<BITS; z++) { 167 externs.b[z] = 0; 168 params.b[z] = 0; 169 consts.b[z] = 0; 170 addrs.b[z] = 0; 171 ivar.b[z] = 0; 172 ovar.b[z] = 0; 173 } 174 175 /* 176 * pass 1 177 * build aux data structure 178 * allocate pcs 179 * find use and set of variables 180 */ 181 g = flowstart(firstp, sizeof(Reg)); 182 if(g == nil) { 183 for(i=0; i<nvar; i++) 184 var[i].node->opt = nil; 185 return; 186 } 187 188 firstr = (Reg*)g->start; 189 190 for(r = firstr; r != R; r = (Reg*)r->f.link) { 191 p = r->f.prog; 192 if(p->as == AVARDEF || p->as == AVARKILL) 193 continue; 194 proginfo(&info, p); 195 196 // Avoid making variables for direct-called functions. 197 if(p->as == ABL && p->to.type == D_EXTERN) 198 continue; 199 200 bit = mkvar(r, &p->from); 201 if(info.flags & LeftRead) 202 for(z=0; z<BITS; z++) 203 r->use1.b[z] |= bit.b[z]; 204 if(info.flags & LeftAddr) 205 setaddrs(bit); 206 207 if(info.flags & RegRead) { 208 if(p->from.type != D_FREG) 209 r->use1.b[0] |= RtoB(p->reg); 210 else 211 r->use1.b[0] |= FtoB(p->reg); 212 } 213 214 if(info.flags & (RightAddr | RightRead | RightWrite)) { 215 bit = mkvar(r, &p->to); 216 if(info.flags & RightAddr) 217 setaddrs(bit); 218 if(info.flags & RightRead) 219 for(z=0; z<BITS; z++) 220 r->use2.b[z] |= bit.b[z]; 221 if(info.flags & RightWrite) 222 for(z=0; z<BITS; z++) 223 r->set.b[z] |= bit.b[z]; 224 } 225 } 226 if(firstr == R) 227 return; 228 229 for(i=0; i<nvar; i++) { 230 Var *v = var+i; 231 if(v->addr) { 232 bit = blsh(i); 233 for(z=0; z<BITS; z++) 234 addrs.b[z] |= bit.b[z]; 235 } 236 237 if(debug['R'] && debug['v']) 238 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 239 i, v->addr, v->etype, v->width, v->node, v->offset); 240 } 241 242 if(debug['R'] && debug['v']) 243 dumpit("pass1", &firstr->f, 1); 244 245 /* 246 * pass 2 247 * find looping structure 248 */ 249 flowrpo(g); 250 251 if(debug['R'] && debug['v']) 252 dumpit("pass2", &firstr->f, 1); 253 254 /* 255 * pass 2.5 256 * iterate propagating fat vardef covering forward 257 * r->act records vars with a VARDEF since the last CALL. 258 * (r->act will be reused in pass 5 for something else, 259 * but we'll be done with it by then.) 260 */ 261 active = 0; 262 for(r = firstr; r != R; r = (Reg*)r->f.link) { 263 r->f.active = 0; 264 r->act = zbits; 265 } 266 for(r = firstr; r != R; r = (Reg*)r->f.link) { 267 p = r->f.prog; 268 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 269 active++; 270 walkvardef(p->to.node, r, active); 271 } 272 } 273 274 /* 275 * pass 3 276 * iterate propagating usage 277 * back until flow graph is complete 278 */ 279 loop1: 280 change = 0; 281 for(r = firstr; r != R; r = (Reg*)r->f.link) 282 r->f.active = 0; 283 for(r = firstr; r != R; r = (Reg*)r->f.link) 284 if(r->f.prog->as == ARET) 285 prop(r, zbits, zbits); 286 loop11: 287 /* pick up unreachable code */ 288 i = 0; 289 for(r = firstr; r != R; r = r1) { 290 r1 = (Reg*)r->f.link; 291 if(r1 && r1->f.active && !r->f.active) { 292 prop(r, zbits, zbits); 293 i = 1; 294 } 295 } 296 if(i) 297 goto loop11; 298 if(change) 299 goto loop1; 300 301 if(debug['R'] && debug['v']) 302 dumpit("pass3", &firstr->f, 1); 303 304 305 /* 306 * pass 4 307 * iterate propagating register/variable synchrony 308 * forward until graph is complete 309 */ 310 loop2: 311 change = 0; 312 for(r = firstr; r != R; r = (Reg*)r->f.link) 313 r->f.active = 0; 314 synch(firstr, zbits); 315 if(change) 316 goto loop2; 317 318 addsplits(); 319 320 if(debug['R'] && debug['v']) 321 dumpit("pass4", &firstr->f, 1); 322 323 if(debug['R'] > 1) { 324 print("\nprop structure:\n"); 325 for(r = firstr; r != R; r = (Reg*)r->f.link) { 326 print("%d:%P", r->f.loop, r->f.prog); 327 for(z=0; z<BITS; z++) { 328 bit.b[z] = r->set.b[z] | 329 r->refahead.b[z] | r->calahead.b[z] | 330 r->refbehind.b[z] | r->calbehind.b[z] | 331 r->use1.b[z] | r->use2.b[z]; 332 bit.b[z] &= ~addrs.b[z]; 333 } 334 335 if(bany(&bit)) { 336 print("\t"); 337 if(bany(&r->use1)) 338 print(" u1=%Q", r->use1); 339 if(bany(&r->use2)) 340 print(" u2=%Q", r->use2); 341 if(bany(&r->set)) 342 print(" st=%Q", r->set); 343 if(bany(&r->refahead)) 344 print(" ra=%Q", r->refahead); 345 if(bany(&r->calahead)) 346 print(" ca=%Q", r->calahead); 347 if(bany(&r->refbehind)) 348 print(" rb=%Q", r->refbehind); 349 if(bany(&r->calbehind)) 350 print(" cb=%Q", r->calbehind); 351 } 352 print("\n"); 353 } 354 } 355 356 /* 357 * pass 4.5 358 * move register pseudo-variables into regu. 359 */ 360 for(r = firstr; r != R; r = (Reg*)r->f.link) { 361 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 362 363 r->set.b[0] &= ~REGBITS; 364 r->use1.b[0] &= ~REGBITS; 365 r->use2.b[0] &= ~REGBITS; 366 r->refbehind.b[0] &= ~REGBITS; 367 r->refahead.b[0] &= ~REGBITS; 368 r->calbehind.b[0] &= ~REGBITS; 369 r->calahead.b[0] &= ~REGBITS; 370 r->regdiff.b[0] &= ~REGBITS; 371 r->act.b[0] &= ~REGBITS; 372 } 373 374 if(debug['R'] && debug['v']) 375 dumpit("pass4.5", &firstr->f, 1); 376 377 /* 378 * pass 5 379 * isolate regions 380 * calculate costs (paint1) 381 */ 382 r = firstr; 383 if(r) { 384 for(z=0; z<BITS; z++) 385 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 386 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 387 if(bany(&bit) & !r->f.refset) { 388 // should never happen - all variables are preset 389 if(debug['w']) 390 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 391 r->f.refset = 1; 392 } 393 } 394 395 for(r = firstr; r != R; r = (Reg*)r->f.link) 396 r->act = zbits; 397 rgp = region; 398 nregion = 0; 399 for(r = firstr; r != R; r = (Reg*)r->f.link) { 400 for(z=0; z<BITS; z++) 401 bit.b[z] = r->set.b[z] & 402 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 403 if(bany(&bit) && !r->f.refset) { 404 if(debug['w']) 405 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 406 r->f.refset = 1; 407 excise(&r->f); 408 } 409 for(z=0; z<BITS; z++) 410 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 411 while(bany(&bit)) { 412 i = bnum(bit); 413 rgp->enter = r; 414 rgp->varno = i; 415 change = 0; 416 if(debug['R'] > 1) 417 print("\n"); 418 paint1(r, i); 419 bit.b[i/32] &= ~(1L<<(i%32)); 420 if(change <= 0) { 421 if(debug['R']) 422 print("%L $%d: %Q\n", 423 r->f.prog->lineno, change, blsh(i)); 424 continue; 425 } 426 rgp->cost = change; 427 nregion++; 428 if(nregion >= NRGN) { 429 if(debug['R'] > 1) 430 print("too many regions\n"); 431 goto brk; 432 } 433 rgp++; 434 } 435 } 436 brk: 437 qsort(region, nregion, sizeof(region[0]), rcmp); 438 439 if(debug['R'] && debug['v']) 440 dumpit("pass5", &firstr->f, 1); 441 442 /* 443 * pass 6 444 * determine used registers (paint2) 445 * replace code (paint3) 446 */ 447 rgp = region; 448 for(i=0; i<nregion; i++) { 449 bit = blsh(rgp->varno); 450 vreg = paint2(rgp->enter, rgp->varno); 451 vreg = allreg(vreg, rgp); 452 if(debug['R']) { 453 if(rgp->regno >= NREG) 454 print("%L $%d F%d: %Q\n", 455 rgp->enter->f.prog->lineno, 456 rgp->cost, 457 rgp->regno-NREG, 458 bit); 459 else 460 print("%L $%d R%d: %Q\n", 461 rgp->enter->f.prog->lineno, 462 rgp->cost, 463 rgp->regno, 464 bit); 465 } 466 if(rgp->regno != 0) 467 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 468 rgp++; 469 } 470 471 if(debug['R'] && debug['v']) 472 dumpit("pass6", &firstr->f, 1); 473 474 /* 475 * free aux structures. peep allocates new ones. 476 */ 477 for(i=0; i<nvar; i++) 478 var[i].node->opt = nil; 479 flowend(g); 480 firstr = R; 481 482 /* 483 * pass 7 484 * peep-hole on basic block 485 */ 486 if(!debug['R'] || debug['P']) { 487 peep(firstp); 488 } 489 490 if(debug['R'] && debug['v']) 491 dumpit("pass7", &firstr->f, 1); 492 493 /* 494 * last pass 495 * eliminate nops 496 * free aux structures 497 * adjust the stack pointer 498 * MOVW.W R1,-12(R13) <<- start 499 * MOVW R0,R1 500 * MOVW R1,8(R13) 501 * MOVW $0,R1 502 * MOVW R1,4(R13) 503 * BL ,runtime.newproc+0(SB) 504 * MOVW &ft+-32(SP),R7 <<- adjust 505 * MOVW &j+-40(SP),R6 <<- adjust 506 * MOVW autotmp_0003+-24(SP),R5 <<- adjust 507 * MOVW $12(R13),R13 <<- finish 508 */ 509 vreg = 0; 510 for(p = firstp; p != P; p = p->link) { 511 while(p->link != P && p->link->as == ANOP) 512 p->link = p->link->link; 513 if(p->to.type == D_BRANCH) 514 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 515 p->to.u.branch = p->to.u.branch->link; 516 if(p->as == AMOVW && p->to.reg == 13) { 517 if(p->scond & C_WBIT) { 518 vreg = -p->to.offset; // in adjust region 519 // print("%P adjusting %d\n", p, vreg); 520 continue; 521 } 522 if(p->from.type == D_CONST && p->to.type == D_REG) { 523 if(p->from.offset != vreg) 524 print("in and out different\n"); 525 // print("%P finish %d\n", p, vreg); 526 vreg = 0; // done adjust region 527 continue; 528 } 529 530 // print("%P %d %d from type\n", p, p->from.type, D_CONST); 531 // print("%P %d %d to type\n\n", p, p->to.type, D_REG); 532 } 533 534 if(p->as == AMOVW && vreg != 0) { 535 if(p->from.sym != nil) 536 if(p->from.name == D_AUTO || p->from.name == D_PARAM) { 537 p->from.offset += vreg; 538 // print("%P adjusting from %d %d\n", p, vreg, p->from.type); 539 } 540 if(p->to.sym != nil) 541 if(p->to.name == D_AUTO || p->to.name == D_PARAM) { 542 p->to.offset += vreg; 543 // print("%P adjusting to %d %d\n", p, vreg, p->from.type); 544 } 545 } 546 } 547 } 548 549 static void 550 walkvardef(Node *n, Reg *r, int active) 551 { 552 Reg *r1, *r2; 553 int bn; 554 Var *v; 555 556 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 557 if(r1->f.active == active) 558 break; 559 r1->f.active = active; 560 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 561 break; 562 for(v=n->opt; v!=nil; v=v->nextinnode) { 563 bn = v - var; 564 r1->act.b[bn/32] |= 1L << (bn%32); 565 } 566 if(r1->f.prog->as == ABL) 567 break; 568 } 569 570 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 571 if(r2->f.s2 != nil) 572 walkvardef(n, (Reg*)r2->f.s2, active); 573 } 574 575 void 576 addsplits(void) 577 { 578 Reg *r, *r1; 579 int z, i; 580 Bits bit; 581 582 for(r = firstr; r != R; r = (Reg*)r->f.link) { 583 if(r->f.loop > 1) 584 continue; 585 if(r->f.prog->as == ABL) 586 continue; 587 if(r->f.prog->as == ADUFFZERO) 588 continue; 589 if(r->f.prog->as == ADUFFCOPY) 590 continue; 591 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) { 592 if(r1->f.loop <= 1) 593 continue; 594 for(z=0; z<BITS; z++) 595 bit.b[z] = r1->calbehind.b[z] & 596 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 597 ~(r->calahead.b[z] & addrs.b[z]); 598 while(bany(&bit)) { 599 i = bnum(bit); 600 bit.b[i/32] &= ~(1L << (i%32)); 601 } 602 } 603 } 604 } 605 606 /* 607 * add mov b,rn 608 * just after r 609 */ 610 void 611 addmove(Reg *r, int bn, int rn, int f) 612 { 613 Prog *p, *p1, *p2; 614 Adr *a; 615 Var *v; 616 617 p1 = mal(sizeof(*p1)); 618 *p1 = zprog; 619 p = r->f.prog; 620 621 // If there's a stack fixup coming (after BL newproc or BL deferproc), 622 // delay the load until after the fixup. 623 p2 = p->link; 624 if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG) 625 p = p2; 626 627 p1->link = p->link; 628 p->link = p1; 629 p1->lineno = p->lineno; 630 631 v = var + bn; 632 633 a = &p1->to; 634 a->name = v->name; 635 a->node = v->node; 636 a->sym = linksym(v->node->sym); 637 a->offset = v->offset; 638 a->etype = v->etype; 639 a->type = D_OREG; 640 if(a->etype == TARRAY || a->sym == nil) 641 a->type = D_CONST; 642 643 if(v->addr) 644 fatal("addmove: shouldn't be doing this %A\n", a); 645 646 switch(v->etype) { 647 default: 648 print("What is this %E\n", v->etype); 649 650 case TINT8: 651 p1->as = AMOVBS; 652 break; 653 case TBOOL: 654 case TUINT8: 655 //print("movbu %E %d %S\n", v->etype, bn, v->sym); 656 p1->as = AMOVBU; 657 break; 658 case TINT16: 659 p1->as = AMOVHS; 660 break; 661 case TUINT16: 662 p1->as = AMOVHU; 663 break; 664 case TINT32: 665 case TUINT32: 666 case TPTR32: 667 p1->as = AMOVW; 668 break; 669 case TFLOAT32: 670 p1->as = AMOVF; 671 break; 672 case TFLOAT64: 673 p1->as = AMOVD; 674 break; 675 } 676 677 p1->from.type = D_REG; 678 p1->from.reg = rn; 679 if(rn >= NREG) { 680 p1->from.type = D_FREG; 681 p1->from.reg = rn-NREG; 682 } 683 if(!f) { 684 p1->from = *a; 685 *a = zprog.from; 686 a->type = D_REG; 687 a->reg = rn; 688 if(rn >= NREG) { 689 a->type = D_FREG; 690 a->reg = rn-NREG; 691 } 692 if(v->etype == TUINT8 || v->etype == TBOOL) 693 p1->as = AMOVBU; 694 if(v->etype == TUINT16) 695 p1->as = AMOVHU; 696 } 697 if(debug['R']) 698 print("%P\t.a%P\n", p, p1); 699 } 700 701 static int 702 overlap(int32 o1, int w1, int32 o2, int w2) 703 { 704 int32 t1, t2; 705 706 t1 = o1+w1; 707 t2 = o2+w2; 708 709 if(!(t1 > o2 && t2 > o1)) 710 return 0; 711 712 return 1; 713 } 714 715 Bits 716 mkvar(Reg *r, Adr *a) 717 { 718 Var *v; 719 int i, t, n, et, z, w, flag; 720 int32 o; 721 Bits bit; 722 Node *node; 723 724 // mark registers used 725 t = a->type; 726 727 flag = 0; 728 switch(t) { 729 default: 730 print("type %d %d %D\n", t, a->name, a); 731 goto none; 732 733 case D_NONE: 734 case D_FCONST: 735 case D_BRANCH: 736 break; 737 738 739 case D_REGREG: 740 case D_REGREG2: 741 bit = zbits; 742 if(a->offset != NREG) 743 bit.b[0] |= RtoB(a->offset); 744 if(a->reg != NREG) 745 bit.b[0] |= RtoB(a->reg); 746 return bit; 747 748 case D_CONST: 749 case D_REG: 750 case D_SHIFT: 751 if(a->reg != NREG) { 752 bit = zbits; 753 bit.b[0] = RtoB(a->reg); 754 return bit; 755 } 756 break; 757 758 case D_OREG: 759 if(a->reg != NREG) { 760 if(a == &r->f.prog->from) 761 r->use1.b[0] |= RtoB(a->reg); 762 else 763 r->use2.b[0] |= RtoB(a->reg); 764 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 765 r->set.b[0] |= RtoB(a->reg); 766 } 767 break; 768 769 case D_FREG: 770 if(a->reg != NREG) { 771 bit = zbits; 772 bit.b[0] = FtoB(a->reg); 773 return bit; 774 } 775 break; 776 } 777 778 switch(a->name) { 779 default: 780 goto none; 781 782 case D_EXTERN: 783 case D_STATIC: 784 case D_AUTO: 785 case D_PARAM: 786 n = a->name; 787 break; 788 } 789 790 node = a->node; 791 if(node == N || node->op != ONAME || node->orig == N) 792 goto none; 793 node = node->orig; 794 if(node->orig != node) 795 fatal("%D: bad node", a); 796 if(node->sym == S || node->sym->name[0] == '.') 797 goto none; 798 et = a->etype; 799 o = a->offset; 800 w = a->width; 801 if(w < 0) 802 fatal("bad width %d for %D", w, a); 803 804 for(i=0; i<nvar; i++) { 805 v = var+i; 806 if(v->node == node && v->name == n) { 807 if(v->offset == o) 808 if(v->etype == et) 809 if(v->width == w) 810 if(!flag) 811 return blsh(i); 812 813 // if they overlap, disable both 814 if(overlap(v->offset, v->width, o, w)) { 815 v->addr = 1; 816 flag = 1; 817 } 818 } 819 } 820 821 switch(et) { 822 case 0: 823 case TFUNC: 824 goto none; 825 } 826 827 if(nvar >= NVAR) { 828 if(debug['w'] > 1 && node) 829 fatal("variable not optimized: %D", a); 830 831 // If we're not tracking a word in a variable, mark the rest as 832 // having its address taken, so that we keep the whole thing 833 // live at all calls. otherwise we might optimize away part of 834 // a variable but not all of it. 835 for(i=0; i<nvar; i++) { 836 v = var+i; 837 if(v->node == node) 838 v->addr = 1; 839 } 840 goto none; 841 } 842 843 i = nvar; 844 nvar++; 845 //print("var %d %E %D %S\n", i, et, a, s); 846 v = var+i; 847 v->offset = o; 848 v->name = n; 849 v->etype = et; 850 v->width = w; 851 v->addr = flag; // funny punning 852 v->node = node; 853 854 // node->opt is the head of a linked list 855 // of Vars within the given Node, so that 856 // we can start at a Var and find all the other 857 // Vars in the same Go variable. 858 v->nextinnode = node->opt; 859 node->opt = v; 860 861 bit = blsh(i); 862 if(n == D_EXTERN || n == D_STATIC) 863 for(z=0; z<BITS; z++) 864 externs.b[z] |= bit.b[z]; 865 if(n == D_PARAM) 866 for(z=0; z<BITS; z++) 867 params.b[z] |= bit.b[z]; 868 869 if(node->class == PPARAM) 870 for(z=0; z<BITS; z++) 871 ivar.b[z] |= bit.b[z]; 872 if(node->class == PPARAMOUT) 873 for(z=0; z<BITS; z++) 874 ovar.b[z] |= bit.b[z]; 875 876 // Treat values with their address taken as live at calls, 877 // because the garbage collector's liveness analysis in ../gc/plive.c does. 878 // These must be consistent or else we will elide stores and the garbage 879 // collector will see uninitialized data. 880 // The typical case where our own analysis is out of sync is when the 881 // node appears to have its address taken but that code doesn't actually 882 // get generated and therefore doesn't show up as an address being 883 // taken when we analyze the instruction stream. 884 // One instance of this case is when a closure uses the same name as 885 // an outer variable for one of its own variables declared with :=. 886 // The parser flags the outer variable as possibly shared, and therefore 887 // sets addrtaken, even though it ends up not being actually shared. 888 // If we were better about _ elision, _ = &x would suffice too. 889 // The broader := in a closure problem is mentioned in a comment in 890 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 891 if(node->addrtaken) 892 v->addr = 1; 893 894 // Disable registerization for globals, because: 895 // (1) we might panic at any time and we want the recovery code 896 // to see the latest values (issue 1304). 897 // (2) we don't know what pointers might point at them and we want 898 // loads via those pointers to see updated values and vice versa (issue 7995). 899 // 900 // Disable registerization for results if using defer, because the deferred func 901 // might recover and return, causing the current values to be used. 902 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 903 v->addr = 1; 904 905 if(debug['R']) 906 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 907 908 return bit; 909 910 none: 911 return zbits; 912 } 913 914 void 915 prop(Reg *r, Bits ref, Bits cal) 916 { 917 Reg *r1, *r2; 918 int z, i, j; 919 Var *v, *v1; 920 921 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 922 for(z=0; z<BITS; z++) { 923 ref.b[z] |= r1->refahead.b[z]; 924 if(ref.b[z] != r1->refahead.b[z]) { 925 r1->refahead.b[z] = ref.b[z]; 926 change++; 927 } 928 cal.b[z] |= r1->calahead.b[z]; 929 if(cal.b[z] != r1->calahead.b[z]) { 930 r1->calahead.b[z] = cal.b[z]; 931 change++; 932 } 933 } 934 switch(r1->f.prog->as) { 935 case ABL: 936 if(noreturn(r1->f.prog)) 937 break; 938 939 // Mark all input variables (ivar) as used, because that's what the 940 // liveness bitmaps say. The liveness bitmaps say that so that a 941 // panic will not show stale values in the parameter dump. 942 // Mark variables with a recent VARDEF (r1->act) as used, 943 // so that the optimizer flushes initializations to memory, 944 // so that if a garbage collection happens during this CALL, 945 // the collector will see initialized memory. Again this is to 946 // match what the liveness bitmaps say. 947 for(z=0; z<BITS; z++) { 948 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 949 ref.b[z] = 0; 950 } 951 952 // cal.b is the current approximation of what's live across the call. 953 // Every bit in cal.b is a single stack word. For each such word, 954 // find all the other tracked stack words in the same Go variable 955 // (struct/slice/string/interface) and mark them live too. 956 // This is necessary because the liveness analysis for the garbage 957 // collector works at variable granularity, not at word granularity. 958 // It is fundamental for slice/string/interface: the garbage collector 959 // needs the whole value, not just some of the words, in order to 960 // interpret the other bits correctly. Specifically, slice needs a consistent 961 // ptr and cap, string needs a consistent ptr and len, and interface 962 // needs a consistent type word and data word. 963 for(z=0; z<BITS; z++) { 964 if(cal.b[z] == 0) 965 continue; 966 for(i=0; i<32; i++) { 967 if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) 968 continue; 969 v = var+z*32+i; 970 if(v->node->opt == nil) // v represents fixed register, not Go variable 971 continue; 972 973 // v->node->opt is the head of a linked list of Vars 974 // corresponding to tracked words from the Go variable v->node. 975 // Walk the list and set all the bits. 976 // For a large struct this could end up being quadratic: 977 // after the first setting, the outer loop (for z, i) would see a 1 bit 978 // for all of the remaining words in the struct, and for each such 979 // word would go through and turn on all the bits again. 980 // To avoid the quadratic behavior, we only turn on the bits if 981 // v is the head of the list or if the head's bit is not yet turned on. 982 // This will set the bits at most twice, keeping the overall loop linear. 983 v1 = v->node->opt; 984 j = v1 - var; 985 if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { 986 for(; v1 != nil; v1 = v1->nextinnode) { 987 j = v1 - var; 988 cal.b[j/32] |= 1<<(j&31); 989 } 990 } 991 } 992 } 993 break; 994 995 case ATEXT: 996 for(z=0; z<BITS; z++) { 997 cal.b[z] = 0; 998 ref.b[z] = 0; 999 } 1000 break; 1001 1002 case ARET: 1003 for(z=0; z<BITS; z++) { 1004 cal.b[z] = externs.b[z] | ovar.b[z]; 1005 ref.b[z] = 0; 1006 } 1007 break; 1008 } 1009 for(z=0; z<BITS; z++) { 1010 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 1011 r1->use1.b[z] | r1->use2.b[z]; 1012 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 1013 r1->refbehind.b[z] = ref.b[z]; 1014 r1->calbehind.b[z] = cal.b[z]; 1015 } 1016 if(r1->f.active) 1017 break; 1018 r1->f.active = 1; 1019 } 1020 for(; r != r1; r = (Reg*)r->f.p1) 1021 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 1022 prop(r2, r->refbehind, r->calbehind); 1023 } 1024 1025 void 1026 synch(Reg *r, Bits dif) 1027 { 1028 Reg *r1; 1029 int z; 1030 1031 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 1032 for(z=0; z<BITS; z++) { 1033 dif.b[z] = (dif.b[z] & 1034 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1035 r1->set.b[z] | r1->regdiff.b[z]; 1036 if(dif.b[z] != r1->regdiff.b[z]) { 1037 r1->regdiff.b[z] = dif.b[z]; 1038 change++; 1039 } 1040 } 1041 if(r1->f.active) 1042 break; 1043 r1->f.active = 1; 1044 for(z=0; z<BITS; z++) 1045 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1046 if(r1->f.s2 != nil) 1047 synch((Reg*)r1->f.s2, dif); 1048 } 1049 } 1050 1051 uint32 1052 allreg(uint32 b, Rgn *r) 1053 { 1054 Var *v; 1055 int i; 1056 1057 v = var + r->varno; 1058 r->regno = 0; 1059 switch(v->etype) { 1060 1061 default: 1062 fatal("unknown etype %d/%E", bitno(b), v->etype); 1063 break; 1064 1065 case TINT8: 1066 case TUINT8: 1067 case TINT16: 1068 case TUINT16: 1069 case TINT32: 1070 case TUINT32: 1071 case TINT: 1072 case TUINT: 1073 case TUINTPTR: 1074 case TBOOL: 1075 case TPTR32: 1076 i = BtoR(~b); 1077 if(i && r->cost >= 0) { 1078 r->regno = i; 1079 return RtoB(i); 1080 } 1081 break; 1082 1083 case TFLOAT32: 1084 case TFLOAT64: 1085 i = BtoF(~b); 1086 if(i && r->cost >= 0) { 1087 r->regno = i+NREG; 1088 return FtoB(i); 1089 } 1090 break; 1091 1092 case TINT64: 1093 case TUINT64: 1094 case TPTR64: 1095 case TINTER: 1096 case TSTRUCT: 1097 case TARRAY: 1098 break; 1099 } 1100 return 0; 1101 } 1102 1103 void 1104 paint1(Reg *r, int bn) 1105 { 1106 Reg *r1; 1107 Prog *p; 1108 int z; 1109 uint32 bb; 1110 1111 z = bn/32; 1112 bb = 1L<<(bn%32); 1113 if(r->act.b[z] & bb) 1114 return; 1115 for(;;) { 1116 if(!(r->refbehind.b[z] & bb)) 1117 break; 1118 r1 = (Reg*)r->f.p1; 1119 if(r1 == R) 1120 break; 1121 if(!(r1->refahead.b[z] & bb)) 1122 break; 1123 if(r1->act.b[z] & bb) 1124 break; 1125 r = r1; 1126 } 1127 1128 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 1129 change -= CLOAD * r->f.loop; 1130 if(debug['R'] > 1) 1131 print("%d%P\td %Q $%d\n", r->f.loop, 1132 r->f.prog, blsh(bn), change); 1133 } 1134 for(;;) { 1135 r->act.b[z] |= bb; 1136 p = r->f.prog; 1137 1138 1139 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 1140 if(r->use1.b[z] & bb) { 1141 change += CREF * r->f.loop; 1142 if(debug['R'] > 1) 1143 print("%d%P\tu1 %Q $%d\n", r->f.loop, 1144 p, blsh(bn), change); 1145 } 1146 if((r->use2.b[z]|r->set.b[z]) & bb) { 1147 change += CREF * r->f.loop; 1148 if(debug['R'] > 1) 1149 print("%d%P\tu2 %Q $%d\n", r->f.loop, 1150 p, blsh(bn), change); 1151 } 1152 } 1153 1154 if(STORE(r) & r->regdiff.b[z] & bb) { 1155 change -= CLOAD * r->f.loop; 1156 if(debug['R'] > 1) 1157 print("%d%P\tst %Q $%d\n", r->f.loop, 1158 p, blsh(bn), change); 1159 } 1160 1161 if(r->refbehind.b[z] & bb) 1162 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1163 if(r1->refahead.b[z] & bb) 1164 paint1(r1, bn); 1165 1166 if(!(r->refahead.b[z] & bb)) 1167 break; 1168 r1 = (Reg*)r->f.s2; 1169 if(r1 != R) 1170 if(r1->refbehind.b[z] & bb) 1171 paint1(r1, bn); 1172 r = (Reg*)r->f.s1; 1173 if(r == R) 1174 break; 1175 if(r->act.b[z] & bb) 1176 break; 1177 if(!(r->refbehind.b[z] & bb)) 1178 break; 1179 } 1180 } 1181 1182 uint32 1183 paint2(Reg *r, int bn) 1184 { 1185 Reg *r1; 1186 int z; 1187 uint32 bb, vreg; 1188 1189 z = bn/32; 1190 bb = 1L << (bn%32); 1191 vreg = regbits; 1192 if(!(r->act.b[z] & bb)) 1193 return vreg; 1194 for(;;) { 1195 if(!(r->refbehind.b[z] & bb)) 1196 break; 1197 r1 = (Reg*)r->f.p1; 1198 if(r1 == R) 1199 break; 1200 if(!(r1->refahead.b[z] & bb)) 1201 break; 1202 if(!(r1->act.b[z] & bb)) 1203 break; 1204 r = r1; 1205 } 1206 for(;;) { 1207 r->act.b[z] &= ~bb; 1208 1209 vreg |= r->regu; 1210 1211 if(r->refbehind.b[z] & bb) 1212 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1213 if(r1->refahead.b[z] & bb) 1214 vreg |= paint2(r1, bn); 1215 1216 if(!(r->refahead.b[z] & bb)) 1217 break; 1218 r1 = (Reg*)r->f.s2; 1219 if(r1 != R) 1220 if(r1->refbehind.b[z] & bb) 1221 vreg |= paint2(r1, bn); 1222 r = (Reg*)r->f.s1; 1223 if(r == R) 1224 break; 1225 if(!(r->act.b[z] & bb)) 1226 break; 1227 if(!(r->refbehind.b[z] & bb)) 1228 break; 1229 } 1230 return vreg; 1231 } 1232 1233 void 1234 paint3(Reg *r, int bn, int32 rb, int rn) 1235 { 1236 Reg *r1; 1237 Prog *p; 1238 int z; 1239 uint32 bb; 1240 1241 z = bn/32; 1242 bb = 1L << (bn%32); 1243 if(r->act.b[z] & bb) 1244 return; 1245 for(;;) { 1246 if(!(r->refbehind.b[z] & bb)) 1247 break; 1248 r1 = (Reg*)r->f.p1; 1249 if(r1 == R) 1250 break; 1251 if(!(r1->refahead.b[z] & bb)) 1252 break; 1253 if(r1->act.b[z] & bb) 1254 break; 1255 r = r1; 1256 } 1257 1258 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1259 addmove(r, bn, rn, 0); 1260 1261 for(;;) { 1262 r->act.b[z] |= bb; 1263 p = r->f.prog; 1264 1265 if(r->use1.b[z] & bb) { 1266 if(debug['R']) 1267 print("%P", p); 1268 addreg(&p->from, rn); 1269 if(debug['R']) 1270 print("\t.c%P\n", p); 1271 } 1272 if((r->use2.b[z]|r->set.b[z]) & bb) { 1273 if(debug['R']) 1274 print("%P", p); 1275 addreg(&p->to, rn); 1276 if(debug['R']) 1277 print("\t.c%P\n", p); 1278 } 1279 1280 if(STORE(r) & r->regdiff.b[z] & bb) 1281 addmove(r, bn, rn, 1); 1282 r->regu |= rb; 1283 1284 if(r->refbehind.b[z] & bb) 1285 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1286 if(r1->refahead.b[z] & bb) 1287 paint3(r1, bn, rb, rn); 1288 1289 if(!(r->refahead.b[z] & bb)) 1290 break; 1291 r1 = (Reg*)r->f.s2; 1292 if(r1 != R) 1293 if(r1->refbehind.b[z] & bb) 1294 paint3(r1, bn, rb, rn); 1295 r = (Reg*)r->f.s1; 1296 if(r == R) 1297 break; 1298 if(r->act.b[z] & bb) 1299 break; 1300 if(!(r->refbehind.b[z] & bb)) 1301 break; 1302 } 1303 } 1304 1305 void 1306 addreg(Adr *a, int rn) 1307 { 1308 a->sym = nil; 1309 a->name = D_NONE; 1310 a->type = D_REG; 1311 a->reg = rn; 1312 if(rn >= NREG) { 1313 a->type = D_FREG; 1314 a->reg = rn-NREG; 1315 } 1316 } 1317 1318 /* 1319 * bit reg 1320 * 0 R0 1321 * 1 R1 1322 * ... ... 1323 * 10 R10 1324 * 12 R12 1325 */ 1326 int32 1327 RtoB(int r) 1328 { 1329 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 1330 return 0; 1331 return 1L << r; 1332 } 1333 1334 int 1335 BtoR(int32 b) 1336 { 1337 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1338 if(b == 0) 1339 return 0; 1340 return bitno(b); 1341 } 1342 1343 /* 1344 * bit reg 1345 * 18 F2 1346 * 19 F3 1347 * ... ... 1348 * 31 F15 1349 */ 1350 int32 1351 FtoB(int f) 1352 { 1353 1354 if(f < 2 || f > NFREG-1) 1355 return 0; 1356 return 1L << (f + 16); 1357 } 1358 1359 int 1360 BtoF(int32 b) 1361 { 1362 1363 b &= 0xfffc0000L; 1364 if(b == 0) 1365 return 0; 1366 return bitno(b) - 16; 1367 } 1368 1369 void 1370 dumpone(Flow *f, int isreg) 1371 { 1372 int z; 1373 Bits bit; 1374 Reg *r; 1375 1376 print("%d:%P", f->loop, f->prog); 1377 if(isreg) { 1378 r = (Reg*)f; 1379 for(z=0; z<BITS; z++) 1380 bit.b[z] = 1381 r->set.b[z] | 1382 r->use1.b[z] | 1383 r->use2.b[z] | 1384 r->refbehind.b[z] | 1385 r->refahead.b[z] | 1386 r->calbehind.b[z] | 1387 r->calahead.b[z] | 1388 r->regdiff.b[z] | 1389 r->act.b[z] | 1390 0; 1391 if(bany(&bit)) { 1392 print("\t"); 1393 if(bany(&r->set)) 1394 print(" s:%Q", r->set); 1395 if(bany(&r->use1)) 1396 print(" u1:%Q", r->use1); 1397 if(bany(&r->use2)) 1398 print(" u2:%Q", r->use2); 1399 if(bany(&r->refbehind)) 1400 print(" rb:%Q ", r->refbehind); 1401 if(bany(&r->refahead)) 1402 print(" ra:%Q ", r->refahead); 1403 if(bany(&r->calbehind)) 1404 print(" cb:%Q ", r->calbehind); 1405 if(bany(&r->calahead)) 1406 print(" ca:%Q ", r->calahead); 1407 if(bany(&r->regdiff)) 1408 print(" d:%Q ", r->regdiff); 1409 if(bany(&r->act)) 1410 print(" a:%Q ", r->act); 1411 } 1412 } 1413 print("\n"); 1414 } 1415 1416 void 1417 dumpit(char *str, Flow *r0, int isreg) 1418 { 1419 Flow *r, *r1; 1420 1421 print("\n%s\n", str); 1422 for(r = r0; r != nil; r = r->link) { 1423 dumpone(r, isreg); 1424 r1 = r->p2; 1425 if(r1 != nil) { 1426 print(" pred:"); 1427 for(; r1 != nil; r1 = r1->p2link) 1428 print(" %.4ud", (int)r1->prog->pc); 1429 if(r->p1 != nil) 1430 print(" (and %.4ud)", (int)r->p1->prog->pc); 1431 else 1432 print(" (only)"); 1433 print("\n"); 1434 } 1435 // r1 = r->s1; 1436 // if(r1 != nil) { 1437 // print(" succ:"); 1438 // for(; r1 != R; r1 = r1->s1) 1439 // print(" %.4ud", (int)r1->prog->pc); 1440 // print("\n"); 1441 // } 1442 } 1443 }