github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/cmd/5g/reg.c (about) 1 // Inferno utils/5c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 32 #include <u.h> 33 #include <libc.h> 34 #include "gg.h" 35 #include "opt.h" 36 37 #define NREGVAR 32 38 #define REGBITS ((uint32)0xffffffff) 39 /*c2go enum { 40 NREGVAR = 32, 41 REGBITS = 0xffffffff, 42 }; 43 */ 44 45 void addsplits(void); 46 static Reg* firstr; 47 static int first = 1; 48 49 int 50 rcmp(const void *a1, const void *a2) 51 { 52 Rgn *p1, *p2; 53 int c1, c2; 54 55 p1 = (Rgn*)a1; 56 p2 = (Rgn*)a2; 57 c1 = p2->cost; 58 c2 = p1->cost; 59 if(c1 -= c2) 60 return c1; 61 return p2->varno - p1->varno; 62 } 63 64 void 65 excise(Flow *r) 66 { 67 Prog *p; 68 69 p = r->prog; 70 p->as = ANOP; 71 p->scond = zprog.scond; 72 p->from = zprog.from; 73 p->to = zprog.to; 74 p->reg = zprog.reg; 75 } 76 77 static void 78 setaddrs(Bits bit) 79 { 80 int i, n; 81 Var *v; 82 Node *node; 83 84 while(bany(&bit)) { 85 // convert each bit to a variable 86 i = bnum(bit); 87 node = var[i].node; 88 n = var[i].name; 89 bit.b[i/32] &= ~(1L<<(i%32)); 90 91 // disable all pieces of that variable 92 for(i=0; i<nvar; i++) { 93 v = var+i; 94 if(v->node == node && v->name == n) 95 v->addr = 2; 96 } 97 } 98 } 99 100 static char* regname[] = { 101 ".R0", 102 ".R1", 103 ".R2", 104 ".R3", 105 ".R4", 106 ".R5", 107 ".R6", 108 ".R7", 109 ".R8", 110 ".R9", 111 ".R10", 112 ".R11", 113 ".R12", 114 ".R13", 115 ".R14", 116 ".R15", 117 ".F0", 118 ".F1", 119 ".F2", 120 ".F3", 121 ".F4", 122 ".F5", 123 ".F6", 124 ".F7", 125 ".F8", 126 ".F9", 127 ".F10", 128 ".F11", 129 ".F12", 130 ".F13", 131 ".F14", 132 ".F15", 133 }; 134 135 static Node* regnodes[NREGVAR]; 136 137 static void walkvardef(Node *n, Reg *r, int active); 138 139 void 140 regopt(Prog *firstp) 141 { 142 Reg *r, *r1; 143 Prog *p; 144 Graph *g; 145 int i, z, active; 146 uint32 vreg; 147 Bits bit; 148 ProgInfo info; 149 150 if(first) { 151 fmtinstall('Q', Qconv); 152 first = 0; 153 } 154 155 mergetemp(firstp); 156 157 /* 158 * control flow is more complicated in generated go code 159 * than in generated c code. define pseudo-variables for 160 * registers, so we have complete register usage information. 161 */ 162 nvar = NREGVAR; 163 memset(var, 0, NREGVAR*sizeof var[0]); 164 for(i=0; i<NREGVAR; i++) { 165 if(regnodes[i] == N) 166 regnodes[i] = newname(lookup(regname[i])); 167 var[i].node = regnodes[i]; 168 } 169 170 regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); 171 for(z=0; z<BITS; z++) { 172 externs.b[z] = 0; 173 params.b[z] = 0; 174 consts.b[z] = 0; 175 addrs.b[z] = 0; 176 ivar.b[z] = 0; 177 ovar.b[z] = 0; 178 } 179 180 /* 181 * pass 1 182 * build aux data structure 183 * allocate pcs 184 * find use and set of variables 185 */ 186 g = flowstart(firstp, sizeof(Reg)); 187 if(g == nil) { 188 for(i=0; i<nvar; i++) 189 var[i].node->opt = nil; 190 return; 191 } 192 193 firstr = (Reg*)g->start; 194 195 for(r = firstr; r != R; r = (Reg*)r->f.link) { 196 p = r->f.prog; 197 if(p->as == AVARDEF || p->as == AVARKILL) 198 continue; 199 proginfo(&info, p); 200 201 // Avoid making variables for direct-called functions. 202 if(p->as == ABL && p->to.type == D_EXTERN) 203 continue; 204 205 bit = mkvar(r, &p->from); 206 if(info.flags & LeftRead) 207 for(z=0; z<BITS; z++) 208 r->use1.b[z] |= bit.b[z]; 209 if(info.flags & LeftAddr) 210 setaddrs(bit); 211 212 if(info.flags & RegRead) { 213 if(p->from.type != D_FREG) 214 r->use1.b[0] |= RtoB(p->reg); 215 else 216 r->use1.b[0] |= FtoB(p->reg); 217 } 218 219 if(info.flags & (RightAddr | RightRead | RightWrite)) { 220 bit = mkvar(r, &p->to); 221 if(info.flags & RightAddr) 222 setaddrs(bit); 223 if(info.flags & RightRead) 224 for(z=0; z<BITS; z++) 225 r->use2.b[z] |= bit.b[z]; 226 if(info.flags & RightWrite) 227 for(z=0; z<BITS; z++) 228 r->set.b[z] |= bit.b[z]; 229 } 230 231 /* the mod/div runtime routines smash R12 */ 232 if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU) 233 r->set.b[z] |= RtoB(12); 234 } 235 if(firstr == R) 236 return; 237 238 for(i=0; i<nvar; i++) { 239 Var *v = var+i; 240 if(v->addr) { 241 bit = blsh(i); 242 for(z=0; z<BITS; z++) 243 addrs.b[z] |= bit.b[z]; 244 } 245 246 if(debug['R'] && debug['v']) 247 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 248 i, v->addr, v->etype, v->width, v->node, v->offset); 249 } 250 251 if(debug['R'] && debug['v']) 252 dumpit("pass1", &firstr->f, 1); 253 254 /* 255 * pass 2 256 * find looping structure 257 */ 258 flowrpo(g); 259 260 if(debug['R'] && debug['v']) 261 dumpit("pass2", &firstr->f, 1); 262 263 /* 264 * pass 2.5 265 * iterate propagating fat vardef covering forward 266 * r->act records vars with a VARDEF since the last CALL. 267 * (r->act will be reused in pass 5 for something else, 268 * but we'll be done with it by then.) 269 */ 270 active = 0; 271 for(r = firstr; r != R; r = (Reg*)r->f.link) { 272 r->f.active = 0; 273 r->act = zbits; 274 } 275 for(r = firstr; r != R; r = (Reg*)r->f.link) { 276 p = r->f.prog; 277 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 278 active++; 279 walkvardef(p->to.node, r, active); 280 } 281 } 282 283 /* 284 * pass 3 285 * iterate propagating usage 286 * back until flow graph is complete 287 */ 288 loop1: 289 change = 0; 290 for(r = firstr; r != R; r = (Reg*)r->f.link) 291 r->f.active = 0; 292 for(r = firstr; r != R; r = (Reg*)r->f.link) 293 if(r->f.prog->as == ARET) 294 prop(r, zbits, zbits); 295 loop11: 296 /* pick up unreachable code */ 297 i = 0; 298 for(r = firstr; r != R; r = r1) { 299 r1 = (Reg*)r->f.link; 300 if(r1 && r1->f.active && !r->f.active) { 301 prop(r, zbits, zbits); 302 i = 1; 303 } 304 } 305 if(i) 306 goto loop11; 307 if(change) 308 goto loop1; 309 310 if(debug['R'] && debug['v']) 311 dumpit("pass3", &firstr->f, 1); 312 313 314 /* 315 * pass 4 316 * iterate propagating register/variable synchrony 317 * forward until graph is complete 318 */ 319 loop2: 320 change = 0; 321 for(r = firstr; r != R; r = (Reg*)r->f.link) 322 r->f.active = 0; 323 synch(firstr, zbits); 324 if(change) 325 goto loop2; 326 327 addsplits(); 328 329 if(debug['R'] && debug['v']) 330 dumpit("pass4", &firstr->f, 1); 331 332 if(debug['R'] > 1) { 333 print("\nprop structure:\n"); 334 for(r = firstr; r != R; r = (Reg*)r->f.link) { 335 print("%d:%P", r->f.loop, r->f.prog); 336 for(z=0; z<BITS; z++) { 337 bit.b[z] = r->set.b[z] | 338 r->refahead.b[z] | r->calahead.b[z] | 339 r->refbehind.b[z] | r->calbehind.b[z] | 340 r->use1.b[z] | r->use2.b[z]; 341 bit.b[z] &= ~addrs.b[z]; 342 } 343 344 if(bany(&bit)) { 345 print("\t"); 346 if(bany(&r->use1)) 347 print(" u1=%Q", r->use1); 348 if(bany(&r->use2)) 349 print(" u2=%Q", r->use2); 350 if(bany(&r->set)) 351 print(" st=%Q", r->set); 352 if(bany(&r->refahead)) 353 print(" ra=%Q", r->refahead); 354 if(bany(&r->calahead)) 355 print(" ca=%Q", r->calahead); 356 if(bany(&r->refbehind)) 357 print(" rb=%Q", r->refbehind); 358 if(bany(&r->calbehind)) 359 print(" cb=%Q", r->calbehind); 360 } 361 print("\n"); 362 } 363 } 364 365 /* 366 * pass 4.5 367 * move register pseudo-variables into regu. 368 */ 369 for(r = firstr; r != R; r = (Reg*)r->f.link) { 370 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 371 372 r->set.b[0] &= ~REGBITS; 373 r->use1.b[0] &= ~REGBITS; 374 r->use2.b[0] &= ~REGBITS; 375 r->refbehind.b[0] &= ~REGBITS; 376 r->refahead.b[0] &= ~REGBITS; 377 r->calbehind.b[0] &= ~REGBITS; 378 r->calahead.b[0] &= ~REGBITS; 379 r->regdiff.b[0] &= ~REGBITS; 380 r->act.b[0] &= ~REGBITS; 381 } 382 383 if(debug['R'] && debug['v']) 384 dumpit("pass4.5", &firstr->f, 1); 385 386 /* 387 * pass 5 388 * isolate regions 389 * calculate costs (paint1) 390 */ 391 r = firstr; 392 if(r) { 393 for(z=0; z<BITS; z++) 394 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 395 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 396 if(bany(&bit) & !r->f.refset) { 397 // should never happen - all variables are preset 398 if(debug['w']) 399 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 400 r->f.refset = 1; 401 } 402 } 403 404 for(r = firstr; r != R; r = (Reg*)r->f.link) 405 r->act = zbits; 406 rgp = region; 407 nregion = 0; 408 for(r = firstr; r != R; r = (Reg*)r->f.link) { 409 for(z=0; z<BITS; z++) 410 bit.b[z] = r->set.b[z] & 411 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 412 if(bany(&bit) && !r->f.refset) { 413 if(debug['w']) 414 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 415 r->f.refset = 1; 416 excise(&r->f); 417 } 418 for(z=0; z<BITS; z++) 419 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 420 while(bany(&bit)) { 421 i = bnum(bit); 422 rgp->enter = r; 423 rgp->varno = i; 424 change = 0; 425 if(debug['R'] > 1) 426 print("\n"); 427 paint1(r, i); 428 bit.b[i/32] &= ~(1L<<(i%32)); 429 if(change <= 0) { 430 if(debug['R']) 431 print("%L $%d: %Q\n", 432 r->f.prog->lineno, change, blsh(i)); 433 continue; 434 } 435 rgp->cost = change; 436 nregion++; 437 if(nregion >= NRGN) { 438 if(debug['R'] > 1) 439 print("too many regions\n"); 440 goto brk; 441 } 442 rgp++; 443 } 444 } 445 brk: 446 qsort(region, nregion, sizeof(region[0]), rcmp); 447 448 if(debug['R'] && debug['v']) 449 dumpit("pass5", &firstr->f, 1); 450 451 /* 452 * pass 6 453 * determine used registers (paint2) 454 * replace code (paint3) 455 */ 456 rgp = region; 457 for(i=0; i<nregion; i++) { 458 bit = blsh(rgp->varno); 459 vreg = paint2(rgp->enter, rgp->varno); 460 vreg = allreg(vreg, rgp); 461 if(debug['R']) { 462 if(rgp->regno >= NREG) 463 print("%L $%d F%d: %Q\n", 464 rgp->enter->f.prog->lineno, 465 rgp->cost, 466 rgp->regno-NREG, 467 bit); 468 else 469 print("%L $%d R%d: %Q\n", 470 rgp->enter->f.prog->lineno, 471 rgp->cost, 472 rgp->regno, 473 bit); 474 } 475 if(rgp->regno != 0) 476 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 477 rgp++; 478 } 479 480 if(debug['R'] && debug['v']) 481 dumpit("pass6", &firstr->f, 1); 482 483 /* 484 * free aux structures. peep allocates new ones. 485 */ 486 for(i=0; i<nvar; i++) 487 var[i].node->opt = nil; 488 flowend(g); 489 firstr = R; 490 491 /* 492 * pass 7 493 * peep-hole on basic block 494 */ 495 if(!debug['R'] || debug['P']) { 496 peep(firstp); 497 } 498 499 if(debug['R'] && debug['v']) 500 dumpit("pass7", &firstr->f, 1); 501 502 /* 503 * last pass 504 * eliminate nops 505 * free aux structures 506 * adjust the stack pointer 507 * MOVW.W R1,-12(R13) <<- start 508 * MOVW R0,R1 509 * MOVW R1,8(R13) 510 * MOVW $0,R1 511 * MOVW R1,4(R13) 512 * BL ,runtime.newproc+0(SB) 513 * MOVW &ft+-32(SP),R7 <<- adjust 514 * MOVW &j+-40(SP),R6 <<- adjust 515 * MOVW autotmp_0003+-24(SP),R5 <<- adjust 516 * MOVW $12(R13),R13 <<- finish 517 */ 518 vreg = 0; 519 for(p = firstp; p != P; p = p->link) { 520 while(p->link != P && p->link->as == ANOP) 521 p->link = p->link->link; 522 if(p->to.type == D_BRANCH) 523 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 524 p->to.u.branch = p->to.u.branch->link; 525 if(p->as == AMOVW && p->to.reg == 13) { 526 if(p->scond & C_WBIT) { 527 vreg = -p->to.offset; // in adjust region 528 // print("%P adjusting %d\n", p, vreg); 529 continue; 530 } 531 if(p->from.type == D_CONST && p->to.type == D_REG) { 532 if(p->from.offset != vreg) 533 print("in and out different\n"); 534 // print("%P finish %d\n", p, vreg); 535 vreg = 0; // done adjust region 536 continue; 537 } 538 539 // print("%P %d %d from type\n", p, p->from.type, D_CONST); 540 // print("%P %d %d to type\n\n", p, p->to.type, D_REG); 541 } 542 543 if(p->as == AMOVW && vreg != 0) { 544 if(p->from.sym != nil) 545 if(p->from.name == D_AUTO || p->from.name == D_PARAM) { 546 p->from.offset += vreg; 547 // print("%P adjusting from %d %d\n", p, vreg, p->from.type); 548 } 549 if(p->to.sym != nil) 550 if(p->to.name == D_AUTO || p->to.name == D_PARAM) { 551 p->to.offset += vreg; 552 // print("%P adjusting to %d %d\n", p, vreg, p->from.type); 553 } 554 } 555 } 556 } 557 558 static void 559 walkvardef(Node *n, Reg *r, int active) 560 { 561 Reg *r1, *r2; 562 int bn; 563 Var *v; 564 565 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 566 if(r1->f.active == active) 567 break; 568 r1->f.active = active; 569 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 570 break; 571 for(v=n->opt; v!=nil; v=v->nextinnode) { 572 bn = v - var; 573 r1->act.b[bn/32] |= 1L << (bn%32); 574 } 575 if(r1->f.prog->as == ABL) 576 break; 577 } 578 579 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 580 if(r2->f.s2 != nil) 581 walkvardef(n, (Reg*)r2->f.s2, active); 582 } 583 584 void 585 addsplits(void) 586 { 587 Reg *r, *r1; 588 int z, i; 589 Bits bit; 590 591 for(r = firstr; r != R; r = (Reg*)r->f.link) { 592 if(r->f.loop > 1) 593 continue; 594 if(r->f.prog->as == ABL) 595 continue; 596 if(r->f.prog->as == ADUFFZERO) 597 continue; 598 if(r->f.prog->as == ADUFFCOPY) 599 continue; 600 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) { 601 if(r1->f.loop <= 1) 602 continue; 603 for(z=0; z<BITS; z++) 604 bit.b[z] = r1->calbehind.b[z] & 605 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 606 ~(r->calahead.b[z] & addrs.b[z]); 607 while(bany(&bit)) { 608 i = bnum(bit); 609 bit.b[i/32] &= ~(1L << (i%32)); 610 } 611 } 612 } 613 } 614 615 /* 616 * add mov b,rn 617 * just after r 618 */ 619 void 620 addmove(Reg *r, int bn, int rn, int f) 621 { 622 Prog *p, *p1, *p2; 623 Adr *a; 624 Var *v; 625 626 p1 = mal(sizeof(*p1)); 627 *p1 = zprog; 628 p = r->f.prog; 629 630 // If there's a stack fixup coming (after BL newproc or BL deferproc), 631 // delay the load until after the fixup. 632 p2 = p->link; 633 if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG) 634 p = p2; 635 636 p1->link = p->link; 637 p->link = p1; 638 p1->lineno = p->lineno; 639 640 v = var + bn; 641 642 a = &p1->to; 643 a->name = v->name; 644 a->node = v->node; 645 a->sym = linksym(v->node->sym); 646 a->offset = v->offset; 647 a->etype = v->etype; 648 a->type = D_OREG; 649 if(a->etype == TARRAY || a->sym == nil) 650 a->type = D_CONST; 651 652 if(v->addr) 653 fatal("addmove: shouldn't be doing this %A\n", a); 654 655 switch(v->etype) { 656 default: 657 print("What is this %E\n", v->etype); 658 659 case TINT8: 660 p1->as = AMOVBS; 661 break; 662 case TBOOL: 663 case TUINT8: 664 //print("movbu %E %d %S\n", v->etype, bn, v->sym); 665 p1->as = AMOVBU; 666 break; 667 case TINT16: 668 p1->as = AMOVHS; 669 break; 670 case TUINT16: 671 p1->as = AMOVHU; 672 break; 673 case TINT32: 674 case TUINT32: 675 case TPTR32: 676 p1->as = AMOVW; 677 break; 678 case TFLOAT32: 679 p1->as = AMOVF; 680 break; 681 case TFLOAT64: 682 p1->as = AMOVD; 683 break; 684 } 685 686 p1->from.type = D_REG; 687 p1->from.reg = rn; 688 if(rn >= NREG) { 689 p1->from.type = D_FREG; 690 p1->from.reg = rn-NREG; 691 } 692 if(!f) { 693 p1->from = *a; 694 *a = zprog.from; 695 a->type = D_REG; 696 a->reg = rn; 697 if(rn >= NREG) { 698 a->type = D_FREG; 699 a->reg = rn-NREG; 700 } 701 if(v->etype == TUINT8 || v->etype == TBOOL) 702 p1->as = AMOVBU; 703 if(v->etype == TUINT16) 704 p1->as = AMOVHU; 705 } 706 if(debug['R']) 707 print("%P\t.a%P\n", p, p1); 708 } 709 710 static int 711 overlap(int32 o1, int w1, int32 o2, int w2) 712 { 713 int32 t1, t2; 714 715 t1 = o1+w1; 716 t2 = o2+w2; 717 718 if(!(t1 > o2 && t2 > o1)) 719 return 0; 720 721 return 1; 722 } 723 724 Bits 725 mkvar(Reg *r, Adr *a) 726 { 727 Var *v; 728 int i, t, n, et, z, w, flag; 729 int32 o; 730 Bits bit; 731 Node *node; 732 733 // mark registers used 734 t = a->type; 735 736 flag = 0; 737 switch(t) { 738 default: 739 print("type %d %d %D\n", t, a->name, a); 740 goto none; 741 742 case D_NONE: 743 case D_FCONST: 744 case D_BRANCH: 745 break; 746 747 748 case D_REGREG: 749 case D_REGREG2: 750 bit = zbits; 751 if(a->offset != NREG) 752 bit.b[0] |= RtoB(a->offset); 753 if(a->reg != NREG) 754 bit.b[0] |= RtoB(a->reg); 755 return bit; 756 757 case D_CONST: 758 case D_REG: 759 case D_SHIFT: 760 if(a->reg != NREG) { 761 bit = zbits; 762 bit.b[0] = RtoB(a->reg); 763 return bit; 764 } 765 break; 766 767 case D_OREG: 768 if(a->reg != NREG) { 769 if(a == &r->f.prog->from) 770 r->use1.b[0] |= RtoB(a->reg); 771 else 772 r->use2.b[0] |= RtoB(a->reg); 773 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 774 r->set.b[0] |= RtoB(a->reg); 775 } 776 break; 777 778 case D_FREG: 779 if(a->reg != NREG) { 780 bit = zbits; 781 bit.b[0] = FtoB(a->reg); 782 return bit; 783 } 784 break; 785 } 786 787 switch(a->name) { 788 default: 789 goto none; 790 791 case D_EXTERN: 792 case D_STATIC: 793 case D_AUTO: 794 case D_PARAM: 795 n = a->name; 796 break; 797 } 798 799 node = a->node; 800 if(node == N || node->op != ONAME || node->orig == N) 801 goto none; 802 node = node->orig; 803 if(node->orig != node) 804 fatal("%D: bad node", a); 805 if(node->sym == S || node->sym->name[0] == '.') 806 goto none; 807 et = a->etype; 808 o = a->offset; 809 w = a->width; 810 if(w < 0) 811 fatal("bad width %d for %D", w, a); 812 813 for(i=0; i<nvar; i++) { 814 v = var+i; 815 if(v->node == node && v->name == n) { 816 if(v->offset == o) 817 if(v->etype == et) 818 if(v->width == w) 819 if(!flag) 820 return blsh(i); 821 822 // if they overlap, disable both 823 if(overlap(v->offset, v->width, o, w)) { 824 v->addr = 1; 825 flag = 1; 826 } 827 } 828 } 829 830 switch(et) { 831 case 0: 832 case TFUNC: 833 goto none; 834 } 835 836 if(nvar >= NVAR) { 837 if(debug['w'] > 1 && node) 838 fatal("variable not optimized: %D", a); 839 840 // If we're not tracking a word in a variable, mark the rest as 841 // having its address taken, so that we keep the whole thing 842 // live at all calls. otherwise we might optimize away part of 843 // a variable but not all of it. 844 for(i=0; i<nvar; i++) { 845 v = var+i; 846 if(v->node == node) 847 v->addr = 1; 848 } 849 goto none; 850 } 851 852 i = nvar; 853 nvar++; 854 //print("var %d %E %D %S\n", i, et, a, s); 855 v = var+i; 856 v->offset = o; 857 v->name = n; 858 v->etype = et; 859 v->width = w; 860 v->addr = flag; // funny punning 861 v->node = node; 862 863 // node->opt is the head of a linked list 864 // of Vars within the given Node, so that 865 // we can start at a Var and find all the other 866 // Vars in the same Go variable. 867 v->nextinnode = node->opt; 868 node->opt = v; 869 870 bit = blsh(i); 871 if(n == D_EXTERN || n == D_STATIC) 872 for(z=0; z<BITS; z++) 873 externs.b[z] |= bit.b[z]; 874 if(n == D_PARAM) 875 for(z=0; z<BITS; z++) 876 params.b[z] |= bit.b[z]; 877 878 if(node->class == PPARAM) 879 for(z=0; z<BITS; z++) 880 ivar.b[z] |= bit.b[z]; 881 if(node->class == PPARAMOUT) 882 for(z=0; z<BITS; z++) 883 ovar.b[z] |= bit.b[z]; 884 885 // Treat values with their address taken as live at calls, 886 // because the garbage collector's liveness analysis in ../gc/plive.c does. 887 // These must be consistent or else we will elide stores and the garbage 888 // collector will see uninitialized data. 889 // The typical case where our own analysis is out of sync is when the 890 // node appears to have its address taken but that code doesn't actually 891 // get generated and therefore doesn't show up as an address being 892 // taken when we analyze the instruction stream. 893 // One instance of this case is when a closure uses the same name as 894 // an outer variable for one of its own variables declared with :=. 895 // The parser flags the outer variable as possibly shared, and therefore 896 // sets addrtaken, even though it ends up not being actually shared. 897 // If we were better about _ elision, _ = &x would suffice too. 898 // The broader := in a closure problem is mentioned in a comment in 899 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 900 if(node->addrtaken) 901 v->addr = 1; 902 903 // Disable registerization for globals, because: 904 // (1) we might panic at any time and we want the recovery code 905 // to see the latest values (issue 1304). 906 // (2) we don't know what pointers might point at them and we want 907 // loads via those pointers to see updated values and vice versa (issue 7995). 908 // 909 // Disable registerization for results if using defer, because the deferred func 910 // might recover and return, causing the current values to be used. 911 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 912 v->addr = 1; 913 914 if(debug['R']) 915 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 916 917 return bit; 918 919 none: 920 return zbits; 921 } 922 923 void 924 prop(Reg *r, Bits ref, Bits cal) 925 { 926 Reg *r1, *r2; 927 int z, i, j; 928 Var *v, *v1; 929 930 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 931 for(z=0; z<BITS; z++) { 932 ref.b[z] |= r1->refahead.b[z]; 933 if(ref.b[z] != r1->refahead.b[z]) { 934 r1->refahead.b[z] = ref.b[z]; 935 change++; 936 } 937 cal.b[z] |= r1->calahead.b[z]; 938 if(cal.b[z] != r1->calahead.b[z]) { 939 r1->calahead.b[z] = cal.b[z]; 940 change++; 941 } 942 } 943 switch(r1->f.prog->as) { 944 case ABL: 945 if(noreturn(r1->f.prog)) 946 break; 947 948 // Mark all input variables (ivar) as used, because that's what the 949 // liveness bitmaps say. The liveness bitmaps say that so that a 950 // panic will not show stale values in the parameter dump. 951 // Mark variables with a recent VARDEF (r1->act) as used, 952 // so that the optimizer flushes initializations to memory, 953 // so that if a garbage collection happens during this CALL, 954 // the collector will see initialized memory. Again this is to 955 // match what the liveness bitmaps say. 956 for(z=0; z<BITS; z++) { 957 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 958 ref.b[z] = 0; 959 } 960 961 // cal.b is the current approximation of what's live across the call. 962 // Every bit in cal.b is a single stack word. For each such word, 963 // find all the other tracked stack words in the same Go variable 964 // (struct/slice/string/interface) and mark them live too. 965 // This is necessary because the liveness analysis for the garbage 966 // collector works at variable granularity, not at word granularity. 967 // It is fundamental for slice/string/interface: the garbage collector 968 // needs the whole value, not just some of the words, in order to 969 // interpret the other bits correctly. Specifically, slice needs a consistent 970 // ptr and cap, string needs a consistent ptr and len, and interface 971 // needs a consistent type word and data word. 972 for(z=0; z<BITS; z++) { 973 if(cal.b[z] == 0) 974 continue; 975 for(i=0; i<32; i++) { 976 if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) 977 continue; 978 v = var+z*32+i; 979 if(v->node->opt == nil) // v represents fixed register, not Go variable 980 continue; 981 982 // v->node->opt is the head of a linked list of Vars 983 // corresponding to tracked words from the Go variable v->node. 984 // Walk the list and set all the bits. 985 // For a large struct this could end up being quadratic: 986 // after the first setting, the outer loop (for z, i) would see a 1 bit 987 // for all of the remaining words in the struct, and for each such 988 // word would go through and turn on all the bits again. 989 // To avoid the quadratic behavior, we only turn on the bits if 990 // v is the head of the list or if the head's bit is not yet turned on. 991 // This will set the bits at most twice, keeping the overall loop linear. 992 v1 = v->node->opt; 993 j = v1 - var; 994 if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { 995 for(; v1 != nil; v1 = v1->nextinnode) { 996 j = v1 - var; 997 cal.b[j/32] |= 1<<(j&31); 998 } 999 } 1000 } 1001 } 1002 break; 1003 1004 case ATEXT: 1005 for(z=0; z<BITS; z++) { 1006 cal.b[z] = 0; 1007 ref.b[z] = 0; 1008 } 1009 break; 1010 1011 case ARET: 1012 for(z=0; z<BITS; z++) { 1013 cal.b[z] = externs.b[z] | ovar.b[z]; 1014 ref.b[z] = 0; 1015 } 1016 break; 1017 } 1018 for(z=0; z<BITS; z++) { 1019 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 1020 r1->use1.b[z] | r1->use2.b[z]; 1021 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 1022 r1->refbehind.b[z] = ref.b[z]; 1023 r1->calbehind.b[z] = cal.b[z]; 1024 } 1025 if(r1->f.active) 1026 break; 1027 r1->f.active = 1; 1028 } 1029 for(; r != r1; r = (Reg*)r->f.p1) 1030 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 1031 prop(r2, r->refbehind, r->calbehind); 1032 } 1033 1034 void 1035 synch(Reg *r, Bits dif) 1036 { 1037 Reg *r1; 1038 int z; 1039 1040 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 1041 for(z=0; z<BITS; z++) { 1042 dif.b[z] = (dif.b[z] & 1043 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 1044 r1->set.b[z] | r1->regdiff.b[z]; 1045 if(dif.b[z] != r1->regdiff.b[z]) { 1046 r1->regdiff.b[z] = dif.b[z]; 1047 change++; 1048 } 1049 } 1050 if(r1->f.active) 1051 break; 1052 r1->f.active = 1; 1053 for(z=0; z<BITS; z++) 1054 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1055 if(r1->f.s2 != nil) 1056 synch((Reg*)r1->f.s2, dif); 1057 } 1058 } 1059 1060 uint32 1061 allreg(uint32 b, Rgn *r) 1062 { 1063 Var *v; 1064 int i; 1065 1066 v = var + r->varno; 1067 r->regno = 0; 1068 switch(v->etype) { 1069 1070 default: 1071 fatal("unknown etype %d/%E", bitno(b), v->etype); 1072 break; 1073 1074 case TINT8: 1075 case TUINT8: 1076 case TINT16: 1077 case TUINT16: 1078 case TINT32: 1079 case TUINT32: 1080 case TINT: 1081 case TUINT: 1082 case TUINTPTR: 1083 case TBOOL: 1084 case TPTR32: 1085 i = BtoR(~b); 1086 if(i && r->cost >= 0) { 1087 r->regno = i; 1088 return RtoB(i); 1089 } 1090 break; 1091 1092 case TFLOAT32: 1093 case TFLOAT64: 1094 i = BtoF(~b); 1095 if(i && r->cost >= 0) { 1096 r->regno = i+NREG; 1097 return FtoB(i); 1098 } 1099 break; 1100 1101 case TINT64: 1102 case TUINT64: 1103 case TPTR64: 1104 case TINTER: 1105 case TSTRUCT: 1106 case TARRAY: 1107 break; 1108 } 1109 return 0; 1110 } 1111 1112 void 1113 paint1(Reg *r, int bn) 1114 { 1115 Reg *r1; 1116 Prog *p; 1117 int z; 1118 uint32 bb; 1119 1120 z = bn/32; 1121 bb = 1L<<(bn%32); 1122 if(r->act.b[z] & bb) 1123 return; 1124 for(;;) { 1125 if(!(r->refbehind.b[z] & bb)) 1126 break; 1127 r1 = (Reg*)r->f.p1; 1128 if(r1 == R) 1129 break; 1130 if(!(r1->refahead.b[z] & bb)) 1131 break; 1132 if(r1->act.b[z] & bb) 1133 break; 1134 r = r1; 1135 } 1136 1137 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 1138 change -= CLOAD * r->f.loop; 1139 if(debug['R'] > 1) 1140 print("%d%P\td %Q $%d\n", r->f.loop, 1141 r->f.prog, blsh(bn), change); 1142 } 1143 for(;;) { 1144 r->act.b[z] |= bb; 1145 p = r->f.prog; 1146 1147 1148 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 1149 if(r->use1.b[z] & bb) { 1150 change += CREF * r->f.loop; 1151 if(debug['R'] > 1) 1152 print("%d%P\tu1 %Q $%d\n", r->f.loop, 1153 p, blsh(bn), change); 1154 } 1155 if((r->use2.b[z]|r->set.b[z]) & bb) { 1156 change += CREF * r->f.loop; 1157 if(debug['R'] > 1) 1158 print("%d%P\tu2 %Q $%d\n", r->f.loop, 1159 p, blsh(bn), change); 1160 } 1161 } 1162 1163 if(STORE(r) & r->regdiff.b[z] & bb) { 1164 change -= CLOAD * r->f.loop; 1165 if(debug['R'] > 1) 1166 print("%d%P\tst %Q $%d\n", r->f.loop, 1167 p, blsh(bn), change); 1168 } 1169 1170 if(r->refbehind.b[z] & bb) 1171 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1172 if(r1->refahead.b[z] & bb) 1173 paint1(r1, bn); 1174 1175 if(!(r->refahead.b[z] & bb)) 1176 break; 1177 r1 = (Reg*)r->f.s2; 1178 if(r1 != R) 1179 if(r1->refbehind.b[z] & bb) 1180 paint1(r1, bn); 1181 r = (Reg*)r->f.s1; 1182 if(r == R) 1183 break; 1184 if(r->act.b[z] & bb) 1185 break; 1186 if(!(r->refbehind.b[z] & bb)) 1187 break; 1188 } 1189 } 1190 1191 uint32 1192 paint2(Reg *r, int bn) 1193 { 1194 Reg *r1; 1195 int z; 1196 uint32 bb, vreg; 1197 1198 z = bn/32; 1199 bb = 1L << (bn%32); 1200 vreg = regbits; 1201 if(!(r->act.b[z] & bb)) 1202 return vreg; 1203 for(;;) { 1204 if(!(r->refbehind.b[z] & bb)) 1205 break; 1206 r1 = (Reg*)r->f.p1; 1207 if(r1 == R) 1208 break; 1209 if(!(r1->refahead.b[z] & bb)) 1210 break; 1211 if(!(r1->act.b[z] & bb)) 1212 break; 1213 r = r1; 1214 } 1215 for(;;) { 1216 r->act.b[z] &= ~bb; 1217 1218 vreg |= r->regu; 1219 1220 if(r->refbehind.b[z] & bb) 1221 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1222 if(r1->refahead.b[z] & bb) 1223 vreg |= paint2(r1, bn); 1224 1225 if(!(r->refahead.b[z] & bb)) 1226 break; 1227 r1 = (Reg*)r->f.s2; 1228 if(r1 != R) 1229 if(r1->refbehind.b[z] & bb) 1230 vreg |= paint2(r1, bn); 1231 r = (Reg*)r->f.s1; 1232 if(r == R) 1233 break; 1234 if(!(r->act.b[z] & bb)) 1235 break; 1236 if(!(r->refbehind.b[z] & bb)) 1237 break; 1238 } 1239 return vreg; 1240 } 1241 1242 void 1243 paint3(Reg *r, int bn, int32 rb, int rn) 1244 { 1245 Reg *r1; 1246 Prog *p; 1247 int z; 1248 uint32 bb; 1249 1250 z = bn/32; 1251 bb = 1L << (bn%32); 1252 if(r->act.b[z] & bb) 1253 return; 1254 for(;;) { 1255 if(!(r->refbehind.b[z] & bb)) 1256 break; 1257 r1 = (Reg*)r->f.p1; 1258 if(r1 == R) 1259 break; 1260 if(!(r1->refahead.b[z] & bb)) 1261 break; 1262 if(r1->act.b[z] & bb) 1263 break; 1264 r = r1; 1265 } 1266 1267 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1268 addmove(r, bn, rn, 0); 1269 1270 for(;;) { 1271 r->act.b[z] |= bb; 1272 p = r->f.prog; 1273 1274 if(r->use1.b[z] & bb) { 1275 if(debug['R']) 1276 print("%P", p); 1277 addreg(&p->from, rn); 1278 if(debug['R']) 1279 print("\t.c%P\n", p); 1280 } 1281 if((r->use2.b[z]|r->set.b[z]) & bb) { 1282 if(debug['R']) 1283 print("%P", p); 1284 addreg(&p->to, rn); 1285 if(debug['R']) 1286 print("\t.c%P\n", p); 1287 } 1288 1289 if(STORE(r) & r->regdiff.b[z] & bb) 1290 addmove(r, bn, rn, 1); 1291 r->regu |= rb; 1292 1293 if(r->refbehind.b[z] & bb) 1294 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1295 if(r1->refahead.b[z] & bb) 1296 paint3(r1, bn, rb, rn); 1297 1298 if(!(r->refahead.b[z] & bb)) 1299 break; 1300 r1 = (Reg*)r->f.s2; 1301 if(r1 != R) 1302 if(r1->refbehind.b[z] & bb) 1303 paint3(r1, bn, rb, rn); 1304 r = (Reg*)r->f.s1; 1305 if(r == R) 1306 break; 1307 if(r->act.b[z] & bb) 1308 break; 1309 if(!(r->refbehind.b[z] & bb)) 1310 break; 1311 } 1312 } 1313 1314 void 1315 addreg(Adr *a, int rn) 1316 { 1317 a->sym = nil; 1318 a->node = nil; 1319 a->name = D_NONE; 1320 a->type = D_REG; 1321 a->reg = rn; 1322 if(rn >= NREG) { 1323 a->type = D_FREG; 1324 a->reg = rn-NREG; 1325 } 1326 } 1327 1328 /* 1329 * bit reg 1330 * 0 R0 1331 * 1 R1 1332 * ... ... 1333 * 10 R10 1334 * 12 R12 1335 */ 1336 int32 1337 RtoB(int r) 1338 { 1339 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 1340 return 0; 1341 return 1L << r; 1342 } 1343 1344 int 1345 BtoR(int32 b) 1346 { 1347 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 1348 if(b == 0) 1349 return 0; 1350 return bitno(b); 1351 } 1352 1353 /* 1354 * bit reg 1355 * 18 F2 1356 * 19 F3 1357 * ... ... 1358 * 31 F15 1359 */ 1360 int32 1361 FtoB(int f) 1362 { 1363 1364 if(f < 2 || f > NFREG-1) 1365 return 0; 1366 return 1L << (f + 16); 1367 } 1368 1369 int 1370 BtoF(int32 b) 1371 { 1372 1373 b &= 0xfffc0000L; 1374 if(b == 0) 1375 return 0; 1376 return bitno(b) - 16; 1377 } 1378 1379 void 1380 dumpone(Flow *f, int isreg) 1381 { 1382 int z; 1383 Bits bit; 1384 Reg *r; 1385 1386 print("%d:%P", f->loop, f->prog); 1387 if(isreg) { 1388 r = (Reg*)f; 1389 for(z=0; z<BITS; z++) 1390 bit.b[z] = 1391 r->set.b[z] | 1392 r->use1.b[z] | 1393 r->use2.b[z] | 1394 r->refbehind.b[z] | 1395 r->refahead.b[z] | 1396 r->calbehind.b[z] | 1397 r->calahead.b[z] | 1398 r->regdiff.b[z] | 1399 r->act.b[z] | 1400 0; 1401 if(bany(&bit)) { 1402 print("\t"); 1403 if(bany(&r->set)) 1404 print(" s:%Q", r->set); 1405 if(bany(&r->use1)) 1406 print(" u1:%Q", r->use1); 1407 if(bany(&r->use2)) 1408 print(" u2:%Q", r->use2); 1409 if(bany(&r->refbehind)) 1410 print(" rb:%Q ", r->refbehind); 1411 if(bany(&r->refahead)) 1412 print(" ra:%Q ", r->refahead); 1413 if(bany(&r->calbehind)) 1414 print(" cb:%Q ", r->calbehind); 1415 if(bany(&r->calahead)) 1416 print(" ca:%Q ", r->calahead); 1417 if(bany(&r->regdiff)) 1418 print(" d:%Q ", r->regdiff); 1419 if(bany(&r->act)) 1420 print(" a:%Q ", r->act); 1421 } 1422 } 1423 print("\n"); 1424 } 1425 1426 void 1427 dumpit(char *str, Flow *r0, int isreg) 1428 { 1429 Flow *r, *r1; 1430 1431 print("\n%s\n", str); 1432 for(r = r0; r != nil; r = r->link) { 1433 dumpone(r, isreg); 1434 r1 = r->p2; 1435 if(r1 != nil) { 1436 print(" pred:"); 1437 for(; r1 != nil; r1 = r1->p2link) 1438 print(" %.4ud", (int)r1->prog->pc); 1439 if(r->p1 != nil) 1440 print(" (and %.4ud)", (int)r->p1->prog->pc); 1441 else 1442 print(" (only)"); 1443 print("\n"); 1444 } 1445 // r1 = r->s1; 1446 // if(r1 != nil) { 1447 // print(" succ:"); 1448 // for(; r1 != R; r1 = r1->s1) 1449 // print(" %.4ud", (int)r1->prog->pc); 1450 // print("\n"); 1451 // } 1452 } 1453 }