github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/6g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 32 /* 16 general + 16 floating */ 37 #define REGBITS ((uint32)0xffffffff) 38 39 static Reg* firstr; 40 static int first = 1; 41 42 int 43 rcmp(const void *a1, const void *a2) 44 { 45 Rgn *p1, *p2; 46 int c1, c2; 47 48 p1 = (Rgn*)a1; 49 p2 = (Rgn*)a2; 50 c1 = p2->cost; 51 c2 = p1->cost; 52 if(c1 -= c2) 53 return c1; 54 return p2->varno - p1->varno; 55 } 56 57 static void 58 setaddrs(Bits bit) 59 { 60 int i, n; 61 Var *v; 62 Node *node; 63 64 while(bany(&bit)) { 65 // convert each bit to a variable 66 i = bnum(bit); 67 node = var[i].node; 68 n = var[i].name; 69 bit.b[i/32] &= ~(1L<<(i%32)); 70 71 // disable all pieces of that variable 72 for(i=0; i<nvar; i++) { 73 v = var+i; 74 if(v->node == node && v->name == n) 75 v->addr = 2; 76 } 77 } 78 } 79 80 static char* regname[] = { 81 ".AX", 82 ".CX", 83 ".DX", 84 ".BX", 85 ".SP", 86 ".BP", 87 ".SI", 88 ".DI", 89 ".R8", 90 ".R9", 91 ".R10", 92 ".R11", 93 ".R12", 94 ".R13", 95 ".R14", 96 ".R15", 97 ".X0", 98 ".X1", 99 ".X2", 100 ".X3", 101 ".X4", 102 ".X5", 103 ".X6", 104 ".X7", 105 ".X8", 106 ".X9", 107 ".X10", 108 ".X11", 109 ".X12", 110 ".X13", 111 ".X14", 112 ".X15", 113 }; 114 115 static Node* regnodes[NREGVAR]; 116 117 static void walkvardef(Node *n, Reg *r, int active); 118 119 void 120 regopt(Prog *firstp) 121 { 122 Reg *r, *r1; 123 Prog *p; 124 Graph *g; 125 ProgInfo info; 126 int i, z, active; 127 uint32 vreg; 128 Bits bit; 129 130 if(first) { 131 fmtinstall('Q', Qconv); 132 exregoffset = D_R15; 133 first = 0; 134 } 135 136 mergetemp(firstp); 137 138 /* 139 * control flow is more complicated in generated go code 140 * than in generated c code. define pseudo-variables for 141 * registers, so we have complete register usage information. 142 */ 143 nvar = NREGVAR; 144 memset(var, 0, NREGVAR*sizeof var[0]); 145 for(i=0; i<NREGVAR; i++) { 146 if(regnodes[i] == N) 147 regnodes[i] = newname(lookup(regname[i])); 148 var[i].node = regnodes[i]; 149 } 150 151 regbits = RtoB(D_SP); 152 for(z=0; z<BITS; z++) { 153 externs.b[z] = 0; 154 params.b[z] = 0; 155 consts.b[z] = 0; 156 addrs.b[z] = 0; 157 ivar.b[z] = 0; 158 ovar.b[z] = 0; 159 } 160 161 /* 162 * pass 1 163 * build aux data structure 164 * allocate pcs 165 * find use and set of variables 166 */ 167 g = flowstart(firstp, sizeof(Reg)); 168 if(g == nil) { 169 for(i=0; i<nvar; i++) 170 var[i].node->opt = nil; 171 return; 172 } 173 174 firstr = (Reg*)g->start; 175 176 for(r = firstr; r != R; r = (Reg*)r->f.link) { 177 p = r->f.prog; 178 if(p->as == AVARDEF || p->as == AVARKILL) 179 continue; 180 proginfo(&info, p); 181 182 // Avoid making variables for direct-called functions. 183 if(p->as == ACALL && p->to.type == D_EXTERN) 184 continue; 185 186 r->use1.b[0] |= info.reguse | info.regindex; 187 r->set.b[0] |= info.regset; 188 189 bit = mkvar(r, &p->from); 190 if(bany(&bit)) { 191 if(info.flags & LeftAddr) 192 setaddrs(bit); 193 if(info.flags & LeftRead) 194 for(z=0; z<BITS; z++) 195 r->use1.b[z] |= bit.b[z]; 196 if(info.flags & LeftWrite) 197 for(z=0; z<BITS; z++) 198 r->set.b[z] |= bit.b[z]; 199 } 200 201 bit = mkvar(r, &p->to); 202 if(bany(&bit)) { 203 if(info.flags & RightAddr) 204 setaddrs(bit); 205 if(info.flags & RightRead) 206 for(z=0; z<BITS; z++) 207 r->use2.b[z] |= bit.b[z]; 208 if(info.flags & RightWrite) 209 for(z=0; z<BITS; z++) 210 r->set.b[z] |= bit.b[z]; 211 } 212 } 213 214 for(i=0; i<nvar; i++) { 215 Var *v = var+i; 216 if(v->addr) { 217 bit = blsh(i); 218 for(z=0; z<BITS; z++) 219 addrs.b[z] |= bit.b[z]; 220 } 221 222 if(debug['R'] && debug['v']) 223 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 224 i, v->addr, v->etype, v->width, v->node, v->offset); 225 } 226 227 if(debug['R'] && debug['v']) 228 dumpit("pass1", &firstr->f, 1); 229 230 /* 231 * pass 2 232 * find looping structure 233 */ 234 flowrpo(g); 235 236 if(debug['R'] && debug['v']) 237 dumpit("pass2", &firstr->f, 1); 238 239 /* 240 * pass 2.5 241 * iterate propagating fat vardef covering forward 242 * r->act records vars with a VARDEF since the last CALL. 243 * (r->act will be reused in pass 5 for something else, 244 * but we'll be done with it by then.) 245 */ 246 active = 0; 247 for(r = firstr; r != R; r = (Reg*)r->f.link) { 248 r->f.active = 0; 249 r->act = zbits; 250 } 251 for(r = firstr; r != R; r = (Reg*)r->f.link) { 252 p = r->f.prog; 253 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 254 active++; 255 walkvardef(p->to.node, r, active); 256 } 257 } 258 259 /* 260 * pass 3 261 * iterate propagating usage 262 * back until flow graph is complete 263 */ 264 loop1: 265 change = 0; 266 for(r = firstr; r != R; r = (Reg*)r->f.link) 267 r->f.active = 0; 268 for(r = firstr; r != R; r = (Reg*)r->f.link) 269 if(r->f.prog->as == ARET) 270 prop(r, zbits, zbits); 271 loop11: 272 /* pick up unreachable code */ 273 i = 0; 274 for(r = firstr; r != R; r = r1) { 275 r1 = (Reg*)r->f.link; 276 if(r1 && r1->f.active && !r->f.active) { 277 prop(r, zbits, zbits); 278 i = 1; 279 } 280 } 281 if(i) 282 goto loop11; 283 if(change) 284 goto loop1; 285 286 if(debug['R'] && debug['v']) 287 dumpit("pass3", &firstr->f, 1); 288 289 /* 290 * pass 4 291 * iterate propagating register/variable synchrony 292 * forward until graph is complete 293 */ 294 loop2: 295 change = 0; 296 for(r = firstr; r != R; r = (Reg*)r->f.link) 297 r->f.active = 0; 298 synch(firstr, zbits); 299 if(change) 300 goto loop2; 301 302 if(debug['R'] && debug['v']) 303 dumpit("pass4", &firstr->f, 1); 304 305 /* 306 * pass 4.5 307 * move register pseudo-variables into regu. 308 */ 309 for(r = firstr; r != R; r = (Reg*)r->f.link) { 310 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 311 312 r->set.b[0] &= ~REGBITS; 313 r->use1.b[0] &= ~REGBITS; 314 r->use2.b[0] &= ~REGBITS; 315 r->refbehind.b[0] &= ~REGBITS; 316 r->refahead.b[0] &= ~REGBITS; 317 r->calbehind.b[0] &= ~REGBITS; 318 r->calahead.b[0] &= ~REGBITS; 319 r->regdiff.b[0] &= ~REGBITS; 320 r->act.b[0] &= ~REGBITS; 321 } 322 323 /* 324 * pass 5 325 * isolate regions 326 * calculate costs (paint1) 327 */ 328 r = firstr; 329 if(r) { 330 for(z=0; z<BITS; z++) 331 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 332 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 333 if(bany(&bit) && !r->f.refset) { 334 // should never happen - all variables are preset 335 if(debug['w']) 336 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 337 r->f.refset = 1; 338 } 339 } 340 for(r = firstr; r != R; r = (Reg*)r->f.link) 341 r->act = zbits; 342 rgp = region; 343 nregion = 0; 344 for(r = firstr; r != R; r = (Reg*)r->f.link) { 345 for(z=0; z<BITS; z++) 346 bit.b[z] = r->set.b[z] & 347 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 348 if(bany(&bit) && !r->f.refset) { 349 if(debug['w']) 350 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 351 r->f.refset = 1; 352 excise(&r->f); 353 } 354 for(z=0; z<BITS; z++) 355 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 356 while(bany(&bit)) { 357 i = bnum(bit); 358 rgp->enter = r; 359 rgp->varno = i; 360 change = 0; 361 paint1(r, i); 362 bit.b[i/32] &= ~(1L<<(i%32)); 363 if(change <= 0) 364 continue; 365 rgp->cost = change; 366 nregion++; 367 if(nregion >= NRGN) { 368 if(debug['R'] && debug['v']) 369 print("too many regions\n"); 370 goto brk; 371 } 372 rgp++; 373 } 374 } 375 brk: 376 qsort(region, nregion, sizeof(region[0]), rcmp); 377 378 if(debug['R'] && debug['v']) 379 dumpit("pass5", &firstr->f, 1); 380 381 /* 382 * pass 6 383 * determine used registers (paint2) 384 * replace code (paint3) 385 */ 386 rgp = region; 387 for(i=0; i<nregion; i++) { 388 bit = blsh(rgp->varno); 389 vreg = paint2(rgp->enter, rgp->varno); 390 vreg = allreg(vreg, rgp); 391 if(rgp->regno != 0) { 392 if(debug['R'] && debug['v']) { 393 Var *v; 394 395 v = var + rgp->varno; 396 print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", 397 v->node, v->offset, rgp->varno, v->etype, rgp->regno); 398 } 399 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 400 } 401 rgp++; 402 } 403 404 if(debug['R'] && debug['v']) 405 dumpit("pass6", &firstr->f, 1); 406 407 /* 408 * free aux structures. peep allocates new ones. 409 */ 410 for(i=0; i<nvar; i++) 411 var[i].node->opt = nil; 412 flowend(g); 413 firstr = R; 414 415 /* 416 * pass 7 417 * peep-hole on basic block 418 */ 419 if(!debug['R'] || debug['P']) 420 peep(firstp); 421 422 /* 423 * eliminate nops 424 */ 425 for(p=firstp; p!=P; p=p->link) { 426 while(p->link != P && p->link->as == ANOP) 427 p->link = p->link->link; 428 if(p->to.type == D_BRANCH) 429 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 430 p->to.u.branch = p->to.u.branch->link; 431 } 432 433 if(debug['R']) { 434 if(ostats.ncvtreg || 435 ostats.nspill || 436 ostats.nreload || 437 ostats.ndelmov || 438 ostats.nvar || 439 ostats.naddr || 440 0) 441 print("\nstats\n"); 442 443 if(ostats.ncvtreg) 444 print(" %4d cvtreg\n", ostats.ncvtreg); 445 if(ostats.nspill) 446 print(" %4d spill\n", ostats.nspill); 447 if(ostats.nreload) 448 print(" %4d reload\n", ostats.nreload); 449 if(ostats.ndelmov) 450 print(" %4d delmov\n", ostats.ndelmov); 451 if(ostats.nvar) 452 print(" %4d var\n", ostats.nvar); 453 if(ostats.naddr) 454 print(" %4d addr\n", ostats.naddr); 455 456 memset(&ostats, 0, sizeof(ostats)); 457 } 458 } 459 460 static void 461 walkvardef(Node *n, Reg *r, int active) 462 { 463 Reg *r1, *r2; 464 int bn; 465 Var *v; 466 467 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 468 if(r1->f.active == active) 469 break; 470 r1->f.active = active; 471 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 472 break; 473 for(v=n->opt; v!=nil; v=v->nextinnode) { 474 bn = v - var; 475 r1->act.b[bn/32] |= 1L << (bn%32); 476 } 477 if(r1->f.prog->as == ACALL) 478 break; 479 } 480 481 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 482 if(r2->f.s2 != nil) 483 walkvardef(n, (Reg*)r2->f.s2, active); 484 } 485 486 /* 487 * add mov b,rn 488 * just after r 489 */ 490 void 491 addmove(Reg *r, int bn, int rn, int f) 492 { 493 Prog *p, *p1; 494 Adr *a; 495 Var *v; 496 497 p1 = mal(sizeof(*p1)); 498 clearp(p1); 499 p1->pc = 9999; 500 501 p = r->f.prog; 502 p1->link = p->link; 503 p->link = p1; 504 p1->lineno = p->lineno; 505 506 v = var + bn; 507 508 a = &p1->to; 509 a->offset = v->offset; 510 a->etype = v->etype; 511 a->type = v->name; 512 a->node = v->node; 513 a->sym = linksym(v->node->sym); 514 515 // need to clean this up with wptr and 516 // some of the defaults 517 p1->as = AMOVL; 518 switch(simtype[(uchar)v->etype]) { 519 default: 520 fatal("unknown type %E", v->etype); 521 case TINT8: 522 case TUINT8: 523 case TBOOL: 524 p1->as = AMOVB; 525 break; 526 case TINT16: 527 case TUINT16: 528 p1->as = AMOVW; 529 break; 530 case TINT64: 531 case TUINT64: 532 case TPTR64: 533 p1->as = AMOVQ; 534 break; 535 case TFLOAT32: 536 p1->as = AMOVSS; 537 break; 538 case TFLOAT64: 539 p1->as = AMOVSD; 540 break; 541 case TINT32: 542 case TUINT32: 543 case TPTR32: 544 break; 545 } 546 547 p1->from.type = rn; 548 if(!f) { 549 p1->from = *a; 550 *a = zprog.from; 551 a->type = rn; 552 if(v->etype == TUINT8) 553 p1->as = AMOVB; 554 if(v->etype == TUINT16) 555 p1->as = AMOVW; 556 } 557 if(debug['R'] && debug['v']) 558 print("%P ===add=== %P\n", p, p1); 559 ostats.nspill++; 560 } 561 562 uint32 563 doregbits(int r) 564 { 565 uint32 b; 566 567 b = 0; 568 if(r >= D_INDIR) 569 r -= D_INDIR; 570 if(r >= D_AX && r <= D_R15) 571 b |= RtoB(r); 572 else 573 if(r >= D_AL && r <= D_R15B) 574 b |= RtoB(r-D_AL+D_AX); 575 else 576 if(r >= D_AH && r <= D_BH) 577 b |= RtoB(r-D_AH+D_AX); 578 else 579 if(r >= D_X0 && r <= D_X0+15) 580 b |= FtoB(r); 581 return b; 582 } 583 584 static int 585 overlap(int64 o1, int w1, int64 o2, int w2) 586 { 587 int64 t1, t2; 588 589 t1 = o1+w1; 590 t2 = o2+w2; 591 592 if(!(t1 > o2 && t2 > o1)) 593 return 0; 594 595 return 1; 596 } 597 598 Bits 599 mkvar(Reg *r, Adr *a) 600 { 601 Var *v; 602 int i, t, n, et, z, flag; 603 int64 w; 604 uint32 regu; 605 int64 o; 606 Bits bit; 607 Node *node; 608 609 /* 610 * mark registers used 611 */ 612 t = a->type; 613 if(t == D_NONE) 614 goto none; 615 616 if(r != R) 617 r->use1.b[0] |= doregbits(a->index); 618 619 switch(t) { 620 default: 621 regu = doregbits(t); 622 if(regu == 0) 623 goto none; 624 bit = zbits; 625 bit.b[0] = regu; 626 return bit; 627 628 case D_ADDR: 629 a->type = a->index; 630 bit = mkvar(r, a); 631 setaddrs(bit); 632 a->type = t; 633 ostats.naddr++; 634 goto none; 635 636 case D_EXTERN: 637 case D_STATIC: 638 case D_PARAM: 639 case D_AUTO: 640 n = t; 641 break; 642 } 643 644 node = a->node; 645 if(node == N || node->op != ONAME || node->orig == N) 646 goto none; 647 node = node->orig; 648 if(node->orig != node) 649 fatal("%D: bad node", a); 650 if(node->sym == S || node->sym->name[0] == '.') 651 goto none; 652 et = a->etype; 653 o = a->offset; 654 w = a->width; 655 if(w < 0) 656 fatal("bad width %lld for %D", w, a); 657 658 flag = 0; 659 for(i=0; i<nvar; i++) { 660 v = var+i; 661 if(v->node == node && v->name == n) { 662 if(v->offset == o) 663 if(v->etype == et) 664 if(v->width == w) 665 return blsh(i); 666 667 // if they overlaps, disable both 668 if(overlap(v->offset, v->width, o, w)) { 669 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 670 v->addr = 1; 671 flag = 1; 672 } 673 } 674 } 675 switch(et) { 676 case 0: 677 case TFUNC: 678 goto none; 679 } 680 681 if(nvar >= NVAR) { 682 if(debug['w'] > 1 && node != N) 683 fatal("variable not optimized: %#N", node); 684 685 // If we're not tracking a word in a variable, mark the rest as 686 // having its address taken, so that we keep the whole thing 687 // live at all calls. otherwise we might optimize away part of 688 // a variable but not all of it. 689 for(i=0; i<nvar; i++) { 690 v = var+i; 691 if(v->node == node) 692 v->addr = 1; 693 } 694 goto none; 695 } 696 697 i = nvar; 698 nvar++; 699 v = var+i; 700 v->offset = o; 701 v->name = n; 702 v->etype = et; 703 v->width = w; 704 v->addr = flag; // funny punning 705 v->node = node; 706 707 // node->opt is the head of a linked list 708 // of Vars within the given Node, so that 709 // we can start at a Var and find all the other 710 // Vars in the same Go variable. 711 v->nextinnode = node->opt; 712 node->opt = v; 713 714 bit = blsh(i); 715 if(n == D_EXTERN || n == D_STATIC) 716 for(z=0; z<BITS; z++) 717 externs.b[z] |= bit.b[z]; 718 if(n == D_PARAM) 719 for(z=0; z<BITS; z++) 720 params.b[z] |= bit.b[z]; 721 722 if(node->class == PPARAM) 723 for(z=0; z<BITS; z++) 724 ivar.b[z] |= bit.b[z]; 725 if(node->class == PPARAMOUT) 726 for(z=0; z<BITS; z++) 727 ovar.b[z] |= bit.b[z]; 728 729 // Treat values with their address taken as live at calls, 730 // because the garbage collector's liveness analysis in ../gc/plive.c does. 731 // These must be consistent or else we will elide stores and the garbage 732 // collector will see uninitialized data. 733 // The typical case where our own analysis is out of sync is when the 734 // node appears to have its address taken but that code doesn't actually 735 // get generated and therefore doesn't show up as an address being 736 // taken when we analyze the instruction stream. 737 // One instance of this case is when a closure uses the same name as 738 // an outer variable for one of its own variables declared with :=. 739 // The parser flags the outer variable as possibly shared, and therefore 740 // sets addrtaken, even though it ends up not being actually shared. 741 // If we were better about _ elision, _ = &x would suffice too. 742 // The broader := in a closure problem is mentioned in a comment in 743 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 744 if(node->addrtaken) 745 v->addr = 1; 746 747 // Disable registerization for globals, because: 748 // (1) we might panic at any time and we want the recovery code 749 // to see the latest values (issue 1304). 750 // (2) we don't know what pointers might point at them and we want 751 // loads via those pointers to see updated values and vice versa (issue 7995). 752 // 753 // Disable registerization for results if using defer, because the deferred func 754 // might recover and return, causing the current values to be used. 755 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 756 v->addr = 1; 757 758 if(debug['R']) 759 print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 760 ostats.nvar++; 761 762 return bit; 763 764 none: 765 return zbits; 766 } 767 768 void 769 prop(Reg *r, Bits ref, Bits cal) 770 { 771 Reg *r1, *r2; 772 int z, i, j; 773 Var *v, *v1; 774 775 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 776 for(z=0; z<BITS; z++) { 777 ref.b[z] |= r1->refahead.b[z]; 778 if(ref.b[z] != r1->refahead.b[z]) { 779 r1->refahead.b[z] = ref.b[z]; 780 change++; 781 } 782 cal.b[z] |= r1->calahead.b[z]; 783 if(cal.b[z] != r1->calahead.b[z]) { 784 r1->calahead.b[z] = cal.b[z]; 785 change++; 786 } 787 } 788 switch(r1->f.prog->as) { 789 case ACALL: 790 if(noreturn(r1->f.prog)) 791 break; 792 793 // Mark all input variables (ivar) as used, because that's what the 794 // liveness bitmaps say. The liveness bitmaps say that so that a 795 // panic will not show stale values in the parameter dump. 796 // Mark variables with a recent VARDEF (r1->act) as used, 797 // so that the optimizer flushes initializations to memory, 798 // so that if a garbage collection happens during this CALL, 799 // the collector will see initialized memory. Again this is to 800 // match what the liveness bitmaps say. 801 for(z=0; z<BITS; z++) { 802 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 803 ref.b[z] = 0; 804 } 805 806 // cal.b is the current approximation of what's live across the call. 807 // Every bit in cal.b is a single stack word. For each such word, 808 // find all the other tracked stack words in the same Go variable 809 // (struct/slice/string/interface) and mark them live too. 810 // This is necessary because the liveness analysis for the garbage 811 // collector works at variable granularity, not at word granularity. 812 // It is fundamental for slice/string/interface: the garbage collector 813 // needs the whole value, not just some of the words, in order to 814 // interpret the other bits correctly. Specifically, slice needs a consistent 815 // ptr and cap, string needs a consistent ptr and len, and interface 816 // needs a consistent type word and data word. 817 for(z=0; z<BITS; z++) { 818 if(cal.b[z] == 0) 819 continue; 820 for(i=0; i<32; i++) { 821 if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) 822 continue; 823 v = var+z*32+i; 824 if(v->node->opt == nil) // v represents fixed register, not Go variable 825 continue; 826 827 // v->node->opt is the head of a linked list of Vars 828 // corresponding to tracked words from the Go variable v->node. 829 // Walk the list and set all the bits. 830 // For a large struct this could end up being quadratic: 831 // after the first setting, the outer loop (for z, i) would see a 1 bit 832 // for all of the remaining words in the struct, and for each such 833 // word would go through and turn on all the bits again. 834 // To avoid the quadratic behavior, we only turn on the bits if 835 // v is the head of the list or if the head's bit is not yet turned on. 836 // This will set the bits at most twice, keeping the overall loop linear. 837 v1 = v->node->opt; 838 j = v1 - var; 839 if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { 840 for(; v1 != nil; v1 = v1->nextinnode) { 841 j = v1 - var; 842 cal.b[j/32] |= 1<<(j&31); 843 } 844 } 845 } 846 } 847 break; 848 849 case ATEXT: 850 for(z=0; z<BITS; z++) { 851 cal.b[z] = 0; 852 ref.b[z] = 0; 853 } 854 break; 855 856 case ARET: 857 for(z=0; z<BITS; z++) { 858 cal.b[z] = externs.b[z] | ovar.b[z]; 859 ref.b[z] = 0; 860 } 861 break; 862 } 863 for(z=0; z<BITS; z++) { 864 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 865 r1->use1.b[z] | r1->use2.b[z]; 866 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 867 r1->refbehind.b[z] = ref.b[z]; 868 r1->calbehind.b[z] = cal.b[z]; 869 } 870 if(r1->f.active) 871 break; 872 r1->f.active = 1; 873 } 874 for(; r != r1; r = (Reg*)r->f.p1) 875 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 876 prop(r2, r->refbehind, r->calbehind); 877 } 878 879 void 880 synch(Reg *r, Bits dif) 881 { 882 Reg *r1; 883 int z; 884 885 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 886 for(z=0; z<BITS; z++) { 887 dif.b[z] = (dif.b[z] & 888 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 889 r1->set.b[z] | r1->regdiff.b[z]; 890 if(dif.b[z] != r1->regdiff.b[z]) { 891 r1->regdiff.b[z] = dif.b[z]; 892 change++; 893 } 894 } 895 if(r1->f.active) 896 break; 897 r1->f.active = 1; 898 for(z=0; z<BITS; z++) 899 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 900 if(r1->f.s2 != nil) 901 synch((Reg*)r1->f.s2, dif); 902 } 903 } 904 905 uint32 906 allreg(uint32 b, Rgn *r) 907 { 908 Var *v; 909 int i; 910 911 v = var + r->varno; 912 r->regno = 0; 913 switch(v->etype) { 914 915 default: 916 fatal("unknown etype %d/%E", bitno(b), v->etype); 917 break; 918 919 case TINT8: 920 case TUINT8: 921 case TINT16: 922 case TUINT16: 923 case TINT32: 924 case TUINT32: 925 case TINT64: 926 case TUINT64: 927 case TINT: 928 case TUINT: 929 case TUINTPTR: 930 case TBOOL: 931 case TPTR32: 932 case TPTR64: 933 i = BtoR(~b); 934 if(i && r->cost > 0) { 935 r->regno = i; 936 return RtoB(i); 937 } 938 break; 939 940 case TFLOAT32: 941 case TFLOAT64: 942 i = BtoF(~b); 943 if(i && r->cost > 0) { 944 r->regno = i; 945 return FtoB(i); 946 } 947 break; 948 } 949 return 0; 950 } 951 952 void 953 paint1(Reg *r, int bn) 954 { 955 Reg *r1; 956 int z; 957 uint32 bb; 958 959 z = bn/32; 960 bb = 1L<<(bn%32); 961 if(r->act.b[z] & bb) 962 return; 963 for(;;) { 964 if(!(r->refbehind.b[z] & bb)) 965 break; 966 r1 = (Reg*)r->f.p1; 967 if(r1 == R) 968 break; 969 if(!(r1->refahead.b[z] & bb)) 970 break; 971 if(r1->act.b[z] & bb) 972 break; 973 r = r1; 974 } 975 976 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 977 change -= CLOAD * r->f.loop; 978 } 979 for(;;) { 980 r->act.b[z] |= bb; 981 982 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 983 if(r->use1.b[z] & bb) 984 change += CREF * r->f.loop; 985 if((r->use2.b[z]|r->set.b[z]) & bb) 986 change += CREF * r->f.loop; 987 } 988 989 if(STORE(r) & r->regdiff.b[z] & bb) { 990 change -= CLOAD * r->f.loop; 991 } 992 993 if(r->refbehind.b[z] & bb) 994 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 995 if(r1->refahead.b[z] & bb) 996 paint1(r1, bn); 997 998 if(!(r->refahead.b[z] & bb)) 999 break; 1000 r1 = (Reg*)r->f.s2; 1001 if(r1 != R) 1002 if(r1->refbehind.b[z] & bb) 1003 paint1(r1, bn); 1004 r = (Reg*)r->f.s1; 1005 if(r == R) 1006 break; 1007 if(r->act.b[z] & bb) 1008 break; 1009 if(!(r->refbehind.b[z] & bb)) 1010 break; 1011 } 1012 } 1013 1014 uint32 1015 regset(Reg *r, uint32 bb) 1016 { 1017 uint32 b, set; 1018 Adr v; 1019 int c; 1020 1021 set = 0; 1022 v = zprog.from; 1023 while(b = bb & ~(bb-1)) { 1024 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1025 if(v.type == 0) 1026 fatal("zero v.type for %#ux", b); 1027 c = copyu(r->f.prog, &v, nil); 1028 if(c == 3) 1029 set |= b; 1030 bb &= ~b; 1031 } 1032 return set; 1033 } 1034 1035 uint32 1036 reguse(Reg *r, uint32 bb) 1037 { 1038 uint32 b, set; 1039 Adr v; 1040 int c; 1041 1042 set = 0; 1043 v = zprog.from; 1044 while(b = bb & ~(bb-1)) { 1045 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1046 c = copyu(r->f.prog, &v, nil); 1047 if(c == 1 || c == 2 || c == 4) 1048 set |= b; 1049 bb &= ~b; 1050 } 1051 return set; 1052 } 1053 1054 uint32 1055 paint2(Reg *r, int bn) 1056 { 1057 Reg *r1; 1058 int z; 1059 uint32 bb, vreg, x; 1060 1061 z = bn/32; 1062 bb = 1L << (bn%32); 1063 vreg = regbits; 1064 if(!(r->act.b[z] & bb)) 1065 return vreg; 1066 for(;;) { 1067 if(!(r->refbehind.b[z] & bb)) 1068 break; 1069 r1 = (Reg*)r->f.p1; 1070 if(r1 == R) 1071 break; 1072 if(!(r1->refahead.b[z] & bb)) 1073 break; 1074 if(!(r1->act.b[z] & bb)) 1075 break; 1076 r = r1; 1077 } 1078 for(;;) { 1079 r->act.b[z] &= ~bb; 1080 1081 vreg |= r->regu; 1082 1083 if(r->refbehind.b[z] & bb) 1084 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1085 if(r1->refahead.b[z] & bb) 1086 vreg |= paint2(r1, bn); 1087 1088 if(!(r->refahead.b[z] & bb)) 1089 break; 1090 r1 = (Reg*)r->f.s2; 1091 if(r1 != R) 1092 if(r1->refbehind.b[z] & bb) 1093 vreg |= paint2(r1, bn); 1094 r = (Reg*)r->f.s1; 1095 if(r == R) 1096 break; 1097 if(!(r->act.b[z] & bb)) 1098 break; 1099 if(!(r->refbehind.b[z] & bb)) 1100 break; 1101 } 1102 1103 bb = vreg; 1104 for(; r; r=(Reg*)r->f.s1) { 1105 x = r->regu & ~bb; 1106 if(x) { 1107 vreg |= reguse(r, x); 1108 bb |= regset(r, x); 1109 } 1110 } 1111 return vreg; 1112 } 1113 1114 void 1115 paint3(Reg *r, int bn, int32 rb, int rn) 1116 { 1117 Reg *r1; 1118 Prog *p; 1119 int z; 1120 uint32 bb; 1121 1122 z = bn/32; 1123 bb = 1L << (bn%32); 1124 if(r->act.b[z] & bb) 1125 return; 1126 for(;;) { 1127 if(!(r->refbehind.b[z] & bb)) 1128 break; 1129 r1 = (Reg*)r->f.p1; 1130 if(r1 == R) 1131 break; 1132 if(!(r1->refahead.b[z] & bb)) 1133 break; 1134 if(r1->act.b[z] & bb) 1135 break; 1136 r = r1; 1137 } 1138 1139 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1140 addmove(r, bn, rn, 0); 1141 for(;;) { 1142 r->act.b[z] |= bb; 1143 p = r->f.prog; 1144 1145 if(r->use1.b[z] & bb) { 1146 if(debug['R'] && debug['v']) 1147 print("%P", p); 1148 addreg(&p->from, rn); 1149 if(debug['R'] && debug['v']) 1150 print(" ===change== %P\n", p); 1151 } 1152 if((r->use2.b[z]|r->set.b[z]) & bb) { 1153 if(debug['R'] && debug['v']) 1154 print("%P", p); 1155 addreg(&p->to, rn); 1156 if(debug['R'] && debug['v']) 1157 print(" ===change== %P\n", p); 1158 } 1159 1160 if(STORE(r) & r->regdiff.b[z] & bb) 1161 addmove(r, bn, rn, 1); 1162 r->regu |= rb; 1163 1164 if(r->refbehind.b[z] & bb) 1165 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1166 if(r1->refahead.b[z] & bb) 1167 paint3(r1, bn, rb, rn); 1168 1169 if(!(r->refahead.b[z] & bb)) 1170 break; 1171 r1 = (Reg*)r->f.s2; 1172 if(r1 != R) 1173 if(r1->refbehind.b[z] & bb) 1174 paint3(r1, bn, rb, rn); 1175 r = (Reg*)r->f.s1; 1176 if(r == R) 1177 break; 1178 if(r->act.b[z] & bb) 1179 break; 1180 if(!(r->refbehind.b[z] & bb)) 1181 break; 1182 } 1183 } 1184 1185 void 1186 addreg(Adr *a, int rn) 1187 { 1188 a->sym = nil; 1189 a->offset = 0; 1190 a->type = rn; 1191 1192 ostats.ncvtreg++; 1193 } 1194 1195 int32 1196 RtoB(int r) 1197 { 1198 1199 if(r < D_AX || r > D_R15) 1200 return 0; 1201 return 1L << (r-D_AX); 1202 } 1203 1204 int 1205 BtoR(int32 b) 1206 { 1207 b &= 0xffffL; 1208 if(nacl) 1209 b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX))); 1210 if(b == 0) 1211 return 0; 1212 return bitno(b) + D_AX; 1213 } 1214 1215 /* 1216 * bit reg 1217 * 16 X0 1218 * ... 1219 * 31 X15 1220 */ 1221 int32 1222 FtoB(int f) 1223 { 1224 if(f < D_X0 || f > D_X15) 1225 return 0; 1226 return 1L << (f - D_X0 + 16); 1227 } 1228 1229 int 1230 BtoF(int32 b) 1231 { 1232 1233 b &= 0xFFFF0000L; 1234 if(b == 0) 1235 return 0; 1236 return bitno(b) - 16 + D_X0; 1237 } 1238 1239 void 1240 dumpone(Flow *f, int isreg) 1241 { 1242 int z; 1243 Bits bit; 1244 Reg *r; 1245 1246 print("%d:%P", f->loop, f->prog); 1247 if(isreg) { 1248 r = (Reg*)f; 1249 for(z=0; z<BITS; z++) 1250 bit.b[z] = 1251 r->set.b[z] | 1252 r->use1.b[z] | 1253 r->use2.b[z] | 1254 r->refbehind.b[z] | 1255 r->refahead.b[z] | 1256 r->calbehind.b[z] | 1257 r->calahead.b[z] | 1258 r->regdiff.b[z] | 1259 r->act.b[z] | 1260 0; 1261 if(bany(&bit)) { 1262 print("\t"); 1263 if(bany(&r->set)) 1264 print(" s:%Q", r->set); 1265 if(bany(&r->use1)) 1266 print(" u1:%Q", r->use1); 1267 if(bany(&r->use2)) 1268 print(" u2:%Q", r->use2); 1269 if(bany(&r->refbehind)) 1270 print(" rb:%Q ", r->refbehind); 1271 if(bany(&r->refahead)) 1272 print(" ra:%Q ", r->refahead); 1273 if(bany(&r->calbehind)) 1274 print(" cb:%Q ", r->calbehind); 1275 if(bany(&r->calahead)) 1276 print(" ca:%Q ", r->calahead); 1277 if(bany(&r->regdiff)) 1278 print(" d:%Q ", r->regdiff); 1279 if(bany(&r->act)) 1280 print(" a:%Q ", r->act); 1281 } 1282 } 1283 print("\n"); 1284 } 1285 1286 void 1287 dumpit(char *str, Flow *r0, int isreg) 1288 { 1289 Flow *r, *r1; 1290 1291 print("\n%s\n", str); 1292 for(r = r0; r != nil; r = r->link) { 1293 dumpone(r, isreg); 1294 r1 = r->p2; 1295 if(r1 != nil) { 1296 print(" pred:"); 1297 for(; r1 != nil; r1 = r1->p2link) 1298 print(" %.4ud", (int)r1->prog->pc); 1299 print("\n"); 1300 } 1301 // r1 = r->s1; 1302 // if(r1 != R) { 1303 // print(" succ:"); 1304 // for(; r1 != R; r1 = r1->s1) 1305 // print(" %.4ud", (int)r1->prog->pc); 1306 // print("\n"); 1307 // } 1308 } 1309 }