github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/cmd/8g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 16 /* 8 integer + 8 floating */ 37 #define REGBITS ((uint64)0xffffull) 38 /*c2go enum { 39 NREGVAR = 16, 40 REGBITS = (1<<NREGVAR) - 1, 41 }; 42 */ 43 44 static Reg* firstr; 45 static int first = 1; 46 47 int 48 rcmp(const void *a1, const void *a2) 49 { 50 Rgn *p1, *p2; 51 int c1, c2; 52 53 p1 = (Rgn*)a1; 54 p2 = (Rgn*)a2; 55 c1 = p2->cost; 56 c2 = p1->cost; 57 if(c1 -= c2) 58 return c1; 59 return p2->varno - p1->varno; 60 } 61 62 static void 63 setaddrs(Bits bit) 64 { 65 int i, n; 66 Var *v; 67 Node *node; 68 69 while(bany(&bit)) { 70 // convert each bit to a variable 71 i = bnum(bit); 72 node = var[i].node; 73 n = var[i].name; 74 biclr(&bit, i); 75 76 // disable all pieces of that variable 77 for(i=0; i<nvar; i++) { 78 v = var+i; 79 if(v->node == node && v->name == n) 80 v->addr = 2; 81 } 82 } 83 } 84 85 static char* regname[] = { 86 ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di", 87 ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7", 88 }; 89 90 static Node* regnodes[NREGVAR]; 91 92 static void walkvardef(Node *n, Reg *r, int active); 93 94 void 95 regopt(Prog *firstp) 96 { 97 Reg *r, *r1; 98 Prog *p; 99 Graph *g; 100 ProgInfo info; 101 int i, z, active; 102 uint32 vreg; 103 Bits bit; 104 105 if(first) { 106 fmtinstall('Q', Qconv); 107 exregoffset = D_DI; // no externals 108 first = 0; 109 } 110 111 mergetemp(firstp); 112 113 /* 114 * control flow is more complicated in generated go code 115 * than in generated c code. define pseudo-variables for 116 * registers, so we have complete register usage information. 117 */ 118 nvar = NREGVAR; 119 memset(var, 0, NREGVAR*sizeof var[0]); 120 for(i=0; i<NREGVAR; i++) { 121 if(regnodes[i] == N) 122 regnodes[i] = newname(lookup(regname[i])); 123 var[i].node = regnodes[i]; 124 } 125 126 regbits = RtoB(D_SP); 127 for(z=0; z<BITS; z++) { 128 externs.b[z] = 0; 129 params.b[z] = 0; 130 consts.b[z] = 0; 131 addrs.b[z] = 0; 132 ivar.b[z] = 0; 133 ovar.b[z] = 0; 134 } 135 136 /* 137 * pass 1 138 * build aux data structure 139 * allocate pcs 140 * find use and set of variables 141 */ 142 g = flowstart(firstp, sizeof(Reg)); 143 if(g == nil) { 144 for(i=0; i<nvar; i++) 145 var[i].node->opt = nil; 146 return; 147 } 148 149 firstr = (Reg*)g->start; 150 151 for(r = firstr; r != R; r = (Reg*)r->f.link) { 152 p = r->f.prog; 153 if(p->as == AVARDEF || p->as == AVARKILL) 154 continue; 155 proginfo(&info, p); 156 157 // Avoid making variables for direct-called functions. 158 if(p->as == ACALL && p->to.type == D_EXTERN) 159 continue; 160 161 r->use1.b[0] |= info.reguse | info.regindex; 162 r->set.b[0] |= info.regset; 163 164 bit = mkvar(r, &p->from); 165 if(bany(&bit)) { 166 if(info.flags & LeftAddr) 167 setaddrs(bit); 168 if(info.flags & LeftRead) 169 for(z=0; z<BITS; z++) 170 r->use1.b[z] |= bit.b[z]; 171 if(info.flags & LeftWrite) 172 for(z=0; z<BITS; z++) 173 r->set.b[z] |= bit.b[z]; 174 } 175 176 bit = mkvar(r, &p->to); 177 if(bany(&bit)) { 178 if(info.flags & RightAddr) 179 setaddrs(bit); 180 if(info.flags & RightRead) 181 for(z=0; z<BITS; z++) 182 r->use2.b[z] |= bit.b[z]; 183 if(info.flags & RightWrite) 184 for(z=0; z<BITS; z++) 185 r->set.b[z] |= bit.b[z]; 186 } 187 } 188 if(firstr == R) 189 return; 190 191 for(i=0; i<nvar; i++) { 192 Var *v = var+i; 193 if(v->addr) { 194 bit = blsh(i); 195 for(z=0; z<BITS; z++) 196 addrs.b[z] |= bit.b[z]; 197 } 198 199 if(debug['R'] && debug['v']) 200 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 201 i, v->addr, v->etype, v->width, v->node, v->offset); 202 } 203 204 if(debug['R'] && debug['v']) 205 dumpit("pass1", &firstr->f, 1); 206 207 /* 208 * pass 2 209 * find looping structure 210 */ 211 flowrpo(g); 212 213 if(debug['R'] && debug['v']) 214 dumpit("pass2", &firstr->f, 1); 215 216 /* 217 * pass 2.5 218 * iterate propagating fat vardef covering forward 219 * r->act records vars with a VARDEF since the last CALL. 220 * (r->act will be reused in pass 5 for something else, 221 * but we'll be done with it by then.) 222 */ 223 active = 0; 224 for(r = firstr; r != R; r = (Reg*)r->f.link) { 225 r->f.active = 0; 226 r->act = zbits; 227 } 228 for(r = firstr; r != R; r = (Reg*)r->f.link) { 229 p = r->f.prog; 230 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 231 active++; 232 walkvardef(p->to.node, r, active); 233 } 234 } 235 236 /* 237 * pass 3 238 * iterate propagating usage 239 * back until flow graph is complete 240 */ 241 loop1: 242 change = 0; 243 for(r = firstr; r != R; r = (Reg*)r->f.link) 244 r->f.active = 0; 245 for(r = firstr; r != R; r = (Reg*)r->f.link) 246 if(r->f.prog->as == ARET) 247 prop(r, zbits, zbits); 248 loop11: 249 /* pick up unreachable code */ 250 i = 0; 251 for(r = firstr; r != R; r = r1) { 252 r1 = (Reg*)r->f.link; 253 if(r1 && r1->f.active && !r->f.active) { 254 prop(r, zbits, zbits); 255 i = 1; 256 } 257 } 258 if(i) 259 goto loop11; 260 if(change) 261 goto loop1; 262 263 if(debug['R'] && debug['v']) 264 dumpit("pass3", &firstr->f, 1); 265 266 /* 267 * pass 4 268 * iterate propagating register/variable synchrony 269 * forward until graph is complete 270 */ 271 loop2: 272 change = 0; 273 for(r = firstr; r != R; r = (Reg*)r->f.link) 274 r->f.active = 0; 275 synch(firstr, zbits); 276 if(change) 277 goto loop2; 278 279 if(debug['R'] && debug['v']) 280 dumpit("pass4", &firstr->f, 1); 281 282 /* 283 * pass 4.5 284 * move register pseudo-variables into regu. 285 */ 286 for(r = firstr; r != R; r = (Reg*)r->f.link) { 287 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 288 289 r->set.b[0] &= ~REGBITS; 290 r->use1.b[0] &= ~REGBITS; 291 r->use2.b[0] &= ~REGBITS; 292 r->refbehind.b[0] &= ~REGBITS; 293 r->refahead.b[0] &= ~REGBITS; 294 r->calbehind.b[0] &= ~REGBITS; 295 r->calahead.b[0] &= ~REGBITS; 296 r->regdiff.b[0] &= ~REGBITS; 297 r->act.b[0] &= ~REGBITS; 298 } 299 300 /* 301 * pass 5 302 * isolate regions 303 * calculate costs (paint1) 304 */ 305 r = firstr; 306 if(r) { 307 for(z=0; z<BITS; z++) 308 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 309 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 310 if(bany(&bit) && !r->f.refset) { 311 // should never happen - all variables are preset 312 if(debug['w']) 313 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 314 r->f.refset = 1; 315 } 316 } 317 for(r = firstr; r != R; r = (Reg*)r->f.link) 318 r->act = zbits; 319 rgp = region; 320 nregion = 0; 321 for(r = firstr; r != R; r = (Reg*)r->f.link) { 322 for(z=0; z<BITS; z++) 323 bit.b[z] = r->set.b[z] & 324 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 325 if(bany(&bit) && !r->f.refset) { 326 if(debug['w']) 327 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 328 r->f.refset = 1; 329 excise(&r->f); 330 } 331 for(z=0; z<BITS; z++) 332 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 333 while(bany(&bit)) { 334 i = bnum(bit); 335 rgp->enter = r; 336 rgp->varno = i; 337 change = 0; 338 paint1(r, i); 339 biclr(&bit, i); 340 if(change <= 0) 341 continue; 342 rgp->cost = change; 343 nregion++; 344 if(nregion >= NRGN) { 345 if(debug['R'] && debug['v']) 346 print("too many regions\n"); 347 goto brk; 348 } 349 rgp++; 350 } 351 } 352 brk: 353 qsort(region, nregion, sizeof(region[0]), rcmp); 354 355 /* 356 * pass 6 357 * determine used registers (paint2) 358 * replace code (paint3) 359 */ 360 rgp = region; 361 if(debug['R'] && debug['v']) 362 print("\nregisterizing\n"); 363 for(i=0; i<nregion; i++) { 364 if(debug['R'] && debug['v']) 365 print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc); 366 bit = blsh(rgp->varno); 367 vreg = paint2(rgp->enter, rgp->varno, 0); 368 vreg = allreg(vreg, rgp); 369 if(rgp->regno != 0) 370 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 371 rgp++; 372 } 373 374 /* 375 * free aux structures. peep allocates new ones. 376 */ 377 for(i=0; i<nvar; i++) 378 var[i].node->opt = nil; 379 flowend(g); 380 firstr = R; 381 382 if(debug['R'] && debug['v']) { 383 // Rebuild flow graph, since we inserted instructions 384 g = flowstart(firstp, sizeof(Reg)); 385 firstr = (Reg*)g->start; 386 dumpit("pass6", &firstr->f, 1); 387 flowend(g); 388 firstr = R; 389 } 390 391 /* 392 * pass 7 393 * peep-hole on basic block 394 */ 395 if(!debug['R'] || debug['P']) 396 peep(firstp); 397 398 /* 399 * eliminate nops 400 */ 401 for(p=firstp; p!=P; p=p->link) { 402 while(p->link != P && p->link->as == ANOP) 403 p->link = p->link->link; 404 if(p->to.type == D_BRANCH) 405 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 406 p->to.u.branch = p->to.u.branch->link; 407 } 408 409 if(!use_sse) 410 for(p=firstp; p!=P; p=p->link) { 411 if(p->from.type >= D_X0 && p->from.type <= D_X7) 412 fatal("invalid use of %R with GO386=387: %P", p->from.type, p); 413 if(p->to.type >= D_X0 && p->to.type <= D_X7) 414 fatal("invalid use of %R with GO386=387: %P", p->to.type, p); 415 } 416 417 if(debug['R']) { 418 if(ostats.ncvtreg || 419 ostats.nspill || 420 ostats.nreload || 421 ostats.ndelmov || 422 ostats.nvar || 423 ostats.naddr || 424 0) 425 print("\nstats\n"); 426 427 if(ostats.ncvtreg) 428 print(" %4d cvtreg\n", ostats.ncvtreg); 429 if(ostats.nspill) 430 print(" %4d spill\n", ostats.nspill); 431 if(ostats.nreload) 432 print(" %4d reload\n", ostats.nreload); 433 if(ostats.ndelmov) 434 print(" %4d delmov\n", ostats.ndelmov); 435 if(ostats.nvar) 436 print(" %4d var\n", ostats.nvar); 437 if(ostats.naddr) 438 print(" %4d addr\n", ostats.naddr); 439 440 memset(&ostats, 0, sizeof(ostats)); 441 } 442 } 443 444 static void 445 walkvardef(Node *n, Reg *r, int active) 446 { 447 Reg *r1, *r2; 448 int bn; 449 Var *v; 450 451 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 452 if(r1->f.active == active) 453 break; 454 r1->f.active = active; 455 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 456 break; 457 for(v=n->opt; v!=nil; v=v->nextinnode) { 458 bn = v - var; 459 biset(&r1->act, bn); 460 } 461 if(r1->f.prog->as == ACALL) 462 break; 463 } 464 465 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 466 if(r2->f.s2 != nil) 467 walkvardef(n, (Reg*)r2->f.s2, active); 468 } 469 470 /* 471 * add mov b,rn 472 * just after r 473 */ 474 void 475 addmove(Reg *r, int bn, int rn, int f) 476 { 477 Prog *p, *p1; 478 Adr *a; 479 Var *v; 480 481 p1 = mal(sizeof(*p1)); 482 clearp(p1); 483 p1->pc = 9999; 484 485 p = r->f.prog; 486 p1->link = p->link; 487 p->link = p1; 488 p1->lineno = p->lineno; 489 490 v = var + bn; 491 492 a = &p1->to; 493 a->offset = v->offset; 494 a->etype = v->etype; 495 a->type = v->name; 496 a->node = v->node; 497 a->sym = linksym(v->node->sym); 498 499 // need to clean this up with wptr and 500 // some of the defaults 501 p1->as = AMOVL; 502 switch(v->etype) { 503 default: 504 fatal("unknown type %E", v->etype); 505 case TINT8: 506 case TUINT8: 507 case TBOOL: 508 p1->as = AMOVB; 509 break; 510 case TINT16: 511 case TUINT16: 512 p1->as = AMOVW; 513 break; 514 case TFLOAT32: 515 p1->as = AMOVSS; 516 break; 517 case TFLOAT64: 518 p1->as = AMOVSD; 519 break; 520 case TINT: 521 case TUINT: 522 case TINT32: 523 case TUINT32: 524 case TPTR32: 525 break; 526 } 527 528 p1->from.type = rn; 529 if(!f) { 530 p1->from = *a; 531 *a = zprog.from; 532 a->type = rn; 533 if(v->etype == TUINT8) 534 p1->as = AMOVB; 535 if(v->etype == TUINT16) 536 p1->as = AMOVW; 537 } 538 if(debug['R'] && debug['v']) 539 print("%P ===add=== %P\n", p, p1); 540 ostats.nspill++; 541 } 542 543 uint32 544 doregbits(int r) 545 { 546 uint32 b; 547 548 b = 0; 549 if(r >= D_INDIR) 550 r -= D_INDIR; 551 if(r >= D_AX && r <= D_DI) 552 b |= RtoB(r); 553 else 554 if(r >= D_AL && r <= D_BL) 555 b |= RtoB(r-D_AL+D_AX); 556 else 557 if(r >= D_AH && r <= D_BH) 558 b |= RtoB(r-D_AH+D_AX); 559 else 560 if(r >= D_X0 && r <= D_X0+7) 561 b |= FtoB(r); 562 return b; 563 } 564 565 static int 566 overlap(int32 o1, int w1, int32 o2, int w2) 567 { 568 int32 t1, t2; 569 570 t1 = o1+w1; 571 t2 = o2+w2; 572 573 if(!(t1 > o2 && t2 > o1)) 574 return 0; 575 576 return 1; 577 } 578 579 Bits 580 mkvar(Reg *r, Adr *a) 581 { 582 Var *v; 583 int i, t, n, et, z, w, flag, regu; 584 int32 o; 585 Bits bit; 586 Node *node; 587 588 /* 589 * mark registers used 590 */ 591 t = a->type; 592 if(t == D_NONE) 593 goto none; 594 595 if(r != R) 596 r->use1.b[0] |= doregbits(a->index); 597 598 switch(t) { 599 default: 600 regu = doregbits(t); 601 if(regu == 0) 602 goto none; 603 bit = zbits; 604 bit.b[0] = regu; 605 return bit; 606 607 case D_ADDR: 608 a->type = a->index; 609 bit = mkvar(r, a); 610 setaddrs(bit); 611 a->type = t; 612 ostats.naddr++; 613 goto none; 614 615 case D_EXTERN: 616 case D_STATIC: 617 case D_PARAM: 618 case D_AUTO: 619 n = t; 620 break; 621 } 622 623 node = a->node; 624 if(node == N || node->op != ONAME || node->orig == N) 625 goto none; 626 node = node->orig; 627 if(node->orig != node) 628 fatal("%D: bad node", a); 629 if(node->sym == S || node->sym->name[0] == '.') 630 goto none; 631 et = a->etype; 632 o = a->offset; 633 w = a->width; 634 if(w < 0) 635 fatal("bad width %d for %D", w, a); 636 637 flag = 0; 638 for(i=0; i<nvar; i++) { 639 v = var+i; 640 if(v->node == node && v->name == n) { 641 if(v->offset == o) 642 if(v->etype == et) 643 if(v->width == w) 644 return blsh(i); 645 646 // if they overlap, disable both 647 if(overlap(v->offset, v->width, o, w)) { 648 if(debug['R']) 649 print("disable %s\n", node->sym->name); 650 v->addr = 1; 651 flag = 1; 652 } 653 } 654 } 655 656 switch(et) { 657 case 0: 658 case TFUNC: 659 goto none; 660 } 661 662 if(nvar >= NVAR) { 663 if(debug['w'] > 1 && node != N) 664 fatal("variable not optimized: %D", a); 665 666 // If we're not tracking a word in a variable, mark the rest as 667 // having its address taken, so that we keep the whole thing 668 // live at all calls. otherwise we might optimize away part of 669 // a variable but not all of it. 670 for(i=0; i<nvar; i++) { 671 v = var+i; 672 if(v->node == node) 673 v->addr = 1; 674 } 675 goto none; 676 } 677 678 i = nvar; 679 nvar++; 680 v = var+i; 681 v->offset = o; 682 v->name = n; 683 v->etype = et; 684 v->width = w; 685 v->addr = flag; // funny punning 686 v->node = node; 687 688 // node->opt is the head of a linked list 689 // of Vars within the given Node, so that 690 // we can start at a Var and find all the other 691 // Vars in the same Go variable. 692 v->nextinnode = node->opt; 693 node->opt = v; 694 695 bit = blsh(i); 696 if(n == D_EXTERN || n == D_STATIC) 697 for(z=0; z<BITS; z++) 698 externs.b[z] |= bit.b[z]; 699 if(n == D_PARAM) 700 for(z=0; z<BITS; z++) 701 params.b[z] |= bit.b[z]; 702 703 if(node->class == PPARAM) 704 for(z=0; z<BITS; z++) 705 ivar.b[z] |= bit.b[z]; 706 if(node->class == PPARAMOUT) 707 for(z=0; z<BITS; z++) 708 ovar.b[z] |= bit.b[z]; 709 710 // Treat values with their address taken as live at calls, 711 // because the garbage collector's liveness analysis in ../gc/plive.c does. 712 // These must be consistent or else we will elide stores and the garbage 713 // collector will see uninitialized data. 714 // The typical case where our own analysis is out of sync is when the 715 // node appears to have its address taken but that code doesn't actually 716 // get generated and therefore doesn't show up as an address being 717 // taken when we analyze the instruction stream. 718 // One instance of this case is when a closure uses the same name as 719 // an outer variable for one of its own variables declared with :=. 720 // The parser flags the outer variable as possibly shared, and therefore 721 // sets addrtaken, even though it ends up not being actually shared. 722 // If we were better about _ elision, _ = &x would suffice too. 723 // The broader := in a closure problem is mentioned in a comment in 724 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 725 if(node->addrtaken) 726 v->addr = 1; 727 728 // Disable registerization for globals, because: 729 // (1) we might panic at any time and we want the recovery code 730 // to see the latest values (issue 1304). 731 // (2) we don't know what pointers might point at them and we want 732 // loads via those pointers to see updated values and vice versa (issue 7995). 733 // 734 // Disable registerization for results if using defer, because the deferred func 735 // might recover and return, causing the current values to be used. 736 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 737 v->addr = 1; 738 739 if(debug['R']) 740 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 741 ostats.nvar++; 742 743 return bit; 744 745 none: 746 return zbits; 747 } 748 749 void 750 prop(Reg *r, Bits ref, Bits cal) 751 { 752 Reg *r1, *r2; 753 int z, i, j; 754 Var *v, *v1; 755 756 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 757 for(z=0; z<BITS; z++) { 758 ref.b[z] |= r1->refahead.b[z]; 759 if(ref.b[z] != r1->refahead.b[z]) { 760 r1->refahead.b[z] = ref.b[z]; 761 change++; 762 } 763 cal.b[z] |= r1->calahead.b[z]; 764 if(cal.b[z] != r1->calahead.b[z]) { 765 r1->calahead.b[z] = cal.b[z]; 766 change++; 767 } 768 } 769 switch(r1->f.prog->as) { 770 case ACALL: 771 if(noreturn(r1->f.prog)) 772 break; 773 774 // Mark all input variables (ivar) as used, because that's what the 775 // liveness bitmaps say. The liveness bitmaps say that so that a 776 // panic will not show stale values in the parameter dump. 777 // Mark variables with a recent VARDEF (r1->act) as used, 778 // so that the optimizer flushes initializations to memory, 779 // so that if a garbage collection happens during this CALL, 780 // the collector will see initialized memory. Again this is to 781 // match what the liveness bitmaps say. 782 for(z=0; z<BITS; z++) { 783 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 784 ref.b[z] = 0; 785 } 786 787 // cal.b is the current approximation of what's live across the call. 788 // Every bit in cal.b is a single stack word. For each such word, 789 // find all the other tracked stack words in the same Go variable 790 // (struct/slice/string/interface) and mark them live too. 791 // This is necessary because the liveness analysis for the garbage 792 // collector works at variable granularity, not at word granularity. 793 // It is fundamental for slice/string/interface: the garbage collector 794 // needs the whole value, not just some of the words, in order to 795 // interpret the other bits correctly. Specifically, slice needs a consistent 796 // ptr and cap, string needs a consistent ptr and len, and interface 797 // needs a consistent type word and data word. 798 for(z=0; z<BITS; z++) { 799 if(cal.b[z] == 0) 800 continue; 801 for(i=0; i<64; i++) { 802 if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) 803 continue; 804 v = var+z*64+i; 805 if(v->node->opt == nil) // v represents fixed register, not Go variable 806 continue; 807 808 // v->node->opt is the head of a linked list of Vars 809 // corresponding to tracked words from the Go variable v->node. 810 // Walk the list and set all the bits. 811 // For a large struct this could end up being quadratic: 812 // after the first setting, the outer loop (for z, i) would see a 1 bit 813 // for all of the remaining words in the struct, and for each such 814 // word would go through and turn on all the bits again. 815 // To avoid the quadratic behavior, we only turn on the bits if 816 // v is the head of the list or if the head's bit is not yet turned on. 817 // This will set the bits at most twice, keeping the overall loop linear. 818 v1 = v->node->opt; 819 j = v1 - var; 820 if(v == v1 || !btest(&cal, j)) { 821 for(; v1 != nil; v1 = v1->nextinnode) { 822 j = v1 - var; 823 biset(&cal, j); 824 } 825 } 826 } 827 } 828 break; 829 830 case ATEXT: 831 for(z=0; z<BITS; z++) { 832 cal.b[z] = 0; 833 ref.b[z] = 0; 834 } 835 break; 836 837 case ARET: 838 for(z=0; z<BITS; z++) { 839 cal.b[z] = externs.b[z] | ovar.b[z]; 840 ref.b[z] = 0; 841 } 842 break; 843 } 844 for(z=0; z<BITS; z++) { 845 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 846 r1->use1.b[z] | r1->use2.b[z]; 847 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 848 r1->refbehind.b[z] = ref.b[z]; 849 r1->calbehind.b[z] = cal.b[z]; 850 } 851 if(r1->f.active) 852 break; 853 r1->f.active = 1; 854 } 855 for(; r != r1; r = (Reg*)r->f.p1) 856 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 857 prop(r2, r->refbehind, r->calbehind); 858 } 859 860 void 861 synch(Reg *r, Bits dif) 862 { 863 Reg *r1; 864 int z; 865 866 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 867 for(z=0; z<BITS; z++) { 868 dif.b[z] = (dif.b[z] & 869 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 870 r1->set.b[z] | r1->regdiff.b[z]; 871 if(dif.b[z] != r1->regdiff.b[z]) { 872 r1->regdiff.b[z] = dif.b[z]; 873 change++; 874 } 875 } 876 if(r1->f.active) 877 break; 878 r1->f.active = 1; 879 for(z=0; z<BITS; z++) 880 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 881 if((Reg*)r1->f.s2 != R) 882 synch((Reg*)r1->f.s2, dif); 883 } 884 } 885 886 uint32 887 allreg(uint32 b, Rgn *r) 888 { 889 Var *v; 890 int i; 891 892 v = var + r->varno; 893 r->regno = 0; 894 switch(v->etype) { 895 896 default: 897 fatal("unknown etype %d/%E", bitno(b), v->etype); 898 break; 899 900 case TINT8: 901 case TUINT8: 902 case TINT16: 903 case TUINT16: 904 case TINT32: 905 case TUINT32: 906 case TINT64: 907 case TINT: 908 case TUINT: 909 case TUINTPTR: 910 case TBOOL: 911 case TPTR32: 912 i = BtoR(~b); 913 if(i && r->cost > 0) { 914 r->regno = i; 915 return RtoB(i); 916 } 917 break; 918 919 case TFLOAT32: 920 case TFLOAT64: 921 if(!use_sse) 922 break; 923 i = BtoF(~b); 924 if(i && r->cost > 0) { 925 r->regno = i; 926 return FtoB(i); 927 } 928 break; 929 } 930 return 0; 931 } 932 933 void 934 paint1(Reg *r, int bn) 935 { 936 Reg *r1; 937 Prog *p; 938 int z; 939 uint64 bb, rbz; 940 941 z = bn/64; 942 bb = 1LL<<(bn%64); 943 if(r->act.b[z] & bb) 944 return; 945 for(;;) { 946 if(!(r->refbehind.b[z] & bb)) 947 break; 948 r1 = (Reg*)r->f.p1; 949 if(r1 == R) 950 break; 951 if(!(r1->refahead.b[z] & bb)) 952 break; 953 if(r1->act.b[z] & bb) 954 break; 955 r = r1; 956 } 957 958 rbz = ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])); 959 if(LOAD(r) & rbz & bb) { 960 change -= CLOAD * r->f.loop; 961 } 962 for(;;) { 963 r->act.b[z] |= bb; 964 p = r->f.prog; 965 966 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 967 if(r->use1.b[z] & bb) { 968 change += CREF * r->f.loop; 969 if(p->as == AFMOVL || p->as == AFMOVW) 970 if(BtoR(bb) != D_F0) 971 change = -CINF; 972 } 973 if((r->use2.b[z]|r->set.b[z]) & bb) { 974 change += CREF * r->f.loop; 975 if(p->as == AFMOVL || p->as == AFMOVW) 976 if(BtoR(bb) != D_F0) 977 change = -CINF; 978 } 979 } 980 981 if(STORE(r) & r->regdiff.b[z] & bb) { 982 change -= CLOAD * r->f.loop; 983 if(p->as == AFMOVL || p->as == AFMOVW) 984 if(BtoR(bb) != D_F0) 985 change = -CINF; 986 } 987 988 if(r->refbehind.b[z] & bb) 989 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 990 if(r1->refahead.b[z] & bb) 991 paint1(r1, bn); 992 993 if(!(r->refahead.b[z] & bb)) 994 break; 995 r1 = (Reg*)r->f.s2; 996 if(r1 != R) 997 if(r1->refbehind.b[z] & bb) 998 paint1(r1, bn); 999 r = (Reg*)r->f.s1; 1000 if(r == R) 1001 break; 1002 if(r->act.b[z] & bb) 1003 break; 1004 if(!(r->refbehind.b[z] & bb)) 1005 break; 1006 } 1007 } 1008 1009 uint32 1010 paint2(Reg *r, int bn, int depth) 1011 { 1012 Reg *r1; 1013 int z; 1014 uint64 bb, vreg; 1015 1016 z = bn/64; 1017 bb = 1LL << (bn%64); 1018 vreg = regbits; 1019 if(!(r->act.b[z] & bb)) 1020 return vreg; 1021 for(;;) { 1022 if(!(r->refbehind.b[z] & bb)) 1023 break; 1024 r1 = (Reg*)r->f.p1; 1025 if(r1 == R) 1026 break; 1027 if(!(r1->refahead.b[z] & bb)) 1028 break; 1029 if(!(r1->act.b[z] & bb)) 1030 break; 1031 r = r1; 1032 } 1033 for(;;) { 1034 if(debug['R'] && debug['v']) 1035 print(" paint2 %d %P\n", depth, r->f.prog); 1036 1037 r->act.b[z] &= ~bb; 1038 1039 vreg |= r->regu; 1040 1041 if(r->refbehind.b[z] & bb) 1042 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1043 if(r1->refahead.b[z] & bb) 1044 vreg |= paint2(r1, bn, depth+1); 1045 1046 if(!(r->refahead.b[z] & bb)) 1047 break; 1048 r1 = (Reg*)r->f.s2; 1049 if(r1 != R) 1050 if(r1->refbehind.b[z] & bb) 1051 vreg |= paint2(r1, bn, depth+1); 1052 r = (Reg*)r->f.s1; 1053 if(r == R) 1054 break; 1055 if(!(r->act.b[z] & bb)) 1056 break; 1057 if(!(r->refbehind.b[z] & bb)) 1058 break; 1059 } 1060 1061 return vreg; 1062 } 1063 1064 void 1065 paint3(Reg *r, int bn, uint32 rb, int rn) 1066 { 1067 Reg *r1; 1068 Prog *p; 1069 int z; 1070 uint64 bb, rbz; 1071 1072 z = bn/64; 1073 bb = 1LL << (bn%64); 1074 if(r->act.b[z] & bb) 1075 return; 1076 for(;;) { 1077 if(!(r->refbehind.b[z] & bb)) 1078 break; 1079 r1 = (Reg*)r->f.p1; 1080 if(r1 == R) 1081 break; 1082 if(!(r1->refahead.b[z] & bb)) 1083 break; 1084 if(r1->act.b[z] & bb) 1085 break; 1086 r = r1; 1087 } 1088 1089 rbz = ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])); 1090 if(LOAD(r) & rbz & bb) 1091 addmove(r, bn, rn, 0); 1092 for(;;) { 1093 r->act.b[z] |= bb; 1094 p = r->f.prog; 1095 1096 if(r->use1.b[z] & bb) { 1097 if(debug['R'] && debug['v']) 1098 print("%P", p); 1099 addreg(&p->from, rn); 1100 if(debug['R'] && debug['v']) 1101 print(" ===change== %P\n", p); 1102 } 1103 if((r->use2.b[z]|r->set.b[z]) & bb) { 1104 if(debug['R'] && debug['v']) 1105 print("%P", p); 1106 addreg(&p->to, rn); 1107 if(debug['R'] && debug['v']) 1108 print(" ===change== %P\n", p); 1109 } 1110 1111 if(STORE(r) & r->regdiff.b[z] & bb) 1112 addmove(r, bn, rn, 1); 1113 r->regu |= rb; 1114 1115 if(r->refbehind.b[z] & bb) 1116 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1117 if(r1->refahead.b[z] & bb) 1118 paint3(r1, bn, rb, rn); 1119 1120 if(!(r->refahead.b[z] & bb)) 1121 break; 1122 r1 = (Reg*)r->f.s2; 1123 if(r1 != R) 1124 if(r1->refbehind.b[z] & bb) 1125 paint3(r1, bn, rb, rn); 1126 r = (Reg*)r->f.s1; 1127 if(r == R) 1128 break; 1129 if(r->act.b[z] & bb) 1130 break; 1131 if(!(r->refbehind.b[z] & bb)) 1132 break; 1133 } 1134 } 1135 1136 void 1137 addreg(Adr *a, int rn) 1138 { 1139 a->sym = nil; 1140 a->node = nil; 1141 a->offset = 0; 1142 a->type = rn; 1143 1144 ostats.ncvtreg++; 1145 } 1146 1147 uint32 1148 RtoB(int r) 1149 { 1150 1151 if(r < D_AX || r > D_DI) 1152 return 0; 1153 return 1L << (r-D_AX); 1154 } 1155 1156 int 1157 BtoR(uint32 b) 1158 { 1159 1160 b &= 0xffL; 1161 if(b == 0) 1162 return 0; 1163 return bitno(b) + D_AX; 1164 } 1165 1166 uint32 1167 FtoB(int f) 1168 { 1169 if(f < D_X0 || f > D_X7) 1170 return 0; 1171 return 1L << (f - D_X0 + 8); 1172 } 1173 1174 int 1175 BtoF(uint32 b) 1176 { 1177 b &= 0xFF00L; 1178 if(b == 0) 1179 return 0; 1180 return bitno(b) - 8 + D_X0; 1181 } 1182 1183 void 1184 dumpone(Flow *f, int isreg) 1185 { 1186 int z; 1187 Bits bit; 1188 Reg *r; 1189 1190 print("%d:%P", f->loop, f->prog); 1191 if(isreg) { 1192 r = (Reg*)f; 1193 for(z=0; z<BITS; z++) 1194 bit.b[z] = 1195 r->set.b[z] | 1196 r->use1.b[z] | 1197 r->use2.b[z] | 1198 r->refbehind.b[z] | 1199 r->refahead.b[z] | 1200 r->calbehind.b[z] | 1201 r->calahead.b[z] | 1202 r->regdiff.b[z] | 1203 r->act.b[z] | 1204 0; 1205 if(bany(&bit)) { 1206 print("\t"); 1207 if(bany(&r->set)) 1208 print(" s:%Q", r->set); 1209 if(bany(&r->use1)) 1210 print(" u1:%Q", r->use1); 1211 if(bany(&r->use2)) 1212 print(" u2:%Q", r->use2); 1213 if(bany(&r->refbehind)) 1214 print(" rb:%Q ", r->refbehind); 1215 if(bany(&r->refahead)) 1216 print(" ra:%Q ", r->refahead); 1217 if(bany(&r->calbehind)) 1218 print(" cb:%Q ", r->calbehind); 1219 if(bany(&r->calahead)) 1220 print(" ca:%Q ", r->calahead); 1221 if(bany(&r->regdiff)) 1222 print(" d:%Q ", r->regdiff); 1223 if(bany(&r->act)) 1224 print(" a:%Q ", r->act); 1225 } 1226 } 1227 print("\n"); 1228 } 1229 1230 void 1231 dumpit(char *str, Flow *r0, int isreg) 1232 { 1233 Flow *r, *r1; 1234 1235 print("\n%s\n", str); 1236 for(r = r0; r != nil; r = r->link) { 1237 dumpone(r, isreg); 1238 r1 = r->p2; 1239 if(r1 != nil) { 1240 print(" pred:"); 1241 for(; r1 != nil; r1 = r1->p2link) 1242 print(" %.4ud", (int)r1->prog->pc); 1243 print("\n"); 1244 } 1245 // Print successors if it's not just the next one 1246 if(r->s1 != r->link || r->s2 != nil) { 1247 print(" succ:"); 1248 if(r->s1 != nil) 1249 print(" %.4ud", (int)r->s1->prog->pc); 1250 if(r->s2 != nil) 1251 print(" %.4ud", (int)r->s2->prog->pc); 1252 print("\n"); 1253 } 1254 } 1255 }