github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/8g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 16 /* 8 integer + 8 floating */ 37 #define REGBITS ((uint32)0xffff) 38 39 static Reg* firstr; 40 static int first = 1; 41 42 int 43 rcmp(const void *a1, const void *a2) 44 { 45 Rgn *p1, *p2; 46 int c1, c2; 47 48 p1 = (Rgn*)a1; 49 p2 = (Rgn*)a2; 50 c1 = p2->cost; 51 c2 = p1->cost; 52 if(c1 -= c2) 53 return c1; 54 return p2->varno - p1->varno; 55 } 56 57 static void 58 setaddrs(Bits bit) 59 { 60 int i, n; 61 Var *v; 62 Node *node; 63 64 while(bany(&bit)) { 65 // convert each bit to a variable 66 i = bnum(bit); 67 node = var[i].node; 68 n = var[i].name; 69 bit.b[i/32] &= ~(1L<<(i%32)); 70 71 // disable all pieces of that variable 72 for(i=0; i<nvar; i++) { 73 v = var+i; 74 if(v->node == node && v->name == n) 75 v->addr = 2; 76 } 77 } 78 } 79 80 static char* regname[] = { 81 ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di", 82 ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7", 83 }; 84 85 static Node* regnodes[NREGVAR]; 86 87 static void walkvardef(Node *n, Reg *r, int active); 88 89 void 90 regopt(Prog *firstp) 91 { 92 Reg *r, *r1; 93 Prog *p; 94 Graph *g; 95 ProgInfo info; 96 int i, z, active; 97 uint32 vreg; 98 Bits bit; 99 100 if(first) { 101 fmtinstall('Q', Qconv); 102 exregoffset = D_DI; // no externals 103 first = 0; 104 } 105 106 mergetemp(firstp); 107 108 /* 109 * control flow is more complicated in generated go code 110 * than in generated c code. define pseudo-variables for 111 * registers, so we have complete register usage information. 112 */ 113 nvar = NREGVAR; 114 memset(var, 0, NREGVAR*sizeof var[0]); 115 for(i=0; i<NREGVAR; i++) { 116 if(regnodes[i] == N) 117 regnodes[i] = newname(lookup(regname[i])); 118 var[i].node = regnodes[i]; 119 } 120 121 regbits = RtoB(D_SP); 122 for(z=0; z<BITS; z++) { 123 externs.b[z] = 0; 124 params.b[z] = 0; 125 consts.b[z] = 0; 126 addrs.b[z] = 0; 127 ivar.b[z] = 0; 128 ovar.b[z] = 0; 129 } 130 131 /* 132 * pass 1 133 * build aux data structure 134 * allocate pcs 135 * find use and set of variables 136 */ 137 g = flowstart(firstp, sizeof(Reg)); 138 if(g == nil) { 139 for(i=0; i<nvar; i++) 140 var[i].node->opt = nil; 141 return; 142 } 143 144 firstr = (Reg*)g->start; 145 146 for(r = firstr; r != R; r = (Reg*)r->f.link) { 147 p = r->f.prog; 148 if(p->as == AVARDEF || p->as == AVARKILL) 149 continue; 150 proginfo(&info, p); 151 152 // Avoid making variables for direct-called functions. 153 if(p->as == ACALL && p->to.type == D_EXTERN) 154 continue; 155 156 r->use1.b[0] |= info.reguse | info.regindex; 157 r->set.b[0] |= info.regset; 158 159 bit = mkvar(r, &p->from); 160 if(bany(&bit)) { 161 if(info.flags & LeftAddr) 162 setaddrs(bit); 163 if(info.flags & LeftRead) 164 for(z=0; z<BITS; z++) 165 r->use1.b[z] |= bit.b[z]; 166 if(info.flags & LeftWrite) 167 for(z=0; z<BITS; z++) 168 r->set.b[z] |= bit.b[z]; 169 } 170 171 bit = mkvar(r, &p->to); 172 if(bany(&bit)) { 173 if(info.flags & RightAddr) 174 setaddrs(bit); 175 if(info.flags & RightRead) 176 for(z=0; z<BITS; z++) 177 r->use2.b[z] |= bit.b[z]; 178 if(info.flags & RightWrite) 179 for(z=0; z<BITS; z++) 180 r->set.b[z] |= bit.b[z]; 181 } 182 } 183 if(firstr == R) 184 return; 185 186 for(i=0; i<nvar; i++) { 187 Var *v = var+i; 188 if(v->addr) { 189 bit = blsh(i); 190 for(z=0; z<BITS; z++) 191 addrs.b[z] |= bit.b[z]; 192 } 193 194 if(debug['R'] && debug['v']) 195 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 196 i, v->addr, v->etype, v->width, v->node, v->offset); 197 } 198 199 if(debug['R'] && debug['v']) 200 dumpit("pass1", &firstr->f, 1); 201 202 /* 203 * pass 2 204 * find looping structure 205 */ 206 flowrpo(g); 207 208 if(debug['R'] && debug['v']) 209 dumpit("pass2", &firstr->f, 1); 210 211 /* 212 * pass 2.5 213 * iterate propagating fat vardef covering forward 214 * r->act records vars with a VARDEF since the last CALL. 215 * (r->act will be reused in pass 5 for something else, 216 * but we'll be done with it by then.) 217 */ 218 active = 0; 219 for(r = firstr; r != R; r = (Reg*)r->f.link) { 220 r->f.active = 0; 221 r->act = zbits; 222 } 223 for(r = firstr; r != R; r = (Reg*)r->f.link) { 224 p = r->f.prog; 225 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 226 active++; 227 walkvardef(p->to.node, r, active); 228 } 229 } 230 231 /* 232 * pass 3 233 * iterate propagating usage 234 * back until flow graph is complete 235 */ 236 loop1: 237 change = 0; 238 for(r = firstr; r != R; r = (Reg*)r->f.link) 239 r->f.active = 0; 240 for(r = firstr; r != R; r = (Reg*)r->f.link) 241 if(r->f.prog->as == ARET) 242 prop(r, zbits, zbits); 243 loop11: 244 /* pick up unreachable code */ 245 i = 0; 246 for(r = firstr; r != R; r = r1) { 247 r1 = (Reg*)r->f.link; 248 if(r1 && r1->f.active && !r->f.active) { 249 prop(r, zbits, zbits); 250 i = 1; 251 } 252 } 253 if(i) 254 goto loop11; 255 if(change) 256 goto loop1; 257 258 if(debug['R'] && debug['v']) 259 dumpit("pass3", &firstr->f, 1); 260 261 /* 262 * pass 4 263 * iterate propagating register/variable synchrony 264 * forward until graph is complete 265 */ 266 loop2: 267 change = 0; 268 for(r = firstr; r != R; r = (Reg*)r->f.link) 269 r->f.active = 0; 270 synch(firstr, zbits); 271 if(change) 272 goto loop2; 273 274 if(debug['R'] && debug['v']) 275 dumpit("pass4", &firstr->f, 1); 276 277 /* 278 * pass 4.5 279 * move register pseudo-variables into regu. 280 */ 281 for(r = firstr; r != R; r = (Reg*)r->f.link) { 282 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 283 284 r->set.b[0] &= ~REGBITS; 285 r->use1.b[0] &= ~REGBITS; 286 r->use2.b[0] &= ~REGBITS; 287 r->refbehind.b[0] &= ~REGBITS; 288 r->refahead.b[0] &= ~REGBITS; 289 r->calbehind.b[0] &= ~REGBITS; 290 r->calahead.b[0] &= ~REGBITS; 291 r->regdiff.b[0] &= ~REGBITS; 292 r->act.b[0] &= ~REGBITS; 293 } 294 295 /* 296 * pass 5 297 * isolate regions 298 * calculate costs (paint1) 299 */ 300 r = firstr; 301 if(r) { 302 for(z=0; z<BITS; z++) 303 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 304 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 305 if(bany(&bit) && !r->f.refset) { 306 // should never happen - all variables are preset 307 if(debug['w']) 308 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 309 r->f.refset = 1; 310 } 311 } 312 for(r = firstr; r != R; r = (Reg*)r->f.link) 313 r->act = zbits; 314 rgp = region; 315 nregion = 0; 316 for(r = firstr; r != R; r = (Reg*)r->f.link) { 317 for(z=0; z<BITS; z++) 318 bit.b[z] = r->set.b[z] & 319 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 320 if(bany(&bit) && !r->f.refset) { 321 if(debug['w']) 322 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 323 r->f.refset = 1; 324 excise(&r->f); 325 } 326 for(z=0; z<BITS; z++) 327 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 328 while(bany(&bit)) { 329 i = bnum(bit); 330 rgp->enter = r; 331 rgp->varno = i; 332 change = 0; 333 paint1(r, i); 334 bit.b[i/32] &= ~(1L<<(i%32)); 335 if(change <= 0) 336 continue; 337 rgp->cost = change; 338 nregion++; 339 if(nregion >= NRGN) { 340 if(debug['R'] && debug['v']) 341 print("too many regions\n"); 342 goto brk; 343 } 344 rgp++; 345 } 346 } 347 brk: 348 qsort(region, nregion, sizeof(region[0]), rcmp); 349 350 /* 351 * pass 6 352 * determine used registers (paint2) 353 * replace code (paint3) 354 */ 355 rgp = region; 356 for(i=0; i<nregion; i++) { 357 bit = blsh(rgp->varno); 358 vreg = paint2(rgp->enter, rgp->varno); 359 vreg = allreg(vreg, rgp); 360 if(rgp->regno != 0) 361 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 362 rgp++; 363 } 364 365 if(debug['R'] && debug['v']) 366 dumpit("pass6", &firstr->f, 1); 367 368 /* 369 * free aux structures. peep allocates new ones. 370 */ 371 for(i=0; i<nvar; i++) 372 var[i].node->opt = nil; 373 flowend(g); 374 firstr = R; 375 376 /* 377 * pass 7 378 * peep-hole on basic block 379 */ 380 if(!debug['R'] || debug['P']) 381 peep(firstp); 382 383 /* 384 * eliminate nops 385 */ 386 for(p=firstp; p!=P; p=p->link) { 387 while(p->link != P && p->link->as == ANOP) 388 p->link = p->link->link; 389 if(p->to.type == D_BRANCH) 390 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 391 p->to.u.branch = p->to.u.branch->link; 392 } 393 394 if(!use_sse) 395 for(p=firstp; p!=P; p=p->link) { 396 if(p->from.type >= D_X0 && p->from.type <= D_X7) 397 fatal("invalid use of %R with GO386=387: %P", p->from.type, p); 398 if(p->to.type >= D_X0 && p->to.type <= D_X7) 399 fatal("invalid use of %R with GO386=387: %P", p->to.type, p); 400 } 401 402 if(debug['R']) { 403 if(ostats.ncvtreg || 404 ostats.nspill || 405 ostats.nreload || 406 ostats.ndelmov || 407 ostats.nvar || 408 ostats.naddr || 409 0) 410 print("\nstats\n"); 411 412 if(ostats.ncvtreg) 413 print(" %4d cvtreg\n", ostats.ncvtreg); 414 if(ostats.nspill) 415 print(" %4d spill\n", ostats.nspill); 416 if(ostats.nreload) 417 print(" %4d reload\n", ostats.nreload); 418 if(ostats.ndelmov) 419 print(" %4d delmov\n", ostats.ndelmov); 420 if(ostats.nvar) 421 print(" %4d var\n", ostats.nvar); 422 if(ostats.naddr) 423 print(" %4d addr\n", ostats.naddr); 424 425 memset(&ostats, 0, sizeof(ostats)); 426 } 427 } 428 429 static void 430 walkvardef(Node *n, Reg *r, int active) 431 { 432 Reg *r1, *r2; 433 int bn; 434 Var *v; 435 436 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 437 if(r1->f.active == active) 438 break; 439 r1->f.active = active; 440 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 441 break; 442 for(v=n->opt; v!=nil; v=v->nextinnode) { 443 bn = v - var; 444 r1->act.b[bn/32] |= 1L << (bn%32); 445 } 446 if(r1->f.prog->as == ACALL) 447 break; 448 } 449 450 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 451 if(r2->f.s2 != nil) 452 walkvardef(n, (Reg*)r2->f.s2, active); 453 } 454 455 /* 456 * add mov b,rn 457 * just after r 458 */ 459 void 460 addmove(Reg *r, int bn, int rn, int f) 461 { 462 Prog *p, *p1; 463 Adr *a; 464 Var *v; 465 466 p1 = mal(sizeof(*p1)); 467 clearp(p1); 468 p1->pc = 9999; 469 470 p = r->f.prog; 471 p1->link = p->link; 472 p->link = p1; 473 p1->lineno = p->lineno; 474 475 v = var + bn; 476 477 a = &p1->to; 478 a->offset = v->offset; 479 a->etype = v->etype; 480 a->type = v->name; 481 a->node = v->node; 482 a->sym = linksym(v->node->sym); 483 484 // need to clean this up with wptr and 485 // some of the defaults 486 p1->as = AMOVL; 487 switch(v->etype) { 488 default: 489 fatal("unknown type %E", v->etype); 490 case TINT8: 491 case TUINT8: 492 case TBOOL: 493 p1->as = AMOVB; 494 break; 495 case TINT16: 496 case TUINT16: 497 p1->as = AMOVW; 498 break; 499 case TFLOAT32: 500 p1->as = AMOVSS; 501 break; 502 case TFLOAT64: 503 p1->as = AMOVSD; 504 break; 505 case TINT: 506 case TUINT: 507 case TINT32: 508 case TUINT32: 509 case TPTR32: 510 break; 511 } 512 513 p1->from.type = rn; 514 if(!f) { 515 p1->from = *a; 516 *a = zprog.from; 517 a->type = rn; 518 if(v->etype == TUINT8) 519 p1->as = AMOVB; 520 if(v->etype == TUINT16) 521 p1->as = AMOVW; 522 } 523 if(debug['R'] && debug['v']) 524 print("%P ===add=== %P\n", p, p1); 525 ostats.nspill++; 526 } 527 528 uint32 529 doregbits(int r) 530 { 531 uint32 b; 532 533 b = 0; 534 if(r >= D_INDIR) 535 r -= D_INDIR; 536 if(r >= D_AX && r <= D_DI) 537 b |= RtoB(r); 538 else 539 if(r >= D_AL && r <= D_BL) 540 b |= RtoB(r-D_AL+D_AX); 541 else 542 if(r >= D_AH && r <= D_BH) 543 b |= RtoB(r-D_AH+D_AX); 544 else 545 if(r >= D_X0 && r <= D_X0+7) 546 b |= FtoB(r); 547 return b; 548 } 549 550 static int 551 overlap(int32 o1, int w1, int32 o2, int w2) 552 { 553 int32 t1, t2; 554 555 t1 = o1+w1; 556 t2 = o2+w2; 557 558 if(!(t1 > o2 && t2 > o1)) 559 return 0; 560 561 return 1; 562 } 563 564 Bits 565 mkvar(Reg *r, Adr *a) 566 { 567 Var *v; 568 int i, t, n, et, z, w, flag, regu; 569 int32 o; 570 Bits bit; 571 Node *node; 572 573 /* 574 * mark registers used 575 */ 576 t = a->type; 577 if(t == D_NONE) 578 goto none; 579 580 if(r != R) 581 r->use1.b[0] |= doregbits(a->index); 582 583 switch(t) { 584 default: 585 regu = doregbits(t); 586 if(regu == 0) 587 goto none; 588 bit = zbits; 589 bit.b[0] = regu; 590 return bit; 591 592 case D_ADDR: 593 a->type = a->index; 594 bit = mkvar(r, a); 595 setaddrs(bit); 596 a->type = t; 597 ostats.naddr++; 598 goto none; 599 600 case D_EXTERN: 601 case D_STATIC: 602 case D_PARAM: 603 case D_AUTO: 604 n = t; 605 break; 606 } 607 608 node = a->node; 609 if(node == N || node->op != ONAME || node->orig == N) 610 goto none; 611 node = node->orig; 612 if(node->orig != node) 613 fatal("%D: bad node", a); 614 if(node->sym == S || node->sym->name[0] == '.') 615 goto none; 616 et = a->etype; 617 o = a->offset; 618 w = a->width; 619 if(w < 0) 620 fatal("bad width %d for %D", w, a); 621 622 flag = 0; 623 for(i=0; i<nvar; i++) { 624 v = var+i; 625 if(v->node == node && v->name == n) { 626 if(v->offset == o) 627 if(v->etype == et) 628 if(v->width == w) 629 return blsh(i); 630 631 // if they overlap, disable both 632 if(overlap(v->offset, v->width, o, w)) { 633 if(debug['R']) 634 print("disable %s\n", node->sym->name); 635 v->addr = 1; 636 flag = 1; 637 } 638 } 639 } 640 641 switch(et) { 642 case 0: 643 case TFUNC: 644 goto none; 645 } 646 647 if(nvar >= NVAR) { 648 if(debug['w'] > 1 && node != N) 649 fatal("variable not optimized: %D", a); 650 651 // If we're not tracking a word in a variable, mark the rest as 652 // having its address taken, so that we keep the whole thing 653 // live at all calls. otherwise we might optimize away part of 654 // a variable but not all of it. 655 for(i=0; i<nvar; i++) { 656 v = var+i; 657 if(v->node == node) 658 v->addr = 1; 659 } 660 goto none; 661 } 662 663 i = nvar; 664 nvar++; 665 v = var+i; 666 v->offset = o; 667 v->name = n; 668 v->etype = et; 669 v->width = w; 670 v->addr = flag; // funny punning 671 v->node = node; 672 673 // node->opt is the head of a linked list 674 // of Vars within the given Node, so that 675 // we can start at a Var and find all the other 676 // Vars in the same Go variable. 677 v->nextinnode = node->opt; 678 node->opt = v; 679 680 bit = blsh(i); 681 if(n == D_EXTERN || n == D_STATIC) 682 for(z=0; z<BITS; z++) 683 externs.b[z] |= bit.b[z]; 684 if(n == D_PARAM) 685 for(z=0; z<BITS; z++) 686 params.b[z] |= bit.b[z]; 687 688 if(node->class == PPARAM) 689 for(z=0; z<BITS; z++) 690 ivar.b[z] |= bit.b[z]; 691 if(node->class == PPARAMOUT) 692 for(z=0; z<BITS; z++) 693 ovar.b[z] |= bit.b[z]; 694 695 // Treat values with their address taken as live at calls, 696 // because the garbage collector's liveness analysis in ../gc/plive.c does. 697 // These must be consistent or else we will elide stores and the garbage 698 // collector will see uninitialized data. 699 // The typical case where our own analysis is out of sync is when the 700 // node appears to have its address taken but that code doesn't actually 701 // get generated and therefore doesn't show up as an address being 702 // taken when we analyze the instruction stream. 703 // One instance of this case is when a closure uses the same name as 704 // an outer variable for one of its own variables declared with :=. 705 // The parser flags the outer variable as possibly shared, and therefore 706 // sets addrtaken, even though it ends up not being actually shared. 707 // If we were better about _ elision, _ = &x would suffice too. 708 // The broader := in a closure problem is mentioned in a comment in 709 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 710 if(node->addrtaken) 711 v->addr = 1; 712 713 // Disable registerization for globals, because: 714 // (1) we might panic at any time and we want the recovery code 715 // to see the latest values (issue 1304). 716 // (2) we don't know what pointers might point at them and we want 717 // loads via those pointers to see updated values and vice versa (issue 7995). 718 // 719 // Disable registerization for results if using defer, because the deferred func 720 // might recover and return, causing the current values to be used. 721 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 722 v->addr = 1; 723 724 if(debug['R']) 725 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 726 ostats.nvar++; 727 728 return bit; 729 730 none: 731 return zbits; 732 } 733 734 void 735 prop(Reg *r, Bits ref, Bits cal) 736 { 737 Reg *r1, *r2; 738 int z, i, j; 739 Var *v, *v1; 740 741 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 742 for(z=0; z<BITS; z++) { 743 ref.b[z] |= r1->refahead.b[z]; 744 if(ref.b[z] != r1->refahead.b[z]) { 745 r1->refahead.b[z] = ref.b[z]; 746 change++; 747 } 748 cal.b[z] |= r1->calahead.b[z]; 749 if(cal.b[z] != r1->calahead.b[z]) { 750 r1->calahead.b[z] = cal.b[z]; 751 change++; 752 } 753 } 754 switch(r1->f.prog->as) { 755 case ACALL: 756 if(noreturn(r1->f.prog)) 757 break; 758 759 // Mark all input variables (ivar) as used, because that's what the 760 // liveness bitmaps say. The liveness bitmaps say that so that a 761 // panic will not show stale values in the parameter dump. 762 // Mark variables with a recent VARDEF (r1->act) as used, 763 // so that the optimizer flushes initializations to memory, 764 // so that if a garbage collection happens during this CALL, 765 // the collector will see initialized memory. Again this is to 766 // match what the liveness bitmaps say. 767 for(z=0; z<BITS; z++) { 768 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 769 ref.b[z] = 0; 770 } 771 772 // cal.b is the current approximation of what's live across the call. 773 // Every bit in cal.b is a single stack word. For each such word, 774 // find all the other tracked stack words in the same Go variable 775 // (struct/slice/string/interface) and mark them live too. 776 // This is necessary because the liveness analysis for the garbage 777 // collector works at variable granularity, not at word granularity. 778 // It is fundamental for slice/string/interface: the garbage collector 779 // needs the whole value, not just some of the words, in order to 780 // interpret the other bits correctly. Specifically, slice needs a consistent 781 // ptr and cap, string needs a consistent ptr and len, and interface 782 // needs a consistent type word and data word. 783 for(z=0; z<BITS; z++) { 784 if(cal.b[z] == 0) 785 continue; 786 for(i=0; i<32; i++) { 787 if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) 788 continue; 789 v = var+z*32+i; 790 if(v->node->opt == nil) // v represents fixed register, not Go variable 791 continue; 792 793 // v->node->opt is the head of a linked list of Vars 794 // corresponding to tracked words from the Go variable v->node. 795 // Walk the list and set all the bits. 796 // For a large struct this could end up being quadratic: 797 // after the first setting, the outer loop (for z, i) would see a 1 bit 798 // for all of the remaining words in the struct, and for each such 799 // word would go through and turn on all the bits again. 800 // To avoid the quadratic behavior, we only turn on the bits if 801 // v is the head of the list or if the head's bit is not yet turned on. 802 // This will set the bits at most twice, keeping the overall loop linear. 803 v1 = v->node->opt; 804 j = v1 - var; 805 if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { 806 for(; v1 != nil; v1 = v1->nextinnode) { 807 j = v1 - var; 808 cal.b[j/32] |= 1<<(j&31); 809 } 810 } 811 } 812 } 813 break; 814 815 case ATEXT: 816 for(z=0; z<BITS; z++) { 817 cal.b[z] = 0; 818 ref.b[z] = 0; 819 } 820 break; 821 822 case ARET: 823 for(z=0; z<BITS; z++) { 824 cal.b[z] = externs.b[z] | ovar.b[z]; 825 ref.b[z] = 0; 826 } 827 break; 828 } 829 for(z=0; z<BITS; z++) { 830 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 831 r1->use1.b[z] | r1->use2.b[z]; 832 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 833 r1->refbehind.b[z] = ref.b[z]; 834 r1->calbehind.b[z] = cal.b[z]; 835 } 836 if(r1->f.active) 837 break; 838 r1->f.active = 1; 839 } 840 for(; r != r1; r = (Reg*)r->f.p1) 841 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 842 prop(r2, r->refbehind, r->calbehind); 843 } 844 845 void 846 synch(Reg *r, Bits dif) 847 { 848 Reg *r1; 849 int z; 850 851 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 852 for(z=0; z<BITS; z++) { 853 dif.b[z] = (dif.b[z] & 854 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 855 r1->set.b[z] | r1->regdiff.b[z]; 856 if(dif.b[z] != r1->regdiff.b[z]) { 857 r1->regdiff.b[z] = dif.b[z]; 858 change++; 859 } 860 } 861 if(r1->f.active) 862 break; 863 r1->f.active = 1; 864 for(z=0; z<BITS; z++) 865 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 866 if((Reg*)r1->f.s2 != R) 867 synch((Reg*)r1->f.s2, dif); 868 } 869 } 870 871 uint32 872 allreg(uint32 b, Rgn *r) 873 { 874 Var *v; 875 int i; 876 877 v = var + r->varno; 878 r->regno = 0; 879 switch(v->etype) { 880 881 default: 882 fatal("unknown etype %d/%E", bitno(b), v->etype); 883 break; 884 885 case TINT8: 886 case TUINT8: 887 case TINT16: 888 case TUINT16: 889 case TINT32: 890 case TUINT32: 891 case TINT64: 892 case TINT: 893 case TUINT: 894 case TUINTPTR: 895 case TBOOL: 896 case TPTR32: 897 i = BtoR(~b); 898 if(i && r->cost > 0) { 899 r->regno = i; 900 return RtoB(i); 901 } 902 break; 903 904 case TFLOAT32: 905 case TFLOAT64: 906 if(!use_sse) 907 break; 908 i = BtoF(~b); 909 if(i && r->cost > 0) { 910 r->regno = i; 911 return FtoB(i); 912 } 913 break; 914 } 915 return 0; 916 } 917 918 void 919 paint1(Reg *r, int bn) 920 { 921 Reg *r1; 922 Prog *p; 923 int z; 924 uint32 bb; 925 926 z = bn/32; 927 bb = 1L<<(bn%32); 928 if(r->act.b[z] & bb) 929 return; 930 for(;;) { 931 if(!(r->refbehind.b[z] & bb)) 932 break; 933 r1 = (Reg*)r->f.p1; 934 if(r1 == R) 935 break; 936 if(!(r1->refahead.b[z] & bb)) 937 break; 938 if(r1->act.b[z] & bb) 939 break; 940 r = r1; 941 } 942 943 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 944 change -= CLOAD * r->f.loop; 945 } 946 for(;;) { 947 r->act.b[z] |= bb; 948 p = r->f.prog; 949 950 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 951 if(r->use1.b[z] & bb) { 952 change += CREF * r->f.loop; 953 if(p->as == AFMOVL || p->as == AFMOVW) 954 if(BtoR(bb) != D_F0) 955 change = -CINF; 956 } 957 if((r->use2.b[z]|r->set.b[z]) & bb) { 958 change += CREF * r->f.loop; 959 if(p->as == AFMOVL || p->as == AFMOVW) 960 if(BtoR(bb) != D_F0) 961 change = -CINF; 962 } 963 } 964 965 if(STORE(r) & r->regdiff.b[z] & bb) { 966 change -= CLOAD * r->f.loop; 967 if(p->as == AFMOVL || p->as == AFMOVW) 968 if(BtoR(bb) != D_F0) 969 change = -CINF; 970 } 971 972 if(r->refbehind.b[z] & bb) 973 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 974 if(r1->refahead.b[z] & bb) 975 paint1(r1, bn); 976 977 if(!(r->refahead.b[z] & bb)) 978 break; 979 r1 = (Reg*)r->f.s2; 980 if(r1 != R) 981 if(r1->refbehind.b[z] & bb) 982 paint1(r1, bn); 983 r = (Reg*)r->f.s1; 984 if(r == R) 985 break; 986 if(r->act.b[z] & bb) 987 break; 988 if(!(r->refbehind.b[z] & bb)) 989 break; 990 } 991 } 992 993 uint32 994 regset(Reg *r, uint32 bb) 995 { 996 uint32 b, set; 997 Adr v; 998 int c; 999 1000 set = 0; 1001 v = zprog.from; 1002 while(b = bb & ~(bb-1)) { 1003 v.type = b & 0xFF ? BtoR(b): BtoF(b); 1004 c = copyu(r->f.prog, &v, nil); 1005 if(c == 3) 1006 set |= b; 1007 bb &= ~b; 1008 } 1009 return set; 1010 } 1011 1012 uint32 1013 reguse(Reg *r, uint32 bb) 1014 { 1015 uint32 b, set; 1016 Adr v; 1017 int c; 1018 1019 set = 0; 1020 v = zprog.from; 1021 while(b = bb & ~(bb-1)) { 1022 v.type = b & 0xFF ? BtoR(b): BtoF(b); 1023 c = copyu(r->f.prog, &v, nil); 1024 if(c == 1 || c == 2 || c == 4) 1025 set |= b; 1026 bb &= ~b; 1027 } 1028 return set; 1029 } 1030 1031 uint32 1032 paint2(Reg *r, int bn) 1033 { 1034 Reg *r1; 1035 int z; 1036 uint32 bb, vreg, x; 1037 1038 z = bn/32; 1039 bb = 1L << (bn%32); 1040 vreg = regbits; 1041 if(!(r->act.b[z] & bb)) 1042 return vreg; 1043 for(;;) { 1044 if(!(r->refbehind.b[z] & bb)) 1045 break; 1046 r1 = (Reg*)r->f.p1; 1047 if(r1 == R) 1048 break; 1049 if(!(r1->refahead.b[z] & bb)) 1050 break; 1051 if(!(r1->act.b[z] & bb)) 1052 break; 1053 r = r1; 1054 } 1055 for(;;) { 1056 r->act.b[z] &= ~bb; 1057 1058 vreg |= r->regu; 1059 1060 if(r->refbehind.b[z] & bb) 1061 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1062 if(r1->refahead.b[z] & bb) 1063 vreg |= paint2(r1, bn); 1064 1065 if(!(r->refahead.b[z] & bb)) 1066 break; 1067 r1 = (Reg*)r->f.s2; 1068 if(r1 != R) 1069 if(r1->refbehind.b[z] & bb) 1070 vreg |= paint2(r1, bn); 1071 r = (Reg*)r->f.s1; 1072 if(r == R) 1073 break; 1074 if(!(r->act.b[z] & bb)) 1075 break; 1076 if(!(r->refbehind.b[z] & bb)) 1077 break; 1078 } 1079 1080 bb = vreg; 1081 for(; r; r=(Reg*)r->f.s1) { 1082 x = r->regu & ~bb; 1083 if(x) { 1084 vreg |= reguse(r, x); 1085 bb |= regset(r, x); 1086 } 1087 } 1088 return vreg; 1089 } 1090 1091 void 1092 paint3(Reg *r, int bn, int32 rb, int rn) 1093 { 1094 Reg *r1; 1095 Prog *p; 1096 int z; 1097 uint32 bb; 1098 1099 z = bn/32; 1100 bb = 1L << (bn%32); 1101 if(r->act.b[z] & bb) 1102 return; 1103 for(;;) { 1104 if(!(r->refbehind.b[z] & bb)) 1105 break; 1106 r1 = (Reg*)r->f.p1; 1107 if(r1 == R) 1108 break; 1109 if(!(r1->refahead.b[z] & bb)) 1110 break; 1111 if(r1->act.b[z] & bb) 1112 break; 1113 r = r1; 1114 } 1115 1116 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1117 addmove(r, bn, rn, 0); 1118 for(;;) { 1119 r->act.b[z] |= bb; 1120 p = r->f.prog; 1121 1122 if(r->use1.b[z] & bb) { 1123 if(debug['R'] && debug['v']) 1124 print("%P", p); 1125 addreg(&p->from, rn); 1126 if(debug['R'] && debug['v']) 1127 print(" ===change== %P\n", p); 1128 } 1129 if((r->use2.b[z]|r->set.b[z]) & bb) { 1130 if(debug['R'] && debug['v']) 1131 print("%P", p); 1132 addreg(&p->to, rn); 1133 if(debug['R'] && debug['v']) 1134 print(" ===change== %P\n", p); 1135 } 1136 1137 if(STORE(r) & r->regdiff.b[z] & bb) 1138 addmove(r, bn, rn, 1); 1139 r->regu |= rb; 1140 1141 if(r->refbehind.b[z] & bb) 1142 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1143 if(r1->refahead.b[z] & bb) 1144 paint3(r1, bn, rb, rn); 1145 1146 if(!(r->refahead.b[z] & bb)) 1147 break; 1148 r1 = (Reg*)r->f.s2; 1149 if(r1 != R) 1150 if(r1->refbehind.b[z] & bb) 1151 paint3(r1, bn, rb, rn); 1152 r = (Reg*)r->f.s1; 1153 if(r == R) 1154 break; 1155 if(r->act.b[z] & bb) 1156 break; 1157 if(!(r->refbehind.b[z] & bb)) 1158 break; 1159 } 1160 } 1161 1162 void 1163 addreg(Adr *a, int rn) 1164 { 1165 a->sym = nil; 1166 a->offset = 0; 1167 a->type = rn; 1168 1169 ostats.ncvtreg++; 1170 } 1171 1172 int32 1173 RtoB(int r) 1174 { 1175 1176 if(r < D_AX || r > D_DI) 1177 return 0; 1178 return 1L << (r-D_AX); 1179 } 1180 1181 int 1182 BtoR(int32 b) 1183 { 1184 1185 b &= 0xffL; 1186 if(b == 0) 1187 return 0; 1188 return bitno(b) + D_AX; 1189 } 1190 1191 int32 1192 FtoB(int f) 1193 { 1194 if(f < D_X0 || f > D_X7) 1195 return 0; 1196 return 1L << (f - D_X0 + 8); 1197 } 1198 1199 int 1200 BtoF(int32 b) 1201 { 1202 b &= 0xFF00L; 1203 if(b == 0) 1204 return 0; 1205 return bitno(b) - 8 + D_X0; 1206 } 1207 1208 void 1209 dumpone(Flow *f, int isreg) 1210 { 1211 int z; 1212 Bits bit; 1213 Reg *r; 1214 1215 print("%d:%P", f->loop, f->prog); 1216 if(isreg) { 1217 r = (Reg*)f; 1218 for(z=0; z<BITS; z++) 1219 bit.b[z] = 1220 r->set.b[z] | 1221 r->use1.b[z] | 1222 r->use2.b[z] | 1223 r->refbehind.b[z] | 1224 r->refahead.b[z] | 1225 r->calbehind.b[z] | 1226 r->calahead.b[z] | 1227 r->regdiff.b[z] | 1228 r->act.b[z] | 1229 0; 1230 if(bany(&bit)) { 1231 print("\t"); 1232 if(bany(&r->set)) 1233 print(" s:%Q", r->set); 1234 if(bany(&r->use1)) 1235 print(" u1:%Q", r->use1); 1236 if(bany(&r->use2)) 1237 print(" u2:%Q", r->use2); 1238 if(bany(&r->refbehind)) 1239 print(" rb:%Q ", r->refbehind); 1240 if(bany(&r->refahead)) 1241 print(" ra:%Q ", r->refahead); 1242 if(bany(&r->calbehind)) 1243 print(" cb:%Q ", r->calbehind); 1244 if(bany(&r->calahead)) 1245 print(" ca:%Q ", r->calahead); 1246 if(bany(&r->regdiff)) 1247 print(" d:%Q ", r->regdiff); 1248 if(bany(&r->act)) 1249 print(" a:%Q ", r->act); 1250 } 1251 } 1252 print("\n"); 1253 } 1254 1255 void 1256 dumpit(char *str, Flow *r0, int isreg) 1257 { 1258 Flow *r, *r1; 1259 1260 print("\n%s\n", str); 1261 for(r = r0; r != nil; r = r->link) { 1262 dumpone(r, isreg); 1263 r1 = r->p2; 1264 if(r1 != nil) { 1265 print(" pred:"); 1266 for(; r1 != nil; r1 = r1->p2link) 1267 print(" %.4ud", (int)r1->prog->pc); 1268 print("\n"); 1269 } 1270 // r1 = r->s1; 1271 // if(r1 != nil) { 1272 // print(" succ:"); 1273 // for(; r1 != R; r1 = r1->s1) 1274 // print(" %.4ud", (int)r1->prog->pc); 1275 // print("\n"); 1276 // } 1277 } 1278 }