github.com/golang-haiku/go-1.4.3@v0.0.0-20190609233734-1f5ae41cc308/src/cmd/6g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 32 /* 16 general + 16 floating */ 37 #define REGBITS ((uint32)0xffffffff) 38 /*c2go enum { 39 NREGVAR = 32, 40 REGBITS = 0xffffffff, 41 }; 42 */ 43 44 static Reg* firstr; 45 static int first = 1; 46 47 int 48 rcmp(const void *a1, const void *a2) 49 { 50 Rgn *p1, *p2; 51 int c1, c2; 52 53 p1 = (Rgn*)a1; 54 p2 = (Rgn*)a2; 55 c1 = p2->cost; 56 c2 = p1->cost; 57 if(c1 -= c2) 58 return c1; 59 return p2->varno - p1->varno; 60 } 61 62 static void 63 setaddrs(Bits bit) 64 { 65 int i, n; 66 Var *v; 67 Node *node; 68 69 while(bany(&bit)) { 70 // convert each bit to a variable 71 i = bnum(bit); 72 node = var[i].node; 73 n = var[i].name; 74 bit.b[i/32] &= ~(1L<<(i%32)); 75 76 // disable all pieces of that variable 77 for(i=0; i<nvar; i++) { 78 v = var+i; 79 if(v->node == node && v->name == n) 80 v->addr = 2; 81 } 82 } 83 } 84 85 static char* regname[] = { 86 ".AX", 87 ".CX", 88 ".DX", 89 ".BX", 90 ".SP", 91 ".BP", 92 ".SI", 93 ".DI", 94 ".R8", 95 ".R9", 96 ".R10", 97 ".R11", 98 ".R12", 99 ".R13", 100 ".R14", 101 ".R15", 102 ".X0", 103 ".X1", 104 ".X2", 105 ".X3", 106 ".X4", 107 ".X5", 108 ".X6", 109 ".X7", 110 ".X8", 111 ".X9", 112 ".X10", 113 ".X11", 114 ".X12", 115 ".X13", 116 ".X14", 117 ".X15", 118 }; 119 120 static Node* regnodes[NREGVAR]; 121 122 static void walkvardef(Node *n, Reg *r, int active); 123 124 void 125 regopt(Prog *firstp) 126 { 127 Reg *r, *r1; 128 Prog *p; 129 Graph *g; 130 ProgInfo info; 131 int i, z, active; 132 uint32 vreg; 133 Bits bit; 134 135 if(first) { 136 fmtinstall('Q', Qconv); 137 exregoffset = D_R15; 138 first = 0; 139 } 140 141 mergetemp(firstp); 142 143 /* 144 * control flow is more complicated in generated go code 145 * than in generated c code. define pseudo-variables for 146 * registers, so we have complete register usage information. 147 */ 148 nvar = NREGVAR; 149 memset(var, 0, NREGVAR*sizeof var[0]); 150 for(i=0; i<NREGVAR; i++) { 151 if(regnodes[i] == N) 152 regnodes[i] = newname(lookup(regname[i])); 153 var[i].node = regnodes[i]; 154 } 155 156 regbits = RtoB(D_SP); 157 for(z=0; z<BITS; z++) { 158 externs.b[z] = 0; 159 params.b[z] = 0; 160 consts.b[z] = 0; 161 addrs.b[z] = 0; 162 ivar.b[z] = 0; 163 ovar.b[z] = 0; 164 } 165 166 /* 167 * pass 1 168 * build aux data structure 169 * allocate pcs 170 * find use and set of variables 171 */ 172 g = flowstart(firstp, sizeof(Reg)); 173 if(g == nil) { 174 for(i=0; i<nvar; i++) 175 var[i].node->opt = nil; 176 return; 177 } 178 179 firstr = (Reg*)g->start; 180 181 for(r = firstr; r != R; r = (Reg*)r->f.link) { 182 p = r->f.prog; 183 if(p->as == AVARDEF || p->as == AVARKILL) 184 continue; 185 proginfo(&info, p); 186 187 // Avoid making variables for direct-called functions. 188 if(p->as == ACALL && p->to.type == D_EXTERN) 189 continue; 190 191 r->use1.b[0] |= info.reguse | info.regindex; 192 r->set.b[0] |= info.regset; 193 194 bit = mkvar(r, &p->from); 195 if(bany(&bit)) { 196 if(info.flags & LeftAddr) 197 setaddrs(bit); 198 if(info.flags & LeftRead) 199 for(z=0; z<BITS; z++) 200 r->use1.b[z] |= bit.b[z]; 201 if(info.flags & LeftWrite) 202 for(z=0; z<BITS; z++) 203 r->set.b[z] |= bit.b[z]; 204 } 205 206 bit = mkvar(r, &p->to); 207 if(bany(&bit)) { 208 if(info.flags & RightAddr) 209 setaddrs(bit); 210 if(info.flags & RightRead) 211 for(z=0; z<BITS; z++) 212 r->use2.b[z] |= bit.b[z]; 213 if(info.flags & RightWrite) 214 for(z=0; z<BITS; z++) 215 r->set.b[z] |= bit.b[z]; 216 } 217 } 218 219 for(i=0; i<nvar; i++) { 220 Var *v = var+i; 221 if(v->addr) { 222 bit = blsh(i); 223 for(z=0; z<BITS; z++) 224 addrs.b[z] |= bit.b[z]; 225 } 226 227 if(debug['R'] && debug['v']) 228 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 229 i, v->addr, v->etype, v->width, v->node, v->offset); 230 } 231 232 if(debug['R'] && debug['v']) 233 dumpit("pass1", &firstr->f, 1); 234 235 /* 236 * pass 2 237 * find looping structure 238 */ 239 flowrpo(g); 240 241 if(debug['R'] && debug['v']) 242 dumpit("pass2", &firstr->f, 1); 243 244 /* 245 * pass 2.5 246 * iterate propagating fat vardef covering forward 247 * r->act records vars with a VARDEF since the last CALL. 248 * (r->act will be reused in pass 5 for something else, 249 * but we'll be done with it by then.) 250 */ 251 active = 0; 252 for(r = firstr; r != R; r = (Reg*)r->f.link) { 253 r->f.active = 0; 254 r->act = zbits; 255 } 256 for(r = firstr; r != R; r = (Reg*)r->f.link) { 257 p = r->f.prog; 258 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 259 active++; 260 walkvardef(p->to.node, r, active); 261 } 262 } 263 264 /* 265 * pass 3 266 * iterate propagating usage 267 * back until flow graph is complete 268 */ 269 loop1: 270 change = 0; 271 for(r = firstr; r != R; r = (Reg*)r->f.link) 272 r->f.active = 0; 273 for(r = firstr; r != R; r = (Reg*)r->f.link) 274 if(r->f.prog->as == ARET) 275 prop(r, zbits, zbits); 276 loop11: 277 /* pick up unreachable code */ 278 i = 0; 279 for(r = firstr; r != R; r = r1) { 280 r1 = (Reg*)r->f.link; 281 if(r1 && r1->f.active && !r->f.active) { 282 prop(r, zbits, zbits); 283 i = 1; 284 } 285 } 286 if(i) 287 goto loop11; 288 if(change) 289 goto loop1; 290 291 if(debug['R'] && debug['v']) 292 dumpit("pass3", &firstr->f, 1); 293 294 /* 295 * pass 4 296 * iterate propagating register/variable synchrony 297 * forward until graph is complete 298 */ 299 loop2: 300 change = 0; 301 for(r = firstr; r != R; r = (Reg*)r->f.link) 302 r->f.active = 0; 303 synch(firstr, zbits); 304 if(change) 305 goto loop2; 306 307 if(debug['R'] && debug['v']) 308 dumpit("pass4", &firstr->f, 1); 309 310 /* 311 * pass 4.5 312 * move register pseudo-variables into regu. 313 */ 314 for(r = firstr; r != R; r = (Reg*)r->f.link) { 315 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 316 317 r->set.b[0] &= ~REGBITS; 318 r->use1.b[0] &= ~REGBITS; 319 r->use2.b[0] &= ~REGBITS; 320 r->refbehind.b[0] &= ~REGBITS; 321 r->refahead.b[0] &= ~REGBITS; 322 r->calbehind.b[0] &= ~REGBITS; 323 r->calahead.b[0] &= ~REGBITS; 324 r->regdiff.b[0] &= ~REGBITS; 325 r->act.b[0] &= ~REGBITS; 326 } 327 328 /* 329 * pass 5 330 * isolate regions 331 * calculate costs (paint1) 332 */ 333 r = firstr; 334 if(r) { 335 for(z=0; z<BITS; z++) 336 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 337 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 338 if(bany(&bit) && !r->f.refset) { 339 // should never happen - all variables are preset 340 if(debug['w']) 341 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 342 r->f.refset = 1; 343 } 344 } 345 for(r = firstr; r != R; r = (Reg*)r->f.link) 346 r->act = zbits; 347 rgp = region; 348 nregion = 0; 349 for(r = firstr; r != R; r = (Reg*)r->f.link) { 350 for(z=0; z<BITS; z++) 351 bit.b[z] = r->set.b[z] & 352 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 353 if(bany(&bit) && !r->f.refset) { 354 if(debug['w']) 355 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 356 r->f.refset = 1; 357 excise(&r->f); 358 } 359 for(z=0; z<BITS; z++) 360 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 361 while(bany(&bit)) { 362 i = bnum(bit); 363 rgp->enter = r; 364 rgp->varno = i; 365 change = 0; 366 paint1(r, i); 367 bit.b[i/32] &= ~(1L<<(i%32)); 368 if(change <= 0) 369 continue; 370 rgp->cost = change; 371 nregion++; 372 if(nregion >= NRGN) { 373 if(debug['R'] && debug['v']) 374 print("too many regions\n"); 375 goto brk; 376 } 377 rgp++; 378 } 379 } 380 brk: 381 qsort(region, nregion, sizeof(region[0]), rcmp); 382 383 if(debug['R'] && debug['v']) 384 dumpit("pass5", &firstr->f, 1); 385 386 /* 387 * pass 6 388 * determine used registers (paint2) 389 * replace code (paint3) 390 */ 391 rgp = region; 392 for(i=0; i<nregion; i++) { 393 bit = blsh(rgp->varno); 394 vreg = paint2(rgp->enter, rgp->varno); 395 vreg = allreg(vreg, rgp); 396 if(rgp->regno != 0) { 397 if(debug['R'] && debug['v']) { 398 Var *v; 399 400 v = var + rgp->varno; 401 print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", 402 v->node, v->offset, rgp->varno, v->etype, rgp->regno); 403 } 404 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 405 } 406 rgp++; 407 } 408 409 if(debug['R'] && debug['v']) 410 dumpit("pass6", &firstr->f, 1); 411 412 /* 413 * free aux structures. peep allocates new ones. 414 */ 415 for(i=0; i<nvar; i++) 416 var[i].node->opt = nil; 417 flowend(g); 418 firstr = R; 419 420 /* 421 * pass 7 422 * peep-hole on basic block 423 */ 424 if(!debug['R'] || debug['P']) 425 peep(firstp); 426 427 /* 428 * eliminate nops 429 */ 430 for(p=firstp; p!=P; p=p->link) { 431 while(p->link != P && p->link->as == ANOP) 432 p->link = p->link->link; 433 if(p->to.type == D_BRANCH) 434 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 435 p->to.u.branch = p->to.u.branch->link; 436 } 437 438 if(debug['R']) { 439 if(ostats.ncvtreg || 440 ostats.nspill || 441 ostats.nreload || 442 ostats.ndelmov || 443 ostats.nvar || 444 ostats.naddr || 445 0) 446 print("\nstats\n"); 447 448 if(ostats.ncvtreg) 449 print(" %4d cvtreg\n", ostats.ncvtreg); 450 if(ostats.nspill) 451 print(" %4d spill\n", ostats.nspill); 452 if(ostats.nreload) 453 print(" %4d reload\n", ostats.nreload); 454 if(ostats.ndelmov) 455 print(" %4d delmov\n", ostats.ndelmov); 456 if(ostats.nvar) 457 print(" %4d var\n", ostats.nvar); 458 if(ostats.naddr) 459 print(" %4d addr\n", ostats.naddr); 460 461 memset(&ostats, 0, sizeof(ostats)); 462 } 463 } 464 465 static void 466 walkvardef(Node *n, Reg *r, int active) 467 { 468 Reg *r1, *r2; 469 int bn; 470 Var *v; 471 472 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 473 if(r1->f.active == active) 474 break; 475 r1->f.active = active; 476 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 477 break; 478 for(v=n->opt; v!=nil; v=v->nextinnode) { 479 bn = v - var; 480 r1->act.b[bn/32] |= 1L << (bn%32); 481 } 482 if(r1->f.prog->as == ACALL) 483 break; 484 } 485 486 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 487 if(r2->f.s2 != nil) 488 walkvardef(n, (Reg*)r2->f.s2, active); 489 } 490 491 /* 492 * add mov b,rn 493 * just after r 494 */ 495 void 496 addmove(Reg *r, int bn, int rn, int f) 497 { 498 Prog *p, *p1; 499 Adr *a; 500 Var *v; 501 502 p1 = mal(sizeof(*p1)); 503 clearp(p1); 504 p1->pc = 9999; 505 506 p = r->f.prog; 507 p1->link = p->link; 508 p->link = p1; 509 p1->lineno = p->lineno; 510 511 v = var + bn; 512 513 a = &p1->to; 514 a->offset = v->offset; 515 a->etype = v->etype; 516 a->type = v->name; 517 a->node = v->node; 518 a->sym = linksym(v->node->sym); 519 520 // need to clean this up with wptr and 521 // some of the defaults 522 p1->as = AMOVL; 523 switch(simtype[(uchar)v->etype]) { 524 default: 525 fatal("unknown type %E", v->etype); 526 case TINT8: 527 case TUINT8: 528 case TBOOL: 529 p1->as = AMOVB; 530 break; 531 case TINT16: 532 case TUINT16: 533 p1->as = AMOVW; 534 break; 535 case TINT64: 536 case TUINT64: 537 case TPTR64: 538 p1->as = AMOVQ; 539 break; 540 case TFLOAT32: 541 p1->as = AMOVSS; 542 break; 543 case TFLOAT64: 544 p1->as = AMOVSD; 545 break; 546 case TINT32: 547 case TUINT32: 548 case TPTR32: 549 break; 550 } 551 552 p1->from.type = rn; 553 if(!f) { 554 p1->from = *a; 555 *a = zprog.from; 556 a->type = rn; 557 if(v->etype == TUINT8) 558 p1->as = AMOVB; 559 if(v->etype == TUINT16) 560 p1->as = AMOVW; 561 } 562 if(debug['R'] && debug['v']) 563 print("%P ===add=== %P\n", p, p1); 564 ostats.nspill++; 565 } 566 567 uint32 568 doregbits(int r) 569 { 570 uint32 b; 571 572 b = 0; 573 if(r >= D_INDIR) 574 r -= D_INDIR; 575 if(r >= D_AX && r <= D_R15) 576 b |= RtoB(r); 577 else 578 if(r >= D_AL && r <= D_R15B) 579 b |= RtoB(r-D_AL+D_AX); 580 else 581 if(r >= D_AH && r <= D_BH) 582 b |= RtoB(r-D_AH+D_AX); 583 else 584 if(r >= D_X0 && r <= D_X0+15) 585 b |= FtoB(r); 586 return b; 587 } 588 589 static int 590 overlap(int64 o1, int w1, int64 o2, int w2) 591 { 592 int64 t1, t2; 593 594 t1 = o1+w1; 595 t2 = o2+w2; 596 597 if(!(t1 > o2 && t2 > o1)) 598 return 0; 599 600 return 1; 601 } 602 603 Bits 604 mkvar(Reg *r, Adr *a) 605 { 606 Var *v; 607 int i, t, n, et, z, flag; 608 int64 w; 609 uint32 regu; 610 int64 o; 611 Bits bit; 612 Node *node; 613 614 /* 615 * mark registers used 616 */ 617 t = a->type; 618 if(t == D_NONE) 619 goto none; 620 621 if(r != R) 622 r->use1.b[0] |= doregbits(a->index); 623 624 switch(t) { 625 default: 626 regu = doregbits(t); 627 if(regu == 0) 628 goto none; 629 bit = zbits; 630 bit.b[0] = regu; 631 return bit; 632 633 case D_ADDR: 634 a->type = a->index; 635 bit = mkvar(r, a); 636 setaddrs(bit); 637 a->type = t; 638 ostats.naddr++; 639 goto none; 640 641 case D_EXTERN: 642 case D_STATIC: 643 case D_PARAM: 644 case D_AUTO: 645 n = t; 646 break; 647 } 648 649 node = a->node; 650 if(node == N || node->op != ONAME || node->orig == N) 651 goto none; 652 node = node->orig; 653 if(node->orig != node) 654 fatal("%D: bad node", a); 655 if(node->sym == S || node->sym->name[0] == '.') 656 goto none; 657 et = a->etype; 658 o = a->offset; 659 w = a->width; 660 if(w < 0) 661 fatal("bad width %lld for %D", w, a); 662 663 flag = 0; 664 for(i=0; i<nvar; i++) { 665 v = var+i; 666 if(v->node == node && v->name == n) { 667 if(v->offset == o) 668 if(v->etype == et) 669 if(v->width == w) 670 return blsh(i); 671 672 // if they overlaps, disable both 673 if(overlap(v->offset, v->width, o, w)) { 674 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 675 v->addr = 1; 676 flag = 1; 677 } 678 } 679 } 680 switch(et) { 681 case 0: 682 case TFUNC: 683 goto none; 684 } 685 686 if(nvar >= NVAR) { 687 if(debug['w'] > 1 && node != N) 688 fatal("variable not optimized: %#N", node); 689 690 // If we're not tracking a word in a variable, mark the rest as 691 // having its address taken, so that we keep the whole thing 692 // live at all calls. otherwise we might optimize away part of 693 // a variable but not all of it. 694 for(i=0; i<nvar; i++) { 695 v = var+i; 696 if(v->node == node) 697 v->addr = 1; 698 } 699 goto none; 700 } 701 702 i = nvar; 703 nvar++; 704 v = var+i; 705 v->offset = o; 706 v->name = n; 707 v->etype = et; 708 v->width = w; 709 v->addr = flag; // funny punning 710 v->node = node; 711 712 // node->opt is the head of a linked list 713 // of Vars within the given Node, so that 714 // we can start at a Var and find all the other 715 // Vars in the same Go variable. 716 v->nextinnode = node->opt; 717 node->opt = v; 718 719 bit = blsh(i); 720 if(n == D_EXTERN || n == D_STATIC) 721 for(z=0; z<BITS; z++) 722 externs.b[z] |= bit.b[z]; 723 if(n == D_PARAM) 724 for(z=0; z<BITS; z++) 725 params.b[z] |= bit.b[z]; 726 727 if(node->class == PPARAM) 728 for(z=0; z<BITS; z++) 729 ivar.b[z] |= bit.b[z]; 730 if(node->class == PPARAMOUT) 731 for(z=0; z<BITS; z++) 732 ovar.b[z] |= bit.b[z]; 733 734 // Treat values with their address taken as live at calls, 735 // because the garbage collector's liveness analysis in ../gc/plive.c does. 736 // These must be consistent or else we will elide stores and the garbage 737 // collector will see uninitialized data. 738 // The typical case where our own analysis is out of sync is when the 739 // node appears to have its address taken but that code doesn't actually 740 // get generated and therefore doesn't show up as an address being 741 // taken when we analyze the instruction stream. 742 // One instance of this case is when a closure uses the same name as 743 // an outer variable for one of its own variables declared with :=. 744 // The parser flags the outer variable as possibly shared, and therefore 745 // sets addrtaken, even though it ends up not being actually shared. 746 // If we were better about _ elision, _ = &x would suffice too. 747 // The broader := in a closure problem is mentioned in a comment in 748 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 749 if(node->addrtaken) 750 v->addr = 1; 751 752 // Disable registerization for globals, because: 753 // (1) we might panic at any time and we want the recovery code 754 // to see the latest values (issue 1304). 755 // (2) we don't know what pointers might point at them and we want 756 // loads via those pointers to see updated values and vice versa (issue 7995). 757 // 758 // Disable registerization for results if using defer, because the deferred func 759 // might recover and return, causing the current values to be used. 760 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 761 v->addr = 1; 762 763 if(debug['R']) 764 print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 765 ostats.nvar++; 766 767 return bit; 768 769 none: 770 return zbits; 771 } 772 773 void 774 prop(Reg *r, Bits ref, Bits cal) 775 { 776 Reg *r1, *r2; 777 int z, i, j; 778 Var *v, *v1; 779 780 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 781 for(z=0; z<BITS; z++) { 782 ref.b[z] |= r1->refahead.b[z]; 783 if(ref.b[z] != r1->refahead.b[z]) { 784 r1->refahead.b[z] = ref.b[z]; 785 change++; 786 } 787 cal.b[z] |= r1->calahead.b[z]; 788 if(cal.b[z] != r1->calahead.b[z]) { 789 r1->calahead.b[z] = cal.b[z]; 790 change++; 791 } 792 } 793 switch(r1->f.prog->as) { 794 case ACALL: 795 if(noreturn(r1->f.prog)) 796 break; 797 798 // Mark all input variables (ivar) as used, because that's what the 799 // liveness bitmaps say. The liveness bitmaps say that so that a 800 // panic will not show stale values in the parameter dump. 801 // Mark variables with a recent VARDEF (r1->act) as used, 802 // so that the optimizer flushes initializations to memory, 803 // so that if a garbage collection happens during this CALL, 804 // the collector will see initialized memory. Again this is to 805 // match what the liveness bitmaps say. 806 for(z=0; z<BITS; z++) { 807 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 808 ref.b[z] = 0; 809 } 810 811 // cal.b is the current approximation of what's live across the call. 812 // Every bit in cal.b is a single stack word. For each such word, 813 // find all the other tracked stack words in the same Go variable 814 // (struct/slice/string/interface) and mark them live too. 815 // This is necessary because the liveness analysis for the garbage 816 // collector works at variable granularity, not at word granularity. 817 // It is fundamental for slice/string/interface: the garbage collector 818 // needs the whole value, not just some of the words, in order to 819 // interpret the other bits correctly. Specifically, slice needs a consistent 820 // ptr and cap, string needs a consistent ptr and len, and interface 821 // needs a consistent type word and data word. 822 for(z=0; z<BITS; z++) { 823 if(cal.b[z] == 0) 824 continue; 825 for(i=0; i<32; i++) { 826 if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) 827 continue; 828 v = var+z*32+i; 829 if(v->node->opt == nil) // v represents fixed register, not Go variable 830 continue; 831 832 // v->node->opt is the head of a linked list of Vars 833 // corresponding to tracked words from the Go variable v->node. 834 // Walk the list and set all the bits. 835 // For a large struct this could end up being quadratic: 836 // after the first setting, the outer loop (for z, i) would see a 1 bit 837 // for all of the remaining words in the struct, and for each such 838 // word would go through and turn on all the bits again. 839 // To avoid the quadratic behavior, we only turn on the bits if 840 // v is the head of the list or if the head's bit is not yet turned on. 841 // This will set the bits at most twice, keeping the overall loop linear. 842 v1 = v->node->opt; 843 j = v1 - var; 844 if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { 845 for(; v1 != nil; v1 = v1->nextinnode) { 846 j = v1 - var; 847 cal.b[j/32] |= 1UL<<(j&31); 848 } 849 } 850 } 851 } 852 break; 853 854 case ATEXT: 855 for(z=0; z<BITS; z++) { 856 cal.b[z] = 0; 857 ref.b[z] = 0; 858 } 859 break; 860 861 case ARET: 862 for(z=0; z<BITS; z++) { 863 cal.b[z] = externs.b[z] | ovar.b[z]; 864 ref.b[z] = 0; 865 } 866 break; 867 } 868 for(z=0; z<BITS; z++) { 869 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 870 r1->use1.b[z] | r1->use2.b[z]; 871 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 872 r1->refbehind.b[z] = ref.b[z]; 873 r1->calbehind.b[z] = cal.b[z]; 874 } 875 if(r1->f.active) 876 break; 877 r1->f.active = 1; 878 } 879 for(; r != r1; r = (Reg*)r->f.p1) 880 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 881 prop(r2, r->refbehind, r->calbehind); 882 } 883 884 void 885 synch(Reg *r, Bits dif) 886 { 887 Reg *r1; 888 int z; 889 890 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 891 for(z=0; z<BITS; z++) { 892 dif.b[z] = (dif.b[z] & 893 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 894 r1->set.b[z] | r1->regdiff.b[z]; 895 if(dif.b[z] != r1->regdiff.b[z]) { 896 r1->regdiff.b[z] = dif.b[z]; 897 change++; 898 } 899 } 900 if(r1->f.active) 901 break; 902 r1->f.active = 1; 903 for(z=0; z<BITS; z++) 904 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 905 if(r1->f.s2 != nil) 906 synch((Reg*)r1->f.s2, dif); 907 } 908 } 909 910 uint32 911 allreg(uint32 b, Rgn *r) 912 { 913 Var *v; 914 int i; 915 916 v = var + r->varno; 917 r->regno = 0; 918 switch(v->etype) { 919 920 default: 921 fatal("unknown etype %d/%E", bitno(b), v->etype); 922 break; 923 924 case TINT8: 925 case TUINT8: 926 case TINT16: 927 case TUINT16: 928 case TINT32: 929 case TUINT32: 930 case TINT64: 931 case TUINT64: 932 case TINT: 933 case TUINT: 934 case TUINTPTR: 935 case TBOOL: 936 case TPTR32: 937 case TPTR64: 938 i = BtoR(~b); 939 if(i && r->cost > 0) { 940 r->regno = i; 941 return RtoB(i); 942 } 943 break; 944 945 case TFLOAT32: 946 case TFLOAT64: 947 i = BtoF(~b); 948 if(i && r->cost > 0) { 949 r->regno = i; 950 return FtoB(i); 951 } 952 break; 953 } 954 return 0; 955 } 956 957 void 958 paint1(Reg *r, int bn) 959 { 960 Reg *r1; 961 int z; 962 uint32 bb; 963 964 z = bn/32; 965 bb = 1L<<(bn%32); 966 if(r->act.b[z] & bb) 967 return; 968 for(;;) { 969 if(!(r->refbehind.b[z] & bb)) 970 break; 971 r1 = (Reg*)r->f.p1; 972 if(r1 == R) 973 break; 974 if(!(r1->refahead.b[z] & bb)) 975 break; 976 if(r1->act.b[z] & bb) 977 break; 978 r = r1; 979 } 980 981 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 982 change -= CLOAD * r->f.loop; 983 } 984 for(;;) { 985 r->act.b[z] |= bb; 986 987 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 988 if(r->use1.b[z] & bb) 989 change += CREF * r->f.loop; 990 if((r->use2.b[z]|r->set.b[z]) & bb) 991 change += CREF * r->f.loop; 992 } 993 994 if(STORE(r) & r->regdiff.b[z] & bb) { 995 change -= CLOAD * r->f.loop; 996 } 997 998 if(r->refbehind.b[z] & bb) 999 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1000 if(r1->refahead.b[z] & bb) 1001 paint1(r1, bn); 1002 1003 if(!(r->refahead.b[z] & bb)) 1004 break; 1005 r1 = (Reg*)r->f.s2; 1006 if(r1 != R) 1007 if(r1->refbehind.b[z] & bb) 1008 paint1(r1, bn); 1009 r = (Reg*)r->f.s1; 1010 if(r == R) 1011 break; 1012 if(r->act.b[z] & bb) 1013 break; 1014 if(!(r->refbehind.b[z] & bb)) 1015 break; 1016 } 1017 } 1018 1019 uint32 1020 regset(Reg *r, uint32 bb) 1021 { 1022 uint32 b, set; 1023 Adr v; 1024 int c; 1025 1026 set = 0; 1027 v = zprog.from; 1028 while(b = bb & ~(bb-1)) { 1029 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1030 if(v.type == 0) 1031 fatal("zero v.type for %#ux", b); 1032 c = copyu(r->f.prog, &v, nil); 1033 if(c == 3) 1034 set |= b; 1035 bb &= ~b; 1036 } 1037 return set; 1038 } 1039 1040 uint32 1041 reguse(Reg *r, uint32 bb) 1042 { 1043 uint32 b, set; 1044 Adr v; 1045 int c; 1046 1047 set = 0; 1048 v = zprog.from; 1049 while(b = bb & ~(bb-1)) { 1050 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1051 c = copyu(r->f.prog, &v, nil); 1052 if(c == 1 || c == 2 || c == 4) 1053 set |= b; 1054 bb &= ~b; 1055 } 1056 return set; 1057 } 1058 1059 uint32 1060 paint2(Reg *r, int bn) 1061 { 1062 Reg *r1; 1063 int z; 1064 uint32 bb, vreg, x; 1065 1066 z = bn/32; 1067 bb = 1L << (bn%32); 1068 vreg = regbits; 1069 if(!(r->act.b[z] & bb)) 1070 return vreg; 1071 for(;;) { 1072 if(!(r->refbehind.b[z] & bb)) 1073 break; 1074 r1 = (Reg*)r->f.p1; 1075 if(r1 == R) 1076 break; 1077 if(!(r1->refahead.b[z] & bb)) 1078 break; 1079 if(!(r1->act.b[z] & bb)) 1080 break; 1081 r = r1; 1082 } 1083 for(;;) { 1084 r->act.b[z] &= ~bb; 1085 1086 vreg |= r->regu; 1087 1088 if(r->refbehind.b[z] & bb) 1089 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1090 if(r1->refahead.b[z] & bb) 1091 vreg |= paint2(r1, bn); 1092 1093 if(!(r->refahead.b[z] & bb)) 1094 break; 1095 r1 = (Reg*)r->f.s2; 1096 if(r1 != R) 1097 if(r1->refbehind.b[z] & bb) 1098 vreg |= paint2(r1, bn); 1099 r = (Reg*)r->f.s1; 1100 if(r == R) 1101 break; 1102 if(!(r->act.b[z] & bb)) 1103 break; 1104 if(!(r->refbehind.b[z] & bb)) 1105 break; 1106 } 1107 1108 bb = vreg; 1109 for(; r; r=(Reg*)r->f.s1) { 1110 x = r->regu & ~bb; 1111 if(x) { 1112 vreg |= reguse(r, x); 1113 bb |= regset(r, x); 1114 } 1115 } 1116 return vreg; 1117 } 1118 1119 void 1120 paint3(Reg *r, int bn, int32 rb, int rn) 1121 { 1122 Reg *r1; 1123 Prog *p; 1124 int z; 1125 uint32 bb; 1126 1127 z = bn/32; 1128 bb = 1L << (bn%32); 1129 if(r->act.b[z] & bb) 1130 return; 1131 for(;;) { 1132 if(!(r->refbehind.b[z] & bb)) 1133 break; 1134 r1 = (Reg*)r->f.p1; 1135 if(r1 == R) 1136 break; 1137 if(!(r1->refahead.b[z] & bb)) 1138 break; 1139 if(r1->act.b[z] & bb) 1140 break; 1141 r = r1; 1142 } 1143 1144 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1145 addmove(r, bn, rn, 0); 1146 for(;;) { 1147 r->act.b[z] |= bb; 1148 p = r->f.prog; 1149 1150 if(r->use1.b[z] & bb) { 1151 if(debug['R'] && debug['v']) 1152 print("%P", p); 1153 addreg(&p->from, rn); 1154 if(debug['R'] && debug['v']) 1155 print(" ===change== %P\n", p); 1156 } 1157 if((r->use2.b[z]|r->set.b[z]) & bb) { 1158 if(debug['R'] && debug['v']) 1159 print("%P", p); 1160 addreg(&p->to, rn); 1161 if(debug['R'] && debug['v']) 1162 print(" ===change== %P\n", p); 1163 } 1164 1165 if(STORE(r) & r->regdiff.b[z] & bb) 1166 addmove(r, bn, rn, 1); 1167 r->regu |= rb; 1168 1169 if(r->refbehind.b[z] & bb) 1170 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1171 if(r1->refahead.b[z] & bb) 1172 paint3(r1, bn, rb, rn); 1173 1174 if(!(r->refahead.b[z] & bb)) 1175 break; 1176 r1 = (Reg*)r->f.s2; 1177 if(r1 != R) 1178 if(r1->refbehind.b[z] & bb) 1179 paint3(r1, bn, rb, rn); 1180 r = (Reg*)r->f.s1; 1181 if(r == R) 1182 break; 1183 if(r->act.b[z] & bb) 1184 break; 1185 if(!(r->refbehind.b[z] & bb)) 1186 break; 1187 } 1188 } 1189 1190 void 1191 addreg(Adr *a, int rn) 1192 { 1193 a->sym = nil; 1194 a->node = nil; 1195 a->offset = 0; 1196 a->type = rn; 1197 1198 ostats.ncvtreg++; 1199 } 1200 1201 int32 1202 RtoB(int r) 1203 { 1204 1205 if(r < D_AX || r > D_R15) 1206 return 0; 1207 return 1L << (r-D_AX); 1208 } 1209 1210 int 1211 BtoR(int32 b) 1212 { 1213 b &= 0xffffL; 1214 if(nacl) 1215 b &= ~((1<<(D_BP-D_AX)) | (1<<(D_R15-D_AX))); 1216 if(b == 0) 1217 return 0; 1218 return bitno(b) + D_AX; 1219 } 1220 1221 /* 1222 * bit reg 1223 * 16 X0 1224 * ... 1225 * 31 X15 1226 */ 1227 int32 1228 FtoB(int f) 1229 { 1230 if(f < D_X0 || f > D_X15) 1231 return 0; 1232 return 1L << (f - D_X0 + 16); 1233 } 1234 1235 int 1236 BtoF(int32 b) 1237 { 1238 1239 b &= 0xFFFF0000L; 1240 if(b == 0) 1241 return 0; 1242 return bitno(b) - 16 + D_X0; 1243 } 1244 1245 void 1246 dumpone(Flow *f, int isreg) 1247 { 1248 int z; 1249 Bits bit; 1250 Reg *r; 1251 1252 print("%d:%P", f->loop, f->prog); 1253 if(isreg) { 1254 r = (Reg*)f; 1255 for(z=0; z<BITS; z++) 1256 bit.b[z] = 1257 r->set.b[z] | 1258 r->use1.b[z] | 1259 r->use2.b[z] | 1260 r->refbehind.b[z] | 1261 r->refahead.b[z] | 1262 r->calbehind.b[z] | 1263 r->calahead.b[z] | 1264 r->regdiff.b[z] | 1265 r->act.b[z] | 1266 0; 1267 if(bany(&bit)) { 1268 print("\t"); 1269 if(bany(&r->set)) 1270 print(" s:%Q", r->set); 1271 if(bany(&r->use1)) 1272 print(" u1:%Q", r->use1); 1273 if(bany(&r->use2)) 1274 print(" u2:%Q", r->use2); 1275 if(bany(&r->refbehind)) 1276 print(" rb:%Q ", r->refbehind); 1277 if(bany(&r->refahead)) 1278 print(" ra:%Q ", r->refahead); 1279 if(bany(&r->calbehind)) 1280 print(" cb:%Q ", r->calbehind); 1281 if(bany(&r->calahead)) 1282 print(" ca:%Q ", r->calahead); 1283 if(bany(&r->regdiff)) 1284 print(" d:%Q ", r->regdiff); 1285 if(bany(&r->act)) 1286 print(" a:%Q ", r->act); 1287 } 1288 } 1289 print("\n"); 1290 } 1291 1292 void 1293 dumpit(char *str, Flow *r0, int isreg) 1294 { 1295 Flow *r, *r1; 1296 1297 print("\n%s\n", str); 1298 for(r = r0; r != nil; r = r->link) { 1299 dumpone(r, isreg); 1300 r1 = r->p2; 1301 if(r1 != nil) { 1302 print(" pred:"); 1303 for(; r1 != nil; r1 = r1->p2link) 1304 print(" %.4ud", (int)r1->prog->pc); 1305 print("\n"); 1306 } 1307 // r1 = r->s1; 1308 // if(r1 != R) { 1309 // print(" succ:"); 1310 // for(; r1 != R; r1 = r1->s1) 1311 // print(" %.4ud", (int)r1->prog->pc); 1312 // print("\n"); 1313 // } 1314 } 1315 }