github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/cmd/8g/reg.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include "gg.h" 34 #include "opt.h" 35 36 #define NREGVAR 16 /* 8 integer + 8 floating */ 37 #define REGBITS ((uint32)0xffff) 38 /*c2go enum { 39 NREGVAR = 16, 40 REGBITS = (1<<NREGVAR) - 1, 41 }; 42 */ 43 44 static Reg* firstr; 45 static int first = 1; 46 47 int 48 rcmp(const void *a1, const void *a2) 49 { 50 Rgn *p1, *p2; 51 int c1, c2; 52 53 p1 = (Rgn*)a1; 54 p2 = (Rgn*)a2; 55 c1 = p2->cost; 56 c2 = p1->cost; 57 if(c1 -= c2) 58 return c1; 59 return p2->varno - p1->varno; 60 } 61 62 static void 63 setaddrs(Bits bit) 64 { 65 int i, n; 66 Var *v; 67 Node *node; 68 69 while(bany(&bit)) { 70 // convert each bit to a variable 71 i = bnum(bit); 72 node = var[i].node; 73 n = var[i].name; 74 bit.b[i/32] &= ~(1L<<(i%32)); 75 76 // disable all pieces of that variable 77 for(i=0; i<nvar; i++) { 78 v = var+i; 79 if(v->node == node && v->name == n) 80 v->addr = 2; 81 } 82 } 83 } 84 85 static char* regname[] = { 86 ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di", 87 ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7", 88 }; 89 90 static Node* regnodes[NREGVAR]; 91 92 static void walkvardef(Node *n, Reg *r, int active); 93 94 void 95 regopt(Prog *firstp) 96 { 97 Reg *r, *r1; 98 Prog *p; 99 Graph *g; 100 ProgInfo info; 101 int i, z, active; 102 uint32 vreg; 103 Bits bit; 104 105 if(first) { 106 fmtinstall('Q', Qconv); 107 exregoffset = D_DI; // no externals 108 first = 0; 109 } 110 111 mergetemp(firstp); 112 113 /* 114 * control flow is more complicated in generated go code 115 * than in generated c code. define pseudo-variables for 116 * registers, so we have complete register usage information. 117 */ 118 nvar = NREGVAR; 119 memset(var, 0, NREGVAR*sizeof var[0]); 120 for(i=0; i<NREGVAR; i++) { 121 if(regnodes[i] == N) 122 regnodes[i] = newname(lookup(regname[i])); 123 var[i].node = regnodes[i]; 124 } 125 126 regbits = RtoB(D_SP); 127 for(z=0; z<BITS; z++) { 128 externs.b[z] = 0; 129 params.b[z] = 0; 130 consts.b[z] = 0; 131 addrs.b[z] = 0; 132 ivar.b[z] = 0; 133 ovar.b[z] = 0; 134 } 135 136 /* 137 * pass 1 138 * build aux data structure 139 * allocate pcs 140 * find use and set of variables 141 */ 142 g = flowstart(firstp, sizeof(Reg)); 143 if(g == nil) { 144 for(i=0; i<nvar; i++) 145 var[i].node->opt = nil; 146 return; 147 } 148 149 firstr = (Reg*)g->start; 150 151 for(r = firstr; r != R; r = (Reg*)r->f.link) { 152 p = r->f.prog; 153 if(p->as == AVARDEF || p->as == AVARKILL) 154 continue; 155 proginfo(&info, p); 156 157 // Avoid making variables for direct-called functions. 158 if(p->as == ACALL && p->to.type == D_EXTERN) 159 continue; 160 161 r->use1.b[0] |= info.reguse | info.regindex; 162 r->set.b[0] |= info.regset; 163 164 bit = mkvar(r, &p->from); 165 if(bany(&bit)) { 166 if(info.flags & LeftAddr) 167 setaddrs(bit); 168 if(info.flags & LeftRead) 169 for(z=0; z<BITS; z++) 170 r->use1.b[z] |= bit.b[z]; 171 if(info.flags & LeftWrite) 172 for(z=0; z<BITS; z++) 173 r->set.b[z] |= bit.b[z]; 174 } 175 176 bit = mkvar(r, &p->to); 177 if(bany(&bit)) { 178 if(info.flags & RightAddr) 179 setaddrs(bit); 180 if(info.flags & RightRead) 181 for(z=0; z<BITS; z++) 182 r->use2.b[z] |= bit.b[z]; 183 if(info.flags & RightWrite) 184 for(z=0; z<BITS; z++) 185 r->set.b[z] |= bit.b[z]; 186 } 187 } 188 if(firstr == R) 189 return; 190 191 for(i=0; i<nvar; i++) { 192 Var *v = var+i; 193 if(v->addr) { 194 bit = blsh(i); 195 for(z=0; z<BITS; z++) 196 addrs.b[z] |= bit.b[z]; 197 } 198 199 if(debug['R'] && debug['v']) 200 print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", 201 i, v->addr, v->etype, v->width, v->node, v->offset); 202 } 203 204 if(debug['R'] && debug['v']) 205 dumpit("pass1", &firstr->f, 1); 206 207 /* 208 * pass 2 209 * find looping structure 210 */ 211 flowrpo(g); 212 213 if(debug['R'] && debug['v']) 214 dumpit("pass2", &firstr->f, 1); 215 216 /* 217 * pass 2.5 218 * iterate propagating fat vardef covering forward 219 * r->act records vars with a VARDEF since the last CALL. 220 * (r->act will be reused in pass 5 for something else, 221 * but we'll be done with it by then.) 222 */ 223 active = 0; 224 for(r = firstr; r != R; r = (Reg*)r->f.link) { 225 r->f.active = 0; 226 r->act = zbits; 227 } 228 for(r = firstr; r != R; r = (Reg*)r->f.link) { 229 p = r->f.prog; 230 if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { 231 active++; 232 walkvardef(p->to.node, r, active); 233 } 234 } 235 236 /* 237 * pass 3 238 * iterate propagating usage 239 * back until flow graph is complete 240 */ 241 loop1: 242 change = 0; 243 for(r = firstr; r != R; r = (Reg*)r->f.link) 244 r->f.active = 0; 245 for(r = firstr; r != R; r = (Reg*)r->f.link) 246 if(r->f.prog->as == ARET) 247 prop(r, zbits, zbits); 248 loop11: 249 /* pick up unreachable code */ 250 i = 0; 251 for(r = firstr; r != R; r = r1) { 252 r1 = (Reg*)r->f.link; 253 if(r1 && r1->f.active && !r->f.active) { 254 prop(r, zbits, zbits); 255 i = 1; 256 } 257 } 258 if(i) 259 goto loop11; 260 if(change) 261 goto loop1; 262 263 if(debug['R'] && debug['v']) 264 dumpit("pass3", &firstr->f, 1); 265 266 /* 267 * pass 4 268 * iterate propagating register/variable synchrony 269 * forward until graph is complete 270 */ 271 loop2: 272 change = 0; 273 for(r = firstr; r != R; r = (Reg*)r->f.link) 274 r->f.active = 0; 275 synch(firstr, zbits); 276 if(change) 277 goto loop2; 278 279 if(debug['R'] && debug['v']) 280 dumpit("pass4", &firstr->f, 1); 281 282 /* 283 * pass 4.5 284 * move register pseudo-variables into regu. 285 */ 286 for(r = firstr; r != R; r = (Reg*)r->f.link) { 287 r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; 288 289 r->set.b[0] &= ~REGBITS; 290 r->use1.b[0] &= ~REGBITS; 291 r->use2.b[0] &= ~REGBITS; 292 r->refbehind.b[0] &= ~REGBITS; 293 r->refahead.b[0] &= ~REGBITS; 294 r->calbehind.b[0] &= ~REGBITS; 295 r->calahead.b[0] &= ~REGBITS; 296 r->regdiff.b[0] &= ~REGBITS; 297 r->act.b[0] &= ~REGBITS; 298 } 299 300 /* 301 * pass 5 302 * isolate regions 303 * calculate costs (paint1) 304 */ 305 r = firstr; 306 if(r) { 307 for(z=0; z<BITS; z++) 308 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 309 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 310 if(bany(&bit) && !r->f.refset) { 311 // should never happen - all variables are preset 312 if(debug['w']) 313 print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); 314 r->f.refset = 1; 315 } 316 } 317 for(r = firstr; r != R; r = (Reg*)r->f.link) 318 r->act = zbits; 319 rgp = region; 320 nregion = 0; 321 for(r = firstr; r != R; r = (Reg*)r->f.link) { 322 for(z=0; z<BITS; z++) 323 bit.b[z] = r->set.b[z] & 324 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 325 if(bany(&bit) && !r->f.refset) { 326 if(debug['w']) 327 print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); 328 r->f.refset = 1; 329 excise(&r->f); 330 } 331 for(z=0; z<BITS; z++) 332 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 333 while(bany(&bit)) { 334 i = bnum(bit); 335 rgp->enter = r; 336 rgp->varno = i; 337 change = 0; 338 paint1(r, i); 339 bit.b[i/32] &= ~(1L<<(i%32)); 340 if(change <= 0) 341 continue; 342 rgp->cost = change; 343 nregion++; 344 if(nregion >= NRGN) { 345 if(debug['R'] && debug['v']) 346 print("too many regions\n"); 347 goto brk; 348 } 349 rgp++; 350 } 351 } 352 brk: 353 qsort(region, nregion, sizeof(region[0]), rcmp); 354 355 /* 356 * pass 6 357 * determine used registers (paint2) 358 * replace code (paint3) 359 */ 360 rgp = region; 361 for(i=0; i<nregion; i++) { 362 bit = blsh(rgp->varno); 363 vreg = paint2(rgp->enter, rgp->varno); 364 vreg = allreg(vreg, rgp); 365 if(rgp->regno != 0) 366 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 367 rgp++; 368 } 369 370 if(debug['R'] && debug['v']) 371 dumpit("pass6", &firstr->f, 1); 372 373 /* 374 * free aux structures. peep allocates new ones. 375 */ 376 for(i=0; i<nvar; i++) 377 var[i].node->opt = nil; 378 flowend(g); 379 firstr = R; 380 381 /* 382 * pass 7 383 * peep-hole on basic block 384 */ 385 if(!debug['R'] || debug['P']) 386 peep(firstp); 387 388 /* 389 * eliminate nops 390 */ 391 for(p=firstp; p!=P; p=p->link) { 392 while(p->link != P && p->link->as == ANOP) 393 p->link = p->link->link; 394 if(p->to.type == D_BRANCH) 395 while(p->to.u.branch != P && p->to.u.branch->as == ANOP) 396 p->to.u.branch = p->to.u.branch->link; 397 } 398 399 if(!use_sse) 400 for(p=firstp; p!=P; p=p->link) { 401 if(p->from.type >= D_X0 && p->from.type <= D_X7) 402 fatal("invalid use of %R with GO386=387: %P", p->from.type, p); 403 if(p->to.type >= D_X0 && p->to.type <= D_X7) 404 fatal("invalid use of %R with GO386=387: %P", p->to.type, p); 405 } 406 407 if(debug['R']) { 408 if(ostats.ncvtreg || 409 ostats.nspill || 410 ostats.nreload || 411 ostats.ndelmov || 412 ostats.nvar || 413 ostats.naddr || 414 0) 415 print("\nstats\n"); 416 417 if(ostats.ncvtreg) 418 print(" %4d cvtreg\n", ostats.ncvtreg); 419 if(ostats.nspill) 420 print(" %4d spill\n", ostats.nspill); 421 if(ostats.nreload) 422 print(" %4d reload\n", ostats.nreload); 423 if(ostats.ndelmov) 424 print(" %4d delmov\n", ostats.ndelmov); 425 if(ostats.nvar) 426 print(" %4d var\n", ostats.nvar); 427 if(ostats.naddr) 428 print(" %4d addr\n", ostats.naddr); 429 430 memset(&ostats, 0, sizeof(ostats)); 431 } 432 } 433 434 static void 435 walkvardef(Node *n, Reg *r, int active) 436 { 437 Reg *r1, *r2; 438 int bn; 439 Var *v; 440 441 for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { 442 if(r1->f.active == active) 443 break; 444 r1->f.active = active; 445 if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) 446 break; 447 for(v=n->opt; v!=nil; v=v->nextinnode) { 448 bn = v - var; 449 r1->act.b[bn/32] |= 1L << (bn%32); 450 } 451 if(r1->f.prog->as == ACALL) 452 break; 453 } 454 455 for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) 456 if(r2->f.s2 != nil) 457 walkvardef(n, (Reg*)r2->f.s2, active); 458 } 459 460 /* 461 * add mov b,rn 462 * just after r 463 */ 464 void 465 addmove(Reg *r, int bn, int rn, int f) 466 { 467 Prog *p, *p1; 468 Adr *a; 469 Var *v; 470 471 p1 = mal(sizeof(*p1)); 472 clearp(p1); 473 p1->pc = 9999; 474 475 p = r->f.prog; 476 p1->link = p->link; 477 p->link = p1; 478 p1->lineno = p->lineno; 479 480 v = var + bn; 481 482 a = &p1->to; 483 a->offset = v->offset; 484 a->etype = v->etype; 485 a->type = v->name; 486 a->node = v->node; 487 a->sym = linksym(v->node->sym); 488 489 // need to clean this up with wptr and 490 // some of the defaults 491 p1->as = AMOVL; 492 switch(v->etype) { 493 default: 494 fatal("unknown type %E", v->etype); 495 case TINT8: 496 case TUINT8: 497 case TBOOL: 498 p1->as = AMOVB; 499 break; 500 case TINT16: 501 case TUINT16: 502 p1->as = AMOVW; 503 break; 504 case TFLOAT32: 505 p1->as = AMOVSS; 506 break; 507 case TFLOAT64: 508 p1->as = AMOVSD; 509 break; 510 case TINT: 511 case TUINT: 512 case TINT32: 513 case TUINT32: 514 case TPTR32: 515 break; 516 } 517 518 p1->from.type = rn; 519 if(!f) { 520 p1->from = *a; 521 *a = zprog.from; 522 a->type = rn; 523 if(v->etype == TUINT8) 524 p1->as = AMOVB; 525 if(v->etype == TUINT16) 526 p1->as = AMOVW; 527 } 528 if(debug['R'] && debug['v']) 529 print("%P ===add=== %P\n", p, p1); 530 ostats.nspill++; 531 } 532 533 uint32 534 doregbits(int r) 535 { 536 uint32 b; 537 538 b = 0; 539 if(r >= D_INDIR) 540 r -= D_INDIR; 541 if(r >= D_AX && r <= D_DI) 542 b |= RtoB(r); 543 else 544 if(r >= D_AL && r <= D_BL) 545 b |= RtoB(r-D_AL+D_AX); 546 else 547 if(r >= D_AH && r <= D_BH) 548 b |= RtoB(r-D_AH+D_AX); 549 else 550 if(r >= D_X0 && r <= D_X0+7) 551 b |= FtoB(r); 552 return b; 553 } 554 555 static int 556 overlap(int32 o1, int w1, int32 o2, int w2) 557 { 558 int32 t1, t2; 559 560 t1 = o1+w1; 561 t2 = o2+w2; 562 563 if(!(t1 > o2 && t2 > o1)) 564 return 0; 565 566 return 1; 567 } 568 569 Bits 570 mkvar(Reg *r, Adr *a) 571 { 572 Var *v; 573 int i, t, n, et, z, w, flag, regu; 574 int32 o; 575 Bits bit; 576 Node *node; 577 578 /* 579 * mark registers used 580 */ 581 t = a->type; 582 if(t == D_NONE) 583 goto none; 584 585 if(r != R) 586 r->use1.b[0] |= doregbits(a->index); 587 588 switch(t) { 589 default: 590 regu = doregbits(t); 591 if(regu == 0) 592 goto none; 593 bit = zbits; 594 bit.b[0] = regu; 595 return bit; 596 597 case D_ADDR: 598 a->type = a->index; 599 bit = mkvar(r, a); 600 setaddrs(bit); 601 a->type = t; 602 ostats.naddr++; 603 goto none; 604 605 case D_EXTERN: 606 case D_STATIC: 607 case D_PARAM: 608 case D_AUTO: 609 n = t; 610 break; 611 } 612 613 node = a->node; 614 if(node == N || node->op != ONAME || node->orig == N) 615 goto none; 616 node = node->orig; 617 if(node->orig != node) 618 fatal("%D: bad node", a); 619 if(node->sym == S || node->sym->name[0] == '.') 620 goto none; 621 et = a->etype; 622 o = a->offset; 623 w = a->width; 624 if(w < 0) 625 fatal("bad width %d for %D", w, a); 626 627 flag = 0; 628 for(i=0; i<nvar; i++) { 629 v = var+i; 630 if(v->node == node && v->name == n) { 631 if(v->offset == o) 632 if(v->etype == et) 633 if(v->width == w) 634 return blsh(i); 635 636 // if they overlap, disable both 637 if(overlap(v->offset, v->width, o, w)) { 638 if(debug['R']) 639 print("disable %s\n", node->sym->name); 640 v->addr = 1; 641 flag = 1; 642 } 643 } 644 } 645 646 switch(et) { 647 case 0: 648 case TFUNC: 649 goto none; 650 } 651 652 if(nvar >= NVAR) { 653 if(debug['w'] > 1 && node != N) 654 fatal("variable not optimized: %D", a); 655 656 // If we're not tracking a word in a variable, mark the rest as 657 // having its address taken, so that we keep the whole thing 658 // live at all calls. otherwise we might optimize away part of 659 // a variable but not all of it. 660 for(i=0; i<nvar; i++) { 661 v = var+i; 662 if(v->node == node) 663 v->addr = 1; 664 } 665 goto none; 666 } 667 668 i = nvar; 669 nvar++; 670 v = var+i; 671 v->offset = o; 672 v->name = n; 673 v->etype = et; 674 v->width = w; 675 v->addr = flag; // funny punning 676 v->node = node; 677 678 // node->opt is the head of a linked list 679 // of Vars within the given Node, so that 680 // we can start at a Var and find all the other 681 // Vars in the same Go variable. 682 v->nextinnode = node->opt; 683 node->opt = v; 684 685 bit = blsh(i); 686 if(n == D_EXTERN || n == D_STATIC) 687 for(z=0; z<BITS; z++) 688 externs.b[z] |= bit.b[z]; 689 if(n == D_PARAM) 690 for(z=0; z<BITS; z++) 691 params.b[z] |= bit.b[z]; 692 693 if(node->class == PPARAM) 694 for(z=0; z<BITS; z++) 695 ivar.b[z] |= bit.b[z]; 696 if(node->class == PPARAMOUT) 697 for(z=0; z<BITS; z++) 698 ovar.b[z] |= bit.b[z]; 699 700 // Treat values with their address taken as live at calls, 701 // because the garbage collector's liveness analysis in ../gc/plive.c does. 702 // These must be consistent or else we will elide stores and the garbage 703 // collector will see uninitialized data. 704 // The typical case where our own analysis is out of sync is when the 705 // node appears to have its address taken but that code doesn't actually 706 // get generated and therefore doesn't show up as an address being 707 // taken when we analyze the instruction stream. 708 // One instance of this case is when a closure uses the same name as 709 // an outer variable for one of its own variables declared with :=. 710 // The parser flags the outer variable as possibly shared, and therefore 711 // sets addrtaken, even though it ends up not being actually shared. 712 // If we were better about _ elision, _ = &x would suffice too. 713 // The broader := in a closure problem is mentioned in a comment in 714 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 715 if(node->addrtaken) 716 v->addr = 1; 717 718 // Disable registerization for globals, because: 719 // (1) we might panic at any time and we want the recovery code 720 // to see the latest values (issue 1304). 721 // (2) we don't know what pointers might point at them and we want 722 // loads via those pointers to see updated values and vice versa (issue 7995). 723 // 724 // Disable registerization for results if using defer, because the deferred func 725 // might recover and return, causing the current values to be used. 726 if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) 727 v->addr = 1; 728 729 if(debug['R']) 730 print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); 731 ostats.nvar++; 732 733 return bit; 734 735 none: 736 return zbits; 737 } 738 739 void 740 prop(Reg *r, Bits ref, Bits cal) 741 { 742 Reg *r1, *r2; 743 int z, i, j; 744 Var *v, *v1; 745 746 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { 747 for(z=0; z<BITS; z++) { 748 ref.b[z] |= r1->refahead.b[z]; 749 if(ref.b[z] != r1->refahead.b[z]) { 750 r1->refahead.b[z] = ref.b[z]; 751 change++; 752 } 753 cal.b[z] |= r1->calahead.b[z]; 754 if(cal.b[z] != r1->calahead.b[z]) { 755 r1->calahead.b[z] = cal.b[z]; 756 change++; 757 } 758 } 759 switch(r1->f.prog->as) { 760 case ACALL: 761 if(noreturn(r1->f.prog)) 762 break; 763 764 // Mark all input variables (ivar) as used, because that's what the 765 // liveness bitmaps say. The liveness bitmaps say that so that a 766 // panic will not show stale values in the parameter dump. 767 // Mark variables with a recent VARDEF (r1->act) as used, 768 // so that the optimizer flushes initializations to memory, 769 // so that if a garbage collection happens during this CALL, 770 // the collector will see initialized memory. Again this is to 771 // match what the liveness bitmaps say. 772 for(z=0; z<BITS; z++) { 773 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; 774 ref.b[z] = 0; 775 } 776 777 // cal.b is the current approximation of what's live across the call. 778 // Every bit in cal.b is a single stack word. For each such word, 779 // find all the other tracked stack words in the same Go variable 780 // (struct/slice/string/interface) and mark them live too. 781 // This is necessary because the liveness analysis for the garbage 782 // collector works at variable granularity, not at word granularity. 783 // It is fundamental for slice/string/interface: the garbage collector 784 // needs the whole value, not just some of the words, in order to 785 // interpret the other bits correctly. Specifically, slice needs a consistent 786 // ptr and cap, string needs a consistent ptr and len, and interface 787 // needs a consistent type word and data word. 788 for(z=0; z<BITS; z++) { 789 if(cal.b[z] == 0) 790 continue; 791 for(i=0; i<32; i++) { 792 if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) 793 continue; 794 v = var+z*32+i; 795 if(v->node->opt == nil) // v represents fixed register, not Go variable 796 continue; 797 798 // v->node->opt is the head of a linked list of Vars 799 // corresponding to tracked words from the Go variable v->node. 800 // Walk the list and set all the bits. 801 // For a large struct this could end up being quadratic: 802 // after the first setting, the outer loop (for z, i) would see a 1 bit 803 // for all of the remaining words in the struct, and for each such 804 // word would go through and turn on all the bits again. 805 // To avoid the quadratic behavior, we only turn on the bits if 806 // v is the head of the list or if the head's bit is not yet turned on. 807 // This will set the bits at most twice, keeping the overall loop linear. 808 v1 = v->node->opt; 809 j = v1 - var; 810 if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { 811 for(; v1 != nil; v1 = v1->nextinnode) { 812 j = v1 - var; 813 cal.b[j/32] |= 1<<(j&31); 814 } 815 } 816 } 817 } 818 break; 819 820 case ATEXT: 821 for(z=0; z<BITS; z++) { 822 cal.b[z] = 0; 823 ref.b[z] = 0; 824 } 825 break; 826 827 case ARET: 828 for(z=0; z<BITS; z++) { 829 cal.b[z] = externs.b[z] | ovar.b[z]; 830 ref.b[z] = 0; 831 } 832 break; 833 } 834 for(z=0; z<BITS; z++) { 835 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 836 r1->use1.b[z] | r1->use2.b[z]; 837 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 838 r1->refbehind.b[z] = ref.b[z]; 839 r1->calbehind.b[z] = cal.b[z]; 840 } 841 if(r1->f.active) 842 break; 843 r1->f.active = 1; 844 } 845 for(; r != r1; r = (Reg*)r->f.p1) 846 for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) 847 prop(r2, r->refbehind, r->calbehind); 848 } 849 850 void 851 synch(Reg *r, Bits dif) 852 { 853 Reg *r1; 854 int z; 855 856 for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { 857 for(z=0; z<BITS; z++) { 858 dif.b[z] = (dif.b[z] & 859 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 860 r1->set.b[z] | r1->regdiff.b[z]; 861 if(dif.b[z] != r1->regdiff.b[z]) { 862 r1->regdiff.b[z] = dif.b[z]; 863 change++; 864 } 865 } 866 if(r1->f.active) 867 break; 868 r1->f.active = 1; 869 for(z=0; z<BITS; z++) 870 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 871 if((Reg*)r1->f.s2 != R) 872 synch((Reg*)r1->f.s2, dif); 873 } 874 } 875 876 uint32 877 allreg(uint32 b, Rgn *r) 878 { 879 Var *v; 880 int i; 881 882 v = var + r->varno; 883 r->regno = 0; 884 switch(v->etype) { 885 886 default: 887 fatal("unknown etype %d/%E", bitno(b), v->etype); 888 break; 889 890 case TINT8: 891 case TUINT8: 892 case TINT16: 893 case TUINT16: 894 case TINT32: 895 case TUINT32: 896 case TINT64: 897 case TINT: 898 case TUINT: 899 case TUINTPTR: 900 case TBOOL: 901 case TPTR32: 902 i = BtoR(~b); 903 if(i && r->cost > 0) { 904 r->regno = i; 905 return RtoB(i); 906 } 907 break; 908 909 case TFLOAT32: 910 case TFLOAT64: 911 if(!use_sse) 912 break; 913 i = BtoF(~b); 914 if(i && r->cost > 0) { 915 r->regno = i; 916 return FtoB(i); 917 } 918 break; 919 } 920 return 0; 921 } 922 923 void 924 paint1(Reg *r, int bn) 925 { 926 Reg *r1; 927 Prog *p; 928 int z; 929 uint32 bb; 930 931 z = bn/32; 932 bb = 1L<<(bn%32); 933 if(r->act.b[z] & bb) 934 return; 935 for(;;) { 936 if(!(r->refbehind.b[z] & bb)) 937 break; 938 r1 = (Reg*)r->f.p1; 939 if(r1 == R) 940 break; 941 if(!(r1->refahead.b[z] & bb)) 942 break; 943 if(r1->act.b[z] & bb) 944 break; 945 r = r1; 946 } 947 948 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 949 change -= CLOAD * r->f.loop; 950 } 951 for(;;) { 952 r->act.b[z] |= bb; 953 p = r->f.prog; 954 955 if(r->f.prog->as != ANOP) { // don't give credit for NOPs 956 if(r->use1.b[z] & bb) { 957 change += CREF * r->f.loop; 958 if(p->as == AFMOVL || p->as == AFMOVW) 959 if(BtoR(bb) != D_F0) 960 change = -CINF; 961 } 962 if((r->use2.b[z]|r->set.b[z]) & bb) { 963 change += CREF * r->f.loop; 964 if(p->as == AFMOVL || p->as == AFMOVW) 965 if(BtoR(bb) != D_F0) 966 change = -CINF; 967 } 968 } 969 970 if(STORE(r) & r->regdiff.b[z] & bb) { 971 change -= CLOAD * r->f.loop; 972 if(p->as == AFMOVL || p->as == AFMOVW) 973 if(BtoR(bb) != D_F0) 974 change = -CINF; 975 } 976 977 if(r->refbehind.b[z] & bb) 978 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 979 if(r1->refahead.b[z] & bb) 980 paint1(r1, bn); 981 982 if(!(r->refahead.b[z] & bb)) 983 break; 984 r1 = (Reg*)r->f.s2; 985 if(r1 != R) 986 if(r1->refbehind.b[z] & bb) 987 paint1(r1, bn); 988 r = (Reg*)r->f.s1; 989 if(r == R) 990 break; 991 if(r->act.b[z] & bb) 992 break; 993 if(!(r->refbehind.b[z] & bb)) 994 break; 995 } 996 } 997 998 uint32 999 regset(Reg *r, uint32 bb) 1000 { 1001 uint32 b, set; 1002 Adr v; 1003 int c; 1004 1005 set = 0; 1006 v = zprog.from; 1007 while(b = bb & ~(bb-1)) { 1008 v.type = b & 0xFF ? BtoR(b): BtoF(b); 1009 c = copyu(r->f.prog, &v, nil); 1010 if(c == 3) 1011 set |= b; 1012 bb &= ~b; 1013 } 1014 return set; 1015 } 1016 1017 uint32 1018 reguse(Reg *r, uint32 bb) 1019 { 1020 uint32 b, set; 1021 Adr v; 1022 int c; 1023 1024 set = 0; 1025 v = zprog.from; 1026 while(b = bb & ~(bb-1)) { 1027 v.type = b & 0xFF ? BtoR(b): BtoF(b); 1028 c = copyu(r->f.prog, &v, nil); 1029 if(c == 1 || c == 2 || c == 4) 1030 set |= b; 1031 bb &= ~b; 1032 } 1033 return set; 1034 } 1035 1036 uint32 1037 paint2(Reg *r, int bn) 1038 { 1039 Reg *r1; 1040 int z; 1041 uint32 bb, vreg, x; 1042 1043 z = bn/32; 1044 bb = 1L << (bn%32); 1045 vreg = regbits; 1046 if(!(r->act.b[z] & bb)) 1047 return vreg; 1048 for(;;) { 1049 if(!(r->refbehind.b[z] & bb)) 1050 break; 1051 r1 = (Reg*)r->f.p1; 1052 if(r1 == R) 1053 break; 1054 if(!(r1->refahead.b[z] & bb)) 1055 break; 1056 if(!(r1->act.b[z] & bb)) 1057 break; 1058 r = r1; 1059 } 1060 for(;;) { 1061 r->act.b[z] &= ~bb; 1062 1063 vreg |= r->regu; 1064 1065 if(r->refbehind.b[z] & bb) 1066 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1067 if(r1->refahead.b[z] & bb) 1068 vreg |= paint2(r1, bn); 1069 1070 if(!(r->refahead.b[z] & bb)) 1071 break; 1072 r1 = (Reg*)r->f.s2; 1073 if(r1 != R) 1074 if(r1->refbehind.b[z] & bb) 1075 vreg |= paint2(r1, bn); 1076 r = (Reg*)r->f.s1; 1077 if(r == R) 1078 break; 1079 if(!(r->act.b[z] & bb)) 1080 break; 1081 if(!(r->refbehind.b[z] & bb)) 1082 break; 1083 } 1084 1085 bb = vreg; 1086 for(; r; r=(Reg*)r->f.s1) { 1087 x = r->regu & ~bb; 1088 if(x) { 1089 vreg |= reguse(r, x); 1090 bb |= regset(r, x); 1091 } 1092 } 1093 return vreg; 1094 } 1095 1096 void 1097 paint3(Reg *r, int bn, int32 rb, int rn) 1098 { 1099 Reg *r1; 1100 Prog *p; 1101 int z; 1102 uint32 bb; 1103 1104 z = bn/32; 1105 bb = 1L << (bn%32); 1106 if(r->act.b[z] & bb) 1107 return; 1108 for(;;) { 1109 if(!(r->refbehind.b[z] & bb)) 1110 break; 1111 r1 = (Reg*)r->f.p1; 1112 if(r1 == R) 1113 break; 1114 if(!(r1->refahead.b[z] & bb)) 1115 break; 1116 if(r1->act.b[z] & bb) 1117 break; 1118 r = r1; 1119 } 1120 1121 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1122 addmove(r, bn, rn, 0); 1123 for(;;) { 1124 r->act.b[z] |= bb; 1125 p = r->f.prog; 1126 1127 if(r->use1.b[z] & bb) { 1128 if(debug['R'] && debug['v']) 1129 print("%P", p); 1130 addreg(&p->from, rn); 1131 if(debug['R'] && debug['v']) 1132 print(" ===change== %P\n", p); 1133 } 1134 if((r->use2.b[z]|r->set.b[z]) & bb) { 1135 if(debug['R'] && debug['v']) 1136 print("%P", p); 1137 addreg(&p->to, rn); 1138 if(debug['R'] && debug['v']) 1139 print(" ===change== %P\n", p); 1140 } 1141 1142 if(STORE(r) & r->regdiff.b[z] & bb) 1143 addmove(r, bn, rn, 1); 1144 r->regu |= rb; 1145 1146 if(r->refbehind.b[z] & bb) 1147 for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) 1148 if(r1->refahead.b[z] & bb) 1149 paint3(r1, bn, rb, rn); 1150 1151 if(!(r->refahead.b[z] & bb)) 1152 break; 1153 r1 = (Reg*)r->f.s2; 1154 if(r1 != R) 1155 if(r1->refbehind.b[z] & bb) 1156 paint3(r1, bn, rb, rn); 1157 r = (Reg*)r->f.s1; 1158 if(r == R) 1159 break; 1160 if(r->act.b[z] & bb) 1161 break; 1162 if(!(r->refbehind.b[z] & bb)) 1163 break; 1164 } 1165 } 1166 1167 void 1168 addreg(Adr *a, int rn) 1169 { 1170 a->sym = nil; 1171 a->node = nil; 1172 a->offset = 0; 1173 a->type = rn; 1174 1175 ostats.ncvtreg++; 1176 } 1177 1178 int32 1179 RtoB(int r) 1180 { 1181 1182 if(r < D_AX || r > D_DI) 1183 return 0; 1184 return 1L << (r-D_AX); 1185 } 1186 1187 int 1188 BtoR(int32 b) 1189 { 1190 1191 b &= 0xffL; 1192 if(b == 0) 1193 return 0; 1194 return bitno(b) + D_AX; 1195 } 1196 1197 int32 1198 FtoB(int f) 1199 { 1200 if(f < D_X0 || f > D_X7) 1201 return 0; 1202 return 1L << (f - D_X0 + 8); 1203 } 1204 1205 int 1206 BtoF(int32 b) 1207 { 1208 b &= 0xFF00L; 1209 if(b == 0) 1210 return 0; 1211 return bitno(b) - 8 + D_X0; 1212 } 1213 1214 void 1215 dumpone(Flow *f, int isreg) 1216 { 1217 int z; 1218 Bits bit; 1219 Reg *r; 1220 1221 print("%d:%P", f->loop, f->prog); 1222 if(isreg) { 1223 r = (Reg*)f; 1224 for(z=0; z<BITS; z++) 1225 bit.b[z] = 1226 r->set.b[z] | 1227 r->use1.b[z] | 1228 r->use2.b[z] | 1229 r->refbehind.b[z] | 1230 r->refahead.b[z] | 1231 r->calbehind.b[z] | 1232 r->calahead.b[z] | 1233 r->regdiff.b[z] | 1234 r->act.b[z] | 1235 0; 1236 if(bany(&bit)) { 1237 print("\t"); 1238 if(bany(&r->set)) 1239 print(" s:%Q", r->set); 1240 if(bany(&r->use1)) 1241 print(" u1:%Q", r->use1); 1242 if(bany(&r->use2)) 1243 print(" u2:%Q", r->use2); 1244 if(bany(&r->refbehind)) 1245 print(" rb:%Q ", r->refbehind); 1246 if(bany(&r->refahead)) 1247 print(" ra:%Q ", r->refahead); 1248 if(bany(&r->calbehind)) 1249 print(" cb:%Q ", r->calbehind); 1250 if(bany(&r->calahead)) 1251 print(" ca:%Q ", r->calahead); 1252 if(bany(&r->regdiff)) 1253 print(" d:%Q ", r->regdiff); 1254 if(bany(&r->act)) 1255 print(" a:%Q ", r->act); 1256 } 1257 } 1258 print("\n"); 1259 } 1260 1261 void 1262 dumpit(char *str, Flow *r0, int isreg) 1263 { 1264 Flow *r, *r1; 1265 1266 print("\n%s\n", str); 1267 for(r = r0; r != nil; r = r->link) { 1268 dumpone(r, isreg); 1269 r1 = r->p2; 1270 if(r1 != nil) { 1271 print(" pred:"); 1272 for(; r1 != nil; r1 = r1->p2link) 1273 print(" %.4ud", (int)r1->prog->pc); 1274 print("\n"); 1275 } 1276 // r1 = r->s1; 1277 // if(r1 != nil) { 1278 // print(" succ:"); 1279 // for(; r1 != R; r1 = r1->s1) 1280 // print(" %.4ud", (int)r1->prog->pc); 1281 // print("\n"); 1282 // } 1283 } 1284 }