github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/gc/popt.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 // "Portable" optimizations. 32 // Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h. 33 // Must code to the intersection of the three back ends. 34 35 #include <u.h> 36 #include <libc.h> 37 #include "gg.h" 38 #include "opt.h" 39 40 // p is a call instruction. Does the call fail to return? 41 int 42 noreturn(Prog *p) 43 { 44 Sym *s; 45 int i; 46 static Sym* symlist[10]; 47 48 if(symlist[0] == S) { 49 symlist[0] = pkglookup("panicindex", runtimepkg); 50 symlist[1] = pkglookup("panicslice", runtimepkg); 51 symlist[2] = pkglookup("throwinit", runtimepkg); 52 symlist[3] = pkglookup("panic", runtimepkg); 53 symlist[4] = pkglookup("panicwrap", runtimepkg); 54 symlist[5] = pkglookup("throwreturn", runtimepkg); 55 symlist[6] = pkglookup("selectgo", runtimepkg); 56 symlist[7] = pkglookup("block", runtimepkg); 57 } 58 59 if(p->to.node == nil) 60 return 0; 61 s = p->to.node->sym; 62 if(s == S) 63 return 0; 64 for(i=0; symlist[i]!=S; i++) 65 if(s == symlist[i]) 66 return 1; 67 return 0; 68 } 69 70 // JMP chasing and removal. 71 // 72 // The code generator depends on being able to write out jump 73 // instructions that it can jump to now but fill in later. 74 // the linker will resolve them nicely, but they make the code 75 // longer and more difficult to follow during debugging. 76 // Remove them. 77 78 /* what instruction does a JMP to p eventually land on? */ 79 static Prog* 80 chasejmp(Prog *p, int *jmploop) 81 { 82 int n; 83 84 n = 0; 85 while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { 86 if(++n > 10) { 87 *jmploop = 1; 88 break; 89 } 90 p = p->to.u.branch; 91 } 92 return p; 93 } 94 95 /* 96 * reuse reg pointer for mark/sweep state. 97 * leave reg==nil at end because alive==nil. 98 */ 99 #define alive ((void*)0) 100 #define dead ((void*)1) 101 102 /* mark all code reachable from firstp as alive */ 103 static void 104 mark(Prog *firstp) 105 { 106 Prog *p; 107 108 for(p=firstp; p; p=p->link) { 109 if(p->opt != dead) 110 break; 111 p->opt = alive; 112 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch) 113 mark(p->to.u.branch); 114 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 115 break; 116 } 117 } 118 119 void 120 fixjmp(Prog *firstp) 121 { 122 int jmploop; 123 Prog *p, *last; 124 125 if(debug['R'] && debug['v']) 126 print("\nfixjmp\n"); 127 128 // pass 1: resolve jump to jump, mark all code as dead. 129 jmploop = 0; 130 for(p=firstp; p; p=p->link) { 131 if(debug['R'] && debug['v']) 132 print("%P\n", p); 133 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { 134 p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); 135 if(debug['R'] && debug['v']) 136 print("->%P\n", p); 137 } 138 p->opt = dead; 139 } 140 if(debug['R'] && debug['v']) 141 print("\n"); 142 143 // pass 2: mark all reachable code alive 144 mark(firstp); 145 146 // pass 3: delete dead code (mostly JMPs). 147 last = nil; 148 for(p=firstp; p; p=p->link) { 149 if(p->opt == dead) { 150 if(p->link == P && p->as == ARET && last && last->as != ARET) { 151 // This is the final ARET, and the code so far doesn't have one. 152 // Let it stay. The register allocator assumes that all live code in 153 // the function can be traversed by starting at all the RET instructions 154 // and following predecessor links. If we remove the final RET, 155 // this assumption will not hold in the case of an infinite loop 156 // at the end of a function. 157 // Keep the RET but mark it dead for the liveness analysis. 158 p->mode = 1; 159 } else { 160 if(debug['R'] && debug['v']) 161 print("del %P\n", p); 162 continue; 163 } 164 } 165 if(last) 166 last->link = p; 167 last = p; 168 } 169 last->link = P; 170 171 // pass 4: elide JMP to next instruction. 172 // only safe if there are no jumps to JMPs anymore. 173 if(!jmploop) { 174 last = nil; 175 for(p=firstp; p; p=p->link) { 176 if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) { 177 if(debug['R'] && debug['v']) 178 print("del %P\n", p); 179 continue; 180 } 181 if(last) 182 last->link = p; 183 last = p; 184 } 185 last->link = P; 186 } 187 188 if(debug['R'] && debug['v']) { 189 print("\n"); 190 for(p=firstp; p; p=p->link) 191 print("%P\n", p); 192 print("\n"); 193 } 194 } 195 196 #undef alive 197 #undef dead 198 199 // Control flow analysis. The Flow structures hold predecessor and successor 200 // information as well as basic loop analysis. 201 // 202 // graph = flowstart(firstp, sizeof(Flow)); 203 // ... use flow graph ... 204 // flowend(graph); // free graph 205 // 206 // Typical uses of the flow graph are to iterate over all the flow-relevant instructions: 207 // 208 // for(f = graph->start; f != nil; f = f->link) 209 // 210 // or, given an instruction f, to iterate over all the predecessors, which is 211 // f->p1 and this list: 212 // 213 // for(f2 = f->p2; f2 != nil; f2 = f2->p2link) 214 // 215 // Often the Flow struct is embedded as the first field inside a larger struct S. 216 // In that case casts are needed to convert Flow* to S* in many places but the 217 // idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart. 218 219 Graph* 220 flowstart(Prog *firstp, int size) 221 { 222 int nf; 223 Flow *f, *f1, *start, *last; 224 Graph *graph; 225 Prog *p; 226 ProgInfo info; 227 228 // Count and mark instructions to annotate. 229 nf = 0; 230 for(p = firstp; p != P; p = p->link) { 231 p->opt = nil; // should be already, but just in case 232 proginfo(&info, p); 233 if(info.flags & Skip) 234 continue; 235 p->opt = (void*)1; 236 nf++; 237 } 238 239 if(nf == 0) 240 return nil; 241 242 if(nf >= 20000) { 243 // fatal("%S is too big (%d instructions)", curfn->nname->sym, nf); 244 return nil; 245 } 246 247 // Allocate annotations and assign to instructions. 248 graph = calloc(sizeof *graph + size*nf, 1); 249 if(graph == nil) 250 fatal("out of memory"); 251 start = (Flow*)(graph+1); 252 last = nil; 253 f = start; 254 for(p = firstp; p != P; p = p->link) { 255 if(p->opt == nil) 256 continue; 257 p->opt = f; 258 f->prog = p; 259 if(last) 260 last->link = f; 261 last = f; 262 263 f = (Flow*)((uchar*)f + size); 264 } 265 266 // Fill in pred/succ information. 267 for(f = start; f != nil; f = f->link) { 268 p = f->prog; 269 proginfo(&info, p); 270 if(!(info.flags & Break)) { 271 f1 = f->link; 272 f->s1 = f1; 273 f1->p1 = f; 274 } 275 if(p->to.type == D_BRANCH) { 276 if(p->to.u.branch == P) 277 fatal("pnil %P", p); 278 f1 = p->to.u.branch->opt; 279 if(f1 == nil) 280 fatal("fnil %P / %P", p, p->to.u.branch); 281 if(f1 == f) { 282 //fatal("self loop %P", p); 283 continue; 284 } 285 f->s2 = f1; 286 f->p2link = f1->p2; 287 f1->p2 = f; 288 } 289 } 290 291 graph->start = start; 292 graph->num = nf; 293 return graph; 294 } 295 296 void 297 flowend(Graph *graph) 298 { 299 Flow *f; 300 301 for(f = graph->start; f != nil; f = f->link) 302 f->prog->opt = nil; 303 free(graph); 304 } 305 306 /* 307 * find looping structure 308 * 309 * 1) find reverse postordering 310 * 2) find approximate dominators, 311 * the actual dominators if the flow graph is reducible 312 * otherwise, dominators plus some other non-dominators. 313 * See Matthew S. Hecht and Jeffrey D. Ullman, 314 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 315 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 316 * Oct. 1-3, 1973, pp. 207-217. 317 * 3) find all nodes with a predecessor dominated by the current node. 318 * such a node is a loop head. 319 * recursively, all preds with a greater rpo number are in the loop 320 */ 321 static int32 322 postorder(Flow *r, Flow **rpo2r, int32 n) 323 { 324 Flow *r1; 325 326 r->rpo = 1; 327 r1 = r->s1; 328 if(r1 && !r1->rpo) 329 n = postorder(r1, rpo2r, n); 330 r1 = r->s2; 331 if(r1 && !r1->rpo) 332 n = postorder(r1, rpo2r, n); 333 rpo2r[n] = r; 334 n++; 335 return n; 336 } 337 338 static int32 339 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 340 { 341 int32 t; 342 343 if(rpo1 == -1) 344 return rpo2; 345 while(rpo1 != rpo2){ 346 if(rpo1 > rpo2){ 347 t = rpo2; 348 rpo2 = rpo1; 349 rpo1 = t; 350 } 351 while(rpo1 < rpo2){ 352 t = idom[rpo2]; 353 if(t >= rpo2) 354 fatal("bad idom"); 355 rpo2 = t; 356 } 357 } 358 return rpo1; 359 } 360 361 static int 362 doms(int32 *idom, int32 r, int32 s) 363 { 364 while(s > r) 365 s = idom[s]; 366 return s == r; 367 } 368 369 static int 370 loophead(int32 *idom, Flow *r) 371 { 372 int32 src; 373 374 src = r->rpo; 375 if(r->p1 != nil && doms(idom, src, r->p1->rpo)) 376 return 1; 377 for(r = r->p2; r != nil; r = r->p2link) 378 if(doms(idom, src, r->rpo)) 379 return 1; 380 return 0; 381 } 382 383 static void 384 loopmark(Flow **rpo2r, int32 head, Flow *r) 385 { 386 if(r->rpo < head || r->active == head) 387 return; 388 r->active = head; 389 r->loop += LOOP; 390 if(r->p1 != nil) 391 loopmark(rpo2r, head, r->p1); 392 for(r = r->p2; r != nil; r = r->p2link) 393 loopmark(rpo2r, head, r); 394 } 395 396 void 397 flowrpo(Graph *g) 398 { 399 Flow *r1; 400 int32 i, d, me, nr, *idom; 401 Flow **rpo2r; 402 403 free(g->rpo); 404 g->rpo = calloc(g->num*sizeof g->rpo[0], 1); 405 idom = calloc(g->num*sizeof idom[0], 1); 406 if(g->rpo == nil || idom == nil) 407 fatal("out of memory"); 408 409 for(r1 = g->start; r1 != nil; r1 = r1->link) 410 r1->active = 0; 411 412 rpo2r = g->rpo; 413 d = postorder(g->start, rpo2r, 0); 414 nr = g->num; 415 if(d > nr) 416 fatal("too many reg nodes %d %d", d, nr); 417 nr = d; 418 for(i = 0; i < nr / 2; i++) { 419 r1 = rpo2r[i]; 420 rpo2r[i] = rpo2r[nr - 1 - i]; 421 rpo2r[nr - 1 - i] = r1; 422 } 423 for(i = 0; i < nr; i++) 424 rpo2r[i]->rpo = i; 425 426 idom[0] = 0; 427 for(i = 0; i < nr; i++) { 428 r1 = rpo2r[i]; 429 me = r1->rpo; 430 d = -1; 431 // rpo2r[r->rpo] == r protects against considering dead code, 432 // which has r->rpo == 0. 433 if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) 434 d = r1->p1->rpo; 435 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 436 if(rpo2r[r1->rpo] == r1 && r1->rpo < me) 437 d = rpolca(idom, d, r1->rpo); 438 idom[i] = d; 439 } 440 441 for(i = 0; i < nr; i++) { 442 r1 = rpo2r[i]; 443 r1->loop++; 444 if(r1->p2 != nil && loophead(idom, r1)) 445 loopmark(rpo2r, i, r1); 446 } 447 free(idom); 448 449 for(r1 = g->start; r1 != nil; r1 = r1->link) 450 r1->active = 0; 451 } 452 453 Flow* 454 uniqp(Flow *r) 455 { 456 Flow *r1; 457 458 r1 = r->p1; 459 if(r1 == nil) { 460 r1 = r->p2; 461 if(r1 == nil || r1->p2link != nil) 462 return nil; 463 } else 464 if(r->p2 != nil) 465 return nil; 466 return r1; 467 } 468 469 Flow* 470 uniqs(Flow *r) 471 { 472 Flow *r1; 473 474 r1 = r->s1; 475 if(r1 == nil) { 476 r1 = r->s2; 477 if(r1 == nil) 478 return nil; 479 } else 480 if(r->s2 != nil) 481 return nil; 482 return r1; 483 } 484 485 // The compilers assume they can generate temporary variables 486 // as needed to preserve the right semantics or simplify code 487 // generation and the back end will still generate good code. 488 // This results in a large number of ephemeral temporary variables. 489 // Merge temps with non-overlapping lifetimes and equal types using the 490 // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation", 491 // ACM TOPLAS 1999. 492 493 typedef struct TempVar TempVar; 494 typedef struct TempFlow TempFlow; 495 496 struct TempVar 497 { 498 Node *node; 499 TempFlow *def; // definition of temp var 500 TempFlow *use; // use list, chained through TempFlow.uselink 501 TempVar *freelink; // next free temp in Type.opt list 502 TempVar *merge; // merge var with this one 503 vlong start; // smallest Prog.pc in live range 504 vlong end; // largest Prog.pc in live range 505 uchar addr; // address taken - no accurate end 506 uchar removed; // removed from program 507 }; 508 509 struct TempFlow 510 { 511 Flow f; 512 TempFlow *uselink; 513 }; 514 515 static int 516 startcmp(const void *va, const void *vb) 517 { 518 TempVar *a, *b; 519 520 a = *(TempVar**)va; 521 b = *(TempVar**)vb; 522 523 if(a->start < b->start) 524 return -1; 525 if(a->start > b->start) 526 return +1; 527 return 0; 528 } 529 530 // Is n available for merging? 531 static int 532 canmerge(Node *n) 533 { 534 return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0; 535 } 536 537 static void mergewalk(TempVar*, TempFlow*, uint32); 538 static void varkillwalk(TempVar*, TempFlow*, uint32); 539 540 void 541 mergetemp(Prog *firstp) 542 { 543 int i, j, nvar, ninuse, nfree, nkill; 544 TempVar *var, *v, *v1, **bystart, **inuse; 545 TempFlow *r; 546 NodeList *l, **lp; 547 Node *n; 548 Prog *p, *p1; 549 Type *t; 550 ProgInfo info, info1; 551 int32 gen; 552 Graph *g; 553 554 enum { Debug = 0 }; 555 556 g = flowstart(firstp, sizeof(TempFlow)); 557 if(g == nil) 558 return; 559 560 // Build list of all mergeable variables. 561 nvar = 0; 562 for(l = curfn->dcl; l != nil; l = l->next) 563 if(canmerge(l->n)) 564 nvar++; 565 566 var = calloc(nvar*sizeof var[0], 1); 567 nvar = 0; 568 for(l = curfn->dcl; l != nil; l = l->next) { 569 n = l->n; 570 if(canmerge(n)) { 571 v = &var[nvar++]; 572 n->opt = v; 573 v->node = n; 574 } 575 } 576 577 // Build list of uses. 578 // We assume that the earliest reference to a temporary is its definition. 579 // This is not true of variables in general but our temporaries are all 580 // single-use (that's why we have so many!). 581 for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { 582 p = r->f.prog; 583 proginfo(&info, p); 584 585 if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt) 586 fatal("double node %P", p); 587 if((n = p->from.node) != N && (v = n->opt) != nil || 588 (n = p->to.node) != N && (v = n->opt) != nil) { 589 if(v->def == nil) 590 v->def = r; 591 r->uselink = v->use; 592 v->use = r; 593 if(n == p->from.node && (info.flags & LeftAddr)) 594 v->addr = 1; 595 } 596 } 597 598 if(Debug > 1) 599 dumpit("before", g->start, 0); 600 601 nkill = 0; 602 603 // Special case. 604 for(v = var; v < var+nvar; v++) { 605 if(v->addr) 606 continue; 607 // Used in only one instruction, which had better be a write. 608 if((r = v->use) != nil && r->uselink == nil) { 609 p = r->f.prog; 610 proginfo(&info, p); 611 if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { 612 p->as = ANOP; 613 p->to = zprog.to; 614 v->removed = 1; 615 if(Debug) 616 print("drop write-only %S\n", v->node->sym); 617 } else 618 fatal("temp used and not set: %P", p); 619 nkill++; 620 continue; 621 } 622 623 // Written in one instruction, read in the next, otherwise unused, 624 // no jumps to the next instruction. Happens mainly in 386 compiler. 625 if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) { 626 p = r->f.prog; 627 proginfo(&info, p); 628 p1 = r->f.link->prog; 629 proginfo(&info1, p1); 630 enum { 631 SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, 632 }; 633 if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) && 634 !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && 635 (info.flags & SizeAny) == (info1.flags & SizeAny)) { 636 p1->from = p->from; 637 excise(&r->f); 638 v->removed = 1; 639 if(Debug) 640 print("drop immediate-use %S\n", v->node->sym); 641 } 642 nkill++; 643 continue; 644 } 645 } 646 647 // Traverse live range of each variable to set start, end. 648 // Each flood uses a new value of gen so that we don't have 649 // to clear all the r->f.active words after each variable. 650 gen = 0; 651 for(v = var; v < var+nvar; v++) { 652 gen++; 653 for(r = v->use; r != nil; r = r->uselink) 654 mergewalk(v, r, gen); 655 if(v->addr) { 656 gen++; 657 for(r = v->use; r != nil; r = r->uselink) 658 varkillwalk(v, r, gen); 659 } 660 } 661 662 // Sort variables by start. 663 bystart = malloc(nvar*sizeof bystart[0]); 664 for(i=0; i<nvar; i++) 665 bystart[i] = &var[i]; 666 qsort(bystart, nvar, sizeof bystart[0], startcmp); 667 668 // List of in-use variables, sorted by end, so that the ones that 669 // will last the longest are the earliest ones in the array. 670 // The tail inuse[nfree:] holds no-longer-used variables. 671 // In theory we should use a sorted tree so that insertions are 672 // guaranteed O(log n) and then the loop is guaranteed O(n log n). 673 // In practice, it doesn't really matter. 674 inuse = malloc(nvar*sizeof inuse[0]); 675 ninuse = 0; 676 nfree = nvar; 677 for(i=0; i<nvar; i++) { 678 v = bystart[i]; 679 if(v->removed) 680 continue; 681 682 // Expire no longer in use. 683 while(ninuse > 0 && inuse[ninuse-1]->end < v->start) { 684 v1 = inuse[--ninuse]; 685 inuse[--nfree] = v1; 686 } 687 688 // Find old temp to reuse if possible. 689 t = v->node->type; 690 for(j=nfree; j<nvar; j++) { 691 v1 = inuse[j]; 692 // Require the types to match but also require the addrtaken bits to match. 693 // If a variable's address is taken, that disables registerization for the individual 694 // words of the variable (for example, the base,len,cap of a slice). 695 // We don't want to merge a non-addressed var with an addressed one and 696 // inhibit registerization of the former. 697 if(eqtype(t, v1->node->type) && v->node->addrtaken == v1->node->addrtaken) { 698 inuse[j] = inuse[nfree++]; 699 if(v1->merge) 700 v->merge = v1->merge; 701 else 702 v->merge = v1; 703 nkill++; 704 break; 705 } 706 } 707 708 // Sort v into inuse. 709 j = ninuse++; 710 while(j > 0 && inuse[j-1]->end < v->end) { 711 inuse[j] = inuse[j-1]; 712 j--; 713 } 714 inuse[j] = v; 715 } 716 717 if(Debug) { 718 print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill); 719 for(v=var; v<var+nvar; v++) { 720 print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end); 721 if(v->addr) 722 print(" addr=1"); 723 if(v->removed) 724 print(" dead=1"); 725 if(v->merge) 726 print(" merge %#N", v->merge->node); 727 if(v->start == v->end) 728 print(" %P", v->def->f.prog); 729 print("\n"); 730 } 731 732 if(Debug > 1) 733 dumpit("after", g->start, 0); 734 } 735 736 // Update node references to use merged temporaries. 737 for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { 738 p = r->f.prog; 739 if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) 740 p->from.node = v->merge->node; 741 if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) 742 p->to.node = v->merge->node; 743 } 744 745 // Delete merged nodes from declaration list. 746 for(lp = &curfn->dcl; (l = *lp); ) { 747 curfn->dcl->end = l; 748 n = l->n; 749 v = n->opt; 750 if(v && (v->merge || v->removed)) { 751 *lp = l->next; 752 continue; 753 } 754 lp = &l->next; 755 } 756 757 // Clear aux structures. 758 for(v=var; v<var+nvar; v++) 759 v->node->opt = nil; 760 free(var); 761 free(bystart); 762 free(inuse); 763 flowend(g); 764 } 765 766 static void 767 mergewalk(TempVar *v, TempFlow *r0, uint32 gen) 768 { 769 Prog *p; 770 TempFlow *r1, *r, *r2; 771 772 for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) { 773 if(r1->f.active == gen) 774 break; 775 r1->f.active = gen; 776 p = r1->f.prog; 777 if(v->end < p->pc) 778 v->end = p->pc; 779 if(r1 == v->def) { 780 v->start = p->pc; 781 break; 782 } 783 } 784 785 for(r = r0; r != r1; r = (TempFlow*)r->f.p1) 786 for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link) 787 mergewalk(v, r2, gen); 788 } 789 790 static void 791 varkillwalk(TempVar *v, TempFlow *r0, uint32 gen) 792 { 793 Prog *p; 794 TempFlow *r1, *r; 795 796 for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.s1) { 797 if(r1->f.active == gen) 798 break; 799 r1->f.active = gen; 800 p = r1->f.prog; 801 if(v->end < p->pc) 802 v->end = p->pc; 803 if(v->start > p->pc) 804 v->start = p->pc; 805 if(p->as == ARET || (p->as == AVARKILL && p->to.node == v->node)) 806 break; 807 } 808 809 for(r = r0; r != r1; r = (TempFlow*)r->f.s1) 810 varkillwalk(v, (TempFlow*)r->f.s2, gen); 811 } 812 813 // Eliminate redundant nil pointer checks. 814 // 815 // The code generation pass emits a CHECKNIL for every possibly nil pointer. 816 // This pass removes a CHECKNIL if every predecessor path has already 817 // checked this value for nil. 818 // 819 // Simple backwards flood from check to definition. 820 // Run prog loop backward from end of program to beginning to avoid quadratic 821 // behavior removing a run of checks. 822 // 823 // Assume that stack variables with address not taken can be loaded multiple times 824 // from memory without being rechecked. Other variables need to be checked on 825 // each load. 826 827 typedef struct NilVar NilVar; 828 typedef struct NilFlow NilFlow; 829 830 struct NilFlow { 831 Flow f; 832 int kill; 833 }; 834 835 static void nilwalkback(NilFlow *rcheck); 836 static void nilwalkfwd(NilFlow *rcheck); 837 838 void 839 nilopt(Prog *firstp) 840 { 841 NilFlow *r; 842 Prog *p; 843 Graph *g; 844 int ncheck, nkill; 845 846 g = flowstart(firstp, sizeof(NilFlow)); 847 if(g == nil) 848 return; 849 850 if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */) 851 dumpit("nilopt", g->start, 0); 852 853 ncheck = 0; 854 nkill = 0; 855 for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { 856 p = r->f.prog; 857 if(p->as != ACHECKNIL || !regtyp(&p->from)) 858 continue; 859 ncheck++; 860 if(stackaddr(&p->from)) { 861 if(debug_checknil && p->lineno > 1) 862 warnl(p->lineno, "removed nil check of SP address"); 863 r->kill = 1; 864 continue; 865 } 866 nilwalkfwd(r); 867 if(r->kill) { 868 if(debug_checknil && p->lineno > 1) 869 warnl(p->lineno, "removed nil check before indirect"); 870 continue; 871 } 872 nilwalkback(r); 873 if(r->kill) { 874 if(debug_checknil && p->lineno > 1) 875 warnl(p->lineno, "removed repeated nil check"); 876 continue; 877 } 878 } 879 880 for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { 881 if(r->kill) { 882 nkill++; 883 excise(&r->f); 884 } 885 } 886 887 flowend(g); 888 889 if(debug_checknil > 1) 890 print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck); 891 } 892 893 static void 894 nilwalkback(NilFlow *rcheck) 895 { 896 Prog *p; 897 ProgInfo info; 898 NilFlow *r; 899 900 for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) { 901 p = r->f.prog; 902 proginfo(&info, p); 903 if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { 904 // Found initialization of value we're checking for nil. 905 // without first finding the check, so this one is unchecked. 906 return; 907 } 908 if(r != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) { 909 rcheck->kill = 1; 910 return; 911 } 912 } 913 914 // Here is a more complex version that scans backward across branches. 915 // It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason 916 // to keep the check (setting rcheck->kill = 0). 917 // It doesn't handle copying of aggregates as well as I would like, 918 // nor variables with their address taken, 919 // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3. 920 /* 921 for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) { 922 if(r1->f.active == gen) 923 break; 924 r1->f.active = gen; 925 p = r1->f.prog; 926 927 // If same check, stop this loop but still check 928 // alternate predecessors up to this point. 929 if(r1 != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) 930 break; 931 932 proginfo(&info, p); 933 if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { 934 // Found initialization of value we're checking for nil. 935 // without first finding the check, so this one is unchecked. 936 rcheck->kill = 0; 937 return; 938 } 939 940 if(r1->f.p1 == nil && r1->f.p2 == nil) { 941 print("lost pred for %P\n", rcheck->f.prog); 942 for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) { 943 proginfo(&info, r1->f.prog); 944 print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from); 945 } 946 fatal("lost pred trail"); 947 } 948 } 949 950 for(r = r0; r != r1; r = (NilFlow*)r->f.p1) 951 for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link) 952 nilwalkback(rcheck, r2, gen); 953 */ 954 } 955 956 static void 957 nilwalkfwd(NilFlow *rcheck) 958 { 959 NilFlow *r, *last; 960 Prog *p; 961 ProgInfo info; 962 963 // If the path down from rcheck dereferences the address 964 // (possibly with a small offset) before writing to memory 965 // and before any subsequent checks, it's okay to wait for 966 // that implicit check. Only consider this basic block to 967 // avoid problems like: 968 // _ = *x // should panic 969 // for {} // no writes but infinite loop may be considered visible 970 last = nil; 971 for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) { 972 p = r->f.prog; 973 proginfo(&info, p); 974 975 if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) { 976 rcheck->kill = 1; 977 return; 978 } 979 if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) { 980 rcheck->kill = 1; 981 return; 982 } 983 984 // Stop if another nil check happens. 985 if(p->as == ACHECKNIL) 986 return; 987 // Stop if value is lost. 988 if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) 989 return; 990 // Stop if memory write. 991 if((info.flags & RightWrite) && !regtyp(&p->to)) 992 return; 993 // Stop if we jump backward. 994 // This test is valid because all the NilFlow* are pointers into 995 // a single contiguous array. We will need to add an explicit 996 // numbering when the code is converted to Go. 997 if(last != nil && r <= last) 998 return; 999 last = r; 1000 } 1001 }