github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/cmd/gc/popt.c (about) 1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 // "Portable" optimizations. 32 // Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h. 33 // Must code to the intersection of the three back ends. 34 35 #include <u.h> 36 #include <libc.h> 37 #include "gg.h" 38 #include "opt.h" 39 40 // p is a call instruction. Does the call fail to return? 41 int 42 noreturn(Prog *p) 43 { 44 Sym *s; 45 int i; 46 static Sym* symlist[10]; 47 48 if(symlist[0] == S) { 49 symlist[0] = pkglookup("panicindex", runtimepkg); 50 symlist[1] = pkglookup("panicslice", runtimepkg); 51 symlist[2] = pkglookup("throwinit", runtimepkg); 52 symlist[3] = pkglookup("gopanic", runtimepkg); 53 symlist[4] = pkglookup("panicwrap", runtimepkg); 54 symlist[5] = pkglookup("throwreturn", runtimepkg); 55 symlist[6] = pkglookup("selectgo", runtimepkg); 56 symlist[7] = pkglookup("block", runtimepkg); 57 } 58 59 if(p->to.node == nil) 60 return 0; 61 s = p->to.node->sym; 62 if(s == S) 63 return 0; 64 for(i=0; symlist[i]!=S; i++) 65 if(s == symlist[i]) 66 return 1; 67 return 0; 68 } 69 70 // JMP chasing and removal. 71 // 72 // The code generator depends on being able to write out jump 73 // instructions that it can jump to now but fill in later. 74 // the linker will resolve them nicely, but they make the code 75 // longer and more difficult to follow during debugging. 76 // Remove them. 77 78 /* what instruction does a JMP to p eventually land on? */ 79 static Prog* 80 chasejmp(Prog *p, int *jmploop) 81 { 82 int n; 83 84 n = 0; 85 while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { 86 if(++n > 10) { 87 *jmploop = 1; 88 break; 89 } 90 p = p->to.u.branch; 91 } 92 return p; 93 } 94 95 /* 96 * reuse reg pointer for mark/sweep state. 97 * leave reg==nil at end because alive==nil. 98 */ 99 #define alive ((void*)0) 100 #define dead ((void*)1) 101 /*c2go 102 extern void *alive; 103 extern void *dead; 104 */ 105 106 /* mark all code reachable from firstp as alive */ 107 static void 108 mark(Prog *firstp) 109 { 110 Prog *p; 111 112 for(p=firstp; p; p=p->link) { 113 if(p->opt != dead) 114 break; 115 p->opt = alive; 116 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch) 117 mark(p->to.u.branch); 118 if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) 119 break; 120 } 121 } 122 123 void 124 fixjmp(Prog *firstp) 125 { 126 int jmploop; 127 Prog *p, *last; 128 129 if(debug['R'] && debug['v']) 130 print("\nfixjmp\n"); 131 132 // pass 1: resolve jump to jump, mark all code as dead. 133 jmploop = 0; 134 for(p=firstp; p; p=p->link) { 135 if(debug['R'] && debug['v']) 136 print("%P\n", p); 137 if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { 138 p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); 139 if(debug['R'] && debug['v']) 140 print("->%P\n", p); 141 } 142 p->opt = dead; 143 } 144 if(debug['R'] && debug['v']) 145 print("\n"); 146 147 // pass 2: mark all reachable code alive 148 mark(firstp); 149 150 // pass 3: delete dead code (mostly JMPs). 151 last = nil; 152 for(p=firstp; p; p=p->link) { 153 if(p->opt == dead) { 154 if(p->link == P && p->as == ARET && last && last->as != ARET) { 155 // This is the final ARET, and the code so far doesn't have one. 156 // Let it stay. The register allocator assumes that all live code in 157 // the function can be traversed by starting at all the RET instructions 158 // and following predecessor links. If we remove the final RET, 159 // this assumption will not hold in the case of an infinite loop 160 // at the end of a function. 161 // Keep the RET but mark it dead for the liveness analysis. 162 p->mode = 1; 163 } else { 164 if(debug['R'] && debug['v']) 165 print("del %P\n", p); 166 continue; 167 } 168 } 169 if(last) 170 last->link = p; 171 last = p; 172 } 173 last->link = P; 174 175 // pass 4: elide JMP to next instruction. 176 // only safe if there are no jumps to JMPs anymore. 177 if(!jmploop) { 178 last = nil; 179 for(p=firstp; p; p=p->link) { 180 if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) { 181 if(debug['R'] && debug['v']) 182 print("del %P\n", p); 183 continue; 184 } 185 if(last) 186 last->link = p; 187 last = p; 188 } 189 last->link = P; 190 } 191 192 if(debug['R'] && debug['v']) { 193 print("\n"); 194 for(p=firstp; p; p=p->link) 195 print("%P\n", p); 196 print("\n"); 197 } 198 } 199 200 #undef alive 201 #undef dead 202 203 // Control flow analysis. The Flow structures hold predecessor and successor 204 // information as well as basic loop analysis. 205 // 206 // graph = flowstart(firstp, sizeof(Flow)); 207 // ... use flow graph ... 208 // flowend(graph); // free graph 209 // 210 // Typical uses of the flow graph are to iterate over all the flow-relevant instructions: 211 // 212 // for(f = graph->start; f != nil; f = f->link) 213 // 214 // or, given an instruction f, to iterate over all the predecessors, which is 215 // f->p1 and this list: 216 // 217 // for(f2 = f->p2; f2 != nil; f2 = f2->p2link) 218 // 219 // Often the Flow struct is embedded as the first field inside a larger struct S. 220 // In that case casts are needed to convert Flow* to S* in many places but the 221 // idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart. 222 223 Graph* 224 flowstart(Prog *firstp, int size) 225 { 226 int nf; 227 Flow *f, *f1, *start, *last; 228 Graph *graph; 229 Prog *p; 230 ProgInfo info; 231 232 // Count and mark instructions to annotate. 233 nf = 0; 234 for(p = firstp; p != P; p = p->link) { 235 p->opt = nil; // should be already, but just in case 236 proginfo(&info, p); 237 if(info.flags & Skip) 238 continue; 239 p->opt = (void*)1; 240 nf++; 241 } 242 243 if(nf == 0) 244 return nil; 245 246 if(nf >= 20000) { 247 // fatal("%S is too big (%d instructions)", curfn->nname->sym, nf); 248 return nil; 249 } 250 251 // Allocate annotations and assign to instructions. 252 graph = calloc(sizeof *graph + size*nf, 1); 253 if(graph == nil) 254 fatal("out of memory"); 255 start = (Flow*)(graph+1); 256 last = nil; 257 f = start; 258 for(p = firstp; p != P; p = p->link) { 259 if(p->opt == nil) 260 continue; 261 p->opt = f; 262 f->prog = p; 263 if(last) 264 last->link = f; 265 last = f; 266 267 f = (Flow*)((uchar*)f + size); 268 } 269 270 // Fill in pred/succ information. 271 for(f = start; f != nil; f = f->link) { 272 p = f->prog; 273 proginfo(&info, p); 274 if(!(info.flags & Break)) { 275 f1 = f->link; 276 f->s1 = f1; 277 f1->p1 = f; 278 } 279 if(p->to.type == D_BRANCH) { 280 if(p->to.u.branch == P) 281 fatal("pnil %P", p); 282 f1 = p->to.u.branch->opt; 283 if(f1 == nil) 284 fatal("fnil %P / %P", p, p->to.u.branch); 285 if(f1 == f) { 286 //fatal("self loop %P", p); 287 continue; 288 } 289 f->s2 = f1; 290 f->p2link = f1->p2; 291 f1->p2 = f; 292 } 293 } 294 295 graph->start = start; 296 graph->num = nf; 297 return graph; 298 } 299 300 void 301 flowend(Graph *graph) 302 { 303 Flow *f; 304 305 for(f = graph->start; f != nil; f = f->link) 306 f->prog->opt = nil; 307 free(graph); 308 } 309 310 /* 311 * find looping structure 312 * 313 * 1) find reverse postordering 314 * 2) find approximate dominators, 315 * the actual dominators if the flow graph is reducible 316 * otherwise, dominators plus some other non-dominators. 317 * See Matthew S. Hecht and Jeffrey D. Ullman, 318 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 319 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 320 * Oct. 1-3, 1973, pp. 207-217. 321 * 3) find all nodes with a predecessor dominated by the current node. 322 * such a node is a loop head. 323 * recursively, all preds with a greater rpo number are in the loop 324 */ 325 static int32 326 postorder(Flow *r, Flow **rpo2r, int32 n) 327 { 328 Flow *r1; 329 330 r->rpo = 1; 331 r1 = r->s1; 332 if(r1 && !r1->rpo) 333 n = postorder(r1, rpo2r, n); 334 r1 = r->s2; 335 if(r1 && !r1->rpo) 336 n = postorder(r1, rpo2r, n); 337 rpo2r[n] = r; 338 n++; 339 return n; 340 } 341 342 static int32 343 rpolca(int32 *idom, int32 rpo1, int32 rpo2) 344 { 345 int32 t; 346 347 if(rpo1 == -1) 348 return rpo2; 349 while(rpo1 != rpo2){ 350 if(rpo1 > rpo2){ 351 t = rpo2; 352 rpo2 = rpo1; 353 rpo1 = t; 354 } 355 while(rpo1 < rpo2){ 356 t = idom[rpo2]; 357 if(t >= rpo2) 358 fatal("bad idom"); 359 rpo2 = t; 360 } 361 } 362 return rpo1; 363 } 364 365 static int 366 doms(int32 *idom, int32 r, int32 s) 367 { 368 while(s > r) 369 s = idom[s]; 370 return s == r; 371 } 372 373 static int 374 loophead(int32 *idom, Flow *r) 375 { 376 int32 src; 377 378 src = r->rpo; 379 if(r->p1 != nil && doms(idom, src, r->p1->rpo)) 380 return 1; 381 for(r = r->p2; r != nil; r = r->p2link) 382 if(doms(idom, src, r->rpo)) 383 return 1; 384 return 0; 385 } 386 387 static void 388 loopmark(Flow **rpo2r, int32 head, Flow *r) 389 { 390 if(r->rpo < head || r->active == head) 391 return; 392 r->active = head; 393 r->loop += LOOP; 394 if(r->p1 != nil) 395 loopmark(rpo2r, head, r->p1); 396 for(r = r->p2; r != nil; r = r->p2link) 397 loopmark(rpo2r, head, r); 398 } 399 400 void 401 flowrpo(Graph *g) 402 { 403 Flow *r1; 404 int32 i, d, me, nr, *idom; 405 Flow **rpo2r; 406 407 free(g->rpo); 408 g->rpo = calloc(g->num*sizeof g->rpo[0], 1); 409 idom = calloc(g->num*sizeof idom[0], 1); 410 if(g->rpo == nil || idom == nil) 411 fatal("out of memory"); 412 413 for(r1 = g->start; r1 != nil; r1 = r1->link) 414 r1->active = 0; 415 416 rpo2r = g->rpo; 417 d = postorder(g->start, rpo2r, 0); 418 nr = g->num; 419 if(d > nr) 420 fatal("too many reg nodes %d %d", d, nr); 421 nr = d; 422 for(i = 0; i < nr / 2; i++) { 423 r1 = rpo2r[i]; 424 rpo2r[i] = rpo2r[nr - 1 - i]; 425 rpo2r[nr - 1 - i] = r1; 426 } 427 for(i = 0; i < nr; i++) 428 rpo2r[i]->rpo = i; 429 430 idom[0] = 0; 431 for(i = 0; i < nr; i++) { 432 r1 = rpo2r[i]; 433 me = r1->rpo; 434 d = -1; 435 // rpo2r[r->rpo] == r protects against considering dead code, 436 // which has r->rpo == 0. 437 if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) 438 d = r1->p1->rpo; 439 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 440 if(rpo2r[r1->rpo] == r1 && r1->rpo < me) 441 d = rpolca(idom, d, r1->rpo); 442 idom[i] = d; 443 } 444 445 for(i = 0; i < nr; i++) { 446 r1 = rpo2r[i]; 447 r1->loop++; 448 if(r1->p2 != nil && loophead(idom, r1)) 449 loopmark(rpo2r, i, r1); 450 } 451 free(idom); 452 453 for(r1 = g->start; r1 != nil; r1 = r1->link) 454 r1->active = 0; 455 } 456 457 Flow* 458 uniqp(Flow *r) 459 { 460 Flow *r1; 461 462 r1 = r->p1; 463 if(r1 == nil) { 464 r1 = r->p2; 465 if(r1 == nil || r1->p2link != nil) 466 return nil; 467 } else 468 if(r->p2 != nil) 469 return nil; 470 return r1; 471 } 472 473 Flow* 474 uniqs(Flow *r) 475 { 476 Flow *r1; 477 478 r1 = r->s1; 479 if(r1 == nil) { 480 r1 = r->s2; 481 if(r1 == nil) 482 return nil; 483 } else 484 if(r->s2 != nil) 485 return nil; 486 return r1; 487 } 488 489 // The compilers assume they can generate temporary variables 490 // as needed to preserve the right semantics or simplify code 491 // generation and the back end will still generate good code. 492 // This results in a large number of ephemeral temporary variables. 493 // Merge temps with non-overlapping lifetimes and equal types using the 494 // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation", 495 // ACM TOPLAS 1999. 496 497 typedef struct TempVar TempVar; 498 typedef struct TempFlow TempFlow; 499 500 struct TempVar 501 { 502 Node *node; 503 TempFlow *def; // definition of temp var 504 TempFlow *use; // use list, chained through TempFlow.uselink 505 TempVar *freelink; // next free temp in Type.opt list 506 TempVar *merge; // merge var with this one 507 vlong start; // smallest Prog.pc in live range 508 vlong end; // largest Prog.pc in live range 509 uchar addr; // address taken - no accurate end 510 uchar removed; // removed from program 511 }; 512 513 struct TempFlow 514 { 515 Flow f; 516 TempFlow *uselink; 517 }; 518 519 static int 520 startcmp(const void *va, const void *vb) 521 { 522 TempVar *a, *b; 523 524 a = *(TempVar**)va; 525 b = *(TempVar**)vb; 526 527 if(a->start < b->start) 528 return -1; 529 if(a->start > b->start) 530 return +1; 531 return 0; 532 } 533 534 // Is n available for merging? 535 static int 536 canmerge(Node *n) 537 { 538 return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0; 539 } 540 541 static void mergewalk(TempVar*, TempFlow*, uint32); 542 static void varkillwalk(TempVar*, TempFlow*, uint32); 543 544 void 545 mergetemp(Prog *firstp) 546 { 547 int i, j, nvar, ninuse, nfree, nkill; 548 TempVar *var, *v, *v1, **bystart, **inuse; 549 TempFlow *r; 550 NodeList *l, **lp; 551 Node *n; 552 Prog *p, *p1; 553 Type *t; 554 ProgInfo info, info1; 555 int32 gen; 556 Graph *g; 557 558 enum { Debug = 0 }; 559 560 g = flowstart(firstp, sizeof(TempFlow)); 561 if(g == nil) 562 return; 563 564 // Build list of all mergeable variables. 565 nvar = 0; 566 for(l = curfn->dcl; l != nil; l = l->next) 567 if(canmerge(l->n)) 568 nvar++; 569 570 var = calloc(nvar*sizeof var[0], 1); 571 nvar = 0; 572 for(l = curfn->dcl; l != nil; l = l->next) { 573 n = l->n; 574 if(canmerge(n)) { 575 v = &var[nvar++]; 576 n->opt = v; 577 v->node = n; 578 } 579 } 580 581 // Build list of uses. 582 // We assume that the earliest reference to a temporary is its definition. 583 // This is not true of variables in general but our temporaries are all 584 // single-use (that's why we have so many!). 585 for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { 586 p = r->f.prog; 587 proginfo(&info, p); 588 589 if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt) 590 fatal("double node %P", p); 591 if((n = p->from.node) != N && (v = n->opt) != nil || 592 (n = p->to.node) != N && (v = n->opt) != nil) { 593 if(v->def == nil) 594 v->def = r; 595 r->uselink = v->use; 596 v->use = r; 597 if(n == p->from.node && (info.flags & LeftAddr)) 598 v->addr = 1; 599 } 600 } 601 602 if(Debug > 1) 603 dumpit("before", g->start, 0); 604 605 nkill = 0; 606 607 // Special case. 608 for(v = var; v < var+nvar; v++) { 609 if(v->addr) 610 continue; 611 // Used in only one instruction, which had better be a write. 612 if((r = v->use) != nil && r->uselink == nil) { 613 p = r->f.prog; 614 proginfo(&info, p); 615 if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { 616 p->as = ANOP; 617 p->to = zprog.to; 618 v->removed = 1; 619 if(Debug) 620 print("drop write-only %S\n", v->node->sym); 621 } else 622 fatal("temp used and not set: %P", p); 623 nkill++; 624 continue; 625 } 626 627 // Written in one instruction, read in the next, otherwise unused, 628 // no jumps to the next instruction. Happens mainly in 386 compiler. 629 if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) { 630 p = r->f.prog; 631 proginfo(&info, p); 632 p1 = r->f.link->prog; 633 proginfo(&info1, p1); 634 enum { 635 SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, 636 }; 637 if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) && 638 !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && 639 (info.flags & SizeAny) == (info1.flags & SizeAny)) { 640 p1->from = p->from; 641 excise(&r->f); 642 v->removed = 1; 643 if(Debug) 644 print("drop immediate-use %S\n", v->node->sym); 645 } 646 nkill++; 647 continue; 648 } 649 } 650 651 // Traverse live range of each variable to set start, end. 652 // Each flood uses a new value of gen so that we don't have 653 // to clear all the r->f.active words after each variable. 654 gen = 0; 655 for(v = var; v < var+nvar; v++) { 656 gen++; 657 for(r = v->use; r != nil; r = r->uselink) 658 mergewalk(v, r, gen); 659 if(v->addr) { 660 gen++; 661 for(r = v->use; r != nil; r = r->uselink) 662 varkillwalk(v, r, gen); 663 } 664 } 665 666 // Sort variables by start. 667 bystart = malloc(nvar*sizeof bystart[0]); 668 for(i=0; i<nvar; i++) 669 bystart[i] = &var[i]; 670 qsort(bystart, nvar, sizeof bystart[0], startcmp); 671 672 // List of in-use variables, sorted by end, so that the ones that 673 // will last the longest are the earliest ones in the array. 674 // The tail inuse[nfree:] holds no-longer-used variables. 675 // In theory we should use a sorted tree so that insertions are 676 // guaranteed O(log n) and then the loop is guaranteed O(n log n). 677 // In practice, it doesn't really matter. 678 inuse = malloc(nvar*sizeof inuse[0]); 679 ninuse = 0; 680 nfree = nvar; 681 for(i=0; i<nvar; i++) { 682 v = bystart[i]; 683 if(v->removed) 684 continue; 685 686 // Expire no longer in use. 687 while(ninuse > 0 && inuse[ninuse-1]->end < v->start) { 688 v1 = inuse[--ninuse]; 689 inuse[--nfree] = v1; 690 } 691 692 // Find old temp to reuse if possible. 693 t = v->node->type; 694 for(j=nfree; j<nvar; j++) { 695 v1 = inuse[j]; 696 // Require the types to match but also require the addrtaken bits to match. 697 // If a variable's address is taken, that disables registerization for the individual 698 // words of the variable (for example, the base,len,cap of a slice). 699 // We don't want to merge a non-addressed var with an addressed one and 700 // inhibit registerization of the former. 701 if(eqtype(t, v1->node->type) && v->node->addrtaken == v1->node->addrtaken) { 702 inuse[j] = inuse[nfree++]; 703 if(v1->merge) 704 v->merge = v1->merge; 705 else 706 v->merge = v1; 707 nkill++; 708 break; 709 } 710 } 711 712 // Sort v into inuse. 713 j = ninuse++; 714 while(j > 0 && inuse[j-1]->end < v->end) { 715 inuse[j] = inuse[j-1]; 716 j--; 717 } 718 inuse[j] = v; 719 } 720 721 if(Debug) { 722 print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill); 723 for(v=var; v<var+nvar; v++) { 724 print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end); 725 if(v->addr) 726 print(" addr=1"); 727 if(v->removed) 728 print(" dead=1"); 729 if(v->merge) 730 print(" merge %#N", v->merge->node); 731 if(v->start == v->end) 732 print(" %P", v->def->f.prog); 733 print("\n"); 734 } 735 736 if(Debug > 1) 737 dumpit("after", g->start, 0); 738 } 739 740 // Update node references to use merged temporaries. 741 for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { 742 p = r->f.prog; 743 if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) 744 p->from.node = v->merge->node; 745 if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) 746 p->to.node = v->merge->node; 747 } 748 749 // Delete merged nodes from declaration list. 750 for(lp = &curfn->dcl; (l = *lp); ) { 751 curfn->dcl->end = l; 752 n = l->n; 753 v = n->opt; 754 if(v && (v->merge || v->removed)) { 755 *lp = l->next; 756 continue; 757 } 758 lp = &l->next; 759 } 760 761 // Clear aux structures. 762 for(v=var; v<var+nvar; v++) 763 v->node->opt = nil; 764 free(var); 765 free(bystart); 766 free(inuse); 767 flowend(g); 768 } 769 770 static void 771 mergewalk(TempVar *v, TempFlow *r0, uint32 gen) 772 { 773 Prog *p; 774 TempFlow *r1, *r, *r2; 775 776 for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) { 777 if(r1->f.active == gen) 778 break; 779 r1->f.active = gen; 780 p = r1->f.prog; 781 if(v->end < p->pc) 782 v->end = p->pc; 783 if(r1 == v->def) { 784 v->start = p->pc; 785 break; 786 } 787 } 788 789 for(r = r0; r != r1; r = (TempFlow*)r->f.p1) 790 for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link) 791 mergewalk(v, r2, gen); 792 } 793 794 static void 795 varkillwalk(TempVar *v, TempFlow *r0, uint32 gen) 796 { 797 Prog *p; 798 TempFlow *r1, *r; 799 800 for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.s1) { 801 if(r1->f.active == gen) 802 break; 803 r1->f.active = gen; 804 p = r1->f.prog; 805 if(v->end < p->pc) 806 v->end = p->pc; 807 if(v->start > p->pc) 808 v->start = p->pc; 809 if(p->as == ARET || (p->as == AVARKILL && p->to.node == v->node)) 810 break; 811 } 812 813 for(r = r0; r != r1; r = (TempFlow*)r->f.s1) 814 varkillwalk(v, (TempFlow*)r->f.s2, gen); 815 } 816 817 // Eliminate redundant nil pointer checks. 818 // 819 // The code generation pass emits a CHECKNIL for every possibly nil pointer. 820 // This pass removes a CHECKNIL if every predecessor path has already 821 // checked this value for nil. 822 // 823 // Simple backwards flood from check to definition. 824 // Run prog loop backward from end of program to beginning to avoid quadratic 825 // behavior removing a run of checks. 826 // 827 // Assume that stack variables with address not taken can be loaded multiple times 828 // from memory without being rechecked. Other variables need to be checked on 829 // each load. 830 831 typedef struct NilVar NilVar; 832 typedef struct NilFlow NilFlow; 833 834 struct NilFlow { 835 Flow f; 836 int kill; 837 }; 838 839 static void nilwalkback(NilFlow *rcheck); 840 static void nilwalkfwd(NilFlow *rcheck); 841 842 void 843 nilopt(Prog *firstp) 844 { 845 NilFlow *r; 846 Prog *p; 847 Graph *g; 848 int ncheck, nkill; 849 850 g = flowstart(firstp, sizeof(NilFlow)); 851 if(g == nil) 852 return; 853 854 if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */) 855 dumpit("nilopt", g->start, 0); 856 857 ncheck = 0; 858 nkill = 0; 859 for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { 860 p = r->f.prog; 861 if(p->as != ACHECKNIL || !regtyp(&p->from)) 862 continue; 863 ncheck++; 864 if(stackaddr(&p->from)) { 865 if(debug_checknil && p->lineno > 1) 866 warnl(p->lineno, "removed nil check of SP address"); 867 r->kill = 1; 868 continue; 869 } 870 nilwalkfwd(r); 871 if(r->kill) { 872 if(debug_checknil && p->lineno > 1) 873 warnl(p->lineno, "removed nil check before indirect"); 874 continue; 875 } 876 nilwalkback(r); 877 if(r->kill) { 878 if(debug_checknil && p->lineno > 1) 879 warnl(p->lineno, "removed repeated nil check"); 880 continue; 881 } 882 } 883 884 for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { 885 if(r->kill) { 886 nkill++; 887 excise(&r->f); 888 } 889 } 890 891 flowend(g); 892 893 if(debug_checknil > 1) 894 print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck); 895 } 896 897 static void 898 nilwalkback(NilFlow *rcheck) 899 { 900 Prog *p; 901 ProgInfo info; 902 NilFlow *r; 903 904 for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) { 905 p = r->f.prog; 906 proginfo(&info, p); 907 if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { 908 // Found initialization of value we're checking for nil. 909 // without first finding the check, so this one is unchecked. 910 return; 911 } 912 if(r != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) { 913 rcheck->kill = 1; 914 return; 915 } 916 } 917 918 // Here is a more complex version that scans backward across branches. 919 // It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason 920 // to keep the check (setting rcheck->kill = 0). 921 // It doesn't handle copying of aggregates as well as I would like, 922 // nor variables with their address taken, 923 // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3. 924 /* 925 for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) { 926 if(r1->f.active == gen) 927 break; 928 r1->f.active = gen; 929 p = r1->f.prog; 930 931 // If same check, stop this loop but still check 932 // alternate predecessors up to this point. 933 if(r1 != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) 934 break; 935 936 proginfo(&info, p); 937 if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { 938 // Found initialization of value we're checking for nil. 939 // without first finding the check, so this one is unchecked. 940 rcheck->kill = 0; 941 return; 942 } 943 944 if(r1->f.p1 == nil && r1->f.p2 == nil) { 945 print("lost pred for %P\n", rcheck->f.prog); 946 for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) { 947 proginfo(&info, r1->f.prog); 948 print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from); 949 } 950 fatal("lost pred trail"); 951 } 952 } 953 954 for(r = r0; r != r1; r = (NilFlow*)r->f.p1) 955 for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link) 956 nilwalkback(rcheck, r2, gen); 957 */ 958 } 959 960 static void 961 nilwalkfwd(NilFlow *rcheck) 962 { 963 NilFlow *r, *last; 964 Prog *p; 965 ProgInfo info; 966 967 // If the path down from rcheck dereferences the address 968 // (possibly with a small offset) before writing to memory 969 // and before any subsequent checks, it's okay to wait for 970 // that implicit check. Only consider this basic block to 971 // avoid problems like: 972 // _ = *x // should panic 973 // for {} // no writes but infinite loop may be considered visible 974 last = nil; 975 for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) { 976 p = r->f.prog; 977 proginfo(&info, p); 978 979 if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) { 980 rcheck->kill = 1; 981 return; 982 } 983 if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) { 984 rcheck->kill = 1; 985 return; 986 } 987 988 // Stop if another nil check happens. 989 if(p->as == ACHECKNIL) 990 return; 991 // Stop if value is lost. 992 if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) 993 return; 994 // Stop if memory write. 995 if((info.flags & RightWrite) && !regtyp(&p->to)) 996 return; 997 // Stop if we jump backward. 998 // This test is valid because all the NilFlow* are pointers into 999 // a single contiguous array. We will need to add an explicit 1000 // numbering when the code is converted to Go. 1001 if(last != nil && r <= last) 1002 return; 1003 last = r; 1004 } 1005 }