github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/liblink/sched9.c (about) 1 // cmd/9l/sched.c from Vita Nuova. 2 // 3 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 4 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 5 // Portions Copyright © 1997-1999 Vita Nuova Limited 6 // Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) 7 // Portions Copyright © 2004,2006 Bruce Ellis 8 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 9 // Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others 10 // Portions Copyright © 2009 The Go Authors. All rights reserved. 11 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy 13 // of this software and associated documentation files (the "Software"), to deal 14 // in the Software without restriction, including without limitation the rights 15 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 // copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions: 18 // 19 // The above copyright notice and this permission notice shall be included in 20 // all copies or substantial portions of the Software. 21 // 22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 28 // THE SOFTWARE. 29 30 // +build ignore 31 32 #include "l.h" 33 34 enum 35 { 36 E_ICC = 1<<0, 37 E_FCC = 1<<1, 38 E_MEM = 1<<2, 39 E_MEMSP = 1<<3, /* uses offset and size */ 40 E_MEMSB = 1<<4, /* uses offset and size */ 41 E_LR = 1<<5, 42 E_CR = 1<<6, 43 E_CTR = 1<<7, 44 E_XER = 1<<8, 45 46 E_CR0 = 0xF<<0, 47 E_CR1 = 0xF<<4, 48 49 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 50 ALL = ~0, 51 }; 52 53 typedef struct Sch Sch; 54 typedef struct Dep Dep; 55 56 struct Dep 57 { 58 ulong ireg; 59 ulong freg; 60 ulong cc; 61 ulong cr; 62 }; 63 struct Sch 64 { 65 Prog p; 66 Dep set; 67 Dep used; 68 long soffset; 69 char size; 70 char comp; 71 }; 72 73 void regused(Sch*, Prog*); 74 int depend(Sch*, Sch*); 75 int conflict(Sch*, Sch*); 76 int offoverlap(Sch*, Sch*); 77 void dumpbits(Sch*, Dep*); 78 79 void 80 sched(Prog *p0, Prog *pe) 81 { 82 Prog *p, *q; 83 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 84 85 if(!debug['Q']) 86 return; 87 /* 88 * build side structure 89 */ 90 s = sch; 91 for(p=p0;; p=p->link) { 92 memset(s, 0, sizeof(*s)); 93 s->p = *p; 94 regused(s, p); 95 if(debug['X']) { 96 Bprint(&bso, "%P\tset", &s->p); 97 dumpbits(s, &s->set); 98 Bprint(&bso, "; used"); 99 dumpbits(s, &s->used); 100 if(s->comp) 101 Bprint(&bso, "; compound"); 102 if(s->p.mark & LOAD) 103 Bprint(&bso, "; load"); 104 if(s->p.mark & BRANCH) 105 Bprint(&bso, "; branch"); 106 if(s->p.mark & FCMP) 107 Bprint(&bso, "; fcmp"); 108 Bprint(&bso, "\n"); 109 } 110 s++; 111 if(p == pe) 112 break; 113 } 114 se = s; 115 116 for(s=se-1; s>=sch; s--) { 117 118 /* 119 * load delay. interlocked. 120 */ 121 if(s->p.mark & LOAD) { 122 if(s >= se-1) 123 continue; 124 if(!conflict(s, (s+1))) 125 continue; 126 /* 127 * s is load, s+1 is immediate use of result 128 * t is the trial instruction to insert between s and s+1 129 */ 130 for(t=s-1; t>=sch; t--) { 131 if(t->p.mark & BRANCH) 132 goto no2; 133 if(t->p.mark & FCMP) 134 if((s+1)->p.mark & BRANCH) 135 goto no2; 136 if(t->p.mark & LOAD) 137 if(conflict(t, (s+1))) 138 goto no2; 139 for(u=t+1; u<=s; u++) 140 if(depend(u, t)) 141 goto no2; 142 goto out2; 143 no2:; 144 } 145 if(debug['X']) 146 Bprint(&bso, "?l%P\n", &s->p); 147 continue; 148 out2: 149 if(debug['X']) { 150 Bprint(&bso, "!l%P\n", &t->p); 151 Bprint(&bso, "%P\n", &s->p); 152 } 153 stmp = *t; 154 memmove(t, t+1, (uchar*)s - (uchar*)t); 155 *s = stmp; 156 s--; 157 continue; 158 } 159 160 /* 161 * fop2 delay. 162 */ 163 if(s->p.mark & FCMP) { 164 if(s >= se-1) 165 continue; 166 if(!((s+1)->p.mark & BRANCH)) 167 continue; 168 /* t is the trial instruction to use */ 169 for(t=s-1; t>=sch; t--) { 170 for(u=t+1; u<=s; u++) 171 if(depend(u, t)) 172 goto no3; 173 goto out3; 174 no3:; 175 } 176 if(debug['X']) 177 Bprint(&bso, "?f%P\n", &s->p); 178 continue; 179 out3: 180 if(debug['X']) { 181 Bprint(&bso, "!f%P\n", &t->p); 182 Bprint(&bso, "%P\n", &s->p); 183 } 184 stmp = *t; 185 memmove(t, t+1, (uchar*)s - (uchar*)t); 186 *s = stmp; 187 s--; 188 continue; 189 } 190 } 191 192 /* 193 * put it all back 194 */ 195 for(s=sch, p=p0; s<se; s++, p=q) { 196 q = p->link; 197 if(q != s->p.link) { 198 *p = s->p; 199 p->link = q; 200 } 201 } 202 if(debug['X']) 203 Bprint(&bso, "\n"); 204 } 205 206 void 207 regused(Sch *s, Prog *realp) 208 { 209 int c, ar, ad, ld, sz, nr, upd; 210 ulong m; 211 Prog *p; 212 213 p = &s->p; 214 s->comp = compound(p); 215 if(s->comp) { 216 s->set.ireg |= 1<<REGTMP; 217 s->used.ireg |= 1<<REGTMP; 218 } 219 ar = 0; /* dest is really reference */ 220 ad = 0; /* source/dest is really address */ 221 ld = 0; /* opcode is load instruction */ 222 sz = 32*4; /* size of load/store for overlap computation */ 223 nr = 0; /* source/dest is not really reg */ 224 upd = 0; /* move with update; changes reg */ 225 226 /* 227 * flags based on opcode 228 */ 229 switch(p->as) { 230 case ATEXT: 231 curtext = realp; 232 autosize = p->to.offset + 8; 233 ad = 1; 234 break; 235 case ABL: 236 s->set.cc |= E_LR; 237 ar = 1; 238 ad = 1; 239 break; 240 case ABR: 241 ar = 1; 242 ad = 1; 243 break; 244 case ACMP: 245 case ACMPU: 246 case ACMPW: 247 case ACMPWU: 248 s->set.cc |= E_ICC; 249 if(p->reg == 0) 250 s->set.cr |= E_CR0; 251 else 252 s->set.cr |= (0xF<<((p->reg&7)*4)); 253 ar = 1; 254 break; 255 case AFCMPO: 256 case AFCMPU: 257 s->set.cc |= E_FCC; 258 if(p->reg == 0) 259 s->set.cr |= E_CR0; 260 else 261 s->set.cr |= (0xF<<((p->reg&7)*4)); 262 ar = 1; 263 break; 264 case ACRAND: 265 case ACRANDN: 266 case ACREQV: 267 case ACRNAND: 268 case ACRNOR: 269 case ACROR: 270 case ACRORN: 271 case ACRXOR: 272 s->used.cr |= 1<<p->from.reg; 273 s->set.cr |= 1<<p->to.reg; 274 nr = 1; 275 break; 276 case ABCL: /* tricky */ 277 s->used.cc |= E_FCC|E_ICC; 278 s->used.cr = ALL; 279 s->set.cc |= E_LR; 280 ar = 1; 281 break; 282 case ABC: /* tricky */ 283 s->used.cc |= E_FCC|E_ICC; 284 s->used.cr = ALL; 285 ar = 1; 286 break; 287 case ABEQ: 288 case ABGE: 289 case ABGT: 290 case ABLE: 291 case ABLT: 292 case ABNE: 293 case ABVC: 294 case ABVS: 295 s->used.cc |= E_ICC; 296 s->used.cr |= E_CR0; 297 ar = 1; 298 break; 299 case ALSW: 300 case AMOVMW: 301 /* could do better */ 302 sz = 32*4; 303 ld = 1; 304 break; 305 case AMOVBU: 306 case AMOVBZU: 307 upd = 1; 308 sz = 1; 309 ld = 1; 310 break; 311 case AMOVB: 312 case AMOVBZ: 313 sz = 1; 314 ld = 1; 315 break; 316 case AMOVHU: 317 case AMOVHZU: 318 upd = 1; 319 sz = 2; 320 ld = 1; 321 break; 322 case AMOVH: 323 case AMOVHBR: 324 case AMOVHZ: 325 sz = 2; 326 ld = 1; 327 break; 328 case AFMOVSU: 329 case AMOVWU: 330 case AMOVWZU: 331 upd = 1; 332 sz = 4; 333 ld = 1; 334 break; 335 case AFMOVS: 336 case AMOVW: 337 case AMOVWZ: 338 case AMOVWBR: 339 case ALWAR: 340 sz = 4; 341 ld = 1; 342 break; 343 case AFMOVDU: 344 upd = 1; 345 sz = 8; 346 ld = 1; 347 break; 348 case AFMOVD: 349 sz = 8; 350 ld = 1; 351 break; 352 case AFMOVDCC: 353 sz = 8; 354 ld = 1; 355 s->set.cc |= E_FCC; 356 s->set.cr |= E_CR1; 357 break; 358 case AMOVFL: 359 case AMOVCRFS: 360 case AMTFSB0: 361 case AMTFSB0CC: 362 case AMTFSB1: 363 case AMTFSB1CC: 364 s->set.ireg = ALL; 365 s->set.freg = ALL; 366 s->set.cc = ALL; 367 s->set.cr = ALL; 368 break; 369 case AADDCC: 370 case AADDVCC: 371 case AADDCCC: 372 case AADDCVCC: 373 case AADDMECC: 374 case AADDMEVCC: 375 case AADDECC: 376 case AADDEVCC: 377 case AADDZECC: 378 case AADDZEVCC: 379 case AANDCC: 380 case AANDNCC: 381 case ACNTLZWCC: 382 case ADIVWCC: 383 case ADIVWVCC: 384 case ADIVWUCC: 385 case ADIVWUVCC: 386 case AEQVCC: 387 case AEXTSBCC: 388 case AEXTSHCC: 389 case AMULHWCC: 390 case AMULHWUCC: 391 case AMULLWCC: 392 case AMULLWVCC: 393 case ANANDCC: 394 case ANEGCC: 395 case ANEGVCC: 396 case ANORCC: 397 case AORCC: 398 case AORNCC: 399 case AREMCC: 400 case AREMVCC: 401 case AREMUCC: 402 case AREMUVCC: 403 case ARLWMICC: 404 case ARLWNMCC: 405 case ASLWCC: 406 case ASRAWCC: 407 case ASRWCC: 408 case ASTWCCC: 409 case ASUBCC: 410 case ASUBVCC: 411 case ASUBCCC: 412 case ASUBCVCC: 413 case ASUBMECC: 414 case ASUBMEVCC: 415 case ASUBECC: 416 case ASUBEVCC: 417 case ASUBZECC: 418 case ASUBZEVCC: 419 case AXORCC: 420 s->set.cc |= E_ICC; 421 s->set.cr |= E_CR0; 422 break; 423 case AFABSCC: 424 case AFADDCC: 425 case AFADDSCC: 426 case AFCTIWCC: 427 case AFCTIWZCC: 428 case AFDIVCC: 429 case AFDIVSCC: 430 case AFMADDCC: 431 case AFMADDSCC: 432 case AFMSUBCC: 433 case AFMSUBSCC: 434 case AFMULCC: 435 case AFMULSCC: 436 case AFNABSCC: 437 case AFNEGCC: 438 case AFNMADDCC: 439 case AFNMADDSCC: 440 case AFNMSUBCC: 441 case AFNMSUBSCC: 442 case AFRSPCC: 443 case AFSUBCC: 444 case AFSUBSCC: 445 s->set.cc |= E_FCC; 446 s->set.cr |= E_CR1; 447 break; 448 } 449 450 /* 451 * flags based on 'to' field 452 */ 453 c = p->to.class; 454 if(c == 0) { 455 c = aclass(&p->to) + 1; 456 p->to.class = c; 457 } 458 c--; 459 switch(c) { 460 default: 461 print("unknown class %d %D\n", c, &p->to); 462 463 case C_NONE: 464 case C_ZCON: 465 case C_SCON: 466 case C_UCON: 467 case C_LCON: 468 case C_ADDCON: 469 case C_ANDCON: 470 case C_SBRA: 471 case C_LBRA: 472 break; 473 case C_CREG: 474 c = p->to.reg; 475 if(c == NREG) 476 s->set.cr = ALL; 477 else 478 s->set.cr |= (0xF << ((p->from.reg&7)*4)); 479 s->set.cc = ALL; 480 break; 481 case C_SPR: 482 case C_FPSCR: 483 case C_MSR: 484 case C_XER: 485 s->set.ireg = ALL; 486 s->set.freg = ALL; 487 s->set.cc = ALL; 488 s->set.cr = ALL; 489 break; 490 case C_LR: 491 s->set.cc |= E_LR; 492 break; 493 case C_CTR: 494 s->set.cc |= E_CTR; 495 break; 496 case C_ZOREG: 497 case C_SOREG: 498 case C_LOREG: 499 c = p->to.reg; 500 s->used.ireg |= 1<<c; 501 if(upd) 502 s->set.ireg |= 1<<c; 503 if(ad) 504 break; 505 s->size = sz; 506 s->soffset = regoff(&p->to); 507 508 m = ANYMEM; 509 if(c == REGSB) 510 m = E_MEMSB; 511 if(c == REGSP) 512 m = E_MEMSP; 513 514 if(ar) 515 s->used.cc |= m; 516 else 517 s->set.cc |= m; 518 break; 519 case C_SACON: 520 case C_LACON: 521 s->used.ireg |= 1<<REGSP; 522 if(upd) 523 s->set.ireg |= 1<<c; 524 break; 525 case C_SECON: 526 case C_LECON: 527 s->used.ireg |= 1<<REGSB; 528 if(upd) 529 s->set.ireg |= 1<<c; 530 break; 531 case C_REG: 532 if(nr) 533 break; 534 if(ar) 535 s->used.ireg |= 1<<p->to.reg; 536 else 537 s->set.ireg |= 1<<p->to.reg; 538 break; 539 case C_FREG: 540 if(ar) 541 s->used.freg |= 1<<p->to.reg; 542 else 543 s->set.freg |= 1<<p->to.reg; 544 break; 545 case C_SAUTO: 546 case C_LAUTO: 547 s->used.ireg |= 1<<REGSP; 548 if(upd) 549 s->set.ireg |= 1<<c; 550 if(ad) 551 break; 552 s->size = sz; 553 s->soffset = regoff(&p->to); 554 555 if(ar) 556 s->used.cc |= E_MEMSP; 557 else 558 s->set.cc |= E_MEMSP; 559 break; 560 case C_SEXT: 561 case C_LEXT: 562 s->used.ireg |= 1<<REGSB; 563 if(upd) 564 s->set.ireg |= 1<<c; 565 if(ad) 566 break; 567 s->size = sz; 568 s->soffset = regoff(&p->to); 569 570 if(ar) 571 s->used.cc |= E_MEMSB; 572 else 573 s->set.cc |= E_MEMSB; 574 break; 575 } 576 577 /* 578 * flags based on 'from' field 579 */ 580 c = p->from.class; 581 if(c == 0) { 582 c = aclass(&p->from) + 1; 583 p->from.class = c; 584 } 585 c--; 586 switch(c) { 587 default: 588 print("unknown class %d %D\n", c, &p->from); 589 590 case C_NONE: 591 case C_ZCON: 592 case C_SCON: 593 case C_UCON: 594 case C_LCON: 595 case C_ADDCON: 596 case C_ANDCON: 597 case C_SBRA: 598 case C_LBRA: 599 c = p->from.reg; 600 if(c != NREG) 601 s->used.ireg |= 1<<c; 602 break; 603 case C_CREG: 604 c = p->from.reg; 605 if(c == NREG) 606 s->used.cr = ALL; 607 else 608 s->used.cr |= (0xF << ((p->from.reg&7)*4)); 609 s->used.cc = ALL; 610 break; 611 case C_SPR: 612 case C_FPSCR: 613 case C_MSR: 614 case C_XER: 615 s->set.ireg = ALL; 616 s->set.freg = ALL; 617 s->set.cc = ALL; 618 s->set.cr = ALL; 619 break; 620 case C_LR: 621 s->used.cc |= E_LR; 622 break; 623 case C_CTR: 624 s->used.cc |= E_CTR; 625 break; 626 case C_ZOREG: 627 case C_SOREG: 628 case C_LOREG: 629 c = p->from.reg; 630 s->used.ireg |= 1<<c; 631 if(ld) 632 p->mark |= LOAD; 633 if(ad) 634 break; 635 s->size = sz; 636 s->soffset = regoff(&p->from); 637 638 m = ANYMEM; 639 if(c == REGSB) 640 m = E_MEMSB; 641 if(c == REGSP) 642 m = E_MEMSP; 643 644 s->used.cc |= m; 645 break; 646 case C_SACON: 647 case C_LACON: 648 s->used.ireg |= 1<<REGSP; 649 break; 650 case C_SECON: 651 case C_LECON: 652 s->used.ireg |= 1<<REGSB; 653 break; 654 case C_REG: 655 if(nr) 656 break; 657 s->used.ireg |= 1<<p->from.reg; 658 break; 659 case C_FREG: 660 s->used.freg |= 1<<p->from.reg; 661 break; 662 case C_SAUTO: 663 case C_LAUTO: 664 s->used.ireg |= 1<<REGSP; 665 if(ld) 666 p->mark |= LOAD; 667 if(ad) 668 break; 669 s->size = sz; 670 s->soffset = regoff(&p->from); 671 672 s->used.cc |= E_MEMSP; 673 break; 674 case C_SEXT: 675 case C_LEXT: 676 s->used.ireg |= 1<<REGSB; 677 if(ld) 678 p->mark |= LOAD; 679 if(ad) 680 break; 681 s->size = sz; 682 s->soffset = regoff(&p->from); 683 684 s->used.cc |= E_MEMSB; 685 break; 686 } 687 688 c = p->reg; 689 if(c != NREG) { 690 if(p->from.type == D_FREG || p->to.type == D_FREG) 691 s->used.freg |= 1<<c; 692 else 693 s->used.ireg |= 1<<c; 694 } 695 } 696 697 /* 698 * test to see if 2 instrictions can be 699 * interchanged without changing semantics 700 */ 701 int 702 depend(Sch *sa, Sch *sb) 703 { 704 ulong x; 705 706 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) 707 return 1; 708 if(sb->set.ireg & sa->used.ireg) 709 return 1; 710 711 if(sa->set.freg & (sb->set.freg|sb->used.freg)) 712 return 1; 713 if(sb->set.freg & sa->used.freg) 714 return 1; 715 716 if(sa->set.cr & (sb->set.cr|sb->used.cr)) 717 return 1; 718 if(sb->set.cr & sa->used.cr) 719 return 1; 720 721 722 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 723 (sb->set.cc & sa->used.cc); 724 if(x) { 725 /* 726 * allow SB and SP to pass each other. 727 * allow SB to pass SB iff doffsets are ok 728 * anything else conflicts 729 */ 730 if(x != E_MEMSP && x != E_MEMSB) 731 return 1; 732 x = sa->set.cc | sb->set.cc | 733 sa->used.cc | sb->used.cc; 734 if(x & E_MEM) 735 return 1; 736 if(offoverlap(sa, sb)) 737 return 1; 738 } 739 740 return 0; 741 } 742 743 int 744 offoverlap(Sch *sa, Sch *sb) 745 { 746 747 if(sa->soffset < sb->soffset) { 748 if(sa->soffset+sa->size > sb->soffset) 749 return 1; 750 return 0; 751 } 752 if(sb->soffset+sb->size > sa->soffset) 753 return 1; 754 return 0; 755 } 756 757 /* 758 * test 2 adjacent instructions 759 * and find out if inserted instructions 760 * are desired to prevent stalls. 761 * first instruction is a load instruction. 762 */ 763 int 764 conflict(Sch *sa, Sch *sb) 765 { 766 767 if(sa->set.ireg & sb->used.ireg) 768 return 1; 769 if(sa->set.freg & sb->used.freg) 770 return 1; 771 if(sa->set.cr & sb->used.cr) 772 return 1; 773 return 0; 774 } 775 776 int 777 compound(Prog *p) 778 { 779 Optab *o; 780 781 o = oplook(p); 782 if(o->size != 4) 783 return 1; 784 if(p->to.type == D_REG && p->to.reg == REGSB) 785 return 1; 786 return 0; 787 } 788 789 void 790 dumpbits(Sch *s, Dep *d) 791 { 792 int i; 793 794 for(i=0; i<32; i++) 795 if(d->ireg & (1<<i)) 796 Bprint(&bso, " R%d", i); 797 for(i=0; i<32; i++) 798 if(d->freg & (1<<i)) 799 Bprint(&bso, " F%d", i); 800 for(i=0; i<32; i++) 801 if(d->cr & (1<<i)) 802 Bprint(&bso, " C%d", i); 803 for(i=0; i<32; i++) 804 switch(d->cc & (1<<i)) { 805 default: 806 break; 807 case E_ICC: 808 Bprint(&bso, " ICC"); 809 break; 810 case E_FCC: 811 Bprint(&bso, " FCC"); 812 break; 813 case E_LR: 814 Bprint(&bso, " LR"); 815 break; 816 case E_CR: 817 Bprint(&bso, " CR"); 818 break; 819 case E_CTR: 820 Bprint(&bso, " CTR"); 821 break; 822 case E_XER: 823 Bprint(&bso, " XER"); 824 break; 825 case E_MEM: 826 Bprint(&bso, " MEM%d", s->size); 827 break; 828 case E_MEMSB: 829 Bprint(&bso, " SB%d", s->size); 830 break; 831 case E_MEMSP: 832 Bprint(&bso, " SP%d", s->size); 833 break; 834 } 835 }