github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/liblink/obj6.c (about) 1 // Inferno utils/6l/pass.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6l/pass.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 #include <u.h> 32 #include <libc.h> 33 #include <bio.h> 34 #include <link.h> 35 #include "../cmd/6l/6.out.h" 36 #include "../runtime/stack.h" 37 38 static Prog zprg = { 39 .back = 2, 40 .as = AGOK, 41 .from = { 42 .type = D_NONE, 43 .index = D_NONE, 44 }, 45 .to = { 46 .type = D_NONE, 47 .index = D_NONE, 48 }, 49 }; 50 51 static void 52 nopout(Prog *p) 53 { 54 p->as = ANOP; 55 p->from.type = D_NONE; 56 p->to.type = D_NONE; 57 } 58 59 static int 60 symtype(Addr *a) 61 { 62 int t; 63 64 t = a->type; 65 if(t == D_ADDR) 66 t = a->index; 67 return t; 68 } 69 70 static int 71 isdata(Prog *p) 72 { 73 return p->as == ADATA || p->as == AGLOBL; 74 } 75 76 static int 77 iscall(Prog *p) 78 { 79 return p->as == ACALL; 80 } 81 82 static int 83 datasize(Prog *p) 84 { 85 return p->from.scale; 86 } 87 88 static int 89 textflag(Prog *p) 90 { 91 return p->from.scale; 92 } 93 94 static void 95 settextflag(Prog *p, int f) 96 { 97 p->from.scale = f; 98 } 99 100 static void nacladdr(Link*, Prog*, Addr*); 101 102 static int 103 canuselocaltls(Link *ctxt) 104 { 105 switch(ctxt->headtype) { 106 case Hplan9: 107 case Hwindows: 108 return 0; 109 } 110 return 1; 111 } 112 113 static void 114 progedit(Link *ctxt, Prog *p) 115 { 116 char literal[64]; 117 LSym *s; 118 Prog *q; 119 120 // Thread-local storage references use the TLS pseudo-register. 121 // As a register, TLS refers to the thread-local storage base, and it 122 // can only be loaded into another register: 123 // 124 // MOVQ TLS, AX 125 // 126 // An offset from the thread-local storage base is written off(reg)(TLS*1). 127 // Semantically it is off(reg), but the (TLS*1) annotation marks this as 128 // indexing from the loaded TLS base. This emits a relocation so that 129 // if the linker needs to adjust the offset, it can. For example: 130 // 131 // MOVQ TLS, AX 132 // MOVQ 8(AX)(TLS*1), CX // load m into CX 133 // 134 // On systems that support direct access to the TLS memory, this 135 // pair of instructions can be reduced to a direct TLS memory reference: 136 // 137 // MOVQ 8(TLS), CX // load m into CX 138 // 139 // The 2-instruction and 1-instruction forms correspond roughly to 140 // ELF TLS initial exec mode and ELF TLS local exec mode, respectively. 141 // 142 // We applies this rewrite on systems that support the 1-instruction form. 143 // The decision is made using only the operating system (and probably 144 // the -shared flag, eventually), not the link mode. If some link modes 145 // on a particular operating system require the 2-instruction form, 146 // then all builds for that operating system will use the 2-instruction 147 // form, so that the link mode decision can be delayed to link time. 148 // 149 // In this way, all supported systems use identical instructions to 150 // access TLS, and they are rewritten appropriately first here in 151 // liblink and then finally using relocations in the linker. 152 153 if(canuselocaltls(ctxt)) { 154 // Reduce TLS initial exec model to TLS local exec model. 155 // Sequences like 156 // MOVQ TLS, BX 157 // ... off(BX)(TLS*1) ... 158 // become 159 // NOP 160 // ... off(TLS) ... 161 // 162 // TODO(rsc): Remove the Hsolaris special case. It exists only to 163 // guarantee we are producing byte-identical binaries as before this code. 164 // But it should be unnecessary. 165 if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris) 166 nopout(p); 167 if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) { 168 p->from.type = D_INDIR+D_TLS; 169 p->from.scale = 0; 170 p->from.index = D_NONE; 171 } 172 if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) { 173 p->to.type = D_INDIR+D_TLS; 174 p->to.scale = 0; 175 p->to.index = D_NONE; 176 } 177 } else { 178 // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load. 179 // The instruction 180 // MOVQ off(TLS), BX 181 // becomes the sequence 182 // MOVQ TLS, BX 183 // MOVQ off(BX)(TLS*1), BX 184 // This allows the C compilers to emit references to m and g using the direct off(TLS) form. 185 if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) { 186 q = appendp(ctxt, p); 187 q->as = p->as; 188 q->from = p->from; 189 q->from.type = D_INDIR + p->to.type; 190 q->from.index = D_TLS; 191 q->from.scale = 2; // TODO: use 1 192 q->to = p->to; 193 p->from.type = D_TLS; 194 p->from.index = D_NONE; 195 p->from.offset = 0; 196 } 197 } 198 199 // TODO: Remove. 200 if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) { 201 if(p->from.scale == 1 && p->from.index == D_TLS) 202 p->from.scale = 2; 203 if(p->to.scale == 1 && p->to.index == D_TLS) 204 p->to.scale = 2; 205 } 206 207 if(ctxt->headtype == Hnacl) { 208 nacladdr(ctxt, p, &p->from); 209 nacladdr(ctxt, p, &p->to); 210 } 211 212 // Maintain information about code generation mode. 213 if(ctxt->mode == 0) 214 ctxt->mode = 64; 215 p->mode = ctxt->mode; 216 217 switch(p->as) { 218 case AMODE: 219 if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE) { 220 switch((int)p->from.offset) { 221 case 16: 222 case 32: 223 case 64: 224 ctxt->mode = p->from.offset; 225 break; 226 } 227 } 228 nopout(p); 229 break; 230 } 231 232 // Rewrite CALL/JMP/RET to symbol as D_BRANCH. 233 switch(p->as) { 234 case ACALL: 235 case AJMP: 236 case ARET: 237 if((p->to.type == D_EXTERN || p->to.type == D_STATIC) && p->to.sym != nil) 238 p->to.type = D_BRANCH; 239 break; 240 } 241 242 // Rewrite float constants to values stored in memory. 243 switch(p->as) { 244 case AMOVSS: 245 // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx 246 if(p->from.type == D_FCONST) 247 if(p->from.u.dval == 0) 248 if(p->to.type >= D_X0) 249 if(p->to.type <= D_X15) { 250 p->as = AXORPS; 251 p->from.type = p->to.type; 252 p->from.index = p->to.index; 253 break; 254 } 255 // fallthrough 256 257 case AFMOVF: 258 case AFADDF: 259 case AFSUBF: 260 case AFSUBRF: 261 case AFMULF: 262 case AFDIVF: 263 case AFDIVRF: 264 case AFCOMF: 265 case AFCOMFP: 266 case AADDSS: 267 case ASUBSS: 268 case AMULSS: 269 case ADIVSS: 270 case ACOMISS: 271 case AUCOMISS: 272 if(p->from.type == D_FCONST) { 273 int32 i32; 274 float32 f32; 275 f32 = p->from.u.dval; 276 memmove(&i32, &f32, 4); 277 sprint(literal, "$f32.%08ux", (uint32)i32); 278 s = linklookup(ctxt, literal, 0); 279 if(s->type == 0) { 280 s->type = SRODATA; 281 adduint32(ctxt, s, i32); 282 s->reachable = 0; 283 } 284 p->from.type = D_EXTERN; 285 p->from.sym = s; 286 p->from.offset = 0; 287 } 288 break; 289 290 case AMOVSD: 291 // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx 292 if(p->from.type == D_FCONST) 293 if(p->from.u.dval == 0) 294 if(p->to.type >= D_X0) 295 if(p->to.type <= D_X15) { 296 p->as = AXORPS; 297 p->from.type = p->to.type; 298 p->from.index = p->to.index; 299 break; 300 } 301 // fallthrough 302 303 case AFMOVD: 304 case AFADDD: 305 case AFSUBD: 306 case AFSUBRD: 307 case AFMULD: 308 case AFDIVD: 309 case AFDIVRD: 310 case AFCOMD: 311 case AFCOMDP: 312 case AADDSD: 313 case ASUBSD: 314 case AMULSD: 315 case ADIVSD: 316 case ACOMISD: 317 case AUCOMISD: 318 if(p->from.type == D_FCONST) { 319 int64 i64; 320 memmove(&i64, &p->from.u.dval, 8); 321 sprint(literal, "$f64.%016llux", (uvlong)i64); 322 s = linklookup(ctxt, literal, 0); 323 if(s->type == 0) { 324 s->type = SRODATA; 325 adduint64(ctxt, s, i64); 326 s->reachable = 0; 327 } 328 p->from.type = D_EXTERN; 329 p->from.sym = s; 330 p->from.offset = 0; 331 } 332 break; 333 } 334 } 335 336 static void 337 nacladdr(Link *ctxt, Prog *p, Addr *a) 338 { 339 if(p->as == ALEAL || p->as == ALEAQ) 340 return; 341 342 if(a->type == D_BP || a->type == D_INDIR+D_BP) { 343 ctxt->diag("invalid address: %P", p); 344 return; 345 } 346 if(a->type == D_INDIR+D_TLS) 347 a->type = D_INDIR+D_BP; 348 else if(a->type == D_TLS) 349 a->type = D_BP; 350 if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) { 351 switch(a->type) { 352 case D_INDIR+D_BP: 353 case D_INDIR+D_SP: 354 case D_INDIR+D_R15: 355 // all ok 356 break; 357 default: 358 if(a->index != D_NONE) 359 ctxt->diag("invalid address %P", p); 360 a->index = a->type - D_INDIR; 361 if(a->index != D_NONE) 362 a->scale = 1; 363 a->type = D_INDIR+D_R15; 364 break; 365 } 366 } 367 } 368 369 static Prog* load_g_cx(Link*, Prog*); 370 static Prog* stacksplit(Link*, Prog*, int32, int32, int, Prog**); 371 static void indir_cx(Link*, Addr*); 372 373 static void 374 parsetextconst(vlong arg, vlong *textstksiz, vlong *textarg) 375 { 376 *textstksiz = arg & 0xffffffffLL; 377 if(*textstksiz & 0x80000000LL) 378 *textstksiz = -(-*textstksiz & 0xffffffffLL); 379 380 *textarg = (arg >> 32) & 0xffffffffLL; 381 if(*textarg & 0x80000000LL) 382 *textarg = 0; 383 *textarg = (*textarg+7) & ~7LL; 384 } 385 386 static void 387 addstacksplit(Link *ctxt, LSym *cursym) 388 { 389 Prog *p, *q, *p1, *p2; 390 int32 autoffset, deltasp; 391 int a, pcsize; 392 vlong textstksiz, textarg; 393 394 if(ctxt->tlsg == nil) 395 ctxt->tlsg = linklookup(ctxt, "runtime.tlsg", 0); 396 if(ctxt->symmorestack[0] == nil) { 397 ctxt->symmorestack[0] = linklookup(ctxt, "runtime.morestack", 0); 398 ctxt->symmorestack[1] = linklookup(ctxt, "runtime.morestack_noctxt", 0); 399 } 400 401 if(ctxt->headtype == Hplan9 && ctxt->plan9privates == nil) 402 ctxt->plan9privates = linklookup(ctxt, "_privates", 0); 403 404 ctxt->cursym = cursym; 405 406 if(cursym->text == nil || cursym->text->link == nil) 407 return; 408 409 p = cursym->text; 410 parsetextconst(p->to.offset, &textstksiz, &textarg); 411 autoffset = textstksiz; 412 if(autoffset < 0) 413 autoffset = 0; 414 415 cursym->args = p->to.offset>>32; 416 cursym->locals = textstksiz; 417 418 if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) { 419 for(q = p; q != nil; q = q->link) { 420 if(q->as == ACALL) 421 goto noleaf; 422 if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8) 423 goto noleaf; 424 } 425 p->from.scale |= NOSPLIT; 426 noleaf:; 427 } 428 429 q = nil; 430 if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) { 431 p = appendp(ctxt, p); 432 p = load_g_cx(ctxt, p); // load g into CX 433 } 434 if(!(cursym->text->from.scale & NOSPLIT)) 435 p = stacksplit(ctxt, p, autoffset, textarg, !(cursym->text->from.scale&NEEDCTXT), &q); // emit split check 436 437 if(autoffset) { 438 if(autoffset%ctxt->arch->regsize != 0) 439 ctxt->diag("unaligned stack size %d", autoffset); 440 p = appendp(ctxt, p); 441 p->as = AADJSP; 442 p->from.type = D_CONST; 443 p->from.offset = autoffset; 444 p->spadj = autoffset; 445 } else { 446 // zero-byte stack adjustment. 447 // Insert a fake non-zero adjustment so that stkcheck can 448 // recognize the end of the stack-splitting prolog. 449 p = appendp(ctxt, p); 450 p->as = ANOP; 451 p->spadj = -ctxt->arch->ptrsize; 452 p = appendp(ctxt, p); 453 p->as = ANOP; 454 p->spadj = ctxt->arch->ptrsize; 455 } 456 if(q != nil) 457 q->pcond = p; 458 deltasp = autoffset; 459 460 if(cursym->text->from.scale & WRAPPER) { 461 // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame 462 // 463 // MOVQ g_panic(CX), BX 464 // TESTQ BX, BX 465 // JEQ end 466 // LEAQ (autoffset+8)(SP), DI 467 // CMPQ panic_argp(BX), DI 468 // JNE end 469 // MOVQ SP, panic_argp(BX) 470 // end: 471 // NOP 472 // 473 // The NOP is needed to give the jumps somewhere to land. 474 // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. 475 476 p = appendp(ctxt, p); 477 p->as = AMOVQ; 478 p->from.type = D_INDIR+D_CX; 479 p->from.offset = 4*ctxt->arch->ptrsize; // G.panic 480 p->to.type = D_BX; 481 if(ctxt->headtype == Hnacl) { 482 p->as = AMOVL; 483 p->from.type = D_INDIR+D_R15; 484 p->from.scale = 1; 485 p->from.index = D_CX; 486 } 487 488 p = appendp(ctxt, p); 489 p->as = ATESTQ; 490 p->from.type = D_BX; 491 p->to.type = D_BX; 492 if(ctxt->headtype == Hnacl) 493 p->as = ATESTL; 494 495 p = appendp(ctxt, p); 496 p->as = AJEQ; 497 p->to.type = D_BRANCH; 498 p1 = p; 499 500 p = appendp(ctxt, p); 501 p->as = ALEAQ; 502 p->from.type = D_INDIR+D_SP; 503 p->from.offset = autoffset+8; 504 p->to.type = D_DI; 505 if(ctxt->headtype == Hnacl) 506 p->as = ALEAL; 507 508 p = appendp(ctxt, p); 509 p->as = ACMPQ; 510 p->from.type = D_INDIR+D_BX; 511 p->from.offset = 0; // Panic.argp 512 p->to.type = D_DI; 513 if(ctxt->headtype == Hnacl) { 514 p->as = ACMPL; 515 p->from.type = D_INDIR+D_R15; 516 p->from.scale = 1; 517 p->from.index = D_BX; 518 } 519 520 p = appendp(ctxt, p); 521 p->as = AJNE; 522 p->to.type = D_BRANCH; 523 p2 = p; 524 525 p = appendp(ctxt, p); 526 p->as = AMOVQ; 527 p->from.type = D_SP; 528 p->to.type = D_INDIR+D_BX; 529 p->to.offset = 0; // Panic.argp 530 if(ctxt->headtype == Hnacl) { 531 p->as = AMOVL; 532 p->to.type = D_INDIR+D_R15; 533 p->to.scale = 1; 534 p->to.index = D_BX; 535 } 536 537 p = appendp(ctxt, p); 538 p->as = ANOP; 539 p1->pcond = p; 540 p2->pcond = p; 541 } 542 543 if(ctxt->debugzerostack && autoffset && !(cursym->text->from.scale&NOSPLIT)) { 544 // 6l -Z means zero the stack frame on entry. 545 // This slows down function calls but can help avoid 546 // false positives in garbage collection. 547 p = appendp(ctxt, p); 548 p->as = AMOVQ; 549 p->from.type = D_SP; 550 p->to.type = D_DI; 551 552 p = appendp(ctxt, p); 553 p->as = AMOVQ; 554 p->from.type = D_CONST; 555 p->from.offset = autoffset/8; 556 p->to.type = D_CX; 557 558 p = appendp(ctxt, p); 559 p->as = AMOVQ; 560 p->from.type = D_CONST; 561 p->from.offset = 0; 562 p->to.type = D_AX; 563 564 p = appendp(ctxt, p); 565 p->as = AREP; 566 567 p = appendp(ctxt, p); 568 p->as = ASTOSQ; 569 } 570 571 for(; p != nil; p = p->link) { 572 pcsize = p->mode/8; 573 a = p->from.type; 574 if(a == D_AUTO) 575 p->from.offset += deltasp; 576 if(a == D_PARAM) 577 p->from.offset += deltasp + pcsize; 578 a = p->to.type; 579 if(a == D_AUTO) 580 p->to.offset += deltasp; 581 if(a == D_PARAM) 582 p->to.offset += deltasp + pcsize; 583 584 switch(p->as) { 585 default: 586 continue; 587 case APUSHL: 588 case APUSHFL: 589 deltasp += 4; 590 p->spadj = 4; 591 continue; 592 case APUSHQ: 593 case APUSHFQ: 594 deltasp += 8; 595 p->spadj = 8; 596 continue; 597 case APUSHW: 598 case APUSHFW: 599 deltasp += 2; 600 p->spadj = 2; 601 continue; 602 case APOPL: 603 case APOPFL: 604 deltasp -= 4; 605 p->spadj = -4; 606 continue; 607 case APOPQ: 608 case APOPFQ: 609 deltasp -= 8; 610 p->spadj = -8; 611 continue; 612 case APOPW: 613 case APOPFW: 614 deltasp -= 2; 615 p->spadj = -2; 616 continue; 617 case ARET: 618 break; 619 } 620 621 if(autoffset != deltasp) 622 ctxt->diag("unbalanced PUSH/POP"); 623 624 if(autoffset) { 625 p->as = AADJSP; 626 p->from.type = D_CONST; 627 p->from.offset = -autoffset; 628 p->spadj = -autoffset; 629 p = appendp(ctxt, p); 630 p->as = ARET; 631 // If there are instructions following 632 // this ARET, they come from a branch 633 // with the same stackframe, so undo 634 // the cleanup. 635 p->spadj = +autoffset; 636 } 637 if(p->to.sym) // retjmp 638 p->as = AJMP; 639 } 640 } 641 642 static void 643 indir_cx(Link *ctxt, Addr *a) 644 { 645 if(ctxt->headtype == Hnacl) { 646 a->type = D_INDIR + D_R15; 647 a->index = D_CX; 648 a->scale = 1; 649 return; 650 } 651 652 a->type = D_INDIR+D_CX; 653 } 654 655 // Append code to p to load g into cx. 656 // Overwrites p with the first instruction (no first appendp). 657 // Overwriting p is unusual but it lets use this in both the 658 // prologue (caller must call appendp first) and in the epilogue. 659 // Returns last new instruction. 660 static Prog* 661 load_g_cx(Link *ctxt, Prog *p) 662 { 663 Prog *next; 664 665 p->as = AMOVQ; 666 if(ctxt->arch->ptrsize == 4) 667 p->as = AMOVL; 668 p->from.type = D_INDIR+D_TLS; 669 p->from.offset = 0; 670 p->to.type = D_CX; 671 672 next = p->link; 673 progedit(ctxt, p); 674 while(p->link != next) 675 p = p->link; 676 677 if(p->from.index == D_TLS) 678 p->from.scale = 2; 679 680 return p; 681 } 682 683 // Append code to p to check for stack split. 684 // Appends to (does not overwrite) p. 685 // Assumes g is in CX. 686 // Returns last new instruction. 687 // On return, *jmpok is the instruction that should jump 688 // to the stack frame allocation if no split is needed. 689 static Prog* 690 stacksplit(Link *ctxt, Prog *p, int32 framesize, int32 textarg, int noctxt, Prog **jmpok) 691 { 692 Prog *q, *q1; 693 int cmp, lea, mov, sub; 694 695 USED(textarg); 696 cmp = ACMPQ; 697 lea = ALEAQ; 698 mov = AMOVQ; 699 sub = ASUBQ; 700 701 if(ctxt->headtype == Hnacl) { 702 cmp = ACMPL; 703 lea = ALEAL; 704 mov = AMOVL; 705 sub = ASUBL; 706 } 707 708 q1 = nil; 709 if(framesize <= StackSmall) { 710 // small stack: SP <= stackguard 711 // CMPQ SP, stackguard 712 p = appendp(ctxt, p); 713 p->as = cmp; 714 p->from.type = D_SP; 715 indir_cx(ctxt, &p->to); 716 p->to.offset = 2*ctxt->arch->ptrsize; // G.stackguard0 717 if(ctxt->cursym->cfunc) 718 p->to.offset = 3*ctxt->arch->ptrsize; // G.stackguard1 719 } else if(framesize <= StackBig) { 720 // large stack: SP-framesize <= stackguard-StackSmall 721 // LEAQ -xxx(SP), AX 722 // CMPQ AX, stackguard 723 p = appendp(ctxt, p); 724 p->as = lea; 725 p->from.type = D_INDIR+D_SP; 726 p->from.offset = -(framesize-StackSmall); 727 p->to.type = D_AX; 728 729 p = appendp(ctxt, p); 730 p->as = cmp; 731 p->from.type = D_AX; 732 indir_cx(ctxt, &p->to); 733 p->to.offset = 2*ctxt->arch->ptrsize; // G.stackguard0 734 if(ctxt->cursym->cfunc) 735 p->to.offset = 3*ctxt->arch->ptrsize; // G.stackguard1 736 } else { 737 // Such a large stack we need to protect against wraparound. 738 // If SP is close to zero: 739 // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) 740 // The +StackGuard on both sides is required to keep the left side positive: 741 // SP is allowed to be slightly below stackguard. See stack.h. 742 // 743 // Preemption sets stackguard to StackPreempt, a very large value. 744 // That breaks the math above, so we have to check for that explicitly. 745 // MOVQ stackguard, CX 746 // CMPQ CX, $StackPreempt 747 // JEQ label-of-call-to-morestack 748 // LEAQ StackGuard(SP), AX 749 // SUBQ CX, AX 750 // CMPQ AX, $(framesize+(StackGuard-StackSmall)) 751 752 p = appendp(ctxt, p); 753 p->as = mov; 754 indir_cx(ctxt, &p->from); 755 p->from.offset = 2*ctxt->arch->ptrsize; // G.stackguard0 756 if(ctxt->cursym->cfunc) 757 p->from.offset = 3*ctxt->arch->ptrsize; // G.stackguard1 758 p->to.type = D_SI; 759 760 p = appendp(ctxt, p); 761 p->as = cmp; 762 p->from.type = D_SI; 763 p->to.type = D_CONST; 764 p->to.offset = StackPreempt; 765 766 p = appendp(ctxt, p); 767 p->as = AJEQ; 768 p->to.type = D_BRANCH; 769 q1 = p; 770 771 p = appendp(ctxt, p); 772 p->as = lea; 773 p->from.type = D_INDIR+D_SP; 774 p->from.offset = StackGuard; 775 p->to.type = D_AX; 776 777 p = appendp(ctxt, p); 778 p->as = sub; 779 p->from.type = D_SI; 780 p->to.type = D_AX; 781 782 p = appendp(ctxt, p); 783 p->as = cmp; 784 p->from.type = D_AX; 785 p->to.type = D_CONST; 786 p->to.offset = framesize+(StackGuard-StackSmall); 787 } 788 789 // common 790 p = appendp(ctxt, p); 791 p->as = AJHI; 792 p->to.type = D_BRANCH; 793 q = p; 794 795 p = appendp(ctxt, p); 796 p->as = ACALL; 797 p->to.type = D_BRANCH; 798 if(ctxt->cursym->cfunc) 799 p->to.sym = linklookup(ctxt, "runtime.morestackc", 0); 800 else 801 p->to.sym = ctxt->symmorestack[noctxt]; 802 803 p = appendp(ctxt, p); 804 p->as = AJMP; 805 p->to.type = D_BRANCH; 806 p->pcond = ctxt->cursym->text->link; 807 808 if(q != nil) 809 q->pcond = p->link; 810 if(q1 != nil) 811 q1->pcond = q->link; 812 813 *jmpok = q; 814 return p; 815 } 816 817 static void xfol(Link*, Prog*, Prog**); 818 819 static void 820 follow(Link *ctxt, LSym *s) 821 { 822 Prog *firstp, *lastp; 823 824 ctxt->cursym = s; 825 826 firstp = ctxt->arch->prg(); 827 lastp = firstp; 828 xfol(ctxt, s->text, &lastp); 829 lastp->link = nil; 830 s->text = firstp->link; 831 } 832 833 static int 834 nofollow(int a) 835 { 836 switch(a) { 837 case AJMP: 838 case ARET: 839 case AIRETL: 840 case AIRETQ: 841 case AIRETW: 842 case ARETFL: 843 case ARETFQ: 844 case ARETFW: 845 case AUNDEF: 846 return 1; 847 } 848 return 0; 849 } 850 851 static int 852 pushpop(int a) 853 { 854 switch(a) { 855 case APUSHL: 856 case APUSHFL: 857 case APUSHQ: 858 case APUSHFQ: 859 case APUSHW: 860 case APUSHFW: 861 case APOPL: 862 case APOPFL: 863 case APOPQ: 864 case APOPFQ: 865 case APOPW: 866 case APOPFW: 867 return 1; 868 } 869 return 0; 870 } 871 872 static int 873 relinv(int a) 874 { 875 switch(a) { 876 case AJEQ: return AJNE; 877 case AJNE: return AJEQ; 878 case AJLE: return AJGT; 879 case AJLS: return AJHI; 880 case AJLT: return AJGE; 881 case AJMI: return AJPL; 882 case AJGE: return AJLT; 883 case AJPL: return AJMI; 884 case AJGT: return AJLE; 885 case AJHI: return AJLS; 886 case AJCS: return AJCC; 887 case AJCC: return AJCS; 888 case AJPS: return AJPC; 889 case AJPC: return AJPS; 890 case AJOS: return AJOC; 891 case AJOC: return AJOS; 892 } 893 sysfatal("unknown relation: %s", anames6[a]); 894 return 0; 895 } 896 897 static void 898 xfol(Link *ctxt, Prog *p, Prog **last) 899 { 900 Prog *q; 901 int i; 902 int a; 903 904 loop: 905 if(p == nil) 906 return; 907 if(p->as == AJMP) 908 if((q = p->pcond) != nil && q->as != ATEXT) { 909 /* mark instruction as done and continue layout at target of jump */ 910 p->mark = 1; 911 p = q; 912 if(p->mark == 0) 913 goto loop; 914 } 915 if(p->mark) { 916 /* 917 * p goes here, but already used it elsewhere. 918 * copy up to 4 instructions or else branch to other copy. 919 */ 920 for(i=0,q=p; i<4; i++,q=q->link) { 921 if(q == nil) 922 break; 923 if(q == *last) 924 break; 925 a = q->as; 926 if(a == ANOP) { 927 i--; 928 continue; 929 } 930 if(nofollow(a) || pushpop(a)) 931 break; // NOTE(rsc): arm does goto copy 932 if(q->pcond == nil || q->pcond->mark) 933 continue; 934 if(a == ACALL || a == ALOOP) 935 continue; 936 for(;;) { 937 if(p->as == ANOP) { 938 p = p->link; 939 continue; 940 } 941 q = copyp(ctxt, p); 942 p = p->link; 943 q->mark = 1; 944 (*last)->link = q; 945 *last = q; 946 if(q->as != a || q->pcond == nil || q->pcond->mark) 947 continue; 948 949 q->as = relinv(q->as); 950 p = q->pcond; 951 q->pcond = q->link; 952 q->link = p; 953 xfol(ctxt, q->link, last); 954 p = q->link; 955 if(p->mark) 956 return; 957 goto loop; 958 } 959 } /* */ 960 q = ctxt->arch->prg(); 961 q->as = AJMP; 962 q->lineno = p->lineno; 963 q->to.type = D_BRANCH; 964 q->to.offset = p->pc; 965 q->pcond = p; 966 p = q; 967 } 968 969 /* emit p */ 970 p->mark = 1; 971 (*last)->link = p; 972 *last = p; 973 a = p->as; 974 975 /* continue loop with what comes after p */ 976 if(nofollow(a)) 977 return; 978 if(p->pcond != nil && a != ACALL) { 979 /* 980 * some kind of conditional branch. 981 * recurse to follow one path. 982 * continue loop on the other. 983 */ 984 if((q = brchain(ctxt, p->pcond)) != nil) 985 p->pcond = q; 986 if((q = brchain(ctxt, p->link)) != nil) 987 p->link = q; 988 if(p->from.type == D_CONST) { 989 if(p->from.offset == 1) { 990 /* 991 * expect conditional jump to be taken. 992 * rewrite so that's the fall-through case. 993 */ 994 p->as = relinv(a); 995 q = p->link; 996 p->link = p->pcond; 997 p->pcond = q; 998 } 999 } else { 1000 q = p->link; 1001 if(q->mark) 1002 if(a != ALOOP) { 1003 p->as = relinv(a); 1004 p->link = p->pcond; 1005 p->pcond = q; 1006 } 1007 } 1008 xfol(ctxt, p->link, last); 1009 if(p->pcond->mark) 1010 return; 1011 p = p->pcond; 1012 goto loop; 1013 } 1014 p = p->link; 1015 goto loop; 1016 } 1017 1018 static Prog* 1019 prg(void) 1020 { 1021 Prog *p; 1022 1023 p = emallocz(sizeof(*p)); 1024 *p = zprg; 1025 return p; 1026 } 1027 1028 LinkArch linkamd64 = { 1029 .name = "amd64", 1030 .thechar = '6', 1031 .endian = LittleEndian, 1032 1033 .addstacksplit = addstacksplit, 1034 .assemble = span6, 1035 .datasize = datasize, 1036 .follow = follow, 1037 .iscall = iscall, 1038 .isdata = isdata, 1039 .prg = prg, 1040 .progedit = progedit, 1041 .settextflag = settextflag, 1042 .symtype = symtype, 1043 .textflag = textflag, 1044 1045 .minlc = 1, 1046 .ptrsize = 8, 1047 .regsize = 8, 1048 1049 .D_ADDR = D_ADDR, 1050 .D_AUTO = D_AUTO, 1051 .D_BRANCH = D_BRANCH, 1052 .D_CONST = D_CONST, 1053 .D_EXTERN = D_EXTERN, 1054 .D_FCONST = D_FCONST, 1055 .D_NONE = D_NONE, 1056 .D_PARAM = D_PARAM, 1057 .D_SCONST = D_SCONST, 1058 .D_STATIC = D_STATIC, 1059 1060 .ACALL = ACALL, 1061 .ADATA = ADATA, 1062 .AEND = AEND, 1063 .AFUNCDATA = AFUNCDATA, 1064 .AGLOBL = AGLOBL, 1065 .AJMP = AJMP, 1066 .ANOP = ANOP, 1067 .APCDATA = APCDATA, 1068 .ARET = ARET, 1069 .ATEXT = ATEXT, 1070 .ATYPE = ATYPE, 1071 .AUSEFIELD = AUSEFIELD, 1072 }; 1073 1074 LinkArch linkamd64p32 = { 1075 .name = "amd64p32", 1076 .thechar = '6', 1077 .endian = LittleEndian, 1078 1079 .addstacksplit = addstacksplit, 1080 .assemble = span6, 1081 .datasize = datasize, 1082 .follow = follow, 1083 .iscall = iscall, 1084 .isdata = isdata, 1085 .prg = prg, 1086 .progedit = progedit, 1087 .settextflag = settextflag, 1088 .symtype = symtype, 1089 .textflag = textflag, 1090 1091 .minlc = 1, 1092 .ptrsize = 4, 1093 .regsize = 8, 1094 1095 .D_ADDR = D_ADDR, 1096 .D_AUTO = D_AUTO, 1097 .D_BRANCH = D_BRANCH, 1098 .D_CONST = D_CONST, 1099 .D_EXTERN = D_EXTERN, 1100 .D_FCONST = D_FCONST, 1101 .D_NONE = D_NONE, 1102 .D_PARAM = D_PARAM, 1103 .D_SCONST = D_SCONST, 1104 .D_STATIC = D_STATIC, 1105 1106 .ACALL = ACALL, 1107 .ADATA = ADATA, 1108 .AEND = AEND, 1109 .AFUNCDATA = AFUNCDATA, 1110 .AGLOBL = AGLOBL, 1111 .AJMP = AJMP, 1112 .ANOP = ANOP, 1113 .APCDATA = APCDATA, 1114 .ARET = ARET, 1115 .ATEXT = ATEXT, 1116 .ATYPE = ATYPE, 1117 .AUSEFIELD = AUSEFIELD, 1118 };