github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/internal/obj/x86/obj6.go (about) 1 // Inferno utils/6l/pass.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/pass.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/internal/obj" 35 "cmd/internal/objabi" 36 "cmd/internal/sys" 37 "math" 38 "strings" 39 ) 40 41 func CanUse1InsnTLS(ctxt *obj.Link) bool { 42 if isAndroid { 43 // For android, we use a disgusting hack that assumes 44 // the thread-local storage slot for g is allocated 45 // using pthread_key_create with a fixed offset 46 // (see src/runtime/cgo/gcc_android_amd64.c). 47 // This makes access to the TLS storage (for g) doable 48 // with 1 instruction. 49 return true 50 } 51 52 if ctxt.Arch.Family == sys.I386 { 53 switch ctxt.Headtype { 54 case objabi.Hlinux, 55 objabi.Hnacl, 56 objabi.Hplan9, 57 objabi.Hwindows: 58 return false 59 } 60 61 return true 62 } 63 64 switch ctxt.Headtype { 65 case objabi.Hplan9, objabi.Hwindows: 66 return false 67 case objabi.Hlinux: 68 return !ctxt.Flag_shared 69 } 70 71 return true 72 } 73 74 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 75 // Thread-local storage references use the TLS pseudo-register. 76 // As a register, TLS refers to the thread-local storage base, and it 77 // can only be loaded into another register: 78 // 79 // MOVQ TLS, AX 80 // 81 // An offset from the thread-local storage base is written off(reg)(TLS*1). 82 // Semantically it is off(reg), but the (TLS*1) annotation marks this as 83 // indexing from the loaded TLS base. This emits a relocation so that 84 // if the linker needs to adjust the offset, it can. For example: 85 // 86 // MOVQ TLS, AX 87 // MOVQ 0(AX)(TLS*1), CX // load g into CX 88 // 89 // On systems that support direct access to the TLS memory, this 90 // pair of instructions can be reduced to a direct TLS memory reference: 91 // 92 // MOVQ 0(TLS), CX // load g into CX 93 // 94 // The 2-instruction and 1-instruction forms correspond to the two code 95 // sequences for loading a TLS variable in the local exec model given in "ELF 96 // Handling For Thread-Local Storage". 97 // 98 // We apply this rewrite on systems that support the 1-instruction form. 99 // The decision is made using only the operating system and the -shared flag, 100 // not the link mode. If some link modes on a particular operating system 101 // require the 2-instruction form, then all builds for that operating system 102 // will use the 2-instruction form, so that the link mode decision can be 103 // delayed to link time. 104 // 105 // In this way, all supported systems use identical instructions to 106 // access TLS, and they are rewritten appropriately first here in 107 // liblink and then finally using relocations in the linker. 108 // 109 // When -shared is passed, we leave the code in the 2-instruction form but 110 // assemble (and relocate) them in different ways to generate the initial 111 // exec code sequence. It's a bit of a fluke that this is possible without 112 // rewriting the instructions more comprehensively, and it only does because 113 // we only support a single TLS variable (g). 114 115 if CanUse1InsnTLS(ctxt) { 116 // Reduce 2-instruction sequence to 1-instruction sequence. 117 // Sequences like 118 // MOVQ TLS, BX 119 // ... off(BX)(TLS*1) ... 120 // become 121 // NOP 122 // ... off(TLS) ... 123 // 124 // TODO(rsc): Remove the Hsolaris special case. It exists only to 125 // guarantee we are producing byte-identical binaries as before this code. 126 // But it should be unnecessary. 127 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris { 128 obj.Nopout(p) 129 } 130 if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { 131 p.From.Reg = REG_TLS 132 p.From.Scale = 0 133 p.From.Index = REG_NONE 134 } 135 136 if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 137 p.To.Reg = REG_TLS 138 p.To.Scale = 0 139 p.To.Index = REG_NONE 140 } 141 } else { 142 // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it 143 // as the 2-instruction sequence if necessary. 144 // MOVQ 0(TLS), BX 145 // becomes 146 // MOVQ TLS, BX 147 // MOVQ 0(BX)(TLS*1), BX 148 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 149 q := obj.Appendp(p, newprog) 150 q.As = p.As 151 q.From = p.From 152 q.From.Type = obj.TYPE_MEM 153 q.From.Reg = p.To.Reg 154 q.From.Index = REG_TLS 155 q.From.Scale = 2 // TODO: use 1 156 q.To = p.To 157 p.From.Type = obj.TYPE_REG 158 p.From.Reg = REG_TLS 159 p.From.Index = REG_NONE 160 p.From.Offset = 0 161 } 162 } 163 164 // TODO: Remove. 165 if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 { 166 if p.From.Scale == 1 && p.From.Index == REG_TLS { 167 p.From.Scale = 2 168 } 169 if p.To.Scale == 1 && p.To.Index == REG_TLS { 170 p.To.Scale = 2 171 } 172 } 173 174 // Rewrite 0 to $0 in 3rd argument to CMPPS etc. 175 // That's what the tables expect. 176 switch p.As { 177 case ACMPPD, ACMPPS, ACMPSD, ACMPSS: 178 if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { 179 p.To.Type = obj.TYPE_CONST 180 } 181 } 182 183 // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. 184 switch p.As { 185 case obj.ACALL, obj.AJMP, obj.ARET: 186 if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { 187 p.To.Type = obj.TYPE_BRANCH 188 } 189 } 190 191 // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. 192 if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { 193 switch p.As { 194 case AMOVL: 195 p.As = ALEAL 196 p.From.Type = obj.TYPE_MEM 197 case AMOVQ: 198 p.As = ALEAQ 199 p.From.Type = obj.TYPE_MEM 200 } 201 } 202 203 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 204 if p.From3 != nil { 205 nacladdr(ctxt, p, p.From3) 206 } 207 nacladdr(ctxt, p, &p.From) 208 nacladdr(ctxt, p, &p.To) 209 } 210 211 // Rewrite float constants to values stored in memory. 212 switch p.As { 213 // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx 214 case AMOVSS: 215 if p.From.Type == obj.TYPE_FCONST { 216 // f == 0 can't be used here due to -0, so use Float64bits 217 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 218 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 219 p.As = AXORPS 220 p.From = p.To 221 break 222 } 223 } 224 } 225 fallthrough 226 227 case AFMOVF, 228 AFADDF, 229 AFSUBF, 230 AFSUBRF, 231 AFMULF, 232 AFDIVF, 233 AFDIVRF, 234 AFCOMF, 235 AFCOMFP, 236 AADDSS, 237 ASUBSS, 238 AMULSS, 239 ADIVSS, 240 ACOMISS, 241 AUCOMISS: 242 if p.From.Type == obj.TYPE_FCONST { 243 f32 := float32(p.From.Val.(float64)) 244 p.From.Type = obj.TYPE_MEM 245 p.From.Name = obj.NAME_EXTERN 246 p.From.Sym = ctxt.Float32Sym(f32) 247 p.From.Offset = 0 248 } 249 250 case AMOVSD: 251 // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx 252 if p.From.Type == obj.TYPE_FCONST { 253 // f == 0 can't be used here due to -0, so use Float64bits 254 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 255 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 256 p.As = AXORPS 257 p.From = p.To 258 break 259 } 260 } 261 } 262 fallthrough 263 264 case AFMOVD, 265 AFADDD, 266 AFSUBD, 267 AFSUBRD, 268 AFMULD, 269 AFDIVD, 270 AFDIVRD, 271 AFCOMD, 272 AFCOMDP, 273 AADDSD, 274 ASUBSD, 275 AMULSD, 276 ADIVSD, 277 ACOMISD, 278 AUCOMISD: 279 if p.From.Type == obj.TYPE_FCONST { 280 f64 := p.From.Val.(float64) 281 p.From.Type = obj.TYPE_MEM 282 p.From.Name = obj.NAME_EXTERN 283 p.From.Sym = ctxt.Float64Sym(f64) 284 p.From.Offset = 0 285 } 286 } 287 288 if ctxt.Flag_dynlink { 289 rewriteToUseGot(ctxt, p, newprog) 290 } 291 292 if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 { 293 rewriteToPcrel(ctxt, p, newprog) 294 } 295 } 296 297 // Rewrite p, if necessary, to access global data via the global offset table. 298 func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 299 var add, lea, mov obj.As 300 var reg int16 301 if ctxt.Arch.Family == sys.AMD64 { 302 add = AADDQ 303 lea = ALEAQ 304 mov = AMOVQ 305 reg = REG_R15 306 } else { 307 add = AADDL 308 lea = ALEAL 309 mov = AMOVL 310 reg = REG_CX 311 if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 312 // Special case: clobber the destination register with 313 // the PC so we don't have to clobber CX. 314 // The SSA backend depends on CX not being clobbered across LEAL. 315 // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared). 316 reg = p.To.Reg 317 } 318 } 319 320 if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { 321 // ADUFFxxx $offset 322 // becomes 323 // $MOV runtime.duffxxx@GOT, $reg 324 // $ADD $offset, $reg 325 // CALL $reg 326 var sym *obj.LSym 327 if p.As == obj.ADUFFZERO { 328 sym = ctxt.Lookup("runtime.duffzero") 329 } else { 330 sym = ctxt.Lookup("runtime.duffcopy") 331 } 332 offset := p.To.Offset 333 p.As = mov 334 p.From.Type = obj.TYPE_MEM 335 p.From.Name = obj.NAME_GOTREF 336 p.From.Sym = sym 337 p.To.Type = obj.TYPE_REG 338 p.To.Reg = reg 339 p.To.Offset = 0 340 p.To.Sym = nil 341 p1 := obj.Appendp(p, newprog) 342 p1.As = add 343 p1.From.Type = obj.TYPE_CONST 344 p1.From.Offset = offset 345 p1.To.Type = obj.TYPE_REG 346 p1.To.Reg = reg 347 p2 := obj.Appendp(p1, newprog) 348 p2.As = obj.ACALL 349 p2.To.Type = obj.TYPE_REG 350 p2.To.Reg = reg 351 } 352 353 // We only care about global data: NAME_EXTERN means a global 354 // symbol in the Go sense, and p.Sym.Local is true for a few 355 // internally defined symbols. 356 if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 357 // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below 358 p.As = mov 359 p.From.Type = obj.TYPE_ADDR 360 } 361 if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 362 // $MOV $sym, Rx becomes $MOV sym@GOT, Rx 363 // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx 364 // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX 365 cmplxdest := false 366 pAs := p.As 367 var dest obj.Addr 368 if p.To.Type != obj.TYPE_REG || pAs != mov { 369 if ctxt.Arch.Family == sys.AMD64 { 370 ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) 371 } 372 cmplxdest = true 373 dest = p.To 374 p.As = mov 375 p.To.Type = obj.TYPE_REG 376 p.To.Reg = reg 377 p.To.Sym = nil 378 p.To.Name = obj.NAME_NONE 379 } 380 p.From.Type = obj.TYPE_MEM 381 p.From.Name = obj.NAME_GOTREF 382 q := p 383 if p.From.Offset != 0 { 384 q = obj.Appendp(p, newprog) 385 q.As = lea 386 q.From.Type = obj.TYPE_MEM 387 q.From.Reg = p.To.Reg 388 q.From.Offset = p.From.Offset 389 q.To = p.To 390 p.From.Offset = 0 391 } 392 if cmplxdest { 393 q = obj.Appendp(q, newprog) 394 q.As = pAs 395 q.To = dest 396 q.From.Type = obj.TYPE_REG 397 q.From.Reg = reg 398 } 399 } 400 if p.From3 != nil && p.From3.Name == obj.NAME_EXTERN { 401 ctxt.Diag("don't know how to handle %v with -dynlink", p) 402 } 403 var source *obj.Addr 404 // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry 405 // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) 406 // An addition may be inserted between the two MOVs if there is an offset. 407 if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 408 if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 409 ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) 410 } 411 source = &p.From 412 } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 413 source = &p.To 414 } else { 415 return 416 } 417 if p.As == obj.ACALL { 418 // When dynlinking on 386, almost any call might end up being a call 419 // to a PLT, so make sure the GOT pointer is loaded into BX. 420 // RegTo2 is set on the replacement call insn to stop it being 421 // processed when it is in turn passed to progedit. 422 if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { 423 return 424 } 425 p1 := obj.Appendp(p, newprog) 426 p2 := obj.Appendp(p1, newprog) 427 428 p1.As = ALEAL 429 p1.From.Type = obj.TYPE_MEM 430 p1.From.Name = obj.NAME_STATIC 431 p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_") 432 p1.To.Type = obj.TYPE_REG 433 p1.To.Reg = REG_BX 434 435 p2.As = p.As 436 p2.Scond = p.Scond 437 p2.From = p.From 438 p2.From3 = p.From3 439 p2.Reg = p.Reg 440 p2.To = p.To 441 // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr 442 // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 443 // itself gets passed to progedit. 444 p2.To.Type = obj.TYPE_MEM 445 p2.RegTo2 = 1 446 447 obj.Nopout(p) 448 return 449 450 } 451 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { 452 return 453 } 454 if source.Type != obj.TYPE_MEM { 455 ctxt.Diag("don't know how to handle %v with -dynlink", p) 456 } 457 p1 := obj.Appendp(p, newprog) 458 p2 := obj.Appendp(p1, newprog) 459 460 p1.As = mov 461 p1.From.Type = obj.TYPE_MEM 462 p1.From.Sym = source.Sym 463 p1.From.Name = obj.NAME_GOTREF 464 p1.To.Type = obj.TYPE_REG 465 p1.To.Reg = reg 466 467 p2.As = p.As 468 p2.From = p.From 469 p2.To = p.To 470 if p.From.Name == obj.NAME_EXTERN { 471 p2.From.Reg = reg 472 p2.From.Name = obj.NAME_NONE 473 p2.From.Sym = nil 474 } else if p.To.Name == obj.NAME_EXTERN { 475 p2.To.Reg = reg 476 p2.To.Name = obj.NAME_NONE 477 p2.To.Sym = nil 478 } else { 479 return 480 } 481 obj.Nopout(p) 482 } 483 484 func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 485 // RegTo2 is set on the instructions we insert here so they don't get 486 // processed twice. 487 if p.RegTo2 != 0 { 488 return 489 } 490 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { 491 return 492 } 493 // Any Prog (aside from the above special cases) with an Addr with Name == 494 // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX 495 // inserted before it. 496 isName := func(a *obj.Addr) bool { 497 if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { 498 return false 499 } 500 if a.Sym.Type == objabi.STLSBSS { 501 return false 502 } 503 return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF 504 } 505 506 if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { 507 // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting 508 // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" 509 // respectively. 510 if p.To.Type != obj.TYPE_REG { 511 q := obj.Appendp(p, newprog) 512 q.As = p.As 513 q.From.Type = obj.TYPE_REG 514 q.From.Reg = REG_CX 515 q.To = p.To 516 p.As = AMOVL 517 p.To.Type = obj.TYPE_REG 518 p.To.Reg = REG_CX 519 p.To.Sym = nil 520 p.To.Name = obj.NAME_NONE 521 } 522 } 523 524 if !isName(&p.From) && !isName(&p.To) && (p.From3 == nil || !isName(p.From3)) { 525 return 526 } 527 var dst int16 = REG_CX 528 if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 529 dst = p.To.Reg 530 // Why? See the comment near the top of rewriteToUseGot above. 531 // AMOVLs might be introduced by the GOT rewrites. 532 } 533 q := obj.Appendp(p, newprog) 534 q.RegTo2 = 1 535 r := obj.Appendp(q, newprog) 536 r.RegTo2 = 1 537 q.As = obj.ACALL 538 thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))) 539 q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) }) 540 q.To.Type = obj.TYPE_MEM 541 q.To.Name = obj.NAME_EXTERN 542 r.As = p.As 543 r.Scond = p.Scond 544 r.From = p.From 545 r.From3 = p.From3 546 r.Reg = p.Reg 547 r.To = p.To 548 if isName(&p.From) { 549 r.From.Reg = dst 550 } 551 if isName(&p.To) { 552 r.To.Reg = dst 553 } 554 if p.From3 != nil && isName(p.From3) { 555 r.From3.Reg = dst 556 } 557 obj.Nopout(p) 558 } 559 560 func nacladdr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 561 if p.As == ALEAL || p.As == ALEAQ { 562 return 563 } 564 565 if a.Reg == REG_BP { 566 ctxt.Diag("invalid address: %v", p) 567 return 568 } 569 570 if a.Reg == REG_TLS { 571 a.Reg = REG_BP 572 } 573 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 574 switch a.Reg { 575 // all ok 576 case REG_BP, REG_SP, REG_R15: 577 break 578 579 default: 580 if a.Index != REG_NONE { 581 ctxt.Diag("invalid address %v", p) 582 } 583 a.Index = a.Reg 584 if a.Index != REG_NONE { 585 a.Scale = 1 586 } 587 a.Reg = REG_R15 588 } 589 } 590 } 591 592 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { 593 if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { 594 return 595 } 596 597 p := cursym.Func.Text 598 autoffset := int32(p.To.Offset) 599 if autoffset < 0 { 600 autoffset = 0 601 } 602 603 hasCall := false 604 for q := p; q != nil; q = q.Link { 605 if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO { 606 hasCall = true 607 break 608 } 609 } 610 611 var bpsize int 612 if ctxt.Arch.Family == sys.AMD64 && ctxt.Framepointer_enabled && 613 !p.From.Sym.NoFrame() && // (1) below 614 !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below 615 !(autoffset == 0 && !hasCall) { // (3) below 616 // Make room to save a base pointer. 617 // There are 2 cases we must avoid: 618 // 1) If noframe is set (which we do for functions which tail call). 619 // 2) Scary runtime internals which would be all messed up by frame pointers. 620 // We detect these using a heuristic: frameless nosplit functions. 621 // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic. 622 // For performance, we also want to avoid: 623 // 3) Frameless leaf functions 624 bpsize = ctxt.Arch.PtrSize 625 autoffset += int32(bpsize) 626 p.To.Offset += int64(bpsize) 627 } else { 628 bpsize = 0 629 } 630 631 textarg := int64(p.To.Val.(int32)) 632 cursym.Func.Args = int32(textarg) 633 cursym.Func.Locals = int32(p.To.Offset) 634 635 // TODO(rsc): Remove. 636 if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { 637 cursym.Func.Locals = 0 638 } 639 640 // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. 641 if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() { 642 leaf := true 643 LeafSearch: 644 for q := p; q != nil; q = q.Link { 645 switch q.As { 646 case obj.ACALL: 647 // Treat common runtime calls that take no arguments 648 // the same as duffcopy and duffzero. 649 if !isZeroArgRuntimeCall(q.To.Sym) { 650 leaf = false 651 break LeafSearch 652 } 653 fallthrough 654 case obj.ADUFFCOPY, obj.ADUFFZERO: 655 if autoffset >= objabi.StackSmall-8 { 656 leaf = false 657 break LeafSearch 658 } 659 } 660 } 661 662 if leaf { 663 p.From.Sym.Set(obj.AttrNoSplit, true) 664 } 665 } 666 667 if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() { 668 p = obj.Appendp(p, newprog) 669 p = load_g_cx(ctxt, p, newprog) // load g into CX 670 } 671 672 if !cursym.Func.Text.From.Sym.NoSplit() { 673 p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check 674 } 675 676 if autoffset != 0 { 677 if autoffset%int32(ctxt.Arch.RegSize) != 0 { 678 ctxt.Diag("unaligned stack size %d", autoffset) 679 } 680 p = obj.Appendp(p, newprog) 681 p.As = AADJSP 682 p.From.Type = obj.TYPE_CONST 683 p.From.Offset = int64(autoffset) 684 p.Spadj = autoffset 685 } 686 687 deltasp := autoffset 688 689 if bpsize > 0 { 690 // Save caller's BP 691 p = obj.Appendp(p, newprog) 692 693 p.As = AMOVQ 694 p.From.Type = obj.TYPE_REG 695 p.From.Reg = REG_BP 696 p.To.Type = obj.TYPE_MEM 697 p.To.Reg = REG_SP 698 p.To.Scale = 1 699 p.To.Offset = int64(autoffset) - int64(bpsize) 700 701 // Move current frame to BP 702 p = obj.Appendp(p, newprog) 703 704 p.As = ALEAQ 705 p.From.Type = obj.TYPE_MEM 706 p.From.Reg = REG_SP 707 p.From.Scale = 1 708 p.From.Offset = int64(autoffset) - int64(bpsize) 709 p.To.Type = obj.TYPE_REG 710 p.To.Reg = REG_BP 711 } 712 713 if cursym.Func.Text.From.Sym.Wrapper() { 714 // if g._panic != nil && g._panic.argp == FP { 715 // g._panic.argp = bottom-of-frame 716 // } 717 // 718 // MOVQ g_panic(CX), BX 719 // TESTQ BX, BX 720 // JNE checkargp 721 // end: 722 // NOP 723 // ... rest of function ... 724 // checkargp: 725 // LEAQ (autoffset+8)(SP), DI 726 // CMPQ panic_argp(BX), DI 727 // JNE end 728 // MOVQ SP, panic_argp(BX) 729 // JMP end 730 // 731 // The NOP is needed to give the jumps somewhere to land. 732 // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. 733 // 734 // The layout is chosen to help static branch prediction: 735 // Both conditional jumps are unlikely, so they are arranged to be forward jumps. 736 737 // MOVQ g_panic(CX), BX 738 p = obj.Appendp(p, newprog) 739 p.As = AMOVQ 740 p.From.Type = obj.TYPE_MEM 741 p.From.Reg = REG_CX 742 p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic 743 p.To.Type = obj.TYPE_REG 744 p.To.Reg = REG_BX 745 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 746 p.As = AMOVL 747 p.From.Type = obj.TYPE_MEM 748 p.From.Reg = REG_R15 749 p.From.Scale = 1 750 p.From.Index = REG_CX 751 } 752 if ctxt.Arch.Family == sys.I386 { 753 p.As = AMOVL 754 } 755 756 // TESTQ BX, BX 757 p = obj.Appendp(p, newprog) 758 p.As = ATESTQ 759 p.From.Type = obj.TYPE_REG 760 p.From.Reg = REG_BX 761 p.To.Type = obj.TYPE_REG 762 p.To.Reg = REG_BX 763 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 764 p.As = ATESTL 765 } 766 767 // JNE checkargp (checkargp to be resolved later) 768 jne := obj.Appendp(p, newprog) 769 jne.As = AJNE 770 jne.To.Type = obj.TYPE_BRANCH 771 772 // end: 773 // NOP 774 end := obj.Appendp(jne, newprog) 775 end.As = obj.ANOP 776 777 // Fast forward to end of function. 778 var last *obj.Prog 779 for last = end; last.Link != nil; last = last.Link { 780 } 781 782 // LEAQ (autoffset+8)(SP), DI 783 p = obj.Appendp(last, newprog) 784 p.As = ALEAQ 785 p.From.Type = obj.TYPE_MEM 786 p.From.Reg = REG_SP 787 p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) 788 p.To.Type = obj.TYPE_REG 789 p.To.Reg = REG_DI 790 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 791 p.As = ALEAL 792 } 793 794 // Set jne branch target. 795 jne.Pcond = p 796 797 // CMPQ panic_argp(BX), DI 798 p = obj.Appendp(p, newprog) 799 p.As = ACMPQ 800 p.From.Type = obj.TYPE_MEM 801 p.From.Reg = REG_BX 802 p.From.Offset = 0 // Panic.argp 803 p.To.Type = obj.TYPE_REG 804 p.To.Reg = REG_DI 805 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 806 p.As = ACMPL 807 p.From.Type = obj.TYPE_MEM 808 p.From.Reg = REG_R15 809 p.From.Scale = 1 810 p.From.Index = REG_BX 811 } 812 if ctxt.Arch.Family == sys.I386 { 813 p.As = ACMPL 814 } 815 816 // JNE end 817 p = obj.Appendp(p, newprog) 818 p.As = AJNE 819 p.To.Type = obj.TYPE_BRANCH 820 p.Pcond = end 821 822 // MOVQ SP, panic_argp(BX) 823 p = obj.Appendp(p, newprog) 824 p.As = AMOVQ 825 p.From.Type = obj.TYPE_REG 826 p.From.Reg = REG_SP 827 p.To.Type = obj.TYPE_MEM 828 p.To.Reg = REG_BX 829 p.To.Offset = 0 // Panic.argp 830 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 831 p.As = AMOVL 832 p.To.Type = obj.TYPE_MEM 833 p.To.Reg = REG_R15 834 p.To.Scale = 1 835 p.To.Index = REG_BX 836 } 837 if ctxt.Arch.Family == sys.I386 { 838 p.As = AMOVL 839 } 840 841 // JMP end 842 p = obj.Appendp(p, newprog) 843 p.As = obj.AJMP 844 p.To.Type = obj.TYPE_BRANCH 845 p.Pcond = end 846 847 // Reset p for following code. 848 p = end 849 } 850 851 for ; p != nil; p = p.Link { 852 pcsize := ctxt.Arch.RegSize 853 switch p.From.Name { 854 case obj.NAME_AUTO: 855 p.From.Offset += int64(deltasp) - int64(bpsize) 856 case obj.NAME_PARAM: 857 p.From.Offset += int64(deltasp) + int64(pcsize) 858 } 859 if p.From3 != nil { 860 switch p.From3.Name { 861 case obj.NAME_AUTO: 862 p.From3.Offset += int64(deltasp) - int64(bpsize) 863 case obj.NAME_PARAM: 864 p.From3.Offset += int64(deltasp) + int64(pcsize) 865 } 866 } 867 switch p.To.Name { 868 case obj.NAME_AUTO: 869 p.To.Offset += int64(deltasp) - int64(bpsize) 870 case obj.NAME_PARAM: 871 p.To.Offset += int64(deltasp) + int64(pcsize) 872 } 873 874 switch p.As { 875 default: 876 continue 877 878 case APUSHL, APUSHFL: 879 deltasp += 4 880 p.Spadj = 4 881 continue 882 883 case APUSHQ, APUSHFQ: 884 deltasp += 8 885 p.Spadj = 8 886 continue 887 888 case APUSHW, APUSHFW: 889 deltasp += 2 890 p.Spadj = 2 891 continue 892 893 case APOPL, APOPFL: 894 deltasp -= 4 895 p.Spadj = -4 896 continue 897 898 case APOPQ, APOPFQ: 899 deltasp -= 8 900 p.Spadj = -8 901 continue 902 903 case APOPW, APOPFW: 904 deltasp -= 2 905 p.Spadj = -2 906 continue 907 908 case obj.ARET: 909 // do nothing 910 } 911 912 if autoffset != deltasp { 913 ctxt.Diag("unbalanced PUSH/POP") 914 } 915 916 if autoffset != 0 { 917 if bpsize > 0 { 918 // Restore caller's BP 919 p.As = AMOVQ 920 921 p.From.Type = obj.TYPE_MEM 922 p.From.Reg = REG_SP 923 p.From.Scale = 1 924 p.From.Offset = int64(autoffset) - int64(bpsize) 925 p.To.Type = obj.TYPE_REG 926 p.To.Reg = REG_BP 927 p = obj.Appendp(p, newprog) 928 } 929 930 p.As = AADJSP 931 p.From.Type = obj.TYPE_CONST 932 p.From.Offset = int64(-autoffset) 933 p.Spadj = -autoffset 934 p = obj.Appendp(p, newprog) 935 p.As = obj.ARET 936 937 // If there are instructions following 938 // this ARET, they come from a branch 939 // with the same stackframe, so undo 940 // the cleanup. 941 p.Spadj = +autoffset 942 } 943 944 if p.To.Sym != nil { // retjmp 945 p.As = obj.AJMP 946 } 947 } 948 } 949 950 func isZeroArgRuntimeCall(s *obj.LSym) bool { 951 if s == nil { 952 return false 953 } 954 switch s.Name { 955 case "runtime.panicindex", "runtime.panicslice", "runtime.panicdivide", "runtime.panicwrap": 956 return true 957 } 958 return false 959 } 960 961 func indir_cx(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 962 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 963 a.Type = obj.TYPE_MEM 964 a.Reg = REG_R15 965 a.Index = REG_CX 966 a.Scale = 1 967 return 968 } 969 970 a.Type = obj.TYPE_MEM 971 a.Reg = REG_CX 972 } 973 974 // Append code to p to load g into cx. 975 // Overwrites p with the first instruction (no first appendp). 976 // Overwriting p is unusual but it lets use this in both the 977 // prologue (caller must call appendp first) and in the epilogue. 978 // Returns last new instruction. 979 func load_g_cx(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) *obj.Prog { 980 p.As = AMOVQ 981 if ctxt.Arch.PtrSize == 4 { 982 p.As = AMOVL 983 } 984 p.From.Type = obj.TYPE_MEM 985 p.From.Reg = REG_TLS 986 p.From.Offset = 0 987 p.To.Type = obj.TYPE_REG 988 p.To.Reg = REG_CX 989 990 next := p.Link 991 progedit(ctxt, p, newprog) 992 for p.Link != next { 993 p = p.Link 994 } 995 996 if p.From.Index == REG_TLS { 997 p.From.Scale = 2 998 } 999 1000 return p 1001 } 1002 1003 // Append code to p to check for stack split. 1004 // Appends to (does not overwrite) p. 1005 // Assumes g is in CX. 1006 // Returns last new instruction. 1007 func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog { 1008 cmp := ACMPQ 1009 lea := ALEAQ 1010 mov := AMOVQ 1011 sub := ASUBQ 1012 1013 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 1014 cmp = ACMPL 1015 lea = ALEAL 1016 mov = AMOVL 1017 sub = ASUBL 1018 } 1019 1020 var q1 *obj.Prog 1021 if framesize <= objabi.StackSmall { 1022 // small stack: SP <= stackguard 1023 // CMPQ SP, stackguard 1024 p = obj.Appendp(p, newprog) 1025 1026 p.As = cmp 1027 p.From.Type = obj.TYPE_REG 1028 p.From.Reg = REG_SP 1029 indir_cx(ctxt, p, &p.To) 1030 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1031 if cursym.CFunc() { 1032 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1033 } 1034 } else if framesize <= objabi.StackBig { 1035 // large stack: SP-framesize <= stackguard-StackSmall 1036 // LEAQ -xxx(SP), AX 1037 // CMPQ AX, stackguard 1038 p = obj.Appendp(p, newprog) 1039 1040 p.As = lea 1041 p.From.Type = obj.TYPE_MEM 1042 p.From.Reg = REG_SP 1043 p.From.Offset = -(int64(framesize) - objabi.StackSmall) 1044 p.To.Type = obj.TYPE_REG 1045 p.To.Reg = REG_AX 1046 1047 p = obj.Appendp(p, newprog) 1048 p.As = cmp 1049 p.From.Type = obj.TYPE_REG 1050 p.From.Reg = REG_AX 1051 indir_cx(ctxt, p, &p.To) 1052 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1053 if cursym.CFunc() { 1054 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1055 } 1056 } else { 1057 // Such a large stack we need to protect against wraparound. 1058 // If SP is close to zero: 1059 // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) 1060 // The +StackGuard on both sides is required to keep the left side positive: 1061 // SP is allowed to be slightly below stackguard. See stack.h. 1062 // 1063 // Preemption sets stackguard to StackPreempt, a very large value. 1064 // That breaks the math above, so we have to check for that explicitly. 1065 // MOVQ stackguard, CX 1066 // CMPQ CX, $StackPreempt 1067 // JEQ label-of-call-to-morestack 1068 // LEAQ StackGuard(SP), AX 1069 // SUBQ CX, AX 1070 // CMPQ AX, $(framesize+(StackGuard-StackSmall)) 1071 1072 p = obj.Appendp(p, newprog) 1073 1074 p.As = mov 1075 indir_cx(ctxt, p, &p.From) 1076 p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1077 if cursym.CFunc() { 1078 p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1079 } 1080 p.To.Type = obj.TYPE_REG 1081 p.To.Reg = REG_SI 1082 1083 p = obj.Appendp(p, newprog) 1084 p.As = cmp 1085 p.From.Type = obj.TYPE_REG 1086 p.From.Reg = REG_SI 1087 p.To.Type = obj.TYPE_CONST 1088 p.To.Offset = objabi.StackPreempt 1089 if ctxt.Arch.Family == sys.I386 { 1090 p.To.Offset = int64(uint32(objabi.StackPreempt & (1<<32 - 1))) 1091 } 1092 1093 p = obj.Appendp(p, newprog) 1094 p.As = AJEQ 1095 p.To.Type = obj.TYPE_BRANCH 1096 q1 = p 1097 1098 p = obj.Appendp(p, newprog) 1099 p.As = lea 1100 p.From.Type = obj.TYPE_MEM 1101 p.From.Reg = REG_SP 1102 p.From.Offset = objabi.StackGuard 1103 p.To.Type = obj.TYPE_REG 1104 p.To.Reg = REG_AX 1105 1106 p = obj.Appendp(p, newprog) 1107 p.As = sub 1108 p.From.Type = obj.TYPE_REG 1109 p.From.Reg = REG_SI 1110 p.To.Type = obj.TYPE_REG 1111 p.To.Reg = REG_AX 1112 1113 p = obj.Appendp(p, newprog) 1114 p.As = cmp 1115 p.From.Type = obj.TYPE_REG 1116 p.From.Reg = REG_AX 1117 p.To.Type = obj.TYPE_CONST 1118 p.To.Offset = int64(framesize) + (objabi.StackGuard - objabi.StackSmall) 1119 } 1120 1121 // common 1122 jls := obj.Appendp(p, newprog) 1123 jls.As = AJLS 1124 jls.To.Type = obj.TYPE_BRANCH 1125 1126 var last *obj.Prog 1127 for last = cursym.Func.Text; last.Link != nil; last = last.Link { 1128 } 1129 1130 // Now we are at the end of the function, but logically 1131 // we are still in function prologue. We need to fix the 1132 // SP data and PCDATA. 1133 spfix := obj.Appendp(last, newprog) 1134 spfix.As = obj.ANOP 1135 spfix.Spadj = -framesize 1136 1137 pcdata := obj.Appendp(spfix, newprog) 1138 pcdata.Pos = cursym.Func.Text.Pos 1139 pcdata.As = obj.APCDATA 1140 pcdata.From.Type = obj.TYPE_CONST 1141 pcdata.From.Offset = objabi.PCDATA_StackMapIndex 1142 pcdata.To.Type = obj.TYPE_CONST 1143 pcdata.To.Offset = -1 // pcdata starts at -1 at function entry 1144 1145 call := obj.Appendp(pcdata, newprog) 1146 call.Pos = cursym.Func.Text.Pos 1147 call.As = obj.ACALL 1148 call.To.Type = obj.TYPE_BRANCH 1149 call.To.Name = obj.NAME_EXTERN 1150 morestack := "runtime.morestack" 1151 switch { 1152 case cursym.CFunc(): 1153 morestack = "runtime.morestackc" 1154 case !cursym.Func.Text.From.Sym.NeedCtxt(): 1155 morestack = "runtime.morestack_noctxt" 1156 } 1157 call.To.Sym = ctxt.Lookup(morestack) 1158 // When compiling 386 code for dynamic linking, the call needs to be adjusted 1159 // to follow PIC rules. This in turn can insert more instructions, so we need 1160 // to keep track of the start of the call (where the jump will be to) and the 1161 // end (which following instructions are appended to). 1162 callend := call 1163 progedit(ctxt, callend, newprog) 1164 for ; callend.Link != nil; callend = callend.Link { 1165 progedit(ctxt, callend.Link, newprog) 1166 } 1167 1168 jmp := obj.Appendp(callend, newprog) 1169 jmp.As = obj.AJMP 1170 jmp.To.Type = obj.TYPE_BRANCH 1171 jmp.Pcond = cursym.Func.Text.Link 1172 jmp.Spadj = +framesize 1173 1174 jls.Pcond = call 1175 if q1 != nil { 1176 q1.Pcond = call 1177 } 1178 1179 return jls 1180 } 1181 1182 var unaryDst = map[obj.As]bool{ 1183 ABSWAPL: true, 1184 ABSWAPQ: true, 1185 ACMPXCHG8B: true, 1186 ADECB: true, 1187 ADECL: true, 1188 ADECQ: true, 1189 ADECW: true, 1190 AINCB: true, 1191 AINCL: true, 1192 AINCQ: true, 1193 AINCW: true, 1194 ANEGB: true, 1195 ANEGL: true, 1196 ANEGQ: true, 1197 ANEGW: true, 1198 ANOTB: true, 1199 ANOTL: true, 1200 ANOTQ: true, 1201 ANOTW: true, 1202 APOPL: true, 1203 APOPQ: true, 1204 APOPW: true, 1205 ASETCC: true, 1206 ASETCS: true, 1207 ASETEQ: true, 1208 ASETGE: true, 1209 ASETGT: true, 1210 ASETHI: true, 1211 ASETLE: true, 1212 ASETLS: true, 1213 ASETLT: true, 1214 ASETMI: true, 1215 ASETNE: true, 1216 ASETOC: true, 1217 ASETOS: true, 1218 ASETPC: true, 1219 ASETPL: true, 1220 ASETPS: true, 1221 AFFREE: true, 1222 AFLDENV: true, 1223 AFSAVE: true, 1224 AFSTCW: true, 1225 AFSTENV: true, 1226 AFSTSW: true, 1227 AFXSAVE: true, 1228 AFXSAVE64: true, 1229 ASTMXCSR: true, 1230 } 1231 1232 var Linkamd64 = obj.LinkArch{ 1233 Arch: sys.ArchAMD64, 1234 Init: instinit, 1235 Preprocess: preprocess, 1236 Assemble: span6, 1237 Progedit: progedit, 1238 UnaryDst: unaryDst, 1239 } 1240 1241 var Linkamd64p32 = obj.LinkArch{ 1242 Arch: sys.ArchAMD64P32, 1243 Init: instinit, 1244 Preprocess: preprocess, 1245 Assemble: span6, 1246 Progedit: progedit, 1247 UnaryDst: unaryDst, 1248 } 1249 1250 var Link386 = obj.LinkArch{ 1251 Arch: sys.Arch386, 1252 Init: instinit, 1253 Preprocess: preprocess, 1254 Assemble: span6, 1255 Progedit: progedit, 1256 UnaryDst: unaryDst, 1257 }