github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/internal/obj/x86/obj6.go (about) 1 // Inferno utils/6l/pass.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/pass.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/internal/obj" 35 "cmd/internal/sys" 36 "math" 37 "strings" 38 ) 39 40 func CanUse1InsnTLS(ctxt *obj.Link) bool { 41 if isAndroid { 42 // For android, we use a disgusting hack that assumes 43 // the thread-local storage slot for g is allocated 44 // using pthread_key_create with a fixed offset 45 // (see src/runtime/cgo/gcc_android_amd64.c). 46 // This makes access to the TLS storage (for g) doable 47 // with 1 instruction. 48 return true 49 } 50 51 if ctxt.Arch.Family == sys.I386 { 52 switch ctxt.Headtype { 53 case obj.Hlinux, 54 obj.Hnacl, 55 obj.Hplan9, 56 obj.Hwindows: 57 return false 58 } 59 60 return true 61 } 62 63 switch ctxt.Headtype { 64 case obj.Hplan9, obj.Hwindows: 65 return false 66 case obj.Hlinux: 67 return !ctxt.Flag_shared 68 } 69 70 return true 71 } 72 73 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 74 // Thread-local storage references use the TLS pseudo-register. 75 // As a register, TLS refers to the thread-local storage base, and it 76 // can only be loaded into another register: 77 // 78 // MOVQ TLS, AX 79 // 80 // An offset from the thread-local storage base is written off(reg)(TLS*1). 81 // Semantically it is off(reg), but the (TLS*1) annotation marks this as 82 // indexing from the loaded TLS base. This emits a relocation so that 83 // if the linker needs to adjust the offset, it can. For example: 84 // 85 // MOVQ TLS, AX 86 // MOVQ 0(AX)(TLS*1), CX // load g into CX 87 // 88 // On systems that support direct access to the TLS memory, this 89 // pair of instructions can be reduced to a direct TLS memory reference: 90 // 91 // MOVQ 0(TLS), CX // load g into CX 92 // 93 // The 2-instruction and 1-instruction forms correspond to the two code 94 // sequences for loading a TLS variable in the local exec model given in "ELF 95 // Handling For Thread-Local Storage". 96 // 97 // We apply this rewrite on systems that support the 1-instruction form. 98 // The decision is made using only the operating system and the -shared flag, 99 // not the link mode. If some link modes on a particular operating system 100 // require the 2-instruction form, then all builds for that operating system 101 // will use the 2-instruction form, so that the link mode decision can be 102 // delayed to link time. 103 // 104 // In this way, all supported systems use identical instructions to 105 // access TLS, and they are rewritten appropriately first here in 106 // liblink and then finally using relocations in the linker. 107 // 108 // When -shared is passed, we leave the code in the 2-instruction form but 109 // assemble (and relocate) them in different ways to generate the initial 110 // exec code sequence. It's a bit of a fluke that this is possible without 111 // rewriting the instructions more comprehensively, and it only does because 112 // we only support a single TLS variable (g). 113 114 if CanUse1InsnTLS(ctxt) { 115 // Reduce 2-instruction sequence to 1-instruction sequence. 116 // Sequences like 117 // MOVQ TLS, BX 118 // ... off(BX)(TLS*1) ... 119 // become 120 // NOP 121 // ... off(TLS) ... 122 // 123 // TODO(rsc): Remove the Hsolaris special case. It exists only to 124 // guarantee we are producing byte-identical binaries as before this code. 125 // But it should be unnecessary. 126 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != obj.Hsolaris { 127 obj.Nopout(p) 128 } 129 if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { 130 p.From.Reg = REG_TLS 131 p.From.Scale = 0 132 p.From.Index = REG_NONE 133 } 134 135 if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 136 p.To.Reg = REG_TLS 137 p.To.Scale = 0 138 p.To.Index = REG_NONE 139 } 140 } else { 141 // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it 142 // as the 2-instruction sequence if necessary. 143 // MOVQ 0(TLS), BX 144 // becomes 145 // MOVQ TLS, BX 146 // MOVQ 0(BX)(TLS*1), BX 147 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 148 q := obj.Appendp(p, newprog) 149 q.As = p.As 150 q.From = p.From 151 q.From.Type = obj.TYPE_MEM 152 q.From.Reg = p.To.Reg 153 q.From.Index = REG_TLS 154 q.From.Scale = 2 // TODO: use 1 155 q.To = p.To 156 p.From.Type = obj.TYPE_REG 157 p.From.Reg = REG_TLS 158 p.From.Index = REG_NONE 159 p.From.Offset = 0 160 } 161 } 162 163 // TODO: Remove. 164 if ctxt.Headtype == obj.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == obj.Hplan9 { 165 if p.From.Scale == 1 && p.From.Index == REG_TLS { 166 p.From.Scale = 2 167 } 168 if p.To.Scale == 1 && p.To.Index == REG_TLS { 169 p.To.Scale = 2 170 } 171 } 172 173 // Rewrite 0 to $0 in 3rd argument to CMPPS etc. 174 // That's what the tables expect. 175 switch p.As { 176 case ACMPPD, ACMPPS, ACMPSD, ACMPSS: 177 if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { 178 p.To.Type = obj.TYPE_CONST 179 } 180 } 181 182 // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. 183 switch p.As { 184 case obj.ACALL, obj.AJMP, obj.ARET: 185 if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { 186 p.To.Type = obj.TYPE_BRANCH 187 } 188 } 189 190 // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. 191 if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { 192 switch p.As { 193 case AMOVL: 194 p.As = ALEAL 195 p.From.Type = obj.TYPE_MEM 196 case AMOVQ: 197 p.As = ALEAQ 198 p.From.Type = obj.TYPE_MEM 199 } 200 } 201 202 if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.AMD64 { 203 if p.From3 != nil { 204 nacladdr(ctxt, p, p.From3) 205 } 206 nacladdr(ctxt, p, &p.From) 207 nacladdr(ctxt, p, &p.To) 208 } 209 210 // Rewrite float constants to values stored in memory. 211 switch p.As { 212 // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx 213 case AMOVSS: 214 if p.From.Type == obj.TYPE_FCONST { 215 // f == 0 can't be used here due to -0, so use Float64bits 216 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 217 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 218 p.As = AXORPS 219 p.From = p.To 220 break 221 } 222 } 223 } 224 fallthrough 225 226 case AFMOVF, 227 AFADDF, 228 AFSUBF, 229 AFSUBRF, 230 AFMULF, 231 AFDIVF, 232 AFDIVRF, 233 AFCOMF, 234 AFCOMFP, 235 AADDSS, 236 ASUBSS, 237 AMULSS, 238 ADIVSS, 239 ACOMISS, 240 AUCOMISS: 241 if p.From.Type == obj.TYPE_FCONST { 242 f32 := float32(p.From.Val.(float64)) 243 p.From.Type = obj.TYPE_MEM 244 p.From.Name = obj.NAME_EXTERN 245 p.From.Sym = ctxt.Float32Sym(f32) 246 p.From.Offset = 0 247 } 248 249 case AMOVSD: 250 // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx 251 if p.From.Type == obj.TYPE_FCONST { 252 // f == 0 can't be used here due to -0, so use Float64bits 253 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 254 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 255 p.As = AXORPS 256 p.From = p.To 257 break 258 } 259 } 260 } 261 fallthrough 262 263 case AFMOVD, 264 AFADDD, 265 AFSUBD, 266 AFSUBRD, 267 AFMULD, 268 AFDIVD, 269 AFDIVRD, 270 AFCOMD, 271 AFCOMDP, 272 AADDSD, 273 ASUBSD, 274 AMULSD, 275 ADIVSD, 276 ACOMISD, 277 AUCOMISD: 278 if p.From.Type == obj.TYPE_FCONST { 279 f64 := p.From.Val.(float64) 280 p.From.Type = obj.TYPE_MEM 281 p.From.Name = obj.NAME_EXTERN 282 p.From.Sym = ctxt.Float64Sym(f64) 283 p.From.Offset = 0 284 } 285 } 286 287 if ctxt.Flag_dynlink { 288 rewriteToUseGot(ctxt, p, newprog) 289 } 290 291 if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 { 292 rewriteToPcrel(ctxt, p, newprog) 293 } 294 } 295 296 // Rewrite p, if necessary, to access global data via the global offset table. 297 func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 298 var add, lea, mov obj.As 299 var reg int16 300 if ctxt.Arch.Family == sys.AMD64 { 301 add = AADDQ 302 lea = ALEAQ 303 mov = AMOVQ 304 reg = REG_R15 305 } else { 306 add = AADDL 307 lea = ALEAL 308 mov = AMOVL 309 reg = REG_CX 310 if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 311 // Special case: clobber the destination register with 312 // the PC so we don't have to clobber CX. 313 // The SSA backend depends on CX not being clobbered across LEAL. 314 // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared). 315 reg = p.To.Reg 316 } 317 } 318 319 if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { 320 // ADUFFxxx $offset 321 // becomes 322 // $MOV runtime.duffxxx@GOT, $reg 323 // $ADD $offset, $reg 324 // CALL $reg 325 var sym *obj.LSym 326 if p.As == obj.ADUFFZERO { 327 sym = ctxt.Lookup("runtime.duffzero", 0) 328 } else { 329 sym = ctxt.Lookup("runtime.duffcopy", 0) 330 } 331 offset := p.To.Offset 332 p.As = mov 333 p.From.Type = obj.TYPE_MEM 334 p.From.Name = obj.NAME_GOTREF 335 p.From.Sym = sym 336 p.To.Type = obj.TYPE_REG 337 p.To.Reg = reg 338 p.To.Offset = 0 339 p.To.Sym = nil 340 p1 := obj.Appendp(p, newprog) 341 p1.As = add 342 p1.From.Type = obj.TYPE_CONST 343 p1.From.Offset = offset 344 p1.To.Type = obj.TYPE_REG 345 p1.To.Reg = reg 346 p2 := obj.Appendp(p1, newprog) 347 p2.As = obj.ACALL 348 p2.To.Type = obj.TYPE_REG 349 p2.To.Reg = reg 350 } 351 352 // We only care about global data: NAME_EXTERN means a global 353 // symbol in the Go sense, and p.Sym.Local is true for a few 354 // internally defined symbols. 355 if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 356 // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below 357 p.As = mov 358 p.From.Type = obj.TYPE_ADDR 359 } 360 if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 361 // $MOV $sym, Rx becomes $MOV sym@GOT, Rx 362 // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx 363 // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX 364 cmplxdest := false 365 pAs := p.As 366 var dest obj.Addr 367 if p.To.Type != obj.TYPE_REG || pAs != mov { 368 if ctxt.Arch.Family == sys.AMD64 { 369 ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) 370 } 371 cmplxdest = true 372 dest = p.To 373 p.As = mov 374 p.To.Type = obj.TYPE_REG 375 p.To.Reg = reg 376 p.To.Sym = nil 377 p.To.Name = obj.NAME_NONE 378 } 379 p.From.Type = obj.TYPE_MEM 380 p.From.Name = obj.NAME_GOTREF 381 q := p 382 if p.From.Offset != 0 { 383 q = obj.Appendp(p, newprog) 384 q.As = lea 385 q.From.Type = obj.TYPE_MEM 386 q.From.Reg = p.To.Reg 387 q.From.Offset = p.From.Offset 388 q.To = p.To 389 p.From.Offset = 0 390 } 391 if cmplxdest { 392 q = obj.Appendp(q, newprog) 393 q.As = pAs 394 q.To = dest 395 q.From.Type = obj.TYPE_REG 396 q.From.Reg = reg 397 } 398 } 399 if p.From3 != nil && p.From3.Name == obj.NAME_EXTERN { 400 ctxt.Diag("don't know how to handle %v with -dynlink", p) 401 } 402 var source *obj.Addr 403 // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry 404 // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) 405 // An addition may be inserted between the two MOVs if there is an offset. 406 if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 407 if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 408 ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) 409 } 410 source = &p.From 411 } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 412 source = &p.To 413 } else { 414 return 415 } 416 if p.As == obj.ACALL { 417 // When dynlinking on 386, almost any call might end up being a call 418 // to a PLT, so make sure the GOT pointer is loaded into BX. 419 // RegTo2 is set on the replacement call insn to stop it being 420 // processed when it is in turn passed to progedit. 421 if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { 422 return 423 } 424 p1 := obj.Appendp(p, newprog) 425 p2 := obj.Appendp(p1, newprog) 426 427 p1.As = ALEAL 428 p1.From.Type = obj.TYPE_MEM 429 p1.From.Name = obj.NAME_STATIC 430 p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_", 0) 431 p1.To.Type = obj.TYPE_REG 432 p1.To.Reg = REG_BX 433 434 p2.As = p.As 435 p2.Scond = p.Scond 436 p2.From = p.From 437 p2.From3 = p.From3 438 p2.Reg = p.Reg 439 p2.To = p.To 440 // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr 441 // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 442 // itself gets passed to progedit. 443 p2.To.Type = obj.TYPE_MEM 444 p2.RegTo2 = 1 445 446 obj.Nopout(p) 447 return 448 449 } 450 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { 451 return 452 } 453 if source.Type != obj.TYPE_MEM { 454 ctxt.Diag("don't know how to handle %v with -dynlink", p) 455 } 456 p1 := obj.Appendp(p, newprog) 457 p2 := obj.Appendp(p1, newprog) 458 459 p1.As = mov 460 p1.From.Type = obj.TYPE_MEM 461 p1.From.Sym = source.Sym 462 p1.From.Name = obj.NAME_GOTREF 463 p1.To.Type = obj.TYPE_REG 464 p1.To.Reg = reg 465 466 p2.As = p.As 467 p2.From = p.From 468 p2.To = p.To 469 if p.From.Name == obj.NAME_EXTERN { 470 p2.From.Reg = reg 471 p2.From.Name = obj.NAME_NONE 472 p2.From.Sym = nil 473 } else if p.To.Name == obj.NAME_EXTERN { 474 p2.To.Reg = reg 475 p2.To.Name = obj.NAME_NONE 476 p2.To.Sym = nil 477 } else { 478 return 479 } 480 obj.Nopout(p) 481 } 482 483 func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 484 // RegTo2 is set on the instructions we insert here so they don't get 485 // processed twice. 486 if p.RegTo2 != 0 { 487 return 488 } 489 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { 490 return 491 } 492 // Any Prog (aside from the above special cases) with an Addr with Name == 493 // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX 494 // inserted before it. 495 isName := func(a *obj.Addr) bool { 496 if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { 497 return false 498 } 499 if a.Sym.Type == obj.STLSBSS { 500 return false 501 } 502 return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF 503 } 504 505 if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { 506 // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting 507 // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" 508 // respectively. 509 if p.To.Type != obj.TYPE_REG { 510 q := obj.Appendp(p, newprog) 511 q.As = p.As 512 q.From.Type = obj.TYPE_REG 513 q.From.Reg = REG_CX 514 q.To = p.To 515 p.As = AMOVL 516 p.To.Type = obj.TYPE_REG 517 p.To.Reg = REG_CX 518 p.To.Sym = nil 519 p.To.Name = obj.NAME_NONE 520 } 521 } 522 523 if !isName(&p.From) && !isName(&p.To) && (p.From3 == nil || !isName(p.From3)) { 524 return 525 } 526 var dst int16 = REG_CX 527 if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 528 dst = p.To.Reg 529 // Why? See the comment near the top of rewriteToUseGot above. 530 // AMOVLs might be introduced by the GOT rewrites. 531 } 532 q := obj.Appendp(p, newprog) 533 q.RegTo2 = 1 534 r := obj.Appendp(q, newprog) 535 r.RegTo2 = 1 536 q.As = obj.ACALL 537 thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))) 538 q.To.Sym = ctxt.LookupInit(thunkname, 0, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) }) 539 q.To.Type = obj.TYPE_MEM 540 q.To.Name = obj.NAME_EXTERN 541 r.As = p.As 542 r.Scond = p.Scond 543 r.From = p.From 544 r.From3 = p.From3 545 r.Reg = p.Reg 546 r.To = p.To 547 if isName(&p.From) { 548 r.From.Reg = dst 549 } 550 if isName(&p.To) { 551 r.To.Reg = dst 552 } 553 if p.From3 != nil && isName(p.From3) { 554 r.From3.Reg = dst 555 } 556 obj.Nopout(p) 557 } 558 559 func nacladdr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 560 if p.As == ALEAL || p.As == ALEAQ { 561 return 562 } 563 564 if a.Reg == REG_BP { 565 ctxt.Diag("invalid address: %v", p) 566 return 567 } 568 569 if a.Reg == REG_TLS { 570 a.Reg = REG_BP 571 } 572 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 573 switch a.Reg { 574 // all ok 575 case REG_BP, REG_SP, REG_R15: 576 break 577 578 default: 579 if a.Index != REG_NONE { 580 ctxt.Diag("invalid address %v", p) 581 } 582 a.Index = a.Reg 583 if a.Index != REG_NONE { 584 a.Scale = 1 585 } 586 a.Reg = REG_R15 587 } 588 } 589 } 590 591 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { 592 if cursym.Text == nil || cursym.Text.Link == nil { 593 return 594 } 595 596 p := cursym.Text 597 autoffset := int32(p.To.Offset) 598 if autoffset < 0 { 599 autoffset = 0 600 } 601 602 hasCall := false 603 for q := p; q != nil; q = q.Link { 604 if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO { 605 hasCall = true 606 break 607 } 608 } 609 610 var bpsize int 611 if ctxt.Arch.Family == sys.AMD64 && ctxt.Framepointer_enabled && 612 !p.From.Sym.NoFrame() && // (1) below 613 !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below 614 !(autoffset == 0 && !hasCall) { // (3) below 615 // Make room to save a base pointer. 616 // There are 2 cases we must avoid: 617 // 1) If noframe is set (which we do for functions which tail call). 618 // 2) Scary runtime internals which would be all messed up by frame pointers. 619 // We detect these using a heuristic: frameless nosplit functions. 620 // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic. 621 // For performance, we also want to avoid: 622 // 3) Frameless leaf functions 623 bpsize = ctxt.Arch.PtrSize 624 autoffset += int32(bpsize) 625 p.To.Offset += int64(bpsize) 626 } else { 627 bpsize = 0 628 } 629 630 textarg := int64(p.To.Val.(int32)) 631 cursym.Args = int32(textarg) 632 cursym.Locals = int32(p.To.Offset) 633 634 // TODO(rsc): Remove. 635 if ctxt.Arch.Family == sys.I386 && cursym.Locals < 0 { 636 cursym.Locals = 0 637 } 638 639 // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. 640 if ctxt.Arch.Family == sys.AMD64 && autoffset < obj.StackSmall && !p.From.Sym.NoSplit() { 641 leaf := true 642 LeafSearch: 643 for q := p; q != nil; q = q.Link { 644 switch q.As { 645 case obj.ACALL: 646 // Treat common runtime calls that take no arguments 647 // the same as duffcopy and duffzero. 648 if !isZeroArgRuntimeCall(q.To.Sym) { 649 leaf = false 650 break LeafSearch 651 } 652 fallthrough 653 case obj.ADUFFCOPY, obj.ADUFFZERO: 654 if autoffset >= obj.StackSmall-8 { 655 leaf = false 656 break LeafSearch 657 } 658 } 659 } 660 661 if leaf { 662 p.From.Sym.Set(obj.AttrNoSplit, true) 663 } 664 } 665 666 if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() { 667 p = obj.Appendp(p, newprog) 668 p = load_g_cx(ctxt, p, newprog) // load g into CX 669 } 670 671 if !cursym.Text.From.Sym.NoSplit() { 672 p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check 673 } 674 675 if autoffset != 0 { 676 if autoffset%int32(ctxt.Arch.RegSize) != 0 { 677 ctxt.Diag("unaligned stack size %d", autoffset) 678 } 679 p = obj.Appendp(p, newprog) 680 p.As = AADJSP 681 p.From.Type = obj.TYPE_CONST 682 p.From.Offset = int64(autoffset) 683 p.Spadj = autoffset 684 } 685 686 deltasp := autoffset 687 688 if bpsize > 0 { 689 // Save caller's BP 690 p = obj.Appendp(p, newprog) 691 692 p.As = AMOVQ 693 p.From.Type = obj.TYPE_REG 694 p.From.Reg = REG_BP 695 p.To.Type = obj.TYPE_MEM 696 p.To.Reg = REG_SP 697 p.To.Scale = 1 698 p.To.Offset = int64(autoffset) - int64(bpsize) 699 700 // Move current frame to BP 701 p = obj.Appendp(p, newprog) 702 703 p.As = ALEAQ 704 p.From.Type = obj.TYPE_MEM 705 p.From.Reg = REG_SP 706 p.From.Scale = 1 707 p.From.Offset = int64(autoffset) - int64(bpsize) 708 p.To.Type = obj.TYPE_REG 709 p.To.Reg = REG_BP 710 } 711 712 if cursym.Text.From.Sym.Wrapper() { 713 // if g._panic != nil && g._panic.argp == FP { 714 // g._panic.argp = bottom-of-frame 715 // } 716 // 717 // MOVQ g_panic(CX), BX 718 // TESTQ BX, BX 719 // JNE checkargp 720 // end: 721 // NOP 722 // ... rest of function ... 723 // checkargp: 724 // LEAQ (autoffset+8)(SP), DI 725 // CMPQ panic_argp(BX), DI 726 // JNE end 727 // MOVQ SP, panic_argp(BX) 728 // JMP end 729 // 730 // The NOP is needed to give the jumps somewhere to land. 731 // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. 732 // 733 // The layout is chosen to help static branch prediction: 734 // Both conditional jumps are unlikely, so they are arranged to be forward jumps. 735 736 // MOVQ g_panic(CX), BX 737 p = obj.Appendp(p, newprog) 738 p.As = AMOVQ 739 p.From.Type = obj.TYPE_MEM 740 p.From.Reg = REG_CX 741 p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic 742 p.To.Type = obj.TYPE_REG 743 p.To.Reg = REG_BX 744 if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.AMD64 { 745 p.As = AMOVL 746 p.From.Type = obj.TYPE_MEM 747 p.From.Reg = REG_R15 748 p.From.Scale = 1 749 p.From.Index = REG_CX 750 } 751 if ctxt.Arch.Family == sys.I386 { 752 p.As = AMOVL 753 } 754 755 // TESTQ BX, BX 756 p = obj.Appendp(p, newprog) 757 p.As = ATESTQ 758 p.From.Type = obj.TYPE_REG 759 p.From.Reg = REG_BX 760 p.To.Type = obj.TYPE_REG 761 p.To.Reg = REG_BX 762 if ctxt.Headtype == obj.Hnacl || ctxt.Arch.Family == sys.I386 { 763 p.As = ATESTL 764 } 765 766 // JNE checkargp (checkargp to be resolved later) 767 jne := obj.Appendp(p, newprog) 768 jne.As = AJNE 769 jne.To.Type = obj.TYPE_BRANCH 770 771 // end: 772 // NOP 773 end := obj.Appendp(jne, newprog) 774 end.As = obj.ANOP 775 776 // Fast forward to end of function. 777 var last *obj.Prog 778 for last = end; last.Link != nil; last = last.Link { 779 } 780 781 // LEAQ (autoffset+8)(SP), DI 782 p = obj.Appendp(last, newprog) 783 p.As = ALEAQ 784 p.From.Type = obj.TYPE_MEM 785 p.From.Reg = REG_SP 786 p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) 787 p.To.Type = obj.TYPE_REG 788 p.To.Reg = REG_DI 789 if ctxt.Headtype == obj.Hnacl || ctxt.Arch.Family == sys.I386 { 790 p.As = ALEAL 791 } 792 793 // Set jne branch target. 794 jne.Pcond = p 795 796 // CMPQ panic_argp(BX), DI 797 p = obj.Appendp(p, newprog) 798 p.As = ACMPQ 799 p.From.Type = obj.TYPE_MEM 800 p.From.Reg = REG_BX 801 p.From.Offset = 0 // Panic.argp 802 p.To.Type = obj.TYPE_REG 803 p.To.Reg = REG_DI 804 if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.AMD64 { 805 p.As = ACMPL 806 p.From.Type = obj.TYPE_MEM 807 p.From.Reg = REG_R15 808 p.From.Scale = 1 809 p.From.Index = REG_BX 810 } 811 if ctxt.Arch.Family == sys.I386 { 812 p.As = ACMPL 813 } 814 815 // JNE end 816 p = obj.Appendp(p, newprog) 817 p.As = AJNE 818 p.To.Type = obj.TYPE_BRANCH 819 p.Pcond = end 820 821 // MOVQ SP, panic_argp(BX) 822 p = obj.Appendp(p, newprog) 823 p.As = AMOVQ 824 p.From.Type = obj.TYPE_REG 825 p.From.Reg = REG_SP 826 p.To.Type = obj.TYPE_MEM 827 p.To.Reg = REG_BX 828 p.To.Offset = 0 // Panic.argp 829 if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.AMD64 { 830 p.As = AMOVL 831 p.To.Type = obj.TYPE_MEM 832 p.To.Reg = REG_R15 833 p.To.Scale = 1 834 p.To.Index = REG_BX 835 } 836 if ctxt.Arch.Family == sys.I386 { 837 p.As = AMOVL 838 } 839 840 // JMP end 841 p = obj.Appendp(p, newprog) 842 p.As = obj.AJMP 843 p.To.Type = obj.TYPE_BRANCH 844 p.Pcond = end 845 846 // Reset p for following code. 847 p = end 848 } 849 850 for ; p != nil; p = p.Link { 851 pcsize := ctxt.Arch.RegSize 852 switch p.From.Name { 853 case obj.NAME_AUTO: 854 p.From.Offset += int64(deltasp) - int64(bpsize) 855 case obj.NAME_PARAM: 856 p.From.Offset += int64(deltasp) + int64(pcsize) 857 } 858 if p.From3 != nil { 859 switch p.From3.Name { 860 case obj.NAME_AUTO: 861 p.From3.Offset += int64(deltasp) - int64(bpsize) 862 case obj.NAME_PARAM: 863 p.From3.Offset += int64(deltasp) + int64(pcsize) 864 } 865 } 866 switch p.To.Name { 867 case obj.NAME_AUTO: 868 p.To.Offset += int64(deltasp) - int64(bpsize) 869 case obj.NAME_PARAM: 870 p.To.Offset += int64(deltasp) + int64(pcsize) 871 } 872 873 switch p.As { 874 default: 875 continue 876 877 case APUSHL, APUSHFL: 878 deltasp += 4 879 p.Spadj = 4 880 continue 881 882 case APUSHQ, APUSHFQ: 883 deltasp += 8 884 p.Spadj = 8 885 continue 886 887 case APUSHW, APUSHFW: 888 deltasp += 2 889 p.Spadj = 2 890 continue 891 892 case APOPL, APOPFL: 893 deltasp -= 4 894 p.Spadj = -4 895 continue 896 897 case APOPQ, APOPFQ: 898 deltasp -= 8 899 p.Spadj = -8 900 continue 901 902 case APOPW, APOPFW: 903 deltasp -= 2 904 p.Spadj = -2 905 continue 906 907 case obj.ARET: 908 // do nothing 909 } 910 911 if autoffset != deltasp { 912 ctxt.Diag("unbalanced PUSH/POP") 913 } 914 915 if autoffset != 0 { 916 if bpsize > 0 { 917 // Restore caller's BP 918 p.As = AMOVQ 919 920 p.From.Type = obj.TYPE_MEM 921 p.From.Reg = REG_SP 922 p.From.Scale = 1 923 p.From.Offset = int64(autoffset) - int64(bpsize) 924 p.To.Type = obj.TYPE_REG 925 p.To.Reg = REG_BP 926 p = obj.Appendp(p, newprog) 927 } 928 929 p.As = AADJSP 930 p.From.Type = obj.TYPE_CONST 931 p.From.Offset = int64(-autoffset) 932 p.Spadj = -autoffset 933 p = obj.Appendp(p, newprog) 934 p.As = obj.ARET 935 936 // If there are instructions following 937 // this ARET, they come from a branch 938 // with the same stackframe, so undo 939 // the cleanup. 940 p.Spadj = +autoffset 941 } 942 943 if p.To.Sym != nil { // retjmp 944 p.As = obj.AJMP 945 } 946 } 947 } 948 949 func isZeroArgRuntimeCall(s *obj.LSym) bool { 950 if s == nil { 951 return false 952 } 953 switch s.Name { 954 case "runtime.panicindex", "runtime.panicslice", "runtime.panicdivide", "runtime.panicwrap": 955 return true 956 } 957 return false 958 } 959 960 func indir_cx(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 961 if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.AMD64 { 962 a.Type = obj.TYPE_MEM 963 a.Reg = REG_R15 964 a.Index = REG_CX 965 a.Scale = 1 966 return 967 } 968 969 a.Type = obj.TYPE_MEM 970 a.Reg = REG_CX 971 } 972 973 // Append code to p to load g into cx. 974 // Overwrites p with the first instruction (no first appendp). 975 // Overwriting p is unusual but it lets use this in both the 976 // prologue (caller must call appendp first) and in the epilogue. 977 // Returns last new instruction. 978 func load_g_cx(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) *obj.Prog { 979 p.As = AMOVQ 980 if ctxt.Arch.PtrSize == 4 { 981 p.As = AMOVL 982 } 983 p.From.Type = obj.TYPE_MEM 984 p.From.Reg = REG_TLS 985 p.From.Offset = 0 986 p.To.Type = obj.TYPE_REG 987 p.To.Reg = REG_CX 988 989 next := p.Link 990 progedit(ctxt, p, newprog) 991 for p.Link != next { 992 p = p.Link 993 } 994 995 if p.From.Index == REG_TLS { 996 p.From.Scale = 2 997 } 998 999 return p 1000 } 1001 1002 // Append code to p to check for stack split. 1003 // Appends to (does not overwrite) p. 1004 // Assumes g is in CX. 1005 // Returns last new instruction. 1006 func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog { 1007 cmp := ACMPQ 1008 lea := ALEAQ 1009 mov := AMOVQ 1010 sub := ASUBQ 1011 1012 if ctxt.Headtype == obj.Hnacl || ctxt.Arch.Family == sys.I386 { 1013 cmp = ACMPL 1014 lea = ALEAL 1015 mov = AMOVL 1016 sub = ASUBL 1017 } 1018 1019 var q1 *obj.Prog 1020 if framesize <= obj.StackSmall { 1021 // small stack: SP <= stackguard 1022 // CMPQ SP, stackguard 1023 p = obj.Appendp(p, newprog) 1024 1025 p.As = cmp 1026 p.From.Type = obj.TYPE_REG 1027 p.From.Reg = REG_SP 1028 indir_cx(ctxt, p, &p.To) 1029 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1030 if cursym.CFunc() { 1031 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1032 } 1033 } else if framesize <= obj.StackBig { 1034 // large stack: SP-framesize <= stackguard-StackSmall 1035 // LEAQ -xxx(SP), AX 1036 // CMPQ AX, stackguard 1037 p = obj.Appendp(p, newprog) 1038 1039 p.As = lea 1040 p.From.Type = obj.TYPE_MEM 1041 p.From.Reg = REG_SP 1042 p.From.Offset = -(int64(framesize) - obj.StackSmall) 1043 p.To.Type = obj.TYPE_REG 1044 p.To.Reg = REG_AX 1045 1046 p = obj.Appendp(p, newprog) 1047 p.As = cmp 1048 p.From.Type = obj.TYPE_REG 1049 p.From.Reg = REG_AX 1050 indir_cx(ctxt, p, &p.To) 1051 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1052 if cursym.CFunc() { 1053 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1054 } 1055 } else { 1056 // Such a large stack we need to protect against wraparound. 1057 // If SP is close to zero: 1058 // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) 1059 // The +StackGuard on both sides is required to keep the left side positive: 1060 // SP is allowed to be slightly below stackguard. See stack.h. 1061 // 1062 // Preemption sets stackguard to StackPreempt, a very large value. 1063 // That breaks the math above, so we have to check for that explicitly. 1064 // MOVQ stackguard, CX 1065 // CMPQ CX, $StackPreempt 1066 // JEQ label-of-call-to-morestack 1067 // LEAQ StackGuard(SP), AX 1068 // SUBQ CX, AX 1069 // CMPQ AX, $(framesize+(StackGuard-StackSmall)) 1070 1071 p = obj.Appendp(p, newprog) 1072 1073 p.As = mov 1074 indir_cx(ctxt, p, &p.From) 1075 p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1076 if cursym.CFunc() { 1077 p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1078 } 1079 p.To.Type = obj.TYPE_REG 1080 p.To.Reg = REG_SI 1081 1082 p = obj.Appendp(p, newprog) 1083 p.As = cmp 1084 p.From.Type = obj.TYPE_REG 1085 p.From.Reg = REG_SI 1086 p.To.Type = obj.TYPE_CONST 1087 p.To.Offset = obj.StackPreempt 1088 if ctxt.Arch.Family == sys.I386 { 1089 p.To.Offset = int64(uint32(obj.StackPreempt & (1<<32 - 1))) 1090 } 1091 1092 p = obj.Appendp(p, newprog) 1093 p.As = AJEQ 1094 p.To.Type = obj.TYPE_BRANCH 1095 q1 = p 1096 1097 p = obj.Appendp(p, newprog) 1098 p.As = lea 1099 p.From.Type = obj.TYPE_MEM 1100 p.From.Reg = REG_SP 1101 p.From.Offset = obj.StackGuard 1102 p.To.Type = obj.TYPE_REG 1103 p.To.Reg = REG_AX 1104 1105 p = obj.Appendp(p, newprog) 1106 p.As = sub 1107 p.From.Type = obj.TYPE_REG 1108 p.From.Reg = REG_SI 1109 p.To.Type = obj.TYPE_REG 1110 p.To.Reg = REG_AX 1111 1112 p = obj.Appendp(p, newprog) 1113 p.As = cmp 1114 p.From.Type = obj.TYPE_REG 1115 p.From.Reg = REG_AX 1116 p.To.Type = obj.TYPE_CONST 1117 p.To.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall) 1118 } 1119 1120 // common 1121 jls := obj.Appendp(p, newprog) 1122 jls.As = AJLS 1123 jls.To.Type = obj.TYPE_BRANCH 1124 1125 var last *obj.Prog 1126 for last = cursym.Text; last.Link != nil; last = last.Link { 1127 } 1128 1129 // Now we are at the end of the function, but logically 1130 // we are still in function prologue. We need to fix the 1131 // SP data and PCDATA. 1132 spfix := obj.Appendp(last, newprog) 1133 spfix.As = obj.ANOP 1134 spfix.Spadj = -framesize 1135 1136 pcdata := obj.Appendp(spfix, newprog) 1137 pcdata.Pos = cursym.Text.Pos 1138 pcdata.As = obj.APCDATA 1139 pcdata.From.Type = obj.TYPE_CONST 1140 pcdata.From.Offset = obj.PCDATA_StackMapIndex 1141 pcdata.To.Type = obj.TYPE_CONST 1142 pcdata.To.Offset = -1 // pcdata starts at -1 at function entry 1143 1144 call := obj.Appendp(pcdata, newprog) 1145 call.Pos = cursym.Text.Pos 1146 call.As = obj.ACALL 1147 call.To.Type = obj.TYPE_BRANCH 1148 call.To.Name = obj.NAME_EXTERN 1149 morestack := "runtime.morestack" 1150 switch { 1151 case cursym.CFunc(): 1152 morestack = "runtime.morestackc" 1153 case !cursym.Text.From.Sym.NeedCtxt(): 1154 morestack = "runtime.morestack_noctxt" 1155 } 1156 call.To.Sym = ctxt.Lookup(morestack, 0) 1157 // When compiling 386 code for dynamic linking, the call needs to be adjusted 1158 // to follow PIC rules. This in turn can insert more instructions, so we need 1159 // to keep track of the start of the call (where the jump will be to) and the 1160 // end (which following instructions are appended to). 1161 callend := call 1162 progedit(ctxt, callend, newprog) 1163 for ; callend.Link != nil; callend = callend.Link { 1164 progedit(ctxt, callend.Link, newprog) 1165 } 1166 1167 jmp := obj.Appendp(callend, newprog) 1168 jmp.As = obj.AJMP 1169 jmp.To.Type = obj.TYPE_BRANCH 1170 jmp.Pcond = cursym.Text.Link 1171 jmp.Spadj = +framesize 1172 1173 jls.Pcond = call 1174 if q1 != nil { 1175 q1.Pcond = call 1176 } 1177 1178 return jls 1179 } 1180 1181 var unaryDst = map[obj.As]bool{ 1182 ABSWAPL: true, 1183 ABSWAPQ: true, 1184 ACMPXCHG8B: true, 1185 ADECB: true, 1186 ADECL: true, 1187 ADECQ: true, 1188 ADECW: true, 1189 AINCB: true, 1190 AINCL: true, 1191 AINCQ: true, 1192 AINCW: true, 1193 ANEGB: true, 1194 ANEGL: true, 1195 ANEGQ: true, 1196 ANEGW: true, 1197 ANOTB: true, 1198 ANOTL: true, 1199 ANOTQ: true, 1200 ANOTW: true, 1201 APOPL: true, 1202 APOPQ: true, 1203 APOPW: true, 1204 ASETCC: true, 1205 ASETCS: true, 1206 ASETEQ: true, 1207 ASETGE: true, 1208 ASETGT: true, 1209 ASETHI: true, 1210 ASETLE: true, 1211 ASETLS: true, 1212 ASETLT: true, 1213 ASETMI: true, 1214 ASETNE: true, 1215 ASETOC: true, 1216 ASETOS: true, 1217 ASETPC: true, 1218 ASETPL: true, 1219 ASETPS: true, 1220 AFFREE: true, 1221 AFLDENV: true, 1222 AFSAVE: true, 1223 AFSTCW: true, 1224 AFSTENV: true, 1225 AFSTSW: true, 1226 AFXSAVE: true, 1227 AFXSAVE64: true, 1228 ASTMXCSR: true, 1229 } 1230 1231 var Linkamd64 = obj.LinkArch{ 1232 Arch: sys.ArchAMD64, 1233 Init: instinit, 1234 Preprocess: preprocess, 1235 Assemble: span6, 1236 Progedit: progedit, 1237 UnaryDst: unaryDst, 1238 } 1239 1240 var Linkamd64p32 = obj.LinkArch{ 1241 Arch: sys.ArchAMD64P32, 1242 Init: instinit, 1243 Preprocess: preprocess, 1244 Assemble: span6, 1245 Progedit: progedit, 1246 UnaryDst: unaryDst, 1247 } 1248 1249 var Link386 = obj.LinkArch{ 1250 Arch: sys.Arch386, 1251 Init: instinit, 1252 Preprocess: preprocess, 1253 Assemble: span6, 1254 Progedit: progedit, 1255 UnaryDst: unaryDst, 1256 }