github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/cmd/internal/obj/x86/obj6.go (about) 1 // Inferno utils/6l/pass.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/pass.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/internal/obj" 35 "cmd/internal/objabi" 36 "cmd/internal/sys" 37 "math" 38 "strings" 39 ) 40 41 func CanUse1InsnTLS(ctxt *obj.Link) bool { 42 if isAndroid { 43 // For android, we use a disgusting hack that assumes 44 // the thread-local storage slot for g is allocated 45 // using pthread_key_create with a fixed offset 46 // (see src/runtime/cgo/gcc_android_amd64.c). 47 // This makes access to the TLS storage (for g) doable 48 // with 1 instruction. 49 return true 50 } 51 52 if ctxt.Arch.Family == sys.I386 { 53 switch ctxt.Headtype { 54 case objabi.Hlinux, 55 objabi.Hnacl, 56 objabi.Hplan9, 57 objabi.Hwindows: 58 return false 59 } 60 61 return true 62 } 63 64 switch ctxt.Headtype { 65 case objabi.Hplan9, objabi.Hwindows: 66 return false 67 case objabi.Hlinux: 68 return !ctxt.Flag_shared 69 } 70 71 return true 72 } 73 74 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 75 // Thread-local storage references use the TLS pseudo-register. 76 // As a register, TLS refers to the thread-local storage base, and it 77 // can only be loaded into another register: 78 // 79 // MOVQ TLS, AX 80 // 81 // An offset from the thread-local storage base is written off(reg)(TLS*1). 82 // Semantically it is off(reg), but the (TLS*1) annotation marks this as 83 // indexing from the loaded TLS base. This emits a relocation so that 84 // if the linker needs to adjust the offset, it can. For example: 85 // 86 // MOVQ TLS, AX 87 // MOVQ 0(AX)(TLS*1), CX // load g into CX 88 // 89 // On systems that support direct access to the TLS memory, this 90 // pair of instructions can be reduced to a direct TLS memory reference: 91 // 92 // MOVQ 0(TLS), CX // load g into CX 93 // 94 // The 2-instruction and 1-instruction forms correspond to the two code 95 // sequences for loading a TLS variable in the local exec model given in "ELF 96 // Handling For Thread-Local Storage". 97 // 98 // We apply this rewrite on systems that support the 1-instruction form. 99 // The decision is made using only the operating system and the -shared flag, 100 // not the link mode. If some link modes on a particular operating system 101 // require the 2-instruction form, then all builds for that operating system 102 // will use the 2-instruction form, so that the link mode decision can be 103 // delayed to link time. 104 // 105 // In this way, all supported systems use identical instructions to 106 // access TLS, and they are rewritten appropriately first here in 107 // liblink and then finally using relocations in the linker. 108 // 109 // When -shared is passed, we leave the code in the 2-instruction form but 110 // assemble (and relocate) them in different ways to generate the initial 111 // exec code sequence. It's a bit of a fluke that this is possible without 112 // rewriting the instructions more comprehensively, and it only does because 113 // we only support a single TLS variable (g). 114 115 if CanUse1InsnTLS(ctxt) { 116 // Reduce 2-instruction sequence to 1-instruction sequence. 117 // Sequences like 118 // MOVQ TLS, BX 119 // ... off(BX)(TLS*1) ... 120 // become 121 // NOP 122 // ... off(TLS) ... 123 // 124 // TODO(rsc): Remove the Hsolaris special case. It exists only to 125 // guarantee we are producing byte-identical binaries as before this code. 126 // But it should be unnecessary. 127 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris { 128 obj.Nopout(p) 129 } 130 if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { 131 p.From.Reg = REG_TLS 132 p.From.Scale = 0 133 p.From.Index = REG_NONE 134 } 135 136 if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 137 p.To.Reg = REG_TLS 138 p.To.Scale = 0 139 p.To.Index = REG_NONE 140 } 141 } else { 142 // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it 143 // as the 2-instruction sequence if necessary. 144 // MOVQ 0(TLS), BX 145 // becomes 146 // MOVQ TLS, BX 147 // MOVQ 0(BX)(TLS*1), BX 148 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 149 q := obj.Appendp(p, newprog) 150 q.As = p.As 151 q.From = p.From 152 q.From.Type = obj.TYPE_MEM 153 q.From.Reg = p.To.Reg 154 q.From.Index = REG_TLS 155 q.From.Scale = 2 // TODO: use 1 156 q.To = p.To 157 p.From.Type = obj.TYPE_REG 158 p.From.Reg = REG_TLS 159 p.From.Index = REG_NONE 160 p.From.Offset = 0 161 } 162 } 163 164 // TODO: Remove. 165 if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 { 166 if p.From.Scale == 1 && p.From.Index == REG_TLS { 167 p.From.Scale = 2 168 } 169 if p.To.Scale == 1 && p.To.Index == REG_TLS { 170 p.To.Scale = 2 171 } 172 } 173 174 // Rewrite 0 to $0 in 3rd argument to CMPPS etc. 175 // That's what the tables expect. 176 switch p.As { 177 case ACMPPD, ACMPPS, ACMPSD, ACMPSS: 178 if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { 179 p.To.Type = obj.TYPE_CONST 180 } 181 } 182 183 // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. 184 switch p.As { 185 case obj.ACALL, obj.AJMP, obj.ARET: 186 if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { 187 p.To.Type = obj.TYPE_BRANCH 188 } 189 } 190 191 // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. 192 if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { 193 switch p.As { 194 case AMOVL: 195 p.As = ALEAL 196 p.From.Type = obj.TYPE_MEM 197 case AMOVQ: 198 p.As = ALEAQ 199 p.From.Type = obj.TYPE_MEM 200 } 201 } 202 203 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 204 if p.From3 != nil { 205 nacladdr(ctxt, p, p.From3) 206 } 207 nacladdr(ctxt, p, &p.From) 208 nacladdr(ctxt, p, &p.To) 209 } 210 211 // Rewrite float constants to values stored in memory. 212 switch p.As { 213 // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx 214 case AMOVSS: 215 if p.From.Type == obj.TYPE_FCONST { 216 // f == 0 can't be used here due to -0, so use Float64bits 217 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 218 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 219 p.As = AXORPS 220 p.From = p.To 221 break 222 } 223 } 224 } 225 fallthrough 226 227 case AFMOVF, 228 AFADDF, 229 AFSUBF, 230 AFSUBRF, 231 AFMULF, 232 AFDIVF, 233 AFDIVRF, 234 AFCOMF, 235 AFCOMFP, 236 AADDSS, 237 ASUBSS, 238 AMULSS, 239 ADIVSS, 240 ACOMISS, 241 AUCOMISS: 242 if p.From.Type == obj.TYPE_FCONST { 243 f32 := float32(p.From.Val.(float64)) 244 p.From.Type = obj.TYPE_MEM 245 p.From.Name = obj.NAME_EXTERN 246 p.From.Sym = ctxt.Float32Sym(f32) 247 p.From.Offset = 0 248 } 249 250 case AMOVSD: 251 // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx 252 if p.From.Type == obj.TYPE_FCONST { 253 // f == 0 can't be used here due to -0, so use Float64bits 254 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 255 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 256 p.As = AXORPS 257 p.From = p.To 258 break 259 } 260 } 261 } 262 fallthrough 263 264 case AFMOVD, 265 AFADDD, 266 AFSUBD, 267 AFSUBRD, 268 AFMULD, 269 AFDIVD, 270 AFDIVRD, 271 AFCOMD, 272 AFCOMDP, 273 AADDSD, 274 ASUBSD, 275 AMULSD, 276 ADIVSD, 277 ACOMISD, 278 AUCOMISD: 279 if p.From.Type == obj.TYPE_FCONST { 280 f64 := p.From.Val.(float64) 281 p.From.Type = obj.TYPE_MEM 282 p.From.Name = obj.NAME_EXTERN 283 p.From.Sym = ctxt.Float64Sym(f64) 284 p.From.Offset = 0 285 } 286 } 287 288 if ctxt.Flag_dynlink { 289 rewriteToUseGot(ctxt, p, newprog) 290 } 291 292 if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 { 293 rewriteToPcrel(ctxt, p, newprog) 294 } 295 } 296 297 // Rewrite p, if necessary, to access global data via the global offset table. 298 func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 299 var lea, mov obj.As 300 var reg int16 301 if ctxt.Arch.Family == sys.AMD64 { 302 lea = ALEAQ 303 mov = AMOVQ 304 reg = REG_R15 305 } else { 306 lea = ALEAL 307 mov = AMOVL 308 reg = REG_CX 309 if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 310 // Special case: clobber the destination register with 311 // the PC so we don't have to clobber CX. 312 // The SSA backend depends on CX not being clobbered across LEAL. 313 // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared). 314 reg = p.To.Reg 315 } 316 } 317 318 if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { 319 // ADUFFxxx $offset 320 // becomes 321 // $MOV runtime.duffxxx@GOT, $reg 322 // $LEA $offset($reg), $reg 323 // CALL $reg 324 // (we use LEAx rather than ADDx because ADDx clobbers 325 // flags and duffzero on 386 does not otherwise do so) 326 var sym *obj.LSym 327 if p.As == obj.ADUFFZERO { 328 sym = ctxt.Lookup("runtime.duffzero") 329 } else { 330 sym = ctxt.Lookup("runtime.duffcopy") 331 } 332 offset := p.To.Offset 333 p.As = mov 334 p.From.Type = obj.TYPE_MEM 335 p.From.Name = obj.NAME_GOTREF 336 p.From.Sym = sym 337 p.To.Type = obj.TYPE_REG 338 p.To.Reg = reg 339 p.To.Offset = 0 340 p.To.Sym = nil 341 p1 := obj.Appendp(p, newprog) 342 p1.As = lea 343 p1.From.Type = obj.TYPE_MEM 344 p1.From.Offset = offset 345 p1.From.Reg = reg 346 p1.To.Type = obj.TYPE_REG 347 p1.To.Reg = reg 348 p2 := obj.Appendp(p1, newprog) 349 p2.As = obj.ACALL 350 p2.To.Type = obj.TYPE_REG 351 p2.To.Reg = reg 352 } 353 354 // We only care about global data: NAME_EXTERN means a global 355 // symbol in the Go sense, and p.Sym.Local is true for a few 356 // internally defined symbols. 357 if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 358 // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below 359 p.As = mov 360 p.From.Type = obj.TYPE_ADDR 361 } 362 if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 363 // $MOV $sym, Rx becomes $MOV sym@GOT, Rx 364 // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx 365 // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX 366 cmplxdest := false 367 pAs := p.As 368 var dest obj.Addr 369 if p.To.Type != obj.TYPE_REG || pAs != mov { 370 if ctxt.Arch.Family == sys.AMD64 { 371 ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) 372 } 373 cmplxdest = true 374 dest = p.To 375 p.As = mov 376 p.To.Type = obj.TYPE_REG 377 p.To.Reg = reg 378 p.To.Sym = nil 379 p.To.Name = obj.NAME_NONE 380 } 381 p.From.Type = obj.TYPE_MEM 382 p.From.Name = obj.NAME_GOTREF 383 q := p 384 if p.From.Offset != 0 { 385 q = obj.Appendp(p, newprog) 386 q.As = lea 387 q.From.Type = obj.TYPE_MEM 388 q.From.Reg = p.To.Reg 389 q.From.Offset = p.From.Offset 390 q.To = p.To 391 p.From.Offset = 0 392 } 393 if cmplxdest { 394 q = obj.Appendp(q, newprog) 395 q.As = pAs 396 q.To = dest 397 q.From.Type = obj.TYPE_REG 398 q.From.Reg = reg 399 } 400 } 401 if p.From3 != nil && p.From3.Name == obj.NAME_EXTERN { 402 ctxt.Diag("don't know how to handle %v with -dynlink", p) 403 } 404 var source *obj.Addr 405 // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry 406 // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) 407 // An addition may be inserted between the two MOVs if there is an offset. 408 if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 409 if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 410 ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) 411 } 412 source = &p.From 413 } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 414 source = &p.To 415 } else { 416 return 417 } 418 if p.As == obj.ACALL { 419 // When dynlinking on 386, almost any call might end up being a call 420 // to a PLT, so make sure the GOT pointer is loaded into BX. 421 // RegTo2 is set on the replacement call insn to stop it being 422 // processed when it is in turn passed to progedit. 423 if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { 424 return 425 } 426 p1 := obj.Appendp(p, newprog) 427 p2 := obj.Appendp(p1, newprog) 428 429 p1.As = ALEAL 430 p1.From.Type = obj.TYPE_MEM 431 p1.From.Name = obj.NAME_STATIC 432 p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_") 433 p1.To.Type = obj.TYPE_REG 434 p1.To.Reg = REG_BX 435 436 p2.As = p.As 437 p2.Scond = p.Scond 438 p2.From = p.From 439 p2.From3 = p.From3 440 p2.Reg = p.Reg 441 p2.To = p.To 442 // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr 443 // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 444 // itself gets passed to progedit. 445 p2.To.Type = obj.TYPE_MEM 446 p2.RegTo2 = 1 447 448 obj.Nopout(p) 449 return 450 451 } 452 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { 453 return 454 } 455 if source.Type != obj.TYPE_MEM { 456 ctxt.Diag("don't know how to handle %v with -dynlink", p) 457 } 458 p1 := obj.Appendp(p, newprog) 459 p2 := obj.Appendp(p1, newprog) 460 461 p1.As = mov 462 p1.From.Type = obj.TYPE_MEM 463 p1.From.Sym = source.Sym 464 p1.From.Name = obj.NAME_GOTREF 465 p1.To.Type = obj.TYPE_REG 466 p1.To.Reg = reg 467 468 p2.As = p.As 469 p2.From = p.From 470 p2.To = p.To 471 if p.From.Name == obj.NAME_EXTERN { 472 p2.From.Reg = reg 473 p2.From.Name = obj.NAME_NONE 474 p2.From.Sym = nil 475 } else if p.To.Name == obj.NAME_EXTERN { 476 p2.To.Reg = reg 477 p2.To.Name = obj.NAME_NONE 478 p2.To.Sym = nil 479 } else { 480 return 481 } 482 obj.Nopout(p) 483 } 484 485 func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 486 // RegTo2 is set on the instructions we insert here so they don't get 487 // processed twice. 488 if p.RegTo2 != 0 { 489 return 490 } 491 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { 492 return 493 } 494 // Any Prog (aside from the above special cases) with an Addr with Name == 495 // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX 496 // inserted before it. 497 isName := func(a *obj.Addr) bool { 498 if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { 499 return false 500 } 501 if a.Sym.Type == objabi.STLSBSS { 502 return false 503 } 504 return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF 505 } 506 507 if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { 508 // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting 509 // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" 510 // respectively. 511 if p.To.Type != obj.TYPE_REG { 512 q := obj.Appendp(p, newprog) 513 q.As = p.As 514 q.From.Type = obj.TYPE_REG 515 q.From.Reg = REG_CX 516 q.To = p.To 517 p.As = AMOVL 518 p.To.Type = obj.TYPE_REG 519 p.To.Reg = REG_CX 520 p.To.Sym = nil 521 p.To.Name = obj.NAME_NONE 522 } 523 } 524 525 if !isName(&p.From) && !isName(&p.To) && (p.From3 == nil || !isName(p.From3)) { 526 return 527 } 528 var dst int16 = REG_CX 529 if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 530 dst = p.To.Reg 531 // Why? See the comment near the top of rewriteToUseGot above. 532 // AMOVLs might be introduced by the GOT rewrites. 533 } 534 q := obj.Appendp(p, newprog) 535 q.RegTo2 = 1 536 r := obj.Appendp(q, newprog) 537 r.RegTo2 = 1 538 q.As = obj.ACALL 539 thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))) 540 q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) }) 541 q.To.Type = obj.TYPE_MEM 542 q.To.Name = obj.NAME_EXTERN 543 r.As = p.As 544 r.Scond = p.Scond 545 r.From = p.From 546 r.From3 = p.From3 547 r.Reg = p.Reg 548 r.To = p.To 549 if isName(&p.From) { 550 r.From.Reg = dst 551 } 552 if isName(&p.To) { 553 r.To.Reg = dst 554 } 555 if p.From3 != nil && isName(p.From3) { 556 r.From3.Reg = dst 557 } 558 obj.Nopout(p) 559 } 560 561 func nacladdr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 562 if p.As == ALEAL || p.As == ALEAQ { 563 return 564 } 565 566 if a.Reg == REG_BP { 567 ctxt.Diag("invalid address: %v", p) 568 return 569 } 570 571 if a.Reg == REG_TLS { 572 a.Reg = REG_BP 573 } 574 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 575 switch a.Reg { 576 // all ok 577 case REG_BP, REG_SP, REG_R15: 578 break 579 580 default: 581 if a.Index != REG_NONE { 582 ctxt.Diag("invalid address %v", p) 583 } 584 a.Index = a.Reg 585 if a.Index != REG_NONE { 586 a.Scale = 1 587 } 588 a.Reg = REG_R15 589 } 590 } 591 } 592 593 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { 594 if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { 595 return 596 } 597 598 p := cursym.Func.Text 599 autoffset := int32(p.To.Offset) 600 if autoffset < 0 { 601 autoffset = 0 602 } 603 604 hasCall := false 605 for q := p; q != nil; q = q.Link { 606 if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO { 607 hasCall = true 608 break 609 } 610 } 611 612 var bpsize int 613 if ctxt.Arch.Family == sys.AMD64 && ctxt.Framepointer_enabled && 614 !p.From.Sym.NoFrame() && // (1) below 615 !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below 616 !(autoffset == 0 && !hasCall) { // (3) below 617 // Make room to save a base pointer. 618 // There are 2 cases we must avoid: 619 // 1) If noframe is set (which we do for functions which tail call). 620 // 2) Scary runtime internals which would be all messed up by frame pointers. 621 // We detect these using a heuristic: frameless nosplit functions. 622 // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic. 623 // For performance, we also want to avoid: 624 // 3) Frameless leaf functions 625 bpsize = ctxt.Arch.PtrSize 626 autoffset += int32(bpsize) 627 p.To.Offset += int64(bpsize) 628 } else { 629 bpsize = 0 630 } 631 632 textarg := int64(p.To.Val.(int32)) 633 cursym.Func.Args = int32(textarg) 634 cursym.Func.Locals = int32(p.To.Offset) 635 636 // TODO(rsc): Remove. 637 if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { 638 cursym.Func.Locals = 0 639 } 640 641 // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. 642 if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() { 643 leaf := true 644 LeafSearch: 645 for q := p; q != nil; q = q.Link { 646 switch q.As { 647 case obj.ACALL: 648 // Treat common runtime calls that take no arguments 649 // the same as duffcopy and duffzero. 650 if !isZeroArgRuntimeCall(q.To.Sym) { 651 leaf = false 652 break LeafSearch 653 } 654 fallthrough 655 case obj.ADUFFCOPY, obj.ADUFFZERO: 656 if autoffset >= objabi.StackSmall-8 { 657 leaf = false 658 break LeafSearch 659 } 660 } 661 } 662 663 if leaf { 664 p.From.Sym.Set(obj.AttrNoSplit, true) 665 } 666 } 667 668 if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() { 669 p = obj.Appendp(p, newprog) 670 p = load_g_cx(ctxt, p, newprog) // load g into CX 671 } 672 673 if !cursym.Func.Text.From.Sym.NoSplit() { 674 p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check 675 } 676 677 if autoffset != 0 { 678 if autoffset%int32(ctxt.Arch.RegSize) != 0 { 679 ctxt.Diag("unaligned stack size %d", autoffset) 680 } 681 p = obj.Appendp(p, newprog) 682 p.As = AADJSP 683 p.From.Type = obj.TYPE_CONST 684 p.From.Offset = int64(autoffset) 685 p.Spadj = autoffset 686 } 687 688 deltasp := autoffset 689 690 if bpsize > 0 { 691 // Save caller's BP 692 p = obj.Appendp(p, newprog) 693 694 p.As = AMOVQ 695 p.From.Type = obj.TYPE_REG 696 p.From.Reg = REG_BP 697 p.To.Type = obj.TYPE_MEM 698 p.To.Reg = REG_SP 699 p.To.Scale = 1 700 p.To.Offset = int64(autoffset) - int64(bpsize) 701 702 // Move current frame to BP 703 p = obj.Appendp(p, newprog) 704 705 p.As = ALEAQ 706 p.From.Type = obj.TYPE_MEM 707 p.From.Reg = REG_SP 708 p.From.Scale = 1 709 p.From.Offset = int64(autoffset) - int64(bpsize) 710 p.To.Type = obj.TYPE_REG 711 p.To.Reg = REG_BP 712 } 713 714 if cursym.Func.Text.From.Sym.Wrapper() { 715 // if g._panic != nil && g._panic.argp == FP { 716 // g._panic.argp = bottom-of-frame 717 // } 718 // 719 // MOVQ g_panic(CX), BX 720 // TESTQ BX, BX 721 // JNE checkargp 722 // end: 723 // NOP 724 // ... rest of function ... 725 // checkargp: 726 // LEAQ (autoffset+8)(SP), DI 727 // CMPQ panic_argp(BX), DI 728 // JNE end 729 // MOVQ SP, panic_argp(BX) 730 // JMP end 731 // 732 // The NOP is needed to give the jumps somewhere to land. 733 // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. 734 // 735 // The layout is chosen to help static branch prediction: 736 // Both conditional jumps are unlikely, so they are arranged to be forward jumps. 737 738 // MOVQ g_panic(CX), BX 739 p = obj.Appendp(p, newprog) 740 p.As = AMOVQ 741 p.From.Type = obj.TYPE_MEM 742 p.From.Reg = REG_CX 743 p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic 744 p.To.Type = obj.TYPE_REG 745 p.To.Reg = REG_BX 746 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 747 p.As = AMOVL 748 p.From.Type = obj.TYPE_MEM 749 p.From.Reg = REG_R15 750 p.From.Scale = 1 751 p.From.Index = REG_CX 752 } 753 if ctxt.Arch.Family == sys.I386 { 754 p.As = AMOVL 755 } 756 757 // TESTQ BX, BX 758 p = obj.Appendp(p, newprog) 759 p.As = ATESTQ 760 p.From.Type = obj.TYPE_REG 761 p.From.Reg = REG_BX 762 p.To.Type = obj.TYPE_REG 763 p.To.Reg = REG_BX 764 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 765 p.As = ATESTL 766 } 767 768 // JNE checkargp (checkargp to be resolved later) 769 jne := obj.Appendp(p, newprog) 770 jne.As = AJNE 771 jne.To.Type = obj.TYPE_BRANCH 772 773 // end: 774 // NOP 775 end := obj.Appendp(jne, newprog) 776 end.As = obj.ANOP 777 778 // Fast forward to end of function. 779 var last *obj.Prog 780 for last = end; last.Link != nil; last = last.Link { 781 } 782 783 // LEAQ (autoffset+8)(SP), DI 784 p = obj.Appendp(last, newprog) 785 p.As = ALEAQ 786 p.From.Type = obj.TYPE_MEM 787 p.From.Reg = REG_SP 788 p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) 789 p.To.Type = obj.TYPE_REG 790 p.To.Reg = REG_DI 791 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 792 p.As = ALEAL 793 } 794 795 // Set jne branch target. 796 jne.Pcond = p 797 798 // CMPQ panic_argp(BX), DI 799 p = obj.Appendp(p, newprog) 800 p.As = ACMPQ 801 p.From.Type = obj.TYPE_MEM 802 p.From.Reg = REG_BX 803 p.From.Offset = 0 // Panic.argp 804 p.To.Type = obj.TYPE_REG 805 p.To.Reg = REG_DI 806 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 807 p.As = ACMPL 808 p.From.Type = obj.TYPE_MEM 809 p.From.Reg = REG_R15 810 p.From.Scale = 1 811 p.From.Index = REG_BX 812 } 813 if ctxt.Arch.Family == sys.I386 { 814 p.As = ACMPL 815 } 816 817 // JNE end 818 p = obj.Appendp(p, newprog) 819 p.As = AJNE 820 p.To.Type = obj.TYPE_BRANCH 821 p.Pcond = end 822 823 // MOVQ SP, panic_argp(BX) 824 p = obj.Appendp(p, newprog) 825 p.As = AMOVQ 826 p.From.Type = obj.TYPE_REG 827 p.From.Reg = REG_SP 828 p.To.Type = obj.TYPE_MEM 829 p.To.Reg = REG_BX 830 p.To.Offset = 0 // Panic.argp 831 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 832 p.As = AMOVL 833 p.To.Type = obj.TYPE_MEM 834 p.To.Reg = REG_R15 835 p.To.Scale = 1 836 p.To.Index = REG_BX 837 } 838 if ctxt.Arch.Family == sys.I386 { 839 p.As = AMOVL 840 } 841 842 // JMP end 843 p = obj.Appendp(p, newprog) 844 p.As = obj.AJMP 845 p.To.Type = obj.TYPE_BRANCH 846 p.Pcond = end 847 848 // Reset p for following code. 849 p = end 850 } 851 852 for ; p != nil; p = p.Link { 853 pcsize := ctxt.Arch.RegSize 854 switch p.From.Name { 855 case obj.NAME_AUTO: 856 p.From.Offset += int64(deltasp) - int64(bpsize) 857 case obj.NAME_PARAM: 858 p.From.Offset += int64(deltasp) + int64(pcsize) 859 } 860 if p.From3 != nil { 861 switch p.From3.Name { 862 case obj.NAME_AUTO: 863 p.From3.Offset += int64(deltasp) - int64(bpsize) 864 case obj.NAME_PARAM: 865 p.From3.Offset += int64(deltasp) + int64(pcsize) 866 } 867 } 868 switch p.To.Name { 869 case obj.NAME_AUTO: 870 p.To.Offset += int64(deltasp) - int64(bpsize) 871 case obj.NAME_PARAM: 872 p.To.Offset += int64(deltasp) + int64(pcsize) 873 } 874 875 switch p.As { 876 default: 877 continue 878 879 case APUSHL, APUSHFL: 880 deltasp += 4 881 p.Spadj = 4 882 continue 883 884 case APUSHQ, APUSHFQ: 885 deltasp += 8 886 p.Spadj = 8 887 continue 888 889 case APUSHW, APUSHFW: 890 deltasp += 2 891 p.Spadj = 2 892 continue 893 894 case APOPL, APOPFL: 895 deltasp -= 4 896 p.Spadj = -4 897 continue 898 899 case APOPQ, APOPFQ: 900 deltasp -= 8 901 p.Spadj = -8 902 continue 903 904 case APOPW, APOPFW: 905 deltasp -= 2 906 p.Spadj = -2 907 continue 908 909 case obj.ARET: 910 // do nothing 911 } 912 913 if autoffset != deltasp { 914 ctxt.Diag("unbalanced PUSH/POP") 915 } 916 917 if autoffset != 0 { 918 if bpsize > 0 { 919 // Restore caller's BP 920 p.As = AMOVQ 921 922 p.From.Type = obj.TYPE_MEM 923 p.From.Reg = REG_SP 924 p.From.Scale = 1 925 p.From.Offset = int64(autoffset) - int64(bpsize) 926 p.To.Type = obj.TYPE_REG 927 p.To.Reg = REG_BP 928 p = obj.Appendp(p, newprog) 929 } 930 931 p.As = AADJSP 932 p.From.Type = obj.TYPE_CONST 933 p.From.Offset = int64(-autoffset) 934 p.Spadj = -autoffset 935 p = obj.Appendp(p, newprog) 936 p.As = obj.ARET 937 938 // If there are instructions following 939 // this ARET, they come from a branch 940 // with the same stackframe, so undo 941 // the cleanup. 942 p.Spadj = +autoffset 943 } 944 945 if p.To.Sym != nil { // retjmp 946 p.As = obj.AJMP 947 } 948 } 949 } 950 951 func isZeroArgRuntimeCall(s *obj.LSym) bool { 952 if s == nil { 953 return false 954 } 955 switch s.Name { 956 case "runtime.panicindex", "runtime.panicslice", "runtime.panicdivide", "runtime.panicwrap": 957 return true 958 } 959 return false 960 } 961 962 func indir_cx(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 963 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 964 a.Type = obj.TYPE_MEM 965 a.Reg = REG_R15 966 a.Index = REG_CX 967 a.Scale = 1 968 return 969 } 970 971 a.Type = obj.TYPE_MEM 972 a.Reg = REG_CX 973 } 974 975 // Append code to p to load g into cx. 976 // Overwrites p with the first instruction (no first appendp). 977 // Overwriting p is unusual but it lets use this in both the 978 // prologue (caller must call appendp first) and in the epilogue. 979 // Returns last new instruction. 980 func load_g_cx(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) *obj.Prog { 981 p.As = AMOVQ 982 if ctxt.Arch.PtrSize == 4 { 983 p.As = AMOVL 984 } 985 p.From.Type = obj.TYPE_MEM 986 p.From.Reg = REG_TLS 987 p.From.Offset = 0 988 p.To.Type = obj.TYPE_REG 989 p.To.Reg = REG_CX 990 991 next := p.Link 992 progedit(ctxt, p, newprog) 993 for p.Link != next { 994 p = p.Link 995 } 996 997 if p.From.Index == REG_TLS { 998 p.From.Scale = 2 999 } 1000 1001 return p 1002 } 1003 1004 // Append code to p to check for stack split. 1005 // Appends to (does not overwrite) p. 1006 // Assumes g is in CX. 1007 // Returns last new instruction. 1008 func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog { 1009 cmp := ACMPQ 1010 lea := ALEAQ 1011 mov := AMOVQ 1012 sub := ASUBQ 1013 1014 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 1015 cmp = ACMPL 1016 lea = ALEAL 1017 mov = AMOVL 1018 sub = ASUBL 1019 } 1020 1021 var q1 *obj.Prog 1022 if framesize <= objabi.StackSmall { 1023 // small stack: SP <= stackguard 1024 // CMPQ SP, stackguard 1025 p = obj.Appendp(p, newprog) 1026 1027 p.As = cmp 1028 p.From.Type = obj.TYPE_REG 1029 p.From.Reg = REG_SP 1030 indir_cx(ctxt, p, &p.To) 1031 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1032 if cursym.CFunc() { 1033 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1034 } 1035 } else if framesize <= objabi.StackBig { 1036 // large stack: SP-framesize <= stackguard-StackSmall 1037 // LEAQ -xxx(SP), AX 1038 // CMPQ AX, stackguard 1039 p = obj.Appendp(p, newprog) 1040 1041 p.As = lea 1042 p.From.Type = obj.TYPE_MEM 1043 p.From.Reg = REG_SP 1044 p.From.Offset = -(int64(framesize) - objabi.StackSmall) 1045 p.To.Type = obj.TYPE_REG 1046 p.To.Reg = REG_AX 1047 1048 p = obj.Appendp(p, newprog) 1049 p.As = cmp 1050 p.From.Type = obj.TYPE_REG 1051 p.From.Reg = REG_AX 1052 indir_cx(ctxt, p, &p.To) 1053 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1054 if cursym.CFunc() { 1055 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1056 } 1057 } else { 1058 // Such a large stack we need to protect against wraparound. 1059 // If SP is close to zero: 1060 // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) 1061 // The +StackGuard on both sides is required to keep the left side positive: 1062 // SP is allowed to be slightly below stackguard. See stack.h. 1063 // 1064 // Preemption sets stackguard to StackPreempt, a very large value. 1065 // That breaks the math above, so we have to check for that explicitly. 1066 // MOVQ stackguard, CX 1067 // CMPQ CX, $StackPreempt 1068 // JEQ label-of-call-to-morestack 1069 // LEAQ StackGuard(SP), AX 1070 // SUBQ CX, AX 1071 // CMPQ AX, $(framesize+(StackGuard-StackSmall)) 1072 1073 p = obj.Appendp(p, newprog) 1074 1075 p.As = mov 1076 indir_cx(ctxt, p, &p.From) 1077 p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1078 if cursym.CFunc() { 1079 p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1080 } 1081 p.To.Type = obj.TYPE_REG 1082 p.To.Reg = REG_SI 1083 1084 p = obj.Appendp(p, newprog) 1085 p.As = cmp 1086 p.From.Type = obj.TYPE_REG 1087 p.From.Reg = REG_SI 1088 p.To.Type = obj.TYPE_CONST 1089 p.To.Offset = objabi.StackPreempt 1090 if ctxt.Arch.Family == sys.I386 { 1091 p.To.Offset = int64(uint32(objabi.StackPreempt & (1<<32 - 1))) 1092 } 1093 1094 p = obj.Appendp(p, newprog) 1095 p.As = AJEQ 1096 p.To.Type = obj.TYPE_BRANCH 1097 q1 = p 1098 1099 p = obj.Appendp(p, newprog) 1100 p.As = lea 1101 p.From.Type = obj.TYPE_MEM 1102 p.From.Reg = REG_SP 1103 p.From.Offset = objabi.StackGuard 1104 p.To.Type = obj.TYPE_REG 1105 p.To.Reg = REG_AX 1106 1107 p = obj.Appendp(p, newprog) 1108 p.As = sub 1109 p.From.Type = obj.TYPE_REG 1110 p.From.Reg = REG_SI 1111 p.To.Type = obj.TYPE_REG 1112 p.To.Reg = REG_AX 1113 1114 p = obj.Appendp(p, newprog) 1115 p.As = cmp 1116 p.From.Type = obj.TYPE_REG 1117 p.From.Reg = REG_AX 1118 p.To.Type = obj.TYPE_CONST 1119 p.To.Offset = int64(framesize) + (objabi.StackGuard - objabi.StackSmall) 1120 } 1121 1122 // common 1123 jls := obj.Appendp(p, newprog) 1124 jls.As = AJLS 1125 jls.To.Type = obj.TYPE_BRANCH 1126 1127 var last *obj.Prog 1128 for last = cursym.Func.Text; last.Link != nil; last = last.Link { 1129 } 1130 1131 // Now we are at the end of the function, but logically 1132 // we are still in function prologue. We need to fix the 1133 // SP data and PCDATA. 1134 spfix := obj.Appendp(last, newprog) 1135 spfix.As = obj.ANOP 1136 spfix.Spadj = -framesize 1137 1138 pcdata := obj.Appendp(spfix, newprog) 1139 pcdata.Pos = cursym.Func.Text.Pos 1140 pcdata.As = obj.APCDATA 1141 pcdata.From.Type = obj.TYPE_CONST 1142 pcdata.From.Offset = objabi.PCDATA_StackMapIndex 1143 pcdata.To.Type = obj.TYPE_CONST 1144 pcdata.To.Offset = -1 // pcdata starts at -1 at function entry 1145 1146 call := obj.Appendp(pcdata, newprog) 1147 call.Pos = cursym.Func.Text.Pos 1148 call.As = obj.ACALL 1149 call.To.Type = obj.TYPE_BRANCH 1150 call.To.Name = obj.NAME_EXTERN 1151 morestack := "runtime.morestack" 1152 switch { 1153 case cursym.CFunc(): 1154 morestack = "runtime.morestackc" 1155 case !cursym.Func.Text.From.Sym.NeedCtxt(): 1156 morestack = "runtime.morestack_noctxt" 1157 } 1158 call.To.Sym = ctxt.Lookup(morestack) 1159 // When compiling 386 code for dynamic linking, the call needs to be adjusted 1160 // to follow PIC rules. This in turn can insert more instructions, so we need 1161 // to keep track of the start of the call (where the jump will be to) and the 1162 // end (which following instructions are appended to). 1163 callend := call 1164 progedit(ctxt, callend, newprog) 1165 for ; callend.Link != nil; callend = callend.Link { 1166 progedit(ctxt, callend.Link, newprog) 1167 } 1168 1169 jmp := obj.Appendp(callend, newprog) 1170 jmp.As = obj.AJMP 1171 jmp.To.Type = obj.TYPE_BRANCH 1172 jmp.Pcond = cursym.Func.Text.Link 1173 jmp.Spadj = +framesize 1174 1175 jls.Pcond = call 1176 if q1 != nil { 1177 q1.Pcond = call 1178 } 1179 1180 return jls 1181 } 1182 1183 var unaryDst = map[obj.As]bool{ 1184 ABSWAPL: true, 1185 ABSWAPQ: true, 1186 ACMPXCHG8B: true, 1187 ADECB: true, 1188 ADECL: true, 1189 ADECQ: true, 1190 ADECW: true, 1191 AINCB: true, 1192 AINCL: true, 1193 AINCQ: true, 1194 AINCW: true, 1195 ANEGB: true, 1196 ANEGL: true, 1197 ANEGQ: true, 1198 ANEGW: true, 1199 ANOTB: true, 1200 ANOTL: true, 1201 ANOTQ: true, 1202 ANOTW: true, 1203 APOPL: true, 1204 APOPQ: true, 1205 APOPW: true, 1206 ASETCC: true, 1207 ASETCS: true, 1208 ASETEQ: true, 1209 ASETGE: true, 1210 ASETGT: true, 1211 ASETHI: true, 1212 ASETLE: true, 1213 ASETLS: true, 1214 ASETLT: true, 1215 ASETMI: true, 1216 ASETNE: true, 1217 ASETOC: true, 1218 ASETOS: true, 1219 ASETPC: true, 1220 ASETPL: true, 1221 ASETPS: true, 1222 AFFREE: true, 1223 AFLDENV: true, 1224 AFSAVE: true, 1225 AFSTCW: true, 1226 AFSTENV: true, 1227 AFSTSW: true, 1228 AFXSAVE: true, 1229 AFXSAVE64: true, 1230 ASTMXCSR: true, 1231 } 1232 1233 var Linkamd64 = obj.LinkArch{ 1234 Arch: sys.ArchAMD64, 1235 Init: instinit, 1236 Preprocess: preprocess, 1237 Assemble: span6, 1238 Progedit: progedit, 1239 UnaryDst: unaryDst, 1240 } 1241 1242 var Linkamd64p32 = obj.LinkArch{ 1243 Arch: sys.ArchAMD64P32, 1244 Init: instinit, 1245 Preprocess: preprocess, 1246 Assemble: span6, 1247 Progedit: progedit, 1248 UnaryDst: unaryDst, 1249 } 1250 1251 var Link386 = obj.LinkArch{ 1252 Arch: sys.Arch386, 1253 Init: instinit, 1254 Preprocess: preprocess, 1255 Assemble: span6, 1256 Progedit: progedit, 1257 UnaryDst: unaryDst, 1258 }