github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/obj/x86/obj6.go (about)

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"log"
    35  	"math"
    36  	"path"
    37  	"strings"
    38  
    39  	"github.com/go-asm/go/abi"
    40  	"github.com/go-asm/go/cmd/obj"
    41  	"github.com/go-asm/go/cmd/objabi"
    42  	"github.com/go-asm/go/cmd/src"
    43  	"github.com/go-asm/go/cmd/sys"
    44  )
    45  
    46  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    47  	if isAndroid {
    48  		// Android uses a global variable for the tls offset.
    49  		return false
    50  	}
    51  
    52  	if ctxt.Arch.Family == sys.I386 {
    53  		switch ctxt.Headtype {
    54  		case objabi.Hlinux,
    55  			objabi.Hplan9,
    56  			objabi.Hwindows:
    57  			return false
    58  		}
    59  
    60  		return true
    61  	}
    62  
    63  	switch ctxt.Headtype {
    64  	case objabi.Hplan9, objabi.Hwindows:
    65  		return false
    66  	case objabi.Hlinux, objabi.Hfreebsd:
    67  		return !ctxt.Flag_shared
    68  	}
    69  
    70  	return true
    71  }
    72  
    73  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    74  	// Thread-local storage references use the TLS pseudo-register.
    75  	// As a register, TLS refers to the thread-local storage base, and it
    76  	// can only be loaded into another register:
    77  	//
    78  	//         MOVQ TLS, AX
    79  	//
    80  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    81  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    82  	// indexing from the loaded TLS base. This emits a relocation so that
    83  	// if the linker needs to adjust the offset, it can. For example:
    84  	//
    85  	//         MOVQ TLS, AX
    86  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    87  	//
    88  	// On systems that support direct access to the TLS memory, this
    89  	// pair of instructions can be reduced to a direct TLS memory reference:
    90  	//
    91  	//         MOVQ 0(TLS), CX // load g into CX
    92  	//
    93  	// The 2-instruction and 1-instruction forms correspond to the two code
    94  	// sequences for loading a TLS variable in the local exec model given in "ELF
    95  	// Handling For Thread-Local Storage".
    96  	//
    97  	// We apply this rewrite on systems that support the 1-instruction form.
    98  	// The decision is made using only the operating system and the -shared flag,
    99  	// not the link mode. If some link modes on a particular operating system
   100  	// require the 2-instruction form, then all builds for that operating system
   101  	// will use the 2-instruction form, so that the link mode decision can be
   102  	// delayed to link time.
   103  	//
   104  	// In this way, all supported systems use identical instructions to
   105  	// access TLS, and they are rewritten appropriately first here in
   106  	// liblink and then finally using relocations in the linker.
   107  	//
   108  	// When -shared is passed, we leave the code in the 2-instruction form but
   109  	// assemble (and relocate) them in different ways to generate the initial
   110  	// exec code sequence. It's a bit of a fluke that this is possible without
   111  	// rewriting the instructions more comprehensively, and it only does because
   112  	// we only support a single TLS variable (g).
   113  
   114  	if CanUse1InsnTLS(ctxt) {
   115  		// Reduce 2-instruction sequence to 1-instruction sequence.
   116  		// Sequences like
   117  		//	MOVQ TLS, BX
   118  		//	... off(BX)(TLS*1) ...
   119  		// become
   120  		//	NOP
   121  		//	... off(TLS) ...
   122  		//
   123  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   124  		// guarantee we are producing byte-identical binaries as before this code.
   125  		// But it should be unnecessary.
   126  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   127  			obj.Nopout(p)
   128  		}
   129  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   130  			p.From.Reg = REG_TLS
   131  			p.From.Scale = 0
   132  			p.From.Index = REG_NONE
   133  		}
   134  
   135  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   136  			p.To.Reg = REG_TLS
   137  			p.To.Scale = 0
   138  			p.To.Index = REG_NONE
   139  		}
   140  	} else {
   141  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   142  		// as the 2-instruction sequence if necessary.
   143  		//	MOVQ 0(TLS), BX
   144  		// becomes
   145  		//	MOVQ TLS, BX
   146  		//	MOVQ 0(BX)(TLS*1), BX
   147  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   148  			q := obj.Appendp(p, newprog)
   149  			q.As = p.As
   150  			q.From = p.From
   151  			q.From.Type = obj.TYPE_MEM
   152  			q.From.Reg = p.To.Reg
   153  			q.From.Index = REG_TLS
   154  			q.From.Scale = 2 // TODO: use 1
   155  			q.To = p.To
   156  			p.From.Type = obj.TYPE_REG
   157  			p.From.Reg = REG_TLS
   158  			p.From.Index = REG_NONE
   159  			p.From.Offset = 0
   160  		}
   161  	}
   162  
   163  	// Android and Windows use a tls offset determined at runtime. Rewrite
   164  	//	MOVQ TLS, BX
   165  	// to
   166  	//	MOVQ runtime.tls_g(SB), BX
   167  	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
   168  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   169  		p.From.Type = obj.TYPE_MEM
   170  		p.From.Name = obj.NAME_EXTERN
   171  		p.From.Reg = REG_NONE
   172  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   173  		p.From.Index = REG_NONE
   174  		if ctxt.Headtype == objabi.Hwindows {
   175  			// Windows requires an additional indirection
   176  			// to retrieve the TLS pointer,
   177  			// as runtime.tls_g contains the TLS offset from GS or FS.
   178  			// on AMD64 add
   179  			//	MOVQ 0(BX)(GS*1), BX
   180  			// on 386 add
   181  			//	MOVQ 0(BX)(FS*1), BX4
   182  			q := obj.Appendp(p, newprog)
   183  			q.As = p.As
   184  			q.From = obj.Addr{}
   185  			q.From.Type = obj.TYPE_MEM
   186  			q.From.Reg = p.To.Reg
   187  			if ctxt.Arch.Family == sys.AMD64 {
   188  				q.From.Index = REG_GS
   189  			} else {
   190  				q.From.Index = REG_FS
   191  			}
   192  			q.From.Scale = 1
   193  			q.From.Offset = 0
   194  			q.To = p.To
   195  		}
   196  	}
   197  
   198  	// TODO: Remove.
   199  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   200  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   201  			p.From.Scale = 2
   202  		}
   203  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   204  			p.To.Scale = 2
   205  		}
   206  	}
   207  
   208  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   209  	// That's what the tables expect.
   210  	switch p.As {
   211  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   212  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   213  			p.To.Type = obj.TYPE_CONST
   214  		}
   215  	}
   216  
   217  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   218  	switch p.As {
   219  	case obj.ACALL, obj.AJMP, obj.ARET:
   220  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   221  			p.To.Type = obj.TYPE_BRANCH
   222  		}
   223  	}
   224  
   225  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   226  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   227  		switch p.As {
   228  		case AMOVL:
   229  			p.As = ALEAL
   230  			p.From.Type = obj.TYPE_MEM
   231  		case AMOVQ:
   232  			p.As = ALEAQ
   233  			p.From.Type = obj.TYPE_MEM
   234  		}
   235  	}
   236  
   237  	// Rewrite float constants to values stored in memory.
   238  	switch p.As {
   239  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   240  	case AMOVSS:
   241  		if p.From.Type == obj.TYPE_FCONST {
   242  			//  f == 0 can't be used here due to -0, so use Float64bits
   243  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   244  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   245  					p.As = AXORPS
   246  					p.From = p.To
   247  					break
   248  				}
   249  			}
   250  		}
   251  		fallthrough
   252  
   253  	case AFMOVF,
   254  		AFADDF,
   255  		AFSUBF,
   256  		AFSUBRF,
   257  		AFMULF,
   258  		AFDIVF,
   259  		AFDIVRF,
   260  		AFCOMF,
   261  		AFCOMFP,
   262  		AADDSS,
   263  		ASUBSS,
   264  		AMULSS,
   265  		ADIVSS,
   266  		ACOMISS,
   267  		AUCOMISS:
   268  		if p.From.Type == obj.TYPE_FCONST {
   269  			f32 := float32(p.From.Val.(float64))
   270  			p.From.Type = obj.TYPE_MEM
   271  			p.From.Name = obj.NAME_EXTERN
   272  			p.From.Sym = ctxt.Float32Sym(f32)
   273  			p.From.Offset = 0
   274  		}
   275  
   276  	case AMOVSD:
   277  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   278  		if p.From.Type == obj.TYPE_FCONST {
   279  			//  f == 0 can't be used here due to -0, so use Float64bits
   280  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   281  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   282  					p.As = AXORPS
   283  					p.From = p.To
   284  					break
   285  				}
   286  			}
   287  		}
   288  		fallthrough
   289  
   290  	case AFMOVD,
   291  		AFADDD,
   292  		AFSUBD,
   293  		AFSUBRD,
   294  		AFMULD,
   295  		AFDIVD,
   296  		AFDIVRD,
   297  		AFCOMD,
   298  		AFCOMDP,
   299  		AADDSD,
   300  		ASUBSD,
   301  		AMULSD,
   302  		ADIVSD,
   303  		ACOMISD,
   304  		AUCOMISD:
   305  		if p.From.Type == obj.TYPE_FCONST {
   306  			f64 := p.From.Val.(float64)
   307  			p.From.Type = obj.TYPE_MEM
   308  			p.From.Name = obj.NAME_EXTERN
   309  			p.From.Sym = ctxt.Float64Sym(f64)
   310  			p.From.Offset = 0
   311  		}
   312  	}
   313  
   314  	if ctxt.Flag_dynlink {
   315  		rewriteToUseGot(ctxt, p, newprog)
   316  	}
   317  
   318  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   319  		rewriteToPcrel(ctxt, p, newprog)
   320  	}
   321  }
   322  
   323  // Rewrite p, if necessary, to access global data via the global offset table.
   324  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   325  	var lea, mov obj.As
   326  	var reg int16
   327  	if ctxt.Arch.Family == sys.AMD64 {
   328  		lea = ALEAQ
   329  		mov = AMOVQ
   330  		reg = REG_R15
   331  	} else {
   332  		lea = ALEAL
   333  		mov = AMOVL
   334  		reg = REG_CX
   335  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   336  			// Special case: clobber the destination register with
   337  			// the PC so we don't have to clobber CX.
   338  			// The SSA backend depends on CX not being clobbered across LEAL.
   339  			// See github.com/go-asm/go/cmd/compile/ssa/gen/386.rules (search for Flag_shared).
   340  			reg = p.To.Reg
   341  		}
   342  	}
   343  
   344  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   345  		//     ADUFFxxx $offset
   346  		// becomes
   347  		//     $MOV runtime.duffxxx@GOT, $reg
   348  		//     $LEA $offset($reg), $reg
   349  		//     CALL $reg
   350  		// (we use LEAx rather than ADDx because ADDx clobbers
   351  		// flags and duffzero on 386 does not otherwise do so).
   352  		var sym *obj.LSym
   353  		if p.As == obj.ADUFFZERO {
   354  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   355  		} else {
   356  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   357  		}
   358  		offset := p.To.Offset
   359  		p.As = mov
   360  		p.From.Type = obj.TYPE_MEM
   361  		p.From.Name = obj.NAME_GOTREF
   362  		p.From.Sym = sym
   363  		p.To.Type = obj.TYPE_REG
   364  		p.To.Reg = reg
   365  		p.To.Offset = 0
   366  		p.To.Sym = nil
   367  		p1 := obj.Appendp(p, newprog)
   368  		p1.As = lea
   369  		p1.From.Type = obj.TYPE_MEM
   370  		p1.From.Offset = offset
   371  		p1.From.Reg = reg
   372  		p1.To.Type = obj.TYPE_REG
   373  		p1.To.Reg = reg
   374  		p2 := obj.Appendp(p1, newprog)
   375  		p2.As = obj.ACALL
   376  		p2.To.Type = obj.TYPE_REG
   377  		p2.To.Reg = reg
   378  	}
   379  
   380  	// We only care about global data: NAME_EXTERN means a global
   381  	// symbol in the Go sense, and p.Sym.Local is true for a few
   382  	// internally defined symbols.
   383  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   384  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   385  		p.As = mov
   386  		p.From.Type = obj.TYPE_ADDR
   387  	}
   388  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   389  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   390  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   391  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   392  		cmplxdest := false
   393  		pAs := p.As
   394  		var dest obj.Addr
   395  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   396  			if ctxt.Arch.Family == sys.AMD64 {
   397  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   398  			}
   399  			cmplxdest = true
   400  			dest = p.To
   401  			p.As = mov
   402  			p.To.Type = obj.TYPE_REG
   403  			p.To.Reg = reg
   404  			p.To.Sym = nil
   405  			p.To.Name = obj.NAME_NONE
   406  		}
   407  		p.From.Type = obj.TYPE_MEM
   408  		p.From.Name = obj.NAME_GOTREF
   409  		q := p
   410  		if p.From.Offset != 0 {
   411  			q = obj.Appendp(p, newprog)
   412  			q.As = lea
   413  			q.From.Type = obj.TYPE_MEM
   414  			q.From.Reg = p.To.Reg
   415  			q.From.Offset = p.From.Offset
   416  			q.To = p.To
   417  			p.From.Offset = 0
   418  		}
   419  		if cmplxdest {
   420  			q = obj.Appendp(q, newprog)
   421  			q.As = pAs
   422  			q.To = dest
   423  			q.From.Type = obj.TYPE_REG
   424  			q.From.Reg = reg
   425  		}
   426  	}
   427  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   428  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   429  	}
   430  	var source *obj.Addr
   431  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   432  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   433  	// An addition may be inserted between the two MOVs if there is an offset.
   434  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   435  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   436  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   437  		}
   438  		source = &p.From
   439  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   440  		source = &p.To
   441  	} else {
   442  		return
   443  	}
   444  	if p.As == obj.ACALL {
   445  		// When dynlinking on 386, almost any call might end up being a call
   446  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   447  		// RegTo2 is set on the replacement call insn to stop it being
   448  		// processed when it is in turn passed to progedit.
   449  		//
   450  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   451  		// libraries because of this extra code added before deferreturn calls.
   452  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   453  			return
   454  		}
   455  		p1 := obj.Appendp(p, newprog)
   456  		p2 := obj.Appendp(p1, newprog)
   457  
   458  		p1.As = ALEAL
   459  		p1.From.Type = obj.TYPE_MEM
   460  		p1.From.Name = obj.NAME_STATIC
   461  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   462  		p1.To.Type = obj.TYPE_REG
   463  		p1.To.Reg = REG_BX
   464  
   465  		p2.As = p.As
   466  		p2.Scond = p.Scond
   467  		p2.From = p.From
   468  		if p.RestArgs != nil {
   469  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   470  		}
   471  		p2.Reg = p.Reg
   472  		p2.To = p.To
   473  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   474  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   475  		// itself gets passed to progedit.
   476  		p2.To.Type = obj.TYPE_MEM
   477  		p2.RegTo2 = 1
   478  
   479  		obj.Nopout(p)
   480  		return
   481  
   482  	}
   483  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   484  		return
   485  	}
   486  	if source.Type != obj.TYPE_MEM {
   487  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   488  	}
   489  	p1 := obj.Appendp(p, newprog)
   490  	p2 := obj.Appendp(p1, newprog)
   491  
   492  	p1.As = mov
   493  	p1.From.Type = obj.TYPE_MEM
   494  	p1.From.Sym = source.Sym
   495  	p1.From.Name = obj.NAME_GOTREF
   496  	p1.To.Type = obj.TYPE_REG
   497  	p1.To.Reg = reg
   498  
   499  	p2.As = p.As
   500  	p2.From = p.From
   501  	p2.To = p.To
   502  	if from3 := p.GetFrom3(); from3 != nil {
   503  		p2.AddRestSource(*from3)
   504  	}
   505  	if p.From.Name == obj.NAME_EXTERN {
   506  		p2.From.Reg = reg
   507  		p2.From.Name = obj.NAME_NONE
   508  		p2.From.Sym = nil
   509  	} else if p.To.Name == obj.NAME_EXTERN {
   510  		p2.To.Reg = reg
   511  		p2.To.Name = obj.NAME_NONE
   512  		p2.To.Sym = nil
   513  	} else {
   514  		return
   515  	}
   516  	obj.Nopout(p)
   517  }
   518  
   519  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   520  	// RegTo2 is set on the instructions we insert here so they don't get
   521  	// processed twice.
   522  	if p.RegTo2 != 0 {
   523  		return
   524  	}
   525  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   526  		return
   527  	}
   528  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   529  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   530  	// inserted before it.
   531  	isName := func(a *obj.Addr) bool {
   532  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   533  			return false
   534  		}
   535  		if a.Sym.Type == objabi.STLSBSS {
   536  			return false
   537  		}
   538  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   539  	}
   540  
   541  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   542  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   543  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   544  		// respectively.
   545  		if p.To.Type != obj.TYPE_REG {
   546  			q := obj.Appendp(p, newprog)
   547  			q.As = p.As
   548  			q.From.Type = obj.TYPE_REG
   549  			q.From.Reg = REG_CX
   550  			q.To = p.To
   551  			p.As = AMOVL
   552  			p.To.Type = obj.TYPE_REG
   553  			p.To.Reg = REG_CX
   554  			p.To.Sym = nil
   555  			p.To.Name = obj.NAME_NONE
   556  		}
   557  	}
   558  
   559  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   560  		return
   561  	}
   562  	var dst int16 = REG_CX
   563  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   564  		dst = p.To.Reg
   565  		// Why? See the comment near the top of rewriteToUseGot above.
   566  		// AMOVLs might be introduced by the GOT rewrites.
   567  	}
   568  	q := obj.Appendp(p, newprog)
   569  	q.RegTo2 = 1
   570  	r := obj.Appendp(q, newprog)
   571  	r.RegTo2 = 1
   572  	q.As = obj.ACALL
   573  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   574  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   575  	q.To.Type = obj.TYPE_MEM
   576  	q.To.Name = obj.NAME_EXTERN
   577  	r.As = p.As
   578  	r.Scond = p.Scond
   579  	r.From = p.From
   580  	r.RestArgs = p.RestArgs
   581  	r.Reg = p.Reg
   582  	r.To = p.To
   583  	if isName(&p.From) {
   584  		r.From.Reg = dst
   585  	}
   586  	if isName(&p.To) {
   587  		r.To.Reg = dst
   588  	}
   589  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   590  		r.GetFrom3().Reg = dst
   591  	}
   592  	obj.Nopout(p)
   593  }
   594  
   595  // Prog.mark
   596  const (
   597  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   598  )
   599  
   600  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   601  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   602  		return
   603  	}
   604  
   605  	p := cursym.Func().Text
   606  	autoffset := int32(p.To.Offset)
   607  	if autoffset < 0 {
   608  		autoffset = 0
   609  	}
   610  
   611  	hasCall := false
   612  	for q := p; q != nil; q = q.Link {
   613  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   614  			hasCall = true
   615  			break
   616  		}
   617  	}
   618  
   619  	var bpsize int
   620  	if ctxt.Arch.Family == sys.AMD64 &&
   621  		!p.From.Sym.NoFrame() && // (1) below
   622  		!(autoffset == 0 && !hasCall) { // (2) below
   623  		// Make room to save a base pointer.
   624  		// There are 2 cases we must avoid:
   625  		// 1) If noframe is set (which we do for functions which tail call).
   626  		// For performance, we also want to avoid:
   627  		// 2) Frameless leaf functions
   628  		bpsize = ctxt.Arch.PtrSize
   629  		autoffset += int32(bpsize)
   630  		p.To.Offset += int64(bpsize)
   631  	} else {
   632  		bpsize = 0
   633  		p.From.Sym.Set(obj.AttrNoFrame, true)
   634  	}
   635  
   636  	textarg := int64(p.To.Val.(int32))
   637  	cursym.Func().Args = int32(textarg)
   638  	cursym.Func().Locals = int32(p.To.Offset)
   639  
   640  	// TODO(rsc): Remove.
   641  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   642  		cursym.Func().Locals = 0
   643  	}
   644  
   645  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   646  	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
   647  		leaf := true
   648  	LeafSearch:
   649  		for q := p; q != nil; q = q.Link {
   650  			switch q.As {
   651  			case obj.ACALL:
   652  				// Treat common runtime calls that take no arguments
   653  				// the same as duffcopy and duffzero.
   654  				if !isZeroArgRuntimeCall(q.To.Sym) {
   655  					leaf = false
   656  					break LeafSearch
   657  				}
   658  				fallthrough
   659  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   660  				if autoffset >= abi.StackSmall-8 {
   661  					leaf = false
   662  					break LeafSearch
   663  				}
   664  			}
   665  		}
   666  
   667  		if leaf {
   668  			p.From.Sym.Set(obj.AttrNoSplit, true)
   669  		}
   670  	}
   671  
   672  	var regEntryTmp0, regEntryTmp1 int16
   673  	if ctxt.Arch.Family == sys.AMD64 {
   674  		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
   675  	} else {
   676  		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
   677  	}
   678  
   679  	var regg int16
   680  	if !p.From.Sym.NoSplit() {
   681  		// Emit split check and load G register
   682  		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   683  	} else if p.From.Sym.Wrapper() {
   684  		// Load G register for the wrapper code
   685  		p, regg = loadG(ctxt, cursym, p, newprog)
   686  	}
   687  
   688  	if bpsize > 0 {
   689  		// Save caller's BP
   690  		p = obj.Appendp(p, newprog)
   691  
   692  		p.As = APUSHQ
   693  		p.From.Type = obj.TYPE_REG
   694  		p.From.Reg = REG_BP
   695  
   696  		// Move current frame to BP
   697  		p = obj.Appendp(p, newprog)
   698  
   699  		p.As = AMOVQ
   700  		p.From.Type = obj.TYPE_REG
   701  		p.From.Reg = REG_SP
   702  		p.To.Type = obj.TYPE_REG
   703  		p.To.Reg = REG_BP
   704  	}
   705  
   706  	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   707  		ctxt.Diag("unaligned stack size %d", autoffset)
   708  	}
   709  
   710  	// localoffset is autoffset discounting the frame pointer,
   711  	// which has already been allocated in the stack.
   712  	localoffset := autoffset - int32(bpsize)
   713  	if localoffset != 0 {
   714  		p = obj.Appendp(p, newprog)
   715  		p.As = AADJSP
   716  		p.From.Type = obj.TYPE_CONST
   717  		p.From.Offset = int64(localoffset)
   718  		p.Spadj = localoffset
   719  	}
   720  
   721  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   722  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   723  	if autoffset != 0 {
   724  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   725  	}
   726  
   727  	if cursym.Func().Text.From.Sym.Wrapper() {
   728  		// if g._panic != nil && g._panic.argp == FP {
   729  		//   g._panic.argp = bottom-of-frame
   730  		// }
   731  		//
   732  		//	MOVQ g_panic(g), regEntryTmp0
   733  		//	TESTQ regEntryTmp0, regEntryTmp0
   734  		//	JNE checkargp
   735  		// end:
   736  		//	NOP
   737  		//  ... rest of function ...
   738  		// checkargp:
   739  		//	LEAQ (autoffset+8)(SP), regEntryTmp1
   740  		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   741  		//	JNE end
   742  		//  MOVQ SP, panic_argp(regEntryTmp0)
   743  		//  JMP end
   744  		//
   745  		// The NOP is needed to give the jumps somewhere to land.
   746  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   747  		//
   748  		// The layout is chosen to help static branch prediction:
   749  		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
   750  
   751  		// MOVQ g_panic(g), regEntryTmp0
   752  		p = obj.Appendp(p, newprog)
   753  		p.As = AMOVQ
   754  		p.From.Type = obj.TYPE_MEM
   755  		p.From.Reg = regg
   756  		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
   757  		p.To.Type = obj.TYPE_REG
   758  		p.To.Reg = regEntryTmp0
   759  		if ctxt.Arch.Family == sys.I386 {
   760  			p.As = AMOVL
   761  		}
   762  
   763  		// TESTQ regEntryTmp0, regEntryTmp0
   764  		p = obj.Appendp(p, newprog)
   765  		p.As = ATESTQ
   766  		p.From.Type = obj.TYPE_REG
   767  		p.From.Reg = regEntryTmp0
   768  		p.To.Type = obj.TYPE_REG
   769  		p.To.Reg = regEntryTmp0
   770  		if ctxt.Arch.Family == sys.I386 {
   771  			p.As = ATESTL
   772  		}
   773  
   774  		// JNE checkargp (checkargp to be resolved later)
   775  		jne := obj.Appendp(p, newprog)
   776  		jne.As = AJNE
   777  		jne.To.Type = obj.TYPE_BRANCH
   778  
   779  		// end:
   780  		//  NOP
   781  		end := obj.Appendp(jne, newprog)
   782  		end.As = obj.ANOP
   783  
   784  		// Fast forward to end of function.
   785  		var last *obj.Prog
   786  		for last = end; last.Link != nil; last = last.Link {
   787  		}
   788  
   789  		// LEAQ (autoffset+8)(SP), regEntryTmp1
   790  		p = obj.Appendp(last, newprog)
   791  		p.As = ALEAQ
   792  		p.From.Type = obj.TYPE_MEM
   793  		p.From.Reg = REG_SP
   794  		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
   795  		p.To.Type = obj.TYPE_REG
   796  		p.To.Reg = regEntryTmp1
   797  		if ctxt.Arch.Family == sys.I386 {
   798  			p.As = ALEAL
   799  		}
   800  
   801  		// Set jne branch target.
   802  		jne.To.SetTarget(p)
   803  
   804  		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   805  		p = obj.Appendp(p, newprog)
   806  		p.As = ACMPQ
   807  		p.From.Type = obj.TYPE_MEM
   808  		p.From.Reg = regEntryTmp0
   809  		p.From.Offset = 0 // Panic.argp
   810  		p.To.Type = obj.TYPE_REG
   811  		p.To.Reg = regEntryTmp1
   812  		if ctxt.Arch.Family == sys.I386 {
   813  			p.As = ACMPL
   814  		}
   815  
   816  		// JNE end
   817  		p = obj.Appendp(p, newprog)
   818  		p.As = AJNE
   819  		p.To.Type = obj.TYPE_BRANCH
   820  		p.To.SetTarget(end)
   821  
   822  		// MOVQ SP, panic_argp(regEntryTmp0)
   823  		p = obj.Appendp(p, newprog)
   824  		p.As = AMOVQ
   825  		p.From.Type = obj.TYPE_REG
   826  		p.From.Reg = REG_SP
   827  		p.To.Type = obj.TYPE_MEM
   828  		p.To.Reg = regEntryTmp0
   829  		p.To.Offset = 0 // Panic.argp
   830  		if ctxt.Arch.Family == sys.I386 {
   831  			p.As = AMOVL
   832  		}
   833  
   834  		// JMP end
   835  		p = obj.Appendp(p, newprog)
   836  		p.As = obj.AJMP
   837  		p.To.Type = obj.TYPE_BRANCH
   838  		p.To.SetTarget(end)
   839  
   840  		// Reset p for following code.
   841  		p = end
   842  	}
   843  
   844  	var deltasp int32
   845  	for p = cursym.Func().Text; p != nil; p = p.Link {
   846  		pcsize := ctxt.Arch.RegSize
   847  		switch p.From.Name {
   848  		case obj.NAME_AUTO:
   849  			p.From.Offset += int64(deltasp) - int64(bpsize)
   850  		case obj.NAME_PARAM:
   851  			p.From.Offset += int64(deltasp) + int64(pcsize)
   852  		}
   853  		if p.GetFrom3() != nil {
   854  			switch p.GetFrom3().Name {
   855  			case obj.NAME_AUTO:
   856  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   857  			case obj.NAME_PARAM:
   858  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   859  			}
   860  		}
   861  		switch p.To.Name {
   862  		case obj.NAME_AUTO:
   863  			p.To.Offset += int64(deltasp) - int64(bpsize)
   864  		case obj.NAME_PARAM:
   865  			p.To.Offset += int64(deltasp) + int64(pcsize)
   866  		}
   867  
   868  		switch p.As {
   869  		default:
   870  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   871  				f := cursym.Func()
   872  				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   873  					f.FuncFlag |= abi.FuncFlagSPWrite
   874  					if ctxt.Debugvlog || !ctxt.IsAsm {
   875  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   876  						if !ctxt.IsAsm {
   877  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   878  							ctxt.DiagFlush()
   879  							log.Fatalf("bad SPWRITE")
   880  						}
   881  					}
   882  				}
   883  			}
   884  			continue
   885  
   886  		case APUSHL, APUSHFL:
   887  			deltasp += 4
   888  			p.Spadj = 4
   889  			continue
   890  
   891  		case APUSHQ, APUSHFQ:
   892  			deltasp += 8
   893  			p.Spadj = 8
   894  			continue
   895  
   896  		case APUSHW, APUSHFW:
   897  			deltasp += 2
   898  			p.Spadj = 2
   899  			continue
   900  
   901  		case APOPL, APOPFL:
   902  			deltasp -= 4
   903  			p.Spadj = -4
   904  			continue
   905  
   906  		case APOPQ, APOPFQ:
   907  			deltasp -= 8
   908  			p.Spadj = -8
   909  			continue
   910  
   911  		case APOPW, APOPFW:
   912  			deltasp -= 2
   913  			p.Spadj = -2
   914  			continue
   915  
   916  		case AADJSP:
   917  			p.Spadj = int32(p.From.Offset)
   918  			deltasp += int32(p.From.Offset)
   919  			continue
   920  
   921  		case obj.ARET:
   922  			// do nothing
   923  		}
   924  
   925  		if autoffset != deltasp {
   926  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   927  		}
   928  
   929  		if autoffset != 0 {
   930  			to := p.To // Keep To attached to RET for retjmp below
   931  			p.To = obj.Addr{}
   932  			if localoffset != 0 {
   933  				p.As = AADJSP
   934  				p.From.Type = obj.TYPE_CONST
   935  				p.From.Offset = int64(-localoffset)
   936  				p.Spadj = -localoffset
   937  				p = obj.Appendp(p, newprog)
   938  			}
   939  
   940  			if bpsize > 0 {
   941  				// Restore caller's BP
   942  				p.As = APOPQ
   943  				p.To.Type = obj.TYPE_REG
   944  				p.To.Reg = REG_BP
   945  				p.Spadj = -int32(bpsize)
   946  				p = obj.Appendp(p, newprog)
   947  			}
   948  
   949  			p.As = obj.ARET
   950  			p.To = to
   951  
   952  			// If there are instructions following
   953  			// this ARET, they come from a branch
   954  			// with the same stackframe, so undo
   955  			// the cleanup.
   956  			p.Spadj = +autoffset
   957  		}
   958  
   959  		if p.To.Sym != nil { // retjmp
   960  			p.As = obj.AJMP
   961  		}
   962  	}
   963  }
   964  
   965  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   966  	if s == nil {
   967  		return false
   968  	}
   969  	switch s.Name {
   970  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
   971  		return true
   972  	}
   973  	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
   974  		// These functions do take arguments (in registers),
   975  		// but use no stack before they do a stack check. We
   976  		// should include them. See issue 31219.
   977  		return true
   978  	}
   979  	return false
   980  }
   981  
   982  func indir_cx(ctxt *obj.Link, a *obj.Addr) {
   983  	a.Type = obj.TYPE_MEM
   984  	a.Reg = REG_CX
   985  }
   986  
   987  // loadG ensures the G is loaded into a register (either CX or REGG),
   988  // appending instructions to p if necessary. It returns the new last
   989  // instruction and the G register.
   990  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   991  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   992  		// Use the G register directly in ABIInternal
   993  		return p, REGG
   994  	}
   995  
   996  	var regg int16 = REG_CX
   997  	if ctxt.Arch.Family == sys.AMD64 {
   998  		regg = REGG // == REG_R14
   999  	}
  1000  
  1001  	p = obj.Appendp(p, newprog)
  1002  	p.As = AMOVQ
  1003  	if ctxt.Arch.PtrSize == 4 {
  1004  		p.As = AMOVL
  1005  	}
  1006  	p.From.Type = obj.TYPE_MEM
  1007  	p.From.Reg = REG_TLS
  1008  	p.From.Offset = 0
  1009  	p.To.Type = obj.TYPE_REG
  1010  	p.To.Reg = regg
  1011  
  1012  	// Rewrite TLS instruction if necessary.
  1013  	next := p.Link
  1014  	progedit(ctxt, p, newprog)
  1015  	for p.Link != next {
  1016  		p = p.Link
  1017  		progedit(ctxt, p, newprog)
  1018  	}
  1019  
  1020  	if p.From.Index == REG_TLS {
  1021  		p.From.Scale = 2
  1022  	}
  1023  
  1024  	return p, regg
  1025  }
  1026  
  1027  // Append code to p to check for stack split.
  1028  // Appends to (does not overwrite) p.
  1029  // Assumes g is in rg.
  1030  // Returns last new instruction and G register.
  1031  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
  1032  	cmp := ACMPQ
  1033  	lea := ALEAQ
  1034  	mov := AMOVQ
  1035  	sub := ASUBQ
  1036  	push, pop := APUSHQ, APOPQ
  1037  
  1038  	if ctxt.Arch.Family == sys.I386 {
  1039  		cmp = ACMPL
  1040  		lea = ALEAL
  1041  		mov = AMOVL
  1042  		sub = ASUBL
  1043  		push, pop = APUSHL, APOPL
  1044  	}
  1045  
  1046  	tmp := int16(REG_AX) // use AX for 32-bit
  1047  	if ctxt.Arch.Family == sys.AMD64 {
  1048  		// Avoid register parameters.
  1049  		tmp = int16(REGENTRYTMP0)
  1050  	}
  1051  
  1052  	if ctxt.Flag_maymorestack != "" {
  1053  		p = cursym.Func().SpillRegisterArgs(p, newprog)
  1054  
  1055  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1056  			p = obj.Appendp(p, newprog)
  1057  			p.As = push
  1058  			p.From.Type = obj.TYPE_REG
  1059  			p.From.Reg = REGCTXT
  1060  		}
  1061  
  1062  		// We call maymorestack with an ABI matching the
  1063  		// caller's ABI. Since this is the first thing that
  1064  		// happens in the function, we have to be consistent
  1065  		// with the caller about CPU state (notably,
  1066  		// fixed-meaning registers).
  1067  
  1068  		p = obj.Appendp(p, newprog)
  1069  		p.As = obj.ACALL
  1070  		p.To.Type = obj.TYPE_BRANCH
  1071  		p.To.Name = obj.NAME_EXTERN
  1072  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
  1073  
  1074  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1075  			p = obj.Appendp(p, newprog)
  1076  			p.As = pop
  1077  			p.To.Type = obj.TYPE_REG
  1078  			p.To.Reg = REGCTXT
  1079  		}
  1080  
  1081  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
  1082  	}
  1083  
  1084  	// Jump back to here after morestack returns.
  1085  	startPred := p
  1086  
  1087  	// Load G register
  1088  	var rg int16
  1089  	p, rg = loadG(ctxt, cursym, p, newprog)
  1090  
  1091  	var q1 *obj.Prog
  1092  	if framesize <= abi.StackSmall {
  1093  		// small stack: SP <= stackguard
  1094  		//	CMPQ SP, stackguard
  1095  		p = obj.Appendp(p, newprog)
  1096  
  1097  		p.As = cmp
  1098  		p.From.Type = obj.TYPE_REG
  1099  		p.From.Reg = REG_SP
  1100  		p.To.Type = obj.TYPE_MEM
  1101  		p.To.Reg = rg
  1102  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1103  		if cursym.CFunc() {
  1104  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1105  		}
  1106  
  1107  		// Mark the stack bound check and morestack call async nonpreemptible.
  1108  		// If we get preempted here, when resumed the preemption request is
  1109  		// cleared, but we'll still call morestack, which will double the stack
  1110  		// unnecessarily. See issue #35470.
  1111  		p = ctxt.StartUnsafePoint(p, newprog)
  1112  	} else if framesize <= abi.StackBig {
  1113  		// large stack: SP-framesize <= stackguard-StackSmall
  1114  		//	LEAQ -xxx(SP), tmp
  1115  		//	CMPQ tmp, stackguard
  1116  		p = obj.Appendp(p, newprog)
  1117  
  1118  		p.As = lea
  1119  		p.From.Type = obj.TYPE_MEM
  1120  		p.From.Reg = REG_SP
  1121  		p.From.Offset = -(int64(framesize) - abi.StackSmall)
  1122  		p.To.Type = obj.TYPE_REG
  1123  		p.To.Reg = tmp
  1124  
  1125  		p = obj.Appendp(p, newprog)
  1126  		p.As = cmp
  1127  		p.From.Type = obj.TYPE_REG
  1128  		p.From.Reg = tmp
  1129  		p.To.Type = obj.TYPE_MEM
  1130  		p.To.Reg = rg
  1131  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1132  		if cursym.CFunc() {
  1133  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1134  		}
  1135  
  1136  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1137  	} else {
  1138  		// Such a large stack we need to protect against underflow.
  1139  		// The runtime guarantees SP > objabi.StackBig, but
  1140  		// framesize is large enough that SP-framesize may
  1141  		// underflow, causing a direct comparison with the
  1142  		// stack guard to incorrectly succeed. We explicitly
  1143  		// guard against underflow.
  1144  		//
  1145  		//	MOVQ	SP, tmp
  1146  		//	SUBQ	$(framesize - StackSmall), tmp
  1147  		//	// If subtraction wrapped (carry set), morestack.
  1148  		//	JCS	label-of-call-to-morestack
  1149  		//	CMPQ	tmp, stackguard
  1150  
  1151  		p = obj.Appendp(p, newprog)
  1152  
  1153  		p.As = mov
  1154  		p.From.Type = obj.TYPE_REG
  1155  		p.From.Reg = REG_SP
  1156  		p.To.Type = obj.TYPE_REG
  1157  		p.To.Reg = tmp
  1158  
  1159  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1160  
  1161  		p = obj.Appendp(p, newprog)
  1162  		p.As = sub
  1163  		p.From.Type = obj.TYPE_CONST
  1164  		p.From.Offset = int64(framesize) - abi.StackSmall
  1165  		p.To.Type = obj.TYPE_REG
  1166  		p.To.Reg = tmp
  1167  
  1168  		p = obj.Appendp(p, newprog)
  1169  		p.As = AJCS
  1170  		p.To.Type = obj.TYPE_BRANCH
  1171  		q1 = p
  1172  
  1173  		p = obj.Appendp(p, newprog)
  1174  		p.As = cmp
  1175  		p.From.Type = obj.TYPE_REG
  1176  		p.From.Reg = tmp
  1177  		p.To.Type = obj.TYPE_MEM
  1178  		p.To.Reg = rg
  1179  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1180  		if cursym.CFunc() {
  1181  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1182  		}
  1183  	}
  1184  
  1185  	// common
  1186  	jls := obj.Appendp(p, newprog)
  1187  	jls.As = AJLS
  1188  	jls.To.Type = obj.TYPE_BRANCH
  1189  
  1190  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1191  
  1192  	var last *obj.Prog
  1193  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1194  	}
  1195  
  1196  	// Now we are at the end of the function, but logically
  1197  	// we are still in function prologue. We need to fix the
  1198  	// SP data and PCDATA.
  1199  	spfix := obj.Appendp(last, newprog)
  1200  	spfix.As = obj.ANOP
  1201  	spfix.Spadj = -framesize
  1202  
  1203  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1204  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1205  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1206  
  1207  	call := obj.Appendp(pcdata, newprog)
  1208  	call.Pos = cursym.Func().Text.Pos
  1209  	call.As = obj.ACALL
  1210  	call.To.Type = obj.TYPE_BRANCH
  1211  	call.To.Name = obj.NAME_EXTERN
  1212  	morestack := "runtime.morestack"
  1213  	switch {
  1214  	case cursym.CFunc():
  1215  		morestack = "runtime.morestackc"
  1216  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1217  		morestack = "runtime.morestack_noctxt"
  1218  	}
  1219  	call.To.Sym = ctxt.Lookup(morestack)
  1220  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1221  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1222  	// to keep track of the start of the call (where the jump will be to) and the
  1223  	// end (which following instructions are appended to).
  1224  	callend := call
  1225  	progedit(ctxt, callend, newprog)
  1226  	for ; callend.Link != nil; callend = callend.Link {
  1227  		progedit(ctxt, callend.Link, newprog)
  1228  	}
  1229  
  1230  	// The instructions which unspill regs should be preemptible.
  1231  	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
  1232  	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
  1233  
  1234  	jmp := obj.Appendp(unspill, newprog)
  1235  	jmp.As = obj.AJMP
  1236  	jmp.To.Type = obj.TYPE_BRANCH
  1237  	jmp.To.SetTarget(startPred.Link)
  1238  	jmp.Spadj = +framesize
  1239  
  1240  	jls.To.SetTarget(spill)
  1241  	if q1 != nil {
  1242  		q1.To.SetTarget(spill)
  1243  	}
  1244  
  1245  	return end, rg
  1246  }
  1247  
  1248  func isR15(r int16) bool {
  1249  	return r == REG_R15 || r == REG_R15B
  1250  }
  1251  func addrMentionsR15(a *obj.Addr) bool {
  1252  	if a == nil {
  1253  		return false
  1254  	}
  1255  	return isR15(a.Reg) || isR15(a.Index)
  1256  }
  1257  func progMentionsR15(p *obj.Prog) bool {
  1258  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1259  }
  1260  
  1261  func addrUsesGlobal(a *obj.Addr) bool {
  1262  	if a == nil {
  1263  		return false
  1264  	}
  1265  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1266  }
  1267  func progUsesGlobal(p *obj.Prog) bool {
  1268  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1269  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1270  		// or R15 would be dead at them anyway.
  1271  		return false
  1272  	}
  1273  	if p.As == ALEAQ {
  1274  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1275  		return false
  1276  	}
  1277  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1278  }
  1279  
  1280  type rwMask int
  1281  
  1282  const (
  1283  	readFrom rwMask = 1 << iota
  1284  	readTo
  1285  	readReg
  1286  	readFrom3
  1287  	writeFrom
  1288  	writeTo
  1289  	writeReg
  1290  	writeFrom3
  1291  )
  1292  
  1293  // progRW returns a mask describing the effects of the instruction p.
  1294  // Note: this isn't exhaustively accurate. It is only currently used for detecting
  1295  // reads/writes to R15, so SSE register behavior isn't fully correct, and
  1296  // other weird cases (e.g. writes to DX by CLD) also aren't captured.
  1297  func progRW(p *obj.Prog) rwMask {
  1298  	var m rwMask
  1299  	// Default for most instructions
  1300  	if p.From.Type != obj.TYPE_NONE {
  1301  		m |= readFrom
  1302  	}
  1303  	if p.To.Type != obj.TYPE_NONE {
  1304  		// Most x86 instructions update the To value
  1305  		m |= readTo | writeTo
  1306  	}
  1307  	if p.Reg != 0 {
  1308  		m |= readReg
  1309  	}
  1310  	if p.GetFrom3() != nil {
  1311  		m |= readFrom3
  1312  	}
  1313  
  1314  	// Lots of exceptions to the above defaults.
  1315  	name := p.As.String()
  1316  	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
  1317  		// MOV instructions don't read To.
  1318  		m &^= readTo
  1319  	}
  1320  	switch p.As {
  1321  	case APOPW, APOPL, APOPQ,
  1322  		ALEAL, ALEAQ,
  1323  		AIMUL3W, AIMUL3L, AIMUL3Q,
  1324  		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
  1325  		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
  1326  		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
  1327  		// These instructions are pure writes to To. They don't use its old value.
  1328  		m &^= readTo
  1329  	case AXORL, AXORQ:
  1330  		// Register-clearing idiom doesn't read previous value.
  1331  		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
  1332  			m &^= readFrom | readTo
  1333  		}
  1334  	case AMULXL, AMULXQ:
  1335  		// These are write-only to both To and From3.
  1336  		m &^= readTo | readFrom3
  1337  		m |= writeFrom3
  1338  	}
  1339  	return m
  1340  }
  1341  
  1342  // progReadsR15 reports whether p reads the register R15.
  1343  func progReadsR15(p *obj.Prog) bool {
  1344  	m := progRW(p)
  1345  	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1346  		return true
  1347  	}
  1348  	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1349  		return true
  1350  	}
  1351  	if m&readReg != 0 && isR15(p.Reg) {
  1352  		return true
  1353  	}
  1354  	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1355  		return true
  1356  	}
  1357  	// reads of the index registers
  1358  	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
  1359  		return true
  1360  	}
  1361  	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
  1362  		return true
  1363  	}
  1364  	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
  1365  		return true
  1366  	}
  1367  	return false
  1368  }
  1369  
  1370  // progWritesR15 reports whether p writes the register R15.
  1371  func progWritesR15(p *obj.Prog) bool {
  1372  	m := progRW(p)
  1373  	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1374  		return true
  1375  	}
  1376  	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1377  		return true
  1378  	}
  1379  	if m&writeReg != 0 && isR15(p.Reg) {
  1380  		return true
  1381  	}
  1382  	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1383  		return true
  1384  	}
  1385  	return false
  1386  }
  1387  
  1388  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1389  	// When dynamic linking, R15 is used to access globals. Reject code that
  1390  	// uses R15 after a global variable access.
  1391  	if !ctxt.Flag_dynlink {
  1392  		return
  1393  	}
  1394  
  1395  	// Flood fill all the instructions where R15's value is junk.
  1396  	// If there are any uses of R15 in that set, report an error.
  1397  	var work []*obj.Prog
  1398  	var mentionsR15 bool
  1399  	for p := s.Func().Text; p != nil; p = p.Link {
  1400  		if progUsesGlobal(p) {
  1401  			work = append(work, p)
  1402  			p.Mark |= markBit
  1403  		}
  1404  		if progMentionsR15(p) {
  1405  			mentionsR15 = true
  1406  		}
  1407  	}
  1408  	if mentionsR15 {
  1409  		for len(work) > 0 {
  1410  			p := work[len(work)-1]
  1411  			work = work[:len(work)-1]
  1412  			if progReadsR15(p) {
  1413  				pos := ctxt.PosTable.Pos(p.Pos)
  1414  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1415  				break // only report one error
  1416  			}
  1417  			if progWritesR15(p) {
  1418  				// R15 is overwritten by this instruction. Its value is not junk any more.
  1419  				continue
  1420  			}
  1421  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1422  				q.Mark |= markBit
  1423  				work = append(work, q)
  1424  			}
  1425  			if p.As == obj.AJMP || p.As == obj.ARET {
  1426  				continue // no fallthrough
  1427  			}
  1428  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1429  				q.Mark |= markBit
  1430  				work = append(work, q)
  1431  			}
  1432  		}
  1433  	}
  1434  
  1435  	// Clean up.
  1436  	for p := s.Func().Text; p != nil; p = p.Link {
  1437  		p.Mark &^= markBit
  1438  	}
  1439  }
  1440  
  1441  var unaryDst = map[obj.As]bool{
  1442  	ABSWAPL:     true,
  1443  	ABSWAPQ:     true,
  1444  	ACLDEMOTE:   true,
  1445  	ACLFLUSH:    true,
  1446  	ACLFLUSHOPT: true,
  1447  	ACLWB:       true,
  1448  	ACMPXCHG16B: true,
  1449  	ACMPXCHG8B:  true,
  1450  	ADECB:       true,
  1451  	ADECL:       true,
  1452  	ADECQ:       true,
  1453  	ADECW:       true,
  1454  	AFBSTP:      true,
  1455  	AFFREE:      true,
  1456  	AFLDENV:     true,
  1457  	AFSAVE:      true,
  1458  	AFSTCW:      true,
  1459  	AFSTENV:     true,
  1460  	AFSTSW:      true,
  1461  	AFXSAVE64:   true,
  1462  	AFXSAVE:     true,
  1463  	AINCB:       true,
  1464  	AINCL:       true,
  1465  	AINCQ:       true,
  1466  	AINCW:       true,
  1467  	ANEGB:       true,
  1468  	ANEGL:       true,
  1469  	ANEGQ:       true,
  1470  	ANEGW:       true,
  1471  	ANOTB:       true,
  1472  	ANOTL:       true,
  1473  	ANOTQ:       true,
  1474  	ANOTW:       true,
  1475  	APOPL:       true,
  1476  	APOPQ:       true,
  1477  	APOPW:       true,
  1478  	ARDFSBASEL:  true,
  1479  	ARDFSBASEQ:  true,
  1480  	ARDGSBASEL:  true,
  1481  	ARDGSBASEQ:  true,
  1482  	ARDPID:      true,
  1483  	ARDRANDL:    true,
  1484  	ARDRANDQ:    true,
  1485  	ARDRANDW:    true,
  1486  	ARDSEEDL:    true,
  1487  	ARDSEEDQ:    true,
  1488  	ARDSEEDW:    true,
  1489  	ASETCC:      true,
  1490  	ASETCS:      true,
  1491  	ASETEQ:      true,
  1492  	ASETGE:      true,
  1493  	ASETGT:      true,
  1494  	ASETHI:      true,
  1495  	ASETLE:      true,
  1496  	ASETLS:      true,
  1497  	ASETLT:      true,
  1498  	ASETMI:      true,
  1499  	ASETNE:      true,
  1500  	ASETOC:      true,
  1501  	ASETOS:      true,
  1502  	ASETPC:      true,
  1503  	ASETPL:      true,
  1504  	ASETPS:      true,
  1505  	ASGDT:       true,
  1506  	ASIDT:       true,
  1507  	ASLDTL:      true,
  1508  	ASLDTQ:      true,
  1509  	ASLDTW:      true,
  1510  	ASMSWL:      true,
  1511  	ASMSWQ:      true,
  1512  	ASMSWW:      true,
  1513  	ASTMXCSR:    true,
  1514  	ASTRL:       true,
  1515  	ASTRQ:       true,
  1516  	ASTRW:       true,
  1517  	AXSAVE64:    true,
  1518  	AXSAVE:      true,
  1519  	AXSAVEC64:   true,
  1520  	AXSAVEC:     true,
  1521  	AXSAVEOPT64: true,
  1522  	AXSAVEOPT:   true,
  1523  	AXSAVES64:   true,
  1524  	AXSAVES:     true,
  1525  }
  1526  
  1527  var Linkamd64 = obj.LinkArch{
  1528  	Arch:           sys.ArchAMD64,
  1529  	Init:           instinit,
  1530  	ErrorCheck:     errorCheck,
  1531  	Preprocess:     preprocess,
  1532  	Assemble:       span6,
  1533  	Progedit:       progedit,
  1534  	SEH:            populateSeh,
  1535  	UnaryDst:       unaryDst,
  1536  	DWARFRegisters: AMD64DWARFRegisters,
  1537  }
  1538  
  1539  var Link386 = obj.LinkArch{
  1540  	Arch:           sys.Arch386,
  1541  	Init:           instinit,
  1542  	Preprocess:     preprocess,
  1543  	Assemble:       span6,
  1544  	Progedit:       progedit,
  1545  	UnaryDst:       unaryDst,
  1546  	DWARFRegisters: X86DWARFRegisters,
  1547  }