github.com/bir3/gocompiler@v0.3.205/src/cmd/internal/obj/x86/obj6.go (about)

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    35  	"github.com/bir3/gocompiler/src/cmd/internal/objabi"
    36  	"github.com/bir3/gocompiler/src/cmd/internal/src"
    37  	"github.com/bir3/gocompiler/src/cmd/internal/sys"
    38  	"log"
    39  	"math"
    40  	"path"
    41  	"strings"
    42  )
    43  
    44  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    45  	if isAndroid {
    46  		// Android uses a global variable for the tls offset.
    47  		return false
    48  	}
    49  
    50  	if ctxt.Arch.Family == sys.I386 {
    51  		switch ctxt.Headtype {
    52  		case objabi.Hlinux,
    53  			objabi.Hplan9,
    54  			objabi.Hwindows:
    55  			return false
    56  		}
    57  
    58  		return true
    59  	}
    60  
    61  	switch ctxt.Headtype {
    62  	case objabi.Hplan9, objabi.Hwindows:
    63  		return false
    64  	case objabi.Hlinux, objabi.Hfreebsd:
    65  		return !ctxt.Flag_shared
    66  	}
    67  
    68  	return true
    69  }
    70  
    71  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    72  	// Thread-local storage references use the TLS pseudo-register.
    73  	// As a register, TLS refers to the thread-local storage base, and it
    74  	// can only be loaded into another register:
    75  	//
    76  	//         MOVQ TLS, AX
    77  	//
    78  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    79  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    80  	// indexing from the loaded TLS base. This emits a relocation so that
    81  	// if the linker needs to adjust the offset, it can. For example:
    82  	//
    83  	//         MOVQ TLS, AX
    84  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    85  	//
    86  	// On systems that support direct access to the TLS memory, this
    87  	// pair of instructions can be reduced to a direct TLS memory reference:
    88  	//
    89  	//         MOVQ 0(TLS), CX // load g into CX
    90  	//
    91  	// The 2-instruction and 1-instruction forms correspond to the two code
    92  	// sequences for loading a TLS variable in the local exec model given in "ELF
    93  	// Handling For Thread-Local Storage".
    94  	//
    95  	// We apply this rewrite on systems that support the 1-instruction form.
    96  	// The decision is made using only the operating system and the -shared flag,
    97  	// not the link mode. If some link modes on a particular operating system
    98  	// require the 2-instruction form, then all builds for that operating system
    99  	// will use the 2-instruction form, so that the link mode decision can be
   100  	// delayed to link time.
   101  	//
   102  	// In this way, all supported systems use identical instructions to
   103  	// access TLS, and they are rewritten appropriately first here in
   104  	// liblink and then finally using relocations in the linker.
   105  	//
   106  	// When -shared is passed, we leave the code in the 2-instruction form but
   107  	// assemble (and relocate) them in different ways to generate the initial
   108  	// exec code sequence. It's a bit of a fluke that this is possible without
   109  	// rewriting the instructions more comprehensively, and it only does because
   110  	// we only support a single TLS variable (g).
   111  
   112  	if CanUse1InsnTLS(ctxt) {
   113  		// Reduce 2-instruction sequence to 1-instruction sequence.
   114  		// Sequences like
   115  		//	MOVQ TLS, BX
   116  		//	... off(BX)(TLS*1) ...
   117  		// become
   118  		//	NOP
   119  		//	... off(TLS) ...
   120  		//
   121  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   122  		// guarantee we are producing byte-identical binaries as before this code.
   123  		// But it should be unnecessary.
   124  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   125  			obj.Nopout(p)
   126  		}
   127  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   128  			p.From.Reg = REG_TLS
   129  			p.From.Scale = 0
   130  			p.From.Index = REG_NONE
   131  		}
   132  
   133  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   134  			p.To.Reg = REG_TLS
   135  			p.To.Scale = 0
   136  			p.To.Index = REG_NONE
   137  		}
   138  	} else {
   139  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   140  		// as the 2-instruction sequence if necessary.
   141  		//	MOVQ 0(TLS), BX
   142  		// becomes
   143  		//	MOVQ TLS, BX
   144  		//	MOVQ 0(BX)(TLS*1), BX
   145  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   146  			q := obj.Appendp(p, newprog)
   147  			q.As = p.As
   148  			q.From = p.From
   149  			q.From.Type = obj.TYPE_MEM
   150  			q.From.Reg = p.To.Reg
   151  			q.From.Index = REG_TLS
   152  			q.From.Scale = 2 // TODO: use 1
   153  			q.To = p.To
   154  			p.From.Type = obj.TYPE_REG
   155  			p.From.Reg = REG_TLS
   156  			p.From.Index = REG_NONE
   157  			p.From.Offset = 0
   158  		}
   159  	}
   160  
   161  	// Android and Win64 use a tls offset determined at runtime. Rewrite
   162  	//	MOVQ TLS, BX
   163  	// to
   164  	//	MOVQ runtime.tls_g(SB), BX
   165  	if (isAndroid || (ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64)) &&
   166  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   167  		p.From.Type = obj.TYPE_MEM
   168  		p.From.Name = obj.NAME_EXTERN
   169  		p.From.Reg = REG_NONE
   170  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   171  		p.From.Index = REG_NONE
   172  		if ctxt.Headtype == objabi.Hwindows {
   173  			// Win64 requires an additional indirection
   174  			// to retrieve the TLS pointer,
   175  			// as runtime.tls_g contains the TLS offset from GS.
   176  			// add
   177  			//	MOVQ 0(BX)(GS*1), BX
   178  			q := obj.Appendp(p, newprog)
   179  			q.As = p.As
   180  			q.From = obj.Addr{}
   181  			q.From.Type = obj.TYPE_MEM
   182  			q.From.Reg = p.To.Reg
   183  			q.From.Index = REG_GS
   184  			q.From.Scale = 1
   185  			q.From.Offset = 0
   186  			q.To = p.To
   187  		}
   188  	}
   189  
   190  	// TODO: Remove.
   191  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   192  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   193  			p.From.Scale = 2
   194  		}
   195  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   196  			p.To.Scale = 2
   197  		}
   198  	}
   199  
   200  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   201  	// That's what the tables expect.
   202  	switch p.As {
   203  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   204  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   205  			p.To.Type = obj.TYPE_CONST
   206  		}
   207  	}
   208  
   209  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   210  	switch p.As {
   211  	case obj.ACALL, obj.AJMP, obj.ARET:
   212  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   213  			p.To.Type = obj.TYPE_BRANCH
   214  		}
   215  	}
   216  
   217  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   218  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   219  		switch p.As {
   220  		case AMOVL:
   221  			p.As = ALEAL
   222  			p.From.Type = obj.TYPE_MEM
   223  		case AMOVQ:
   224  			p.As = ALEAQ
   225  			p.From.Type = obj.TYPE_MEM
   226  		}
   227  	}
   228  
   229  	// Rewrite float constants to values stored in memory.
   230  	switch p.As {
   231  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   232  	case AMOVSS:
   233  		if p.From.Type == obj.TYPE_FCONST {
   234  			//  f == 0 can't be used here due to -0, so use Float64bits
   235  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   236  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   237  					p.As = AXORPS
   238  					p.From = p.To
   239  					break
   240  				}
   241  			}
   242  		}
   243  		fallthrough
   244  
   245  	case AFMOVF,
   246  		AFADDF,
   247  		AFSUBF,
   248  		AFSUBRF,
   249  		AFMULF,
   250  		AFDIVF,
   251  		AFDIVRF,
   252  		AFCOMF,
   253  		AFCOMFP,
   254  		AADDSS,
   255  		ASUBSS,
   256  		AMULSS,
   257  		ADIVSS,
   258  		ACOMISS,
   259  		AUCOMISS:
   260  		if p.From.Type == obj.TYPE_FCONST {
   261  			f32 := float32(p.From.Val.(float64))
   262  			p.From.Type = obj.TYPE_MEM
   263  			p.From.Name = obj.NAME_EXTERN
   264  			p.From.Sym = ctxt.Float32Sym(f32)
   265  			p.From.Offset = 0
   266  		}
   267  
   268  	case AMOVSD:
   269  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   270  		if p.From.Type == obj.TYPE_FCONST {
   271  			//  f == 0 can't be used here due to -0, so use Float64bits
   272  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   273  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   274  					p.As = AXORPS
   275  					p.From = p.To
   276  					break
   277  				}
   278  			}
   279  		}
   280  		fallthrough
   281  
   282  	case AFMOVD,
   283  		AFADDD,
   284  		AFSUBD,
   285  		AFSUBRD,
   286  		AFMULD,
   287  		AFDIVD,
   288  		AFDIVRD,
   289  		AFCOMD,
   290  		AFCOMDP,
   291  		AADDSD,
   292  		ASUBSD,
   293  		AMULSD,
   294  		ADIVSD,
   295  		ACOMISD,
   296  		AUCOMISD:
   297  		if p.From.Type == obj.TYPE_FCONST {
   298  			f64 := p.From.Val.(float64)
   299  			p.From.Type = obj.TYPE_MEM
   300  			p.From.Name = obj.NAME_EXTERN
   301  			p.From.Sym = ctxt.Float64Sym(f64)
   302  			p.From.Offset = 0
   303  		}
   304  	}
   305  
   306  	if ctxt.Flag_dynlink {
   307  		rewriteToUseGot(ctxt, p, newprog)
   308  	}
   309  
   310  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   311  		rewriteToPcrel(ctxt, p, newprog)
   312  	}
   313  }
   314  
   315  // Rewrite p, if necessary, to access global data via the global offset table.
   316  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   317  	var lea, mov obj.As
   318  	var reg int16
   319  	if ctxt.Arch.Family == sys.AMD64 {
   320  		lea = ALEAQ
   321  		mov = AMOVQ
   322  		reg = REG_R15
   323  	} else {
   324  		lea = ALEAL
   325  		mov = AMOVL
   326  		reg = REG_CX
   327  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   328  			// Special case: clobber the destination register with
   329  			// the PC so we don't have to clobber CX.
   330  			// The SSA backend depends on CX not being clobbered across LEAL.
   331  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   332  			reg = p.To.Reg
   333  		}
   334  	}
   335  
   336  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   337  		//     ADUFFxxx $offset
   338  		// becomes
   339  		//     $MOV runtime.duffxxx@GOT, $reg
   340  		//     $LEA $offset($reg), $reg
   341  		//     CALL $reg
   342  		// (we use LEAx rather than ADDx because ADDx clobbers
   343  		// flags and duffzero on 386 does not otherwise do so).
   344  		var sym *obj.LSym
   345  		if p.As == obj.ADUFFZERO {
   346  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   347  		} else {
   348  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   349  		}
   350  		offset := p.To.Offset
   351  		p.As = mov
   352  		p.From.Type = obj.TYPE_MEM
   353  		p.From.Name = obj.NAME_GOTREF
   354  		p.From.Sym = sym
   355  		p.To.Type = obj.TYPE_REG
   356  		p.To.Reg = reg
   357  		p.To.Offset = 0
   358  		p.To.Sym = nil
   359  		p1 := obj.Appendp(p, newprog)
   360  		p1.As = lea
   361  		p1.From.Type = obj.TYPE_MEM
   362  		p1.From.Offset = offset
   363  		p1.From.Reg = reg
   364  		p1.To.Type = obj.TYPE_REG
   365  		p1.To.Reg = reg
   366  		p2 := obj.Appendp(p1, newprog)
   367  		p2.As = obj.ACALL
   368  		p2.To.Type = obj.TYPE_REG
   369  		p2.To.Reg = reg
   370  	}
   371  
   372  	// We only care about global data: NAME_EXTERN means a global
   373  	// symbol in the Go sense, and p.Sym.Local is true for a few
   374  	// internally defined symbols.
   375  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   376  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   377  		p.As = mov
   378  		p.From.Type = obj.TYPE_ADDR
   379  	}
   380  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   381  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   382  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   383  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   384  		cmplxdest := false
   385  		pAs := p.As
   386  		var dest obj.Addr
   387  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   388  			if ctxt.Arch.Family == sys.AMD64 {
   389  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   390  			}
   391  			cmplxdest = true
   392  			dest = p.To
   393  			p.As = mov
   394  			p.To.Type = obj.TYPE_REG
   395  			p.To.Reg = reg
   396  			p.To.Sym = nil
   397  			p.To.Name = obj.NAME_NONE
   398  		}
   399  		p.From.Type = obj.TYPE_MEM
   400  		p.From.Name = obj.NAME_GOTREF
   401  		q := p
   402  		if p.From.Offset != 0 {
   403  			q = obj.Appendp(p, newprog)
   404  			q.As = lea
   405  			q.From.Type = obj.TYPE_MEM
   406  			q.From.Reg = p.To.Reg
   407  			q.From.Offset = p.From.Offset
   408  			q.To = p.To
   409  			p.From.Offset = 0
   410  		}
   411  		if cmplxdest {
   412  			q = obj.Appendp(q, newprog)
   413  			q.As = pAs
   414  			q.To = dest
   415  			q.From.Type = obj.TYPE_REG
   416  			q.From.Reg = reg
   417  		}
   418  	}
   419  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   420  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   421  	}
   422  	var source *obj.Addr
   423  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   424  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   425  	// An addition may be inserted between the two MOVs if there is an offset.
   426  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   427  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   428  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   429  		}
   430  		source = &p.From
   431  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   432  		source = &p.To
   433  	} else {
   434  		return
   435  	}
   436  	if p.As == obj.ACALL {
   437  		// When dynlinking on 386, almost any call might end up being a call
   438  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   439  		// RegTo2 is set on the replacement call insn to stop it being
   440  		// processed when it is in turn passed to progedit.
   441  		//
   442  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   443  		// libraries because of this extra code added before deferreturn calls.
   444  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   445  			return
   446  		}
   447  		p1 := obj.Appendp(p, newprog)
   448  		p2 := obj.Appendp(p1, newprog)
   449  
   450  		p1.As = ALEAL
   451  		p1.From.Type = obj.TYPE_MEM
   452  		p1.From.Name = obj.NAME_STATIC
   453  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   454  		p1.To.Type = obj.TYPE_REG
   455  		p1.To.Reg = REG_BX
   456  
   457  		p2.As = p.As
   458  		p2.Scond = p.Scond
   459  		p2.From = p.From
   460  		if p.RestArgs != nil {
   461  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   462  		}
   463  		p2.Reg = p.Reg
   464  		p2.To = p.To
   465  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   466  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   467  		// itself gets passed to progedit.
   468  		p2.To.Type = obj.TYPE_MEM
   469  		p2.RegTo2 = 1
   470  
   471  		obj.Nopout(p)
   472  		return
   473  
   474  	}
   475  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   476  		return
   477  	}
   478  	if source.Type != obj.TYPE_MEM {
   479  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   480  	}
   481  	p1 := obj.Appendp(p, newprog)
   482  	p2 := obj.Appendp(p1, newprog)
   483  
   484  	p1.As = mov
   485  	p1.From.Type = obj.TYPE_MEM
   486  	p1.From.Sym = source.Sym
   487  	p1.From.Name = obj.NAME_GOTREF
   488  	p1.To.Type = obj.TYPE_REG
   489  	p1.To.Reg = reg
   490  
   491  	p2.As = p.As
   492  	p2.From = p.From
   493  	p2.To = p.To
   494  	if p.From.Name == obj.NAME_EXTERN {
   495  		p2.From.Reg = reg
   496  		p2.From.Name = obj.NAME_NONE
   497  		p2.From.Sym = nil
   498  	} else if p.To.Name == obj.NAME_EXTERN {
   499  		p2.To.Reg = reg
   500  		p2.To.Name = obj.NAME_NONE
   501  		p2.To.Sym = nil
   502  	} else {
   503  		return
   504  	}
   505  	obj.Nopout(p)
   506  }
   507  
   508  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   509  	// RegTo2 is set on the instructions we insert here so they don't get
   510  	// processed twice.
   511  	if p.RegTo2 != 0 {
   512  		return
   513  	}
   514  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   515  		return
   516  	}
   517  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   518  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   519  	// inserted before it.
   520  	isName := func(a *obj.Addr) bool {
   521  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   522  			return false
   523  		}
   524  		if a.Sym.Type == objabi.STLSBSS {
   525  			return false
   526  		}
   527  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   528  	}
   529  
   530  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   531  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   532  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   533  		// respectively.
   534  		if p.To.Type != obj.TYPE_REG {
   535  			q := obj.Appendp(p, newprog)
   536  			q.As = p.As
   537  			q.From.Type = obj.TYPE_REG
   538  			q.From.Reg = REG_CX
   539  			q.To = p.To
   540  			p.As = AMOVL
   541  			p.To.Type = obj.TYPE_REG
   542  			p.To.Reg = REG_CX
   543  			p.To.Sym = nil
   544  			p.To.Name = obj.NAME_NONE
   545  		}
   546  	}
   547  
   548  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   549  		return
   550  	}
   551  	var dst int16 = REG_CX
   552  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   553  		dst = p.To.Reg
   554  		// Why? See the comment near the top of rewriteToUseGot above.
   555  		// AMOVLs might be introduced by the GOT rewrites.
   556  	}
   557  	q := obj.Appendp(p, newprog)
   558  	q.RegTo2 = 1
   559  	r := obj.Appendp(q, newprog)
   560  	r.RegTo2 = 1
   561  	q.As = obj.ACALL
   562  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   563  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   564  	q.To.Type = obj.TYPE_MEM
   565  	q.To.Name = obj.NAME_EXTERN
   566  	r.As = p.As
   567  	r.Scond = p.Scond
   568  	r.From = p.From
   569  	r.RestArgs = p.RestArgs
   570  	r.Reg = p.Reg
   571  	r.To = p.To
   572  	if isName(&p.From) {
   573  		r.From.Reg = dst
   574  	}
   575  	if isName(&p.To) {
   576  		r.To.Reg = dst
   577  	}
   578  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   579  		r.GetFrom3().Reg = dst
   580  	}
   581  	obj.Nopout(p)
   582  }
   583  
   584  // Prog.mark
   585  const (
   586  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   587  )
   588  
   589  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   590  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   591  		return
   592  	}
   593  
   594  	p := cursym.Func().Text
   595  	autoffset := int32(p.To.Offset)
   596  	if autoffset < 0 {
   597  		autoffset = 0
   598  	}
   599  
   600  	hasCall := false
   601  	for q := p; q != nil; q = q.Link {
   602  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   603  			hasCall = true
   604  			break
   605  		}
   606  	}
   607  
   608  	var bpsize int
   609  	if ctxt.Arch.Family == sys.AMD64 &&
   610  		!p.From.Sym.NoFrame() && // (1) below
   611  		!(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below
   612  		!(autoffset == 0 && !hasCall) { // (3) below
   613  		// Make room to save a base pointer.
   614  		// There are 2 cases we must avoid:
   615  		// 1) If noframe is set (which we do for functions which tail call).
   616  		// 2) Scary runtime internals which would be all messed up by frame pointers.
   617  		//    We detect these using a heuristic: frameless nosplit functions.
   618  		//    TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic.
   619  		// For performance, we also want to avoid:
   620  		// 3) Frameless leaf functions
   621  		bpsize = ctxt.Arch.PtrSize
   622  		autoffset += int32(bpsize)
   623  		p.To.Offset += int64(bpsize)
   624  	} else {
   625  		bpsize = 0
   626  	}
   627  
   628  	textarg := int64(p.To.Val.(int32))
   629  	cursym.Func().Args = int32(textarg)
   630  	cursym.Func().Locals = int32(p.To.Offset)
   631  
   632  	// TODO(rsc): Remove.
   633  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   634  		cursym.Func().Locals = 0
   635  	}
   636  
   637  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   638  	if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() {
   639  		leaf := true
   640  	LeafSearch:
   641  		for q := p; q != nil; q = q.Link {
   642  			switch q.As {
   643  			case obj.ACALL:
   644  				// Treat common runtime calls that take no arguments
   645  				// the same as duffcopy and duffzero.
   646  				if !isZeroArgRuntimeCall(q.To.Sym) {
   647  					leaf = false
   648  					break LeafSearch
   649  				}
   650  				fallthrough
   651  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   652  				if autoffset >= objabi.StackSmall-8 {
   653  					leaf = false
   654  					break LeafSearch
   655  				}
   656  			}
   657  		}
   658  
   659  		if leaf {
   660  			p.From.Sym.Set(obj.AttrNoSplit, true)
   661  		}
   662  	}
   663  
   664  	var regEntryTmp0, regEntryTmp1 int16
   665  	if ctxt.Arch.Family == sys.AMD64 {
   666  		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
   667  	} else {
   668  		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
   669  	}
   670  
   671  	var regg int16
   672  	if !p.From.Sym.NoSplit() {
   673  		// Emit split check and load G register
   674  		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   675  	} else if p.From.Sym.Wrapper() {
   676  		// Load G register for the wrapper code
   677  		p, regg = loadG(ctxt, cursym, p, newprog)
   678  	}
   679  
   680  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   681  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   682  	markedPrologue := false
   683  
   684  	if autoffset != 0 {
   685  		if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   686  			ctxt.Diag("unaligned stack size %d", autoffset)
   687  		}
   688  		p = obj.Appendp(p, newprog)
   689  		p.As = AADJSP
   690  		p.From.Type = obj.TYPE_CONST
   691  		p.From.Offset = int64(autoffset)
   692  		p.Spadj = autoffset
   693  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   694  		markedPrologue = true
   695  	}
   696  
   697  	if bpsize > 0 {
   698  		// Save caller's BP
   699  		p = obj.Appendp(p, newprog)
   700  
   701  		p.As = AMOVQ
   702  		p.From.Type = obj.TYPE_REG
   703  		p.From.Reg = REG_BP
   704  		p.To.Type = obj.TYPE_MEM
   705  		p.To.Reg = REG_SP
   706  		p.To.Scale = 1
   707  		p.To.Offset = int64(autoffset) - int64(bpsize)
   708  		if !markedPrologue {
   709  			p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   710  		}
   711  
   712  		// Move current frame to BP
   713  		p = obj.Appendp(p, newprog)
   714  
   715  		p.As = ALEAQ
   716  		p.From.Type = obj.TYPE_MEM
   717  		p.From.Reg = REG_SP
   718  		p.From.Scale = 1
   719  		p.From.Offset = int64(autoffset) - int64(bpsize)
   720  		p.To.Type = obj.TYPE_REG
   721  		p.To.Reg = REG_BP
   722  	}
   723  
   724  	if cursym.Func().Text.From.Sym.Wrapper() {
   725  		// if g._panic != nil && g._panic.argp == FP {
   726  		//   g._panic.argp = bottom-of-frame
   727  		// }
   728  		//
   729  		//	MOVQ g_panic(g), regEntryTmp0
   730  		//	TESTQ regEntryTmp0, regEntryTmp0
   731  		//	JNE checkargp
   732  		// end:
   733  		//	NOP
   734  		//  ... rest of function ...
   735  		// checkargp:
   736  		//	LEAQ (autoffset+8)(SP), regEntryTmp1
   737  		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   738  		//	JNE end
   739  		//  MOVQ SP, panic_argp(regEntryTmp0)
   740  		//  JMP end
   741  		//
   742  		// The NOP is needed to give the jumps somewhere to land.
   743  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   744  		//
   745  		// The layout is chosen to help static branch prediction:
   746  		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
   747  
   748  		// MOVQ g_panic(g), regEntryTmp0
   749  		p = obj.Appendp(p, newprog)
   750  		p.As = AMOVQ
   751  		p.From.Type = obj.TYPE_MEM
   752  		p.From.Reg = regg
   753  		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
   754  		p.To.Type = obj.TYPE_REG
   755  		p.To.Reg = regEntryTmp0
   756  		if ctxt.Arch.Family == sys.I386 {
   757  			p.As = AMOVL
   758  		}
   759  
   760  		// TESTQ regEntryTmp0, regEntryTmp0
   761  		p = obj.Appendp(p, newprog)
   762  		p.As = ATESTQ
   763  		p.From.Type = obj.TYPE_REG
   764  		p.From.Reg = regEntryTmp0
   765  		p.To.Type = obj.TYPE_REG
   766  		p.To.Reg = regEntryTmp0
   767  		if ctxt.Arch.Family == sys.I386 {
   768  			p.As = ATESTL
   769  		}
   770  
   771  		// JNE checkargp (checkargp to be resolved later)
   772  		jne := obj.Appendp(p, newprog)
   773  		jne.As = AJNE
   774  		jne.To.Type = obj.TYPE_BRANCH
   775  
   776  		// end:
   777  		//  NOP
   778  		end := obj.Appendp(jne, newprog)
   779  		end.As = obj.ANOP
   780  
   781  		// Fast forward to end of function.
   782  		var last *obj.Prog
   783  		for last = end; last.Link != nil; last = last.Link {
   784  		}
   785  
   786  		// LEAQ (autoffset+8)(SP), regEntryTmp1
   787  		p = obj.Appendp(last, newprog)
   788  		p.As = ALEAQ
   789  		p.From.Type = obj.TYPE_MEM
   790  		p.From.Reg = REG_SP
   791  		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
   792  		p.To.Type = obj.TYPE_REG
   793  		p.To.Reg = regEntryTmp1
   794  		if ctxt.Arch.Family == sys.I386 {
   795  			p.As = ALEAL
   796  		}
   797  
   798  		// Set jne branch target.
   799  		jne.To.SetTarget(p)
   800  
   801  		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   802  		p = obj.Appendp(p, newprog)
   803  		p.As = ACMPQ
   804  		p.From.Type = obj.TYPE_MEM
   805  		p.From.Reg = regEntryTmp0
   806  		p.From.Offset = 0 // Panic.argp
   807  		p.To.Type = obj.TYPE_REG
   808  		p.To.Reg = regEntryTmp1
   809  		if ctxt.Arch.Family == sys.I386 {
   810  			p.As = ACMPL
   811  		}
   812  
   813  		// JNE end
   814  		p = obj.Appendp(p, newprog)
   815  		p.As = AJNE
   816  		p.To.Type = obj.TYPE_BRANCH
   817  		p.To.SetTarget(end)
   818  
   819  		// MOVQ SP, panic_argp(regEntryTmp0)
   820  		p = obj.Appendp(p, newprog)
   821  		p.As = AMOVQ
   822  		p.From.Type = obj.TYPE_REG
   823  		p.From.Reg = REG_SP
   824  		p.To.Type = obj.TYPE_MEM
   825  		p.To.Reg = regEntryTmp0
   826  		p.To.Offset = 0 // Panic.argp
   827  		if ctxt.Arch.Family == sys.I386 {
   828  			p.As = AMOVL
   829  		}
   830  
   831  		// JMP end
   832  		p = obj.Appendp(p, newprog)
   833  		p.As = obj.AJMP
   834  		p.To.Type = obj.TYPE_BRANCH
   835  		p.To.SetTarget(end)
   836  
   837  		// Reset p for following code.
   838  		p = end
   839  	}
   840  
   841  	var deltasp int32
   842  	for p = cursym.Func().Text; p != nil; p = p.Link {
   843  		pcsize := ctxt.Arch.RegSize
   844  		switch p.From.Name {
   845  		case obj.NAME_AUTO:
   846  			p.From.Offset += int64(deltasp) - int64(bpsize)
   847  		case obj.NAME_PARAM:
   848  			p.From.Offset += int64(deltasp) + int64(pcsize)
   849  		}
   850  		if p.GetFrom3() != nil {
   851  			switch p.GetFrom3().Name {
   852  			case obj.NAME_AUTO:
   853  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   854  			case obj.NAME_PARAM:
   855  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   856  			}
   857  		}
   858  		switch p.To.Name {
   859  		case obj.NAME_AUTO:
   860  			p.To.Offset += int64(deltasp) - int64(bpsize)
   861  		case obj.NAME_PARAM:
   862  			p.To.Offset += int64(deltasp) + int64(pcsize)
   863  		}
   864  
   865  		switch p.As {
   866  		default:
   867  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   868  				f := cursym.Func()
   869  				if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 {
   870  					f.FuncFlag |= objabi.FuncFlag_SPWRITE
   871  					if ctxt.Debugvlog || !ctxt.IsAsm {
   872  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   873  						if !ctxt.IsAsm {
   874  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   875  							ctxt.DiagFlush()
   876  							log.Fatalf("bad SPWRITE")
   877  						}
   878  					}
   879  				}
   880  			}
   881  			continue
   882  
   883  		case APUSHL, APUSHFL:
   884  			deltasp += 4
   885  			p.Spadj = 4
   886  			continue
   887  
   888  		case APUSHQ, APUSHFQ:
   889  			deltasp += 8
   890  			p.Spadj = 8
   891  			continue
   892  
   893  		case APUSHW, APUSHFW:
   894  			deltasp += 2
   895  			p.Spadj = 2
   896  			continue
   897  
   898  		case APOPL, APOPFL:
   899  			deltasp -= 4
   900  			p.Spadj = -4
   901  			continue
   902  
   903  		case APOPQ, APOPFQ:
   904  			deltasp -= 8
   905  			p.Spadj = -8
   906  			continue
   907  
   908  		case APOPW, APOPFW:
   909  			deltasp -= 2
   910  			p.Spadj = -2
   911  			continue
   912  
   913  		case AADJSP:
   914  			p.Spadj = int32(p.From.Offset)
   915  			deltasp += int32(p.From.Offset)
   916  			continue
   917  
   918  		case obj.ARET:
   919  			// do nothing
   920  		}
   921  
   922  		if autoffset != deltasp {
   923  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   924  		}
   925  
   926  		if autoffset != 0 {
   927  			to := p.To // Keep To attached to RET for retjmp below
   928  			p.To = obj.Addr{}
   929  			if bpsize > 0 {
   930  				// Restore caller's BP
   931  				p.As = AMOVQ
   932  
   933  				p.From.Type = obj.TYPE_MEM
   934  				p.From.Reg = REG_SP
   935  				p.From.Scale = 1
   936  				p.From.Offset = int64(autoffset) - int64(bpsize)
   937  				p.To.Type = obj.TYPE_REG
   938  				p.To.Reg = REG_BP
   939  				p = obj.Appendp(p, newprog)
   940  			}
   941  
   942  			p.As = AADJSP
   943  			p.From.Type = obj.TYPE_CONST
   944  			p.From.Offset = int64(-autoffset)
   945  			p.Spadj = -autoffset
   946  			p = obj.Appendp(p, newprog)
   947  			p.As = obj.ARET
   948  			p.To = to
   949  
   950  			// If there are instructions following
   951  			// this ARET, they come from a branch
   952  			// with the same stackframe, so undo
   953  			// the cleanup.
   954  			p.Spadj = +autoffset
   955  		}
   956  
   957  		if p.To.Sym != nil { // retjmp
   958  			p.As = obj.AJMP
   959  		}
   960  	}
   961  }
   962  
   963  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   964  	if s == nil {
   965  		return false
   966  	}
   967  	switch s.Name {
   968  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
   969  		return true
   970  	}
   971  	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
   972  		// These functions do take arguments (in registers),
   973  		// but use no stack before they do a stack check. We
   974  		// should include them. See issue 31219.
   975  		return true
   976  	}
   977  	return false
   978  }
   979  
   980  func indir_cx(ctxt *obj.Link, a *obj.Addr) {
   981  	a.Type = obj.TYPE_MEM
   982  	a.Reg = REG_CX
   983  }
   984  
   985  // loadG ensures the G is loaded into a register (either CX or REGG),
   986  // appending instructions to p if necessary. It returns the new last
   987  // instruction and the G register.
   988  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   989  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   990  		// Use the G register directly in ABIInternal
   991  		return p, REGG
   992  	}
   993  
   994  	var regg int16 = REG_CX
   995  	if ctxt.Arch.Family == sys.AMD64 {
   996  		regg = REGG // == REG_R14
   997  	}
   998  
   999  	p = obj.Appendp(p, newprog)
  1000  	p.As = AMOVQ
  1001  	if ctxt.Arch.PtrSize == 4 {
  1002  		p.As = AMOVL
  1003  	}
  1004  	p.From.Type = obj.TYPE_MEM
  1005  	p.From.Reg = REG_TLS
  1006  	p.From.Offset = 0
  1007  	p.To.Type = obj.TYPE_REG
  1008  	p.To.Reg = regg
  1009  
  1010  	// Rewrite TLS instruction if necessary.
  1011  	next := p.Link
  1012  	progedit(ctxt, p, newprog)
  1013  	for p.Link != next {
  1014  		p = p.Link
  1015  		progedit(ctxt, p, newprog)
  1016  	}
  1017  
  1018  	if p.From.Index == REG_TLS {
  1019  		p.From.Scale = 2
  1020  	}
  1021  
  1022  	return p, regg
  1023  }
  1024  
  1025  // Append code to p to check for stack split.
  1026  // Appends to (does not overwrite) p.
  1027  // Assumes g is in rg.
  1028  // Returns last new instruction and G register.
  1029  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
  1030  	cmp := ACMPQ
  1031  	lea := ALEAQ
  1032  	mov := AMOVQ
  1033  	sub := ASUBQ
  1034  	push, pop := APUSHQ, APOPQ
  1035  
  1036  	if ctxt.Arch.Family == sys.I386 {
  1037  		cmp = ACMPL
  1038  		lea = ALEAL
  1039  		mov = AMOVL
  1040  		sub = ASUBL
  1041  		push, pop = APUSHL, APOPL
  1042  	}
  1043  
  1044  	tmp := int16(REG_AX) // use AX for 32-bit
  1045  	if ctxt.Arch.Family == sys.AMD64 {
  1046  		// Avoid register parameters.
  1047  		tmp = int16(REGENTRYTMP0)
  1048  	}
  1049  
  1050  	if ctxt.Flag_maymorestack != "" {
  1051  		p = cursym.Func().SpillRegisterArgs(p, newprog)
  1052  
  1053  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1054  			p = obj.Appendp(p, newprog)
  1055  			p.As = push
  1056  			p.From.Type = obj.TYPE_REG
  1057  			p.From.Reg = REGCTXT
  1058  		}
  1059  
  1060  		// We call maymorestack with an ABI matching the
  1061  		// caller's ABI. Since this is the first thing that
  1062  		// happens in the function, we have to be consistent
  1063  		// with the caller about CPU state (notably,
  1064  		// fixed-meaning registers).
  1065  
  1066  		p = obj.Appendp(p, newprog)
  1067  		p.As = obj.ACALL
  1068  		p.To.Type = obj.TYPE_BRANCH
  1069  		p.To.Name = obj.NAME_EXTERN
  1070  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
  1071  
  1072  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1073  			p = obj.Appendp(p, newprog)
  1074  			p.As = pop
  1075  			p.To.Type = obj.TYPE_REG
  1076  			p.To.Reg = REGCTXT
  1077  		}
  1078  
  1079  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
  1080  	}
  1081  
  1082  	// Jump back to here after morestack returns.
  1083  	startPred := p
  1084  
  1085  	// Load G register
  1086  	var rg int16
  1087  	p, rg = loadG(ctxt, cursym, p, newprog)
  1088  
  1089  	var q1 *obj.Prog
  1090  	if framesize <= objabi.StackSmall {
  1091  		// small stack: SP <= stackguard
  1092  		//	CMPQ SP, stackguard
  1093  		p = obj.Appendp(p, newprog)
  1094  
  1095  		p.As = cmp
  1096  		p.From.Type = obj.TYPE_REG
  1097  		p.From.Reg = REG_SP
  1098  		p.To.Type = obj.TYPE_MEM
  1099  		p.To.Reg = rg
  1100  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1101  		if cursym.CFunc() {
  1102  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1103  		}
  1104  
  1105  		// Mark the stack bound check and morestack call async nonpreemptible.
  1106  		// If we get preempted here, when resumed the preemption request is
  1107  		// cleared, but we'll still call morestack, which will double the stack
  1108  		// unnecessarily. See issue #35470.
  1109  		p = ctxt.StartUnsafePoint(p, newprog)
  1110  	} else if framesize <= objabi.StackBig {
  1111  		// large stack: SP-framesize <= stackguard-StackSmall
  1112  		//	LEAQ -xxx(SP), tmp
  1113  		//	CMPQ tmp, stackguard
  1114  		p = obj.Appendp(p, newprog)
  1115  
  1116  		p.As = lea
  1117  		p.From.Type = obj.TYPE_MEM
  1118  		p.From.Reg = REG_SP
  1119  		p.From.Offset = -(int64(framesize) - objabi.StackSmall)
  1120  		p.To.Type = obj.TYPE_REG
  1121  		p.To.Reg = tmp
  1122  
  1123  		p = obj.Appendp(p, newprog)
  1124  		p.As = cmp
  1125  		p.From.Type = obj.TYPE_REG
  1126  		p.From.Reg = tmp
  1127  		p.To.Type = obj.TYPE_MEM
  1128  		p.To.Reg = rg
  1129  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1130  		if cursym.CFunc() {
  1131  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1132  		}
  1133  
  1134  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1135  	} else {
  1136  		// Such a large stack we need to protect against underflow.
  1137  		// The runtime guarantees SP > objabi.StackBig, but
  1138  		// framesize is large enough that SP-framesize may
  1139  		// underflow, causing a direct comparison with the
  1140  		// stack guard to incorrectly succeed. We explicitly
  1141  		// guard against underflow.
  1142  		//
  1143  		//	MOVQ	SP, tmp
  1144  		//	SUBQ	$(framesize - StackSmall), tmp
  1145  		//	// If subtraction wrapped (carry set), morestack.
  1146  		//	JCS	label-of-call-to-morestack
  1147  		//	CMPQ	tmp, stackguard
  1148  
  1149  		p = obj.Appendp(p, newprog)
  1150  
  1151  		p.As = mov
  1152  		p.From.Type = obj.TYPE_REG
  1153  		p.From.Reg = REG_SP
  1154  		p.To.Type = obj.TYPE_REG
  1155  		p.To.Reg = tmp
  1156  
  1157  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1158  
  1159  		p = obj.Appendp(p, newprog)
  1160  		p.As = sub
  1161  		p.From.Type = obj.TYPE_CONST
  1162  		p.From.Offset = int64(framesize) - objabi.StackSmall
  1163  		p.To.Type = obj.TYPE_REG
  1164  		p.To.Reg = tmp
  1165  
  1166  		p = obj.Appendp(p, newprog)
  1167  		p.As = AJCS
  1168  		p.To.Type = obj.TYPE_BRANCH
  1169  		q1 = p
  1170  
  1171  		p = obj.Appendp(p, newprog)
  1172  		p.As = cmp
  1173  		p.From.Type = obj.TYPE_REG
  1174  		p.From.Reg = tmp
  1175  		p.To.Type = obj.TYPE_MEM
  1176  		p.To.Reg = rg
  1177  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1178  		if cursym.CFunc() {
  1179  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1180  		}
  1181  	}
  1182  
  1183  	// common
  1184  	jls := obj.Appendp(p, newprog)
  1185  	jls.As = AJLS
  1186  	jls.To.Type = obj.TYPE_BRANCH
  1187  
  1188  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1189  
  1190  	var last *obj.Prog
  1191  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1192  	}
  1193  
  1194  	// Now we are at the end of the function, but logically
  1195  	// we are still in function prologue. We need to fix the
  1196  	// SP data and PCDATA.
  1197  	spfix := obj.Appendp(last, newprog)
  1198  	spfix.As = obj.ANOP
  1199  	spfix.Spadj = -framesize
  1200  
  1201  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1202  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1203  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1204  
  1205  	call := obj.Appendp(pcdata, newprog)
  1206  	call.Pos = cursym.Func().Text.Pos
  1207  	call.As = obj.ACALL
  1208  	call.To.Type = obj.TYPE_BRANCH
  1209  	call.To.Name = obj.NAME_EXTERN
  1210  	morestack := "runtime.morestack"
  1211  	switch {
  1212  	case cursym.CFunc():
  1213  		morestack = "runtime.morestackc"
  1214  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1215  		morestack = "runtime.morestack_noctxt"
  1216  	}
  1217  	call.To.Sym = ctxt.Lookup(morestack)
  1218  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1219  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1220  	// to keep track of the start of the call (where the jump will be to) and the
  1221  	// end (which following instructions are appended to).
  1222  	callend := call
  1223  	progedit(ctxt, callend, newprog)
  1224  	for ; callend.Link != nil; callend = callend.Link {
  1225  		progedit(ctxt, callend.Link, newprog)
  1226  	}
  1227  
  1228  	pcdata = cursym.Func().UnspillRegisterArgs(callend, newprog)
  1229  	pcdata = ctxt.EndUnsafePoint(pcdata, newprog, -1)
  1230  
  1231  	jmp := obj.Appendp(pcdata, newprog)
  1232  	jmp.As = obj.AJMP
  1233  	jmp.To.Type = obj.TYPE_BRANCH
  1234  	jmp.To.SetTarget(startPred.Link)
  1235  	jmp.Spadj = +framesize
  1236  
  1237  	jls.To.SetTarget(spill)
  1238  	if q1 != nil {
  1239  		q1.To.SetTarget(spill)
  1240  	}
  1241  
  1242  	return end, rg
  1243  }
  1244  
  1245  func isR15(r int16) bool {
  1246  	return r == REG_R15 || r == REG_R15B
  1247  }
  1248  func addrMentionsR15(a *obj.Addr) bool {
  1249  	if a == nil {
  1250  		return false
  1251  	}
  1252  	return isR15(a.Reg) || isR15(a.Index)
  1253  }
  1254  func progMentionsR15(p *obj.Prog) bool {
  1255  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1256  }
  1257  
  1258  // progOverwritesR15 reports whether p writes to R15 and does not depend on
  1259  // the previous value of R15.
  1260  func progOverwritesR15(p *obj.Prog) bool {
  1261  	if !(p.To.Type == obj.TYPE_REG && isR15(p.To.Reg)) {
  1262  		// Not writing to R15.
  1263  		return false
  1264  	}
  1265  	if (p.As == AXORL || p.As == AXORQ) && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1266  		// These look like uses of R15, but aren't, so we must detect these
  1267  		// before the use check below.
  1268  		return true
  1269  	}
  1270  	if addrMentionsR15(&p.From) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) {
  1271  		// use before overwrite
  1272  		return false
  1273  	}
  1274  	if p.As == AMOVL || p.As == AMOVQ || p.As == APOPQ {
  1275  		return true
  1276  		// TODO: MOVB might be ok if we only ever use R15B.
  1277  	}
  1278  	return false
  1279  }
  1280  
  1281  func addrUsesGlobal(a *obj.Addr) bool {
  1282  	if a == nil {
  1283  		return false
  1284  	}
  1285  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1286  }
  1287  func progUsesGlobal(p *obj.Prog) bool {
  1288  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1289  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1290  		// or R15 would be dead at them anyway.
  1291  		return false
  1292  	}
  1293  	if p.As == ALEAQ {
  1294  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1295  		return false
  1296  	}
  1297  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1298  }
  1299  
  1300  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1301  	// When dynamic linking, R15 is used to access globals. Reject code that
  1302  	// uses R15 after a global variable access.
  1303  	if !ctxt.Flag_dynlink {
  1304  		return
  1305  	}
  1306  
  1307  	// Flood fill all the instructions where R15's value is junk.
  1308  	// If there are any uses of R15 in that set, report an error.
  1309  	var work []*obj.Prog
  1310  	var mentionsR15 bool
  1311  	for p := s.Func().Text; p != nil; p = p.Link {
  1312  		if progUsesGlobal(p) {
  1313  			work = append(work, p)
  1314  			p.Mark |= markBit
  1315  		}
  1316  		if progMentionsR15(p) {
  1317  			mentionsR15 = true
  1318  		}
  1319  	}
  1320  	if mentionsR15 {
  1321  		for len(work) > 0 {
  1322  			p := work[len(work)-1]
  1323  			work = work[:len(work)-1]
  1324  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1325  				q.Mark |= markBit
  1326  				work = append(work, q)
  1327  			}
  1328  			if p.As == obj.AJMP || p.As == obj.ARET {
  1329  				continue // no fallthrough
  1330  			}
  1331  			if progMentionsR15(p) {
  1332  				if progOverwritesR15(p) {
  1333  					// R15 is overwritten by this instruction. Its value is not junk any more.
  1334  					continue
  1335  				}
  1336  				pos := ctxt.PosTable.Pos(p.Pos)
  1337  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1338  				break // only report one error
  1339  			}
  1340  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1341  				q.Mark |= markBit
  1342  				work = append(work, q)
  1343  			}
  1344  		}
  1345  	}
  1346  
  1347  	// Clean up.
  1348  	for p := s.Func().Text; p != nil; p = p.Link {
  1349  		p.Mark &^= markBit
  1350  	}
  1351  }
  1352  
  1353  var unaryDst = map[obj.As]bool{
  1354  	ABSWAPL:     true,
  1355  	ABSWAPQ:     true,
  1356  	ACLDEMOTE:   true,
  1357  	ACLFLUSH:    true,
  1358  	ACLFLUSHOPT: true,
  1359  	ACLWB:       true,
  1360  	ACMPXCHG16B: true,
  1361  	ACMPXCHG8B:  true,
  1362  	ADECB:       true,
  1363  	ADECL:       true,
  1364  	ADECQ:       true,
  1365  	ADECW:       true,
  1366  	AFBSTP:      true,
  1367  	AFFREE:      true,
  1368  	AFLDENV:     true,
  1369  	AFSAVE:      true,
  1370  	AFSTCW:      true,
  1371  	AFSTENV:     true,
  1372  	AFSTSW:      true,
  1373  	AFXSAVE64:   true,
  1374  	AFXSAVE:     true,
  1375  	AINCB:       true,
  1376  	AINCL:       true,
  1377  	AINCQ:       true,
  1378  	AINCW:       true,
  1379  	ANEGB:       true,
  1380  	ANEGL:       true,
  1381  	ANEGQ:       true,
  1382  	ANEGW:       true,
  1383  	ANOTB:       true,
  1384  	ANOTL:       true,
  1385  	ANOTQ:       true,
  1386  	ANOTW:       true,
  1387  	APOPL:       true,
  1388  	APOPQ:       true,
  1389  	APOPW:       true,
  1390  	ARDFSBASEL:  true,
  1391  	ARDFSBASEQ:  true,
  1392  	ARDGSBASEL:  true,
  1393  	ARDGSBASEQ:  true,
  1394  	ARDRANDL:    true,
  1395  	ARDRANDQ:    true,
  1396  	ARDRANDW:    true,
  1397  	ARDSEEDL:    true,
  1398  	ARDSEEDQ:    true,
  1399  	ARDSEEDW:    true,
  1400  	ASETCC:      true,
  1401  	ASETCS:      true,
  1402  	ASETEQ:      true,
  1403  	ASETGE:      true,
  1404  	ASETGT:      true,
  1405  	ASETHI:      true,
  1406  	ASETLE:      true,
  1407  	ASETLS:      true,
  1408  	ASETLT:      true,
  1409  	ASETMI:      true,
  1410  	ASETNE:      true,
  1411  	ASETOC:      true,
  1412  	ASETOS:      true,
  1413  	ASETPC:      true,
  1414  	ASETPL:      true,
  1415  	ASETPS:      true,
  1416  	ASGDT:       true,
  1417  	ASIDT:       true,
  1418  	ASLDTL:      true,
  1419  	ASLDTQ:      true,
  1420  	ASLDTW:      true,
  1421  	ASMSWL:      true,
  1422  	ASMSWQ:      true,
  1423  	ASMSWW:      true,
  1424  	ASTMXCSR:    true,
  1425  	ASTRL:       true,
  1426  	ASTRQ:       true,
  1427  	ASTRW:       true,
  1428  	AXSAVE64:    true,
  1429  	AXSAVE:      true,
  1430  	AXSAVEC64:   true,
  1431  	AXSAVEC:     true,
  1432  	AXSAVEOPT64: true,
  1433  	AXSAVEOPT:   true,
  1434  	AXSAVES64:   true,
  1435  	AXSAVES:     true,
  1436  }
  1437  
  1438  var Linkamd64 = obj.LinkArch{
  1439  	Arch:           sys.ArchAMD64,
  1440  	Init:           instinit,
  1441  	ErrorCheck:     errorCheck,
  1442  	Preprocess:     preprocess,
  1443  	Assemble:       span6,
  1444  	Progedit:       progedit,
  1445  	UnaryDst:       unaryDst,
  1446  	DWARFRegisters: AMD64DWARFRegisters,
  1447  }
  1448  
  1449  var Link386 = obj.LinkArch{
  1450  	Arch:           sys.Arch386,
  1451  	Init:           instinit,
  1452  	Preprocess:     preprocess,
  1453  	Assemble:       span6,
  1454  	Progedit:       progedit,
  1455  	UnaryDst:       unaryDst,
  1456  	DWARFRegisters: X86DWARFRegisters,
  1457  }