github.com/bir3/gocompiler@v0.9.2202/src/cmd/internal/obj/x86/obj6.go (about)

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    35  	"github.com/bir3/gocompiler/src/cmd/internal/objabi"
    36  	"github.com/bir3/gocompiler/src/cmd/internal/src"
    37  	"github.com/bir3/gocompiler/src/cmd/internal/sys"
    38  	"github.com/bir3/gocompiler/src/internal/abi"
    39  	"log"
    40  	"math"
    41  	"path"
    42  	"strings"
    43  )
    44  
    45  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    46  	if isAndroid {
    47  		// Android uses a global variable for the tls offset.
    48  		return false
    49  	}
    50  
    51  	if ctxt.Arch.Family == sys.I386 {
    52  		switch ctxt.Headtype {
    53  		case objabi.Hlinux,
    54  			objabi.Hplan9,
    55  			objabi.Hwindows:
    56  			return false
    57  		}
    58  
    59  		return true
    60  	}
    61  
    62  	switch ctxt.Headtype {
    63  	case objabi.Hplan9, objabi.Hwindows:
    64  		return false
    65  	case objabi.Hlinux, objabi.Hfreebsd:
    66  		return !ctxt.Flag_shared
    67  	}
    68  
    69  	return true
    70  }
    71  
    72  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    73  	// Thread-local storage references use the TLS pseudo-register.
    74  	// As a register, TLS refers to the thread-local storage base, and it
    75  	// can only be loaded into another register:
    76  	//
    77  	//         MOVQ TLS, AX
    78  	//
    79  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    80  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    81  	// indexing from the loaded TLS base. This emits a relocation so that
    82  	// if the linker needs to adjust the offset, it can. For example:
    83  	//
    84  	//         MOVQ TLS, AX
    85  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    86  	//
    87  	// On systems that support direct access to the TLS memory, this
    88  	// pair of instructions can be reduced to a direct TLS memory reference:
    89  	//
    90  	//         MOVQ 0(TLS), CX // load g into CX
    91  	//
    92  	// The 2-instruction and 1-instruction forms correspond to the two code
    93  	// sequences for loading a TLS variable in the local exec model given in "ELF
    94  	// Handling For Thread-Local Storage".
    95  	//
    96  	// We apply this rewrite on systems that support the 1-instruction form.
    97  	// The decision is made using only the operating system and the -shared flag,
    98  	// not the link mode. If some link modes on a particular operating system
    99  	// require the 2-instruction form, then all builds for that operating system
   100  	// will use the 2-instruction form, so that the link mode decision can be
   101  	// delayed to link time.
   102  	//
   103  	// In this way, all supported systems use identical instructions to
   104  	// access TLS, and they are rewritten appropriately first here in
   105  	// liblink and then finally using relocations in the linker.
   106  	//
   107  	// When -shared is passed, we leave the code in the 2-instruction form but
   108  	// assemble (and relocate) them in different ways to generate the initial
   109  	// exec code sequence. It's a bit of a fluke that this is possible without
   110  	// rewriting the instructions more comprehensively, and it only does because
   111  	// we only support a single TLS variable (g).
   112  
   113  	if CanUse1InsnTLS(ctxt) {
   114  		// Reduce 2-instruction sequence to 1-instruction sequence.
   115  		// Sequences like
   116  		//	MOVQ TLS, BX
   117  		//	... off(BX)(TLS*1) ...
   118  		// become
   119  		//	NOP
   120  		//	... off(TLS) ...
   121  		//
   122  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   123  		// guarantee we are producing byte-identical binaries as before this code.
   124  		// But it should be unnecessary.
   125  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   126  			obj.Nopout(p)
   127  		}
   128  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   129  			p.From.Reg = REG_TLS
   130  			p.From.Scale = 0
   131  			p.From.Index = REG_NONE
   132  		}
   133  
   134  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   135  			p.To.Reg = REG_TLS
   136  			p.To.Scale = 0
   137  			p.To.Index = REG_NONE
   138  		}
   139  	} else {
   140  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   141  		// as the 2-instruction sequence if necessary.
   142  		//	MOVQ 0(TLS), BX
   143  		// becomes
   144  		//	MOVQ TLS, BX
   145  		//	MOVQ 0(BX)(TLS*1), BX
   146  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   147  			q := obj.Appendp(p, newprog)
   148  			q.As = p.As
   149  			q.From = p.From
   150  			q.From.Type = obj.TYPE_MEM
   151  			q.From.Reg = p.To.Reg
   152  			q.From.Index = REG_TLS
   153  			q.From.Scale = 2	// TODO: use 1
   154  			q.To = p.To
   155  			p.From.Type = obj.TYPE_REG
   156  			p.From.Reg = REG_TLS
   157  			p.From.Index = REG_NONE
   158  			p.From.Offset = 0
   159  		}
   160  	}
   161  
   162  	// Android and Windows use a tls offset determined at runtime. Rewrite
   163  	//	MOVQ TLS, BX
   164  	// to
   165  	//	MOVQ runtime.tls_g(SB), BX
   166  	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
   167  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   168  		p.From.Type = obj.TYPE_MEM
   169  		p.From.Name = obj.NAME_EXTERN
   170  		p.From.Reg = REG_NONE
   171  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   172  		p.From.Index = REG_NONE
   173  		if ctxt.Headtype == objabi.Hwindows {
   174  			// Windows requires an additional indirection
   175  			// to retrieve the TLS pointer,
   176  			// as runtime.tls_g contains the TLS offset from GS or FS.
   177  			// on AMD64 add
   178  			//	MOVQ 0(BX)(GS*1), BX
   179  			// on 386 add
   180  			//	MOVQ 0(BX)(FS*1), BX4
   181  			q := obj.Appendp(p, newprog)
   182  			q.As = p.As
   183  			q.From = obj.Addr{}
   184  			q.From.Type = obj.TYPE_MEM
   185  			q.From.Reg = p.To.Reg
   186  			if ctxt.Arch.Family == sys.AMD64 {
   187  				q.From.Index = REG_GS
   188  			} else {
   189  				q.From.Index = REG_FS
   190  			}
   191  			q.From.Scale = 1
   192  			q.From.Offset = 0
   193  			q.To = p.To
   194  		}
   195  	}
   196  
   197  	// TODO: Remove.
   198  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   199  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   200  			p.From.Scale = 2
   201  		}
   202  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   203  			p.To.Scale = 2
   204  		}
   205  	}
   206  
   207  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   208  	// That's what the tables expect.
   209  	switch p.As {
   210  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   211  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   212  			p.To.Type = obj.TYPE_CONST
   213  		}
   214  	}
   215  
   216  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   217  	switch p.As {
   218  	case obj.ACALL, obj.AJMP, obj.ARET:
   219  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   220  			p.To.Type = obj.TYPE_BRANCH
   221  		}
   222  	}
   223  
   224  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   225  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   226  		switch p.As {
   227  		case AMOVL:
   228  			p.As = ALEAL
   229  			p.From.Type = obj.TYPE_MEM
   230  		case AMOVQ:
   231  			p.As = ALEAQ
   232  			p.From.Type = obj.TYPE_MEM
   233  		}
   234  	}
   235  
   236  	// Rewrite float constants to values stored in memory.
   237  	switch p.As {
   238  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   239  	case AMOVSS:
   240  		if p.From.Type == obj.TYPE_FCONST {
   241  			//  f == 0 can't be used here due to -0, so use Float64bits
   242  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   243  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   244  					p.As = AXORPS
   245  					p.From = p.To
   246  					break
   247  				}
   248  			}
   249  		}
   250  		fallthrough
   251  
   252  	case AFMOVF,
   253  		AFADDF,
   254  		AFSUBF,
   255  		AFSUBRF,
   256  		AFMULF,
   257  		AFDIVF,
   258  		AFDIVRF,
   259  		AFCOMF,
   260  		AFCOMFP,
   261  		AADDSS,
   262  		ASUBSS,
   263  		AMULSS,
   264  		ADIVSS,
   265  		ACOMISS,
   266  		AUCOMISS:
   267  		if p.From.Type == obj.TYPE_FCONST {
   268  			f32 := float32(p.From.Val.(float64))
   269  			p.From.Type = obj.TYPE_MEM
   270  			p.From.Name = obj.NAME_EXTERN
   271  			p.From.Sym = ctxt.Float32Sym(f32)
   272  			p.From.Offset = 0
   273  		}
   274  
   275  	case AMOVSD:
   276  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   277  		if p.From.Type == obj.TYPE_FCONST {
   278  			//  f == 0 can't be used here due to -0, so use Float64bits
   279  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   280  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   281  					p.As = AXORPS
   282  					p.From = p.To
   283  					break
   284  				}
   285  			}
   286  		}
   287  		fallthrough
   288  
   289  	case AFMOVD,
   290  		AFADDD,
   291  		AFSUBD,
   292  		AFSUBRD,
   293  		AFMULD,
   294  		AFDIVD,
   295  		AFDIVRD,
   296  		AFCOMD,
   297  		AFCOMDP,
   298  		AADDSD,
   299  		ASUBSD,
   300  		AMULSD,
   301  		ADIVSD,
   302  		ACOMISD,
   303  		AUCOMISD:
   304  		if p.From.Type == obj.TYPE_FCONST {
   305  			f64 := p.From.Val.(float64)
   306  			p.From.Type = obj.TYPE_MEM
   307  			p.From.Name = obj.NAME_EXTERN
   308  			p.From.Sym = ctxt.Float64Sym(f64)
   309  			p.From.Offset = 0
   310  		}
   311  	}
   312  
   313  	if ctxt.Flag_dynlink {
   314  		rewriteToUseGot(ctxt, p, newprog)
   315  	}
   316  
   317  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   318  		rewriteToPcrel(ctxt, p, newprog)
   319  	}
   320  }
   321  
   322  // Rewrite p, if necessary, to access global data via the global offset table.
   323  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   324  	var lea, mov obj.As
   325  	var reg int16
   326  	if ctxt.Arch.Family == sys.AMD64 {
   327  		lea = ALEAQ
   328  		mov = AMOVQ
   329  		reg = REG_R15
   330  	} else {
   331  		lea = ALEAL
   332  		mov = AMOVL
   333  		reg = REG_CX
   334  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   335  			// Special case: clobber the destination register with
   336  			// the PC so we don't have to clobber CX.
   337  			// The SSA backend depends on CX not being clobbered across LEAL.
   338  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   339  			reg = p.To.Reg
   340  		}
   341  	}
   342  
   343  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   344  		//     ADUFFxxx $offset
   345  		// becomes
   346  		//     $MOV runtime.duffxxx@GOT, $reg
   347  		//     $LEA $offset($reg), $reg
   348  		//     CALL $reg
   349  		// (we use LEAx rather than ADDx because ADDx clobbers
   350  		// flags and duffzero on 386 does not otherwise do so).
   351  		var sym *obj.LSym
   352  		if p.As == obj.ADUFFZERO {
   353  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   354  		} else {
   355  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   356  		}
   357  		offset := p.To.Offset
   358  		p.As = mov
   359  		p.From.Type = obj.TYPE_MEM
   360  		p.From.Name = obj.NAME_GOTREF
   361  		p.From.Sym = sym
   362  		p.To.Type = obj.TYPE_REG
   363  		p.To.Reg = reg
   364  		p.To.Offset = 0
   365  		p.To.Sym = nil
   366  		p1 := obj.Appendp(p, newprog)
   367  		p1.As = lea
   368  		p1.From.Type = obj.TYPE_MEM
   369  		p1.From.Offset = offset
   370  		p1.From.Reg = reg
   371  		p1.To.Type = obj.TYPE_REG
   372  		p1.To.Reg = reg
   373  		p2 := obj.Appendp(p1, newprog)
   374  		p2.As = obj.ACALL
   375  		p2.To.Type = obj.TYPE_REG
   376  		p2.To.Reg = reg
   377  	}
   378  
   379  	// We only care about global data: NAME_EXTERN means a global
   380  	// symbol in the Go sense, and p.Sym.Local is true for a few
   381  	// internally defined symbols.
   382  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   383  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   384  		p.As = mov
   385  		p.From.Type = obj.TYPE_ADDR
   386  	}
   387  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   388  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   389  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   390  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   391  		cmplxdest := false
   392  		pAs := p.As
   393  		var dest obj.Addr
   394  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   395  			if ctxt.Arch.Family == sys.AMD64 {
   396  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   397  			}
   398  			cmplxdest = true
   399  			dest = p.To
   400  			p.As = mov
   401  			p.To.Type = obj.TYPE_REG
   402  			p.To.Reg = reg
   403  			p.To.Sym = nil
   404  			p.To.Name = obj.NAME_NONE
   405  		}
   406  		p.From.Type = obj.TYPE_MEM
   407  		p.From.Name = obj.NAME_GOTREF
   408  		q := p
   409  		if p.From.Offset != 0 {
   410  			q = obj.Appendp(p, newprog)
   411  			q.As = lea
   412  			q.From.Type = obj.TYPE_MEM
   413  			q.From.Reg = p.To.Reg
   414  			q.From.Offset = p.From.Offset
   415  			q.To = p.To
   416  			p.From.Offset = 0
   417  		}
   418  		if cmplxdest {
   419  			q = obj.Appendp(q, newprog)
   420  			q.As = pAs
   421  			q.To = dest
   422  			q.From.Type = obj.TYPE_REG
   423  			q.From.Reg = reg
   424  		}
   425  	}
   426  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   427  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   428  	}
   429  	var source *obj.Addr
   430  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   431  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   432  	// An addition may be inserted between the two MOVs if there is an offset.
   433  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   434  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   435  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   436  		}
   437  		source = &p.From
   438  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   439  		source = &p.To
   440  	} else {
   441  		return
   442  	}
   443  	if p.As == obj.ACALL {
   444  		// When dynlinking on 386, almost any call might end up being a call
   445  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   446  		// RegTo2 is set on the replacement call insn to stop it being
   447  		// processed when it is in turn passed to progedit.
   448  		//
   449  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   450  		// libraries because of this extra code added before deferreturn calls.
   451  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   452  			return
   453  		}
   454  		p1 := obj.Appendp(p, newprog)
   455  		p2 := obj.Appendp(p1, newprog)
   456  
   457  		p1.As = ALEAL
   458  		p1.From.Type = obj.TYPE_MEM
   459  		p1.From.Name = obj.NAME_STATIC
   460  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   461  		p1.To.Type = obj.TYPE_REG
   462  		p1.To.Reg = REG_BX
   463  
   464  		p2.As = p.As
   465  		p2.Scond = p.Scond
   466  		p2.From = p.From
   467  		if p.RestArgs != nil {
   468  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   469  		}
   470  		p2.Reg = p.Reg
   471  		p2.To = p.To
   472  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   473  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   474  		// itself gets passed to progedit.
   475  		p2.To.Type = obj.TYPE_MEM
   476  		p2.RegTo2 = 1
   477  
   478  		obj.Nopout(p)
   479  		return
   480  
   481  	}
   482  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   483  		return
   484  	}
   485  	if source.Type != obj.TYPE_MEM {
   486  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   487  	}
   488  	p1 := obj.Appendp(p, newprog)
   489  	p2 := obj.Appendp(p1, newprog)
   490  
   491  	p1.As = mov
   492  	p1.From.Type = obj.TYPE_MEM
   493  	p1.From.Sym = source.Sym
   494  	p1.From.Name = obj.NAME_GOTREF
   495  	p1.To.Type = obj.TYPE_REG
   496  	p1.To.Reg = reg
   497  
   498  	p2.As = p.As
   499  	p2.From = p.From
   500  	p2.To = p.To
   501  	if from3 := p.GetFrom3(); from3 != nil {
   502  		p2.AddRestSource(*from3)
   503  	}
   504  	if p.From.Name == obj.NAME_EXTERN {
   505  		p2.From.Reg = reg
   506  		p2.From.Name = obj.NAME_NONE
   507  		p2.From.Sym = nil
   508  	} else if p.To.Name == obj.NAME_EXTERN {
   509  		p2.To.Reg = reg
   510  		p2.To.Name = obj.NAME_NONE
   511  		p2.To.Sym = nil
   512  	} else {
   513  		return
   514  	}
   515  	obj.Nopout(p)
   516  }
   517  
   518  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   519  	// RegTo2 is set on the instructions we insert here so they don't get
   520  	// processed twice.
   521  	if p.RegTo2 != 0 {
   522  		return
   523  	}
   524  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   525  		return
   526  	}
   527  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   528  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   529  	// inserted before it.
   530  	isName := func(a *obj.Addr) bool {
   531  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   532  			return false
   533  		}
   534  		if a.Sym.Type == objabi.STLSBSS {
   535  			return false
   536  		}
   537  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   538  	}
   539  
   540  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   541  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   542  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   543  		// respectively.
   544  		if p.To.Type != obj.TYPE_REG {
   545  			q := obj.Appendp(p, newprog)
   546  			q.As = p.As
   547  			q.From.Type = obj.TYPE_REG
   548  			q.From.Reg = REG_CX
   549  			q.To = p.To
   550  			p.As = AMOVL
   551  			p.To.Type = obj.TYPE_REG
   552  			p.To.Reg = REG_CX
   553  			p.To.Sym = nil
   554  			p.To.Name = obj.NAME_NONE
   555  		}
   556  	}
   557  
   558  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   559  		return
   560  	}
   561  	var dst int16 = REG_CX
   562  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   563  		dst = p.To.Reg
   564  		// Why? See the comment near the top of rewriteToUseGot above.
   565  		// AMOVLs might be introduced by the GOT rewrites.
   566  	}
   567  	q := obj.Appendp(p, newprog)
   568  	q.RegTo2 = 1
   569  	r := obj.Appendp(q, newprog)
   570  	r.RegTo2 = 1
   571  	q.As = obj.ACALL
   572  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   573  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   574  	q.To.Type = obj.TYPE_MEM
   575  	q.To.Name = obj.NAME_EXTERN
   576  	r.As = p.As
   577  	r.Scond = p.Scond
   578  	r.From = p.From
   579  	r.RestArgs = p.RestArgs
   580  	r.Reg = p.Reg
   581  	r.To = p.To
   582  	if isName(&p.From) {
   583  		r.From.Reg = dst
   584  	}
   585  	if isName(&p.To) {
   586  		r.To.Reg = dst
   587  	}
   588  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   589  		r.GetFrom3().Reg = dst
   590  	}
   591  	obj.Nopout(p)
   592  }
   593  
   594  // Prog.mark
   595  const (
   596  	markBit = 1 << 0	// used in errorCheck to avoid duplicate work
   597  )
   598  
   599  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   600  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   601  		return
   602  	}
   603  
   604  	p := cursym.Func().Text
   605  	autoffset := int32(p.To.Offset)
   606  	if autoffset < 0 {
   607  		autoffset = 0
   608  	}
   609  
   610  	hasCall := false
   611  	for q := p; q != nil; q = q.Link {
   612  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   613  			hasCall = true
   614  			break
   615  		}
   616  	}
   617  
   618  	var bpsize int
   619  	if ctxt.Arch.Family == sys.AMD64 &&
   620  		!p.From.Sym.NoFrame() &&	// (1) below
   621  		!(autoffset == 0 && !hasCall) {	// (2) below
   622  		// Make room to save a base pointer.
   623  		// There are 2 cases we must avoid:
   624  		// 1) If noframe is set (which we do for functions which tail call).
   625  		// For performance, we also want to avoid:
   626  		// 2) Frameless leaf functions
   627  		bpsize = ctxt.Arch.PtrSize
   628  		autoffset += int32(bpsize)
   629  		p.To.Offset += int64(bpsize)
   630  	} else {
   631  		bpsize = 0
   632  		p.From.Sym.Set(obj.AttrNoFrame, true)
   633  	}
   634  
   635  	textarg := int64(p.To.Val.(int32))
   636  	cursym.Func().Args = int32(textarg)
   637  	cursym.Func().Locals = int32(p.To.Offset)
   638  
   639  	// TODO(rsc): Remove.
   640  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   641  		cursym.Func().Locals = 0
   642  	}
   643  
   644  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   645  	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
   646  		leaf := true
   647  	LeafSearch:
   648  		for q := p; q != nil; q = q.Link {
   649  			switch q.As {
   650  			case obj.ACALL:
   651  				// Treat common runtime calls that take no arguments
   652  				// the same as duffcopy and duffzero.
   653  				if !isZeroArgRuntimeCall(q.To.Sym) {
   654  					leaf = false
   655  					break LeafSearch
   656  				}
   657  				fallthrough
   658  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   659  				if autoffset >= abi.StackSmall-8 {
   660  					leaf = false
   661  					break LeafSearch
   662  				}
   663  			}
   664  		}
   665  
   666  		if leaf {
   667  			p.From.Sym.Set(obj.AttrNoSplit, true)
   668  		}
   669  	}
   670  
   671  	var regEntryTmp0, regEntryTmp1 int16
   672  	if ctxt.Arch.Family == sys.AMD64 {
   673  		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
   674  	} else {
   675  		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
   676  	}
   677  
   678  	var regg int16
   679  	if !p.From.Sym.NoSplit() {
   680  		// Emit split check and load G register
   681  		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   682  	} else if p.From.Sym.Wrapper() {
   683  		// Load G register for the wrapper code
   684  		p, regg = loadG(ctxt, cursym, p, newprog)
   685  	}
   686  
   687  	if bpsize > 0 {
   688  		// Save caller's BP
   689  		p = obj.Appendp(p, newprog)
   690  
   691  		p.As = APUSHQ
   692  		p.From.Type = obj.TYPE_REG
   693  		p.From.Reg = REG_BP
   694  
   695  		// Move current frame to BP
   696  		p = obj.Appendp(p, newprog)
   697  
   698  		p.As = AMOVQ
   699  		p.From.Type = obj.TYPE_REG
   700  		p.From.Reg = REG_SP
   701  		p.To.Type = obj.TYPE_REG
   702  		p.To.Reg = REG_BP
   703  	}
   704  
   705  	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   706  		ctxt.Diag("unaligned stack size %d", autoffset)
   707  	}
   708  
   709  	// localoffset is autoffset discounting the frame pointer,
   710  	// which has already been allocated in the stack.
   711  	localoffset := autoffset - int32(bpsize)
   712  	if localoffset != 0 {
   713  		p = obj.Appendp(p, newprog)
   714  		p.As = AADJSP
   715  		p.From.Type = obj.TYPE_CONST
   716  		p.From.Offset = int64(localoffset)
   717  		p.Spadj = localoffset
   718  	}
   719  
   720  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   721  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   722  	if autoffset != 0 {
   723  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   724  	}
   725  
   726  	if cursym.Func().Text.From.Sym.Wrapper() {
   727  		// if g._panic != nil && g._panic.argp == FP {
   728  		//   g._panic.argp = bottom-of-frame
   729  		// }
   730  		//
   731  		//	MOVQ g_panic(g), regEntryTmp0
   732  		//	TESTQ regEntryTmp0, regEntryTmp0
   733  		//	JNE checkargp
   734  		// end:
   735  		//	NOP
   736  		//  ... rest of function ...
   737  		// checkargp:
   738  		//	LEAQ (autoffset+8)(SP), regEntryTmp1
   739  		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   740  		//	JNE end
   741  		//  MOVQ SP, panic_argp(regEntryTmp0)
   742  		//  JMP end
   743  		//
   744  		// The NOP is needed to give the jumps somewhere to land.
   745  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   746  		//
   747  		// The layout is chosen to help static branch prediction:
   748  		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
   749  
   750  		// MOVQ g_panic(g), regEntryTmp0
   751  		p = obj.Appendp(p, newprog)
   752  		p.As = AMOVQ
   753  		p.From.Type = obj.TYPE_MEM
   754  		p.From.Reg = regg
   755  		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize)	// g_panic
   756  		p.To.Type = obj.TYPE_REG
   757  		p.To.Reg = regEntryTmp0
   758  		if ctxt.Arch.Family == sys.I386 {
   759  			p.As = AMOVL
   760  		}
   761  
   762  		// TESTQ regEntryTmp0, regEntryTmp0
   763  		p = obj.Appendp(p, newprog)
   764  		p.As = ATESTQ
   765  		p.From.Type = obj.TYPE_REG
   766  		p.From.Reg = regEntryTmp0
   767  		p.To.Type = obj.TYPE_REG
   768  		p.To.Reg = regEntryTmp0
   769  		if ctxt.Arch.Family == sys.I386 {
   770  			p.As = ATESTL
   771  		}
   772  
   773  		// JNE checkargp (checkargp to be resolved later)
   774  		jne := obj.Appendp(p, newprog)
   775  		jne.As = AJNE
   776  		jne.To.Type = obj.TYPE_BRANCH
   777  
   778  		// end:
   779  		//  NOP
   780  		end := obj.Appendp(jne, newprog)
   781  		end.As = obj.ANOP
   782  
   783  		// Fast forward to end of function.
   784  		var last *obj.Prog
   785  		for last = end; last.Link != nil; last = last.Link {
   786  		}
   787  
   788  		// LEAQ (autoffset+8)(SP), regEntryTmp1
   789  		p = obj.Appendp(last, newprog)
   790  		p.As = ALEAQ
   791  		p.From.Type = obj.TYPE_MEM
   792  		p.From.Reg = REG_SP
   793  		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
   794  		p.To.Type = obj.TYPE_REG
   795  		p.To.Reg = regEntryTmp1
   796  		if ctxt.Arch.Family == sys.I386 {
   797  			p.As = ALEAL
   798  		}
   799  
   800  		// Set jne branch target.
   801  		jne.To.SetTarget(p)
   802  
   803  		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   804  		p = obj.Appendp(p, newprog)
   805  		p.As = ACMPQ
   806  		p.From.Type = obj.TYPE_MEM
   807  		p.From.Reg = regEntryTmp0
   808  		p.From.Offset = 0	// Panic.argp
   809  		p.To.Type = obj.TYPE_REG
   810  		p.To.Reg = regEntryTmp1
   811  		if ctxt.Arch.Family == sys.I386 {
   812  			p.As = ACMPL
   813  		}
   814  
   815  		// JNE end
   816  		p = obj.Appendp(p, newprog)
   817  		p.As = AJNE
   818  		p.To.Type = obj.TYPE_BRANCH
   819  		p.To.SetTarget(end)
   820  
   821  		// MOVQ SP, panic_argp(regEntryTmp0)
   822  		p = obj.Appendp(p, newprog)
   823  		p.As = AMOVQ
   824  		p.From.Type = obj.TYPE_REG
   825  		p.From.Reg = REG_SP
   826  		p.To.Type = obj.TYPE_MEM
   827  		p.To.Reg = regEntryTmp0
   828  		p.To.Offset = 0	// Panic.argp
   829  		if ctxt.Arch.Family == sys.I386 {
   830  			p.As = AMOVL
   831  		}
   832  
   833  		// JMP end
   834  		p = obj.Appendp(p, newprog)
   835  		p.As = obj.AJMP
   836  		p.To.Type = obj.TYPE_BRANCH
   837  		p.To.SetTarget(end)
   838  
   839  		// Reset p for following code.
   840  		p = end
   841  	}
   842  
   843  	var deltasp int32
   844  	for p = cursym.Func().Text; p != nil; p = p.Link {
   845  		pcsize := ctxt.Arch.RegSize
   846  		switch p.From.Name {
   847  		case obj.NAME_AUTO:
   848  			p.From.Offset += int64(deltasp) - int64(bpsize)
   849  		case obj.NAME_PARAM:
   850  			p.From.Offset += int64(deltasp) + int64(pcsize)
   851  		}
   852  		if p.GetFrom3() != nil {
   853  			switch p.GetFrom3().Name {
   854  			case obj.NAME_AUTO:
   855  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   856  			case obj.NAME_PARAM:
   857  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   858  			}
   859  		}
   860  		switch p.To.Name {
   861  		case obj.NAME_AUTO:
   862  			p.To.Offset += int64(deltasp) - int64(bpsize)
   863  		case obj.NAME_PARAM:
   864  			p.To.Offset += int64(deltasp) + int64(pcsize)
   865  		}
   866  
   867  		switch p.As {
   868  		default:
   869  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   870  				f := cursym.Func()
   871  				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   872  					f.FuncFlag |= abi.FuncFlagSPWrite
   873  					if ctxt.Debugvlog || !ctxt.IsAsm {
   874  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   875  						if !ctxt.IsAsm {
   876  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   877  							ctxt.DiagFlush()
   878  							log.Fatalf("bad SPWRITE")
   879  						}
   880  					}
   881  				}
   882  			}
   883  			continue
   884  
   885  		case APUSHL, APUSHFL:
   886  			deltasp += 4
   887  			p.Spadj = 4
   888  			continue
   889  
   890  		case APUSHQ, APUSHFQ:
   891  			deltasp += 8
   892  			p.Spadj = 8
   893  			continue
   894  
   895  		case APUSHW, APUSHFW:
   896  			deltasp += 2
   897  			p.Spadj = 2
   898  			continue
   899  
   900  		case APOPL, APOPFL:
   901  			deltasp -= 4
   902  			p.Spadj = -4
   903  			continue
   904  
   905  		case APOPQ, APOPFQ:
   906  			deltasp -= 8
   907  			p.Spadj = -8
   908  			continue
   909  
   910  		case APOPW, APOPFW:
   911  			deltasp -= 2
   912  			p.Spadj = -2
   913  			continue
   914  
   915  		case AADJSP:
   916  			p.Spadj = int32(p.From.Offset)
   917  			deltasp += int32(p.From.Offset)
   918  			continue
   919  
   920  		case obj.ARET:
   921  			// do nothing
   922  		}
   923  
   924  		if autoffset != deltasp {
   925  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   926  		}
   927  
   928  		if autoffset != 0 {
   929  			to := p.To	// Keep To attached to RET for retjmp below
   930  			p.To = obj.Addr{}
   931  			if localoffset != 0 {
   932  				p.As = AADJSP
   933  				p.From.Type = obj.TYPE_CONST
   934  				p.From.Offset = int64(-localoffset)
   935  				p.Spadj = -localoffset
   936  				p = obj.Appendp(p, newprog)
   937  			}
   938  
   939  			if bpsize > 0 {
   940  				// Restore caller's BP
   941  				p.As = APOPQ
   942  				p.To.Type = obj.TYPE_REG
   943  				p.To.Reg = REG_BP
   944  				p.Spadj = -int32(bpsize)
   945  				p = obj.Appendp(p, newprog)
   946  			}
   947  
   948  			p.As = obj.ARET
   949  			p.To = to
   950  
   951  			// If there are instructions following
   952  			// this ARET, they come from a branch
   953  			// with the same stackframe, so undo
   954  			// the cleanup.
   955  			p.Spadj = +autoffset
   956  		}
   957  
   958  		if p.To.Sym != nil {	// retjmp
   959  			p.As = obj.AJMP
   960  		}
   961  	}
   962  }
   963  
   964  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   965  	if s == nil {
   966  		return false
   967  	}
   968  	switch s.Name {
   969  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
   970  		return true
   971  	}
   972  	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
   973  		// These functions do take arguments (in registers),
   974  		// but use no stack before they do a stack check. We
   975  		// should include them. See issue 31219.
   976  		return true
   977  	}
   978  	return false
   979  }
   980  
   981  func indir_cx(ctxt *obj.Link, a *obj.Addr) {
   982  	a.Type = obj.TYPE_MEM
   983  	a.Reg = REG_CX
   984  }
   985  
   986  // loadG ensures the G is loaded into a register (either CX or REGG),
   987  // appending instructions to p if necessary. It returns the new last
   988  // instruction and the G register.
   989  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   990  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   991  		// Use the G register directly in ABIInternal
   992  		return p, REGG
   993  	}
   994  
   995  	var regg int16 = REG_CX
   996  	if ctxt.Arch.Family == sys.AMD64 {
   997  		regg = REGG	// == REG_R14
   998  	}
   999  
  1000  	p = obj.Appendp(p, newprog)
  1001  	p.As = AMOVQ
  1002  	if ctxt.Arch.PtrSize == 4 {
  1003  		p.As = AMOVL
  1004  	}
  1005  	p.From.Type = obj.TYPE_MEM
  1006  	p.From.Reg = REG_TLS
  1007  	p.From.Offset = 0
  1008  	p.To.Type = obj.TYPE_REG
  1009  	p.To.Reg = regg
  1010  
  1011  	// Rewrite TLS instruction if necessary.
  1012  	next := p.Link
  1013  	progedit(ctxt, p, newprog)
  1014  	for p.Link != next {
  1015  		p = p.Link
  1016  		progedit(ctxt, p, newprog)
  1017  	}
  1018  
  1019  	if p.From.Index == REG_TLS {
  1020  		p.From.Scale = 2
  1021  	}
  1022  
  1023  	return p, regg
  1024  }
  1025  
  1026  // Append code to p to check for stack split.
  1027  // Appends to (does not overwrite) p.
  1028  // Assumes g is in rg.
  1029  // Returns last new instruction and G register.
  1030  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
  1031  	cmp := ACMPQ
  1032  	lea := ALEAQ
  1033  	mov := AMOVQ
  1034  	sub := ASUBQ
  1035  	push, pop := APUSHQ, APOPQ
  1036  
  1037  	if ctxt.Arch.Family == sys.I386 {
  1038  		cmp = ACMPL
  1039  		lea = ALEAL
  1040  		mov = AMOVL
  1041  		sub = ASUBL
  1042  		push, pop = APUSHL, APOPL
  1043  	}
  1044  
  1045  	tmp := int16(REG_AX)	// use AX for 32-bit
  1046  	if ctxt.Arch.Family == sys.AMD64 {
  1047  		// Avoid register parameters.
  1048  		tmp = int16(REGENTRYTMP0)
  1049  	}
  1050  
  1051  	if ctxt.Flag_maymorestack != "" {
  1052  		p = cursym.Func().SpillRegisterArgs(p, newprog)
  1053  
  1054  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1055  			p = obj.Appendp(p, newprog)
  1056  			p.As = push
  1057  			p.From.Type = obj.TYPE_REG
  1058  			p.From.Reg = REGCTXT
  1059  		}
  1060  
  1061  		// We call maymorestack with an ABI matching the
  1062  		// caller's ABI. Since this is the first thing that
  1063  		// happens in the function, we have to be consistent
  1064  		// with the caller about CPU state (notably,
  1065  		// fixed-meaning registers).
  1066  
  1067  		p = obj.Appendp(p, newprog)
  1068  		p.As = obj.ACALL
  1069  		p.To.Type = obj.TYPE_BRANCH
  1070  		p.To.Name = obj.NAME_EXTERN
  1071  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
  1072  
  1073  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1074  			p = obj.Appendp(p, newprog)
  1075  			p.As = pop
  1076  			p.To.Type = obj.TYPE_REG
  1077  			p.To.Reg = REGCTXT
  1078  		}
  1079  
  1080  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
  1081  	}
  1082  
  1083  	// Jump back to here after morestack returns.
  1084  	startPred := p
  1085  
  1086  	// Load G register
  1087  	var rg int16
  1088  	p, rg = loadG(ctxt, cursym, p, newprog)
  1089  
  1090  	var q1 *obj.Prog
  1091  	if framesize <= abi.StackSmall {
  1092  		// small stack: SP <= stackguard
  1093  		//	CMPQ SP, stackguard
  1094  		p = obj.Appendp(p, newprog)
  1095  
  1096  		p.As = cmp
  1097  		p.From.Type = obj.TYPE_REG
  1098  		p.From.Reg = REG_SP
  1099  		p.To.Type = obj.TYPE_MEM
  1100  		p.To.Reg = rg
  1101  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize)	// G.stackguard0
  1102  		if cursym.CFunc() {
  1103  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize)	// G.stackguard1
  1104  		}
  1105  
  1106  		// Mark the stack bound check and morestack call async nonpreemptible.
  1107  		// If we get preempted here, when resumed the preemption request is
  1108  		// cleared, but we'll still call morestack, which will double the stack
  1109  		// unnecessarily. See issue #35470.
  1110  		p = ctxt.StartUnsafePoint(p, newprog)
  1111  	} else if framesize <= abi.StackBig {
  1112  		// large stack: SP-framesize <= stackguard-StackSmall
  1113  		//	LEAQ -xxx(SP), tmp
  1114  		//	CMPQ tmp, stackguard
  1115  		p = obj.Appendp(p, newprog)
  1116  
  1117  		p.As = lea
  1118  		p.From.Type = obj.TYPE_MEM
  1119  		p.From.Reg = REG_SP
  1120  		p.From.Offset = -(int64(framesize) - abi.StackSmall)
  1121  		p.To.Type = obj.TYPE_REG
  1122  		p.To.Reg = tmp
  1123  
  1124  		p = obj.Appendp(p, newprog)
  1125  		p.As = cmp
  1126  		p.From.Type = obj.TYPE_REG
  1127  		p.From.Reg = tmp
  1128  		p.To.Type = obj.TYPE_MEM
  1129  		p.To.Reg = rg
  1130  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize)	// G.stackguard0
  1131  		if cursym.CFunc() {
  1132  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize)	// G.stackguard1
  1133  		}
  1134  
  1135  		p = ctxt.StartUnsafePoint(p, newprog)	// see the comment above
  1136  	} else {
  1137  		// Such a large stack we need to protect against underflow.
  1138  		// The runtime guarantees SP > objabi.StackBig, but
  1139  		// framesize is large enough that SP-framesize may
  1140  		// underflow, causing a direct comparison with the
  1141  		// stack guard to incorrectly succeed. We explicitly
  1142  		// guard against underflow.
  1143  		//
  1144  		//	MOVQ	SP, tmp
  1145  		//	SUBQ	$(framesize - StackSmall), tmp
  1146  		//	// If subtraction wrapped (carry set), morestack.
  1147  		//	JCS	label-of-call-to-morestack
  1148  		//	CMPQ	tmp, stackguard
  1149  
  1150  		p = obj.Appendp(p, newprog)
  1151  
  1152  		p.As = mov
  1153  		p.From.Type = obj.TYPE_REG
  1154  		p.From.Reg = REG_SP
  1155  		p.To.Type = obj.TYPE_REG
  1156  		p.To.Reg = tmp
  1157  
  1158  		p = ctxt.StartUnsafePoint(p, newprog)	// see the comment above
  1159  
  1160  		p = obj.Appendp(p, newprog)
  1161  		p.As = sub
  1162  		p.From.Type = obj.TYPE_CONST
  1163  		p.From.Offset = int64(framesize) - abi.StackSmall
  1164  		p.To.Type = obj.TYPE_REG
  1165  		p.To.Reg = tmp
  1166  
  1167  		p = obj.Appendp(p, newprog)
  1168  		p.As = AJCS
  1169  		p.To.Type = obj.TYPE_BRANCH
  1170  		q1 = p
  1171  
  1172  		p = obj.Appendp(p, newprog)
  1173  		p.As = cmp
  1174  		p.From.Type = obj.TYPE_REG
  1175  		p.From.Reg = tmp
  1176  		p.To.Type = obj.TYPE_MEM
  1177  		p.To.Reg = rg
  1178  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize)	// G.stackguard0
  1179  		if cursym.CFunc() {
  1180  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize)	// G.stackguard1
  1181  		}
  1182  	}
  1183  
  1184  	// common
  1185  	jls := obj.Appendp(p, newprog)
  1186  	jls.As = AJLS
  1187  	jls.To.Type = obj.TYPE_BRANCH
  1188  
  1189  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1190  
  1191  	var last *obj.Prog
  1192  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1193  	}
  1194  
  1195  	// Now we are at the end of the function, but logically
  1196  	// we are still in function prologue. We need to fix the
  1197  	// SP data and PCDATA.
  1198  	spfix := obj.Appendp(last, newprog)
  1199  	spfix.As = obj.ANOP
  1200  	spfix.Spadj = -framesize
  1201  
  1202  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1203  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1204  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1205  
  1206  	call := obj.Appendp(pcdata, newprog)
  1207  	call.Pos = cursym.Func().Text.Pos
  1208  	call.As = obj.ACALL
  1209  	call.To.Type = obj.TYPE_BRANCH
  1210  	call.To.Name = obj.NAME_EXTERN
  1211  	morestack := "runtime.morestack"
  1212  	switch {
  1213  	case cursym.CFunc():
  1214  		morestack = "runtime.morestackc"
  1215  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1216  		morestack = "runtime.morestack_noctxt"
  1217  	}
  1218  	call.To.Sym = ctxt.Lookup(morestack)
  1219  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1220  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1221  	// to keep track of the start of the call (where the jump will be to) and the
  1222  	// end (which following instructions are appended to).
  1223  	callend := call
  1224  	progedit(ctxt, callend, newprog)
  1225  	for ; callend.Link != nil; callend = callend.Link {
  1226  		progedit(ctxt, callend.Link, newprog)
  1227  	}
  1228  
  1229  	// The instructions which unspill regs should be preemptible.
  1230  	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
  1231  	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
  1232  
  1233  	jmp := obj.Appendp(unspill, newprog)
  1234  	jmp.As = obj.AJMP
  1235  	jmp.To.Type = obj.TYPE_BRANCH
  1236  	jmp.To.SetTarget(startPred.Link)
  1237  	jmp.Spadj = +framesize
  1238  
  1239  	jls.To.SetTarget(spill)
  1240  	if q1 != nil {
  1241  		q1.To.SetTarget(spill)
  1242  	}
  1243  
  1244  	return end, rg
  1245  }
  1246  
  1247  func isR15(r int16) bool {
  1248  	return r == REG_R15 || r == REG_R15B
  1249  }
  1250  func addrMentionsR15(a *obj.Addr) bool {
  1251  	if a == nil {
  1252  		return false
  1253  	}
  1254  	return isR15(a.Reg) || isR15(a.Index)
  1255  }
  1256  func progMentionsR15(p *obj.Prog) bool {
  1257  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1258  }
  1259  
  1260  func addrUsesGlobal(a *obj.Addr) bool {
  1261  	if a == nil {
  1262  		return false
  1263  	}
  1264  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1265  }
  1266  func progUsesGlobal(p *obj.Prog) bool {
  1267  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1268  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1269  		// or R15 would be dead at them anyway.
  1270  		return false
  1271  	}
  1272  	if p.As == ALEAQ {
  1273  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1274  		return false
  1275  	}
  1276  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1277  }
  1278  
  1279  type rwMask int
  1280  
  1281  const (
  1282  	readFrom	rwMask	= 1 << iota
  1283  	readTo
  1284  	readReg
  1285  	readFrom3
  1286  	writeFrom
  1287  	writeTo
  1288  	writeReg
  1289  	writeFrom3
  1290  )
  1291  
  1292  // progRW returns a mask describing the effects of the instruction p.
  1293  // Note: this isn't exhaustively accurate. It is only currently used for detecting
  1294  // reads/writes to R15, so SSE register behavior isn't fully correct, and
  1295  // other weird cases (e.g. writes to DX by CLD) also aren't captured.
  1296  func progRW(p *obj.Prog) rwMask {
  1297  	var m rwMask
  1298  	// Default for most instructions
  1299  	if p.From.Type != obj.TYPE_NONE {
  1300  		m |= readFrom
  1301  	}
  1302  	if p.To.Type != obj.TYPE_NONE {
  1303  		// Most x86 instructions update the To value
  1304  		m |= readTo | writeTo
  1305  	}
  1306  	if p.Reg != 0 {
  1307  		m |= readReg
  1308  	}
  1309  	if p.GetFrom3() != nil {
  1310  		m |= readFrom3
  1311  	}
  1312  
  1313  	// Lots of exceptions to the above defaults.
  1314  	name := p.As.String()
  1315  	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
  1316  		// MOV instructions don't read To.
  1317  		m &^= readTo
  1318  	}
  1319  	switch p.As {
  1320  	case APOPW, APOPL, APOPQ,
  1321  		ALEAL, ALEAQ,
  1322  		AIMUL3W, AIMUL3L, AIMUL3Q,
  1323  		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
  1324  		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
  1325  		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
  1326  		// These instructions are pure writes to To. They don't use its old value.
  1327  		m &^= readTo
  1328  	case AXORL, AXORQ:
  1329  		// Register-clearing idiom doesn't read previous value.
  1330  		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
  1331  			m &^= readFrom | readTo
  1332  		}
  1333  	case AMULXL, AMULXQ:
  1334  		// These are write-only to both To and From3.
  1335  		m &^= readTo | readFrom3
  1336  		m |= writeFrom3
  1337  	}
  1338  	return m
  1339  }
  1340  
  1341  // progReadsR15 reports whether p reads the register R15.
  1342  func progReadsR15(p *obj.Prog) bool {
  1343  	m := progRW(p)
  1344  	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1345  		return true
  1346  	}
  1347  	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1348  		return true
  1349  	}
  1350  	if m&readReg != 0 && isR15(p.Reg) {
  1351  		return true
  1352  	}
  1353  	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1354  		return true
  1355  	}
  1356  	// reads of the index registers
  1357  	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
  1358  		return true
  1359  	}
  1360  	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
  1361  		return true
  1362  	}
  1363  	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
  1364  		return true
  1365  	}
  1366  	return false
  1367  }
  1368  
  1369  // progWritesR15 reports whether p writes the register R15.
  1370  func progWritesR15(p *obj.Prog) bool {
  1371  	m := progRW(p)
  1372  	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1373  		return true
  1374  	}
  1375  	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1376  		return true
  1377  	}
  1378  	if m&writeReg != 0 && isR15(p.Reg) {
  1379  		return true
  1380  	}
  1381  	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1382  		return true
  1383  	}
  1384  	return false
  1385  }
  1386  
  1387  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1388  	// When dynamic linking, R15 is used to access globals. Reject code that
  1389  	// uses R15 after a global variable access.
  1390  	if !ctxt.Flag_dynlink {
  1391  		return
  1392  	}
  1393  
  1394  	// Flood fill all the instructions where R15's value is junk.
  1395  	// If there are any uses of R15 in that set, report an error.
  1396  	var work []*obj.Prog
  1397  	var mentionsR15 bool
  1398  	for p := s.Func().Text; p != nil; p = p.Link {
  1399  		if progUsesGlobal(p) {
  1400  			work = append(work, p)
  1401  			p.Mark |= markBit
  1402  		}
  1403  		if progMentionsR15(p) {
  1404  			mentionsR15 = true
  1405  		}
  1406  	}
  1407  	if mentionsR15 {
  1408  		for len(work) > 0 {
  1409  			p := work[len(work)-1]
  1410  			work = work[:len(work)-1]
  1411  			if progReadsR15(p) {
  1412  				pos := ctxt.PosTable.Pos(p.Pos)
  1413  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1414  				break	// only report one error
  1415  			}
  1416  			if progWritesR15(p) {
  1417  				// R15 is overwritten by this instruction. Its value is not junk any more.
  1418  				continue
  1419  			}
  1420  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1421  				q.Mark |= markBit
  1422  				work = append(work, q)
  1423  			}
  1424  			if p.As == obj.AJMP || p.As == obj.ARET {
  1425  				continue	// no fallthrough
  1426  			}
  1427  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1428  				q.Mark |= markBit
  1429  				work = append(work, q)
  1430  			}
  1431  		}
  1432  	}
  1433  
  1434  	// Clean up.
  1435  	for p := s.Func().Text; p != nil; p = p.Link {
  1436  		p.Mark &^= markBit
  1437  	}
  1438  }
  1439  
  1440  var unaryDst = map[obj.As]bool{
  1441  	ABSWAPL:	true,
  1442  	ABSWAPQ:	true,
  1443  	ACLDEMOTE:	true,
  1444  	ACLFLUSH:	true,
  1445  	ACLFLUSHOPT:	true,
  1446  	ACLWB:		true,
  1447  	ACMPXCHG16B:	true,
  1448  	ACMPXCHG8B:	true,
  1449  	ADECB:		true,
  1450  	ADECL:		true,
  1451  	ADECQ:		true,
  1452  	ADECW:		true,
  1453  	AFBSTP:		true,
  1454  	AFFREE:		true,
  1455  	AFLDENV:	true,
  1456  	AFSAVE:		true,
  1457  	AFSTCW:		true,
  1458  	AFSTENV:	true,
  1459  	AFSTSW:		true,
  1460  	AFXSAVE64:	true,
  1461  	AFXSAVE:	true,
  1462  	AINCB:		true,
  1463  	AINCL:		true,
  1464  	AINCQ:		true,
  1465  	AINCW:		true,
  1466  	ANEGB:		true,
  1467  	ANEGL:		true,
  1468  	ANEGQ:		true,
  1469  	ANEGW:		true,
  1470  	ANOTB:		true,
  1471  	ANOTL:		true,
  1472  	ANOTQ:		true,
  1473  	ANOTW:		true,
  1474  	APOPL:		true,
  1475  	APOPQ:		true,
  1476  	APOPW:		true,
  1477  	ARDFSBASEL:	true,
  1478  	ARDFSBASEQ:	true,
  1479  	ARDGSBASEL:	true,
  1480  	ARDGSBASEQ:	true,
  1481  	ARDPID:		true,
  1482  	ARDRANDL:	true,
  1483  	ARDRANDQ:	true,
  1484  	ARDRANDW:	true,
  1485  	ARDSEEDL:	true,
  1486  	ARDSEEDQ:	true,
  1487  	ARDSEEDW:	true,
  1488  	ASETCC:		true,
  1489  	ASETCS:		true,
  1490  	ASETEQ:		true,
  1491  	ASETGE:		true,
  1492  	ASETGT:		true,
  1493  	ASETHI:		true,
  1494  	ASETLE:		true,
  1495  	ASETLS:		true,
  1496  	ASETLT:		true,
  1497  	ASETMI:		true,
  1498  	ASETNE:		true,
  1499  	ASETOC:		true,
  1500  	ASETOS:		true,
  1501  	ASETPC:		true,
  1502  	ASETPL:		true,
  1503  	ASETPS:		true,
  1504  	ASGDT:		true,
  1505  	ASIDT:		true,
  1506  	ASLDTL:		true,
  1507  	ASLDTQ:		true,
  1508  	ASLDTW:		true,
  1509  	ASMSWL:		true,
  1510  	ASMSWQ:		true,
  1511  	ASMSWW:		true,
  1512  	ASTMXCSR:	true,
  1513  	ASTRL:		true,
  1514  	ASTRQ:		true,
  1515  	ASTRW:		true,
  1516  	AXSAVE64:	true,
  1517  	AXSAVE:		true,
  1518  	AXSAVEC64:	true,
  1519  	AXSAVEC:	true,
  1520  	AXSAVEOPT64:	true,
  1521  	AXSAVEOPT:	true,
  1522  	AXSAVES64:	true,
  1523  	AXSAVES:	true,
  1524  }
  1525  
  1526  var Linkamd64 = obj.LinkArch{
  1527  	Arch:		sys.ArchAMD64,
  1528  	Init:		instinit,
  1529  	ErrorCheck:	errorCheck,
  1530  	Preprocess:	preprocess,
  1531  	Assemble:	span6,
  1532  	Progedit:	progedit,
  1533  	SEH:		populateSeh,
  1534  	UnaryDst:	unaryDst,
  1535  	DWARFRegisters:	AMD64DWARFRegisters,
  1536  }
  1537  
  1538  var Link386 = obj.LinkArch{
  1539  	Arch:		sys.Arch386,
  1540  	Init:		instinit,
  1541  	Preprocess:	preprocess,
  1542  	Assemble:	span6,
  1543  	Progedit:	progedit,
  1544  	UnaryDst:	unaryDst,
  1545  	DWARFRegisters:	X86DWARFRegisters,
  1546  }