github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/liblink/obj6.c (about)

     1  // Inferno utils/6l/pass.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include <bio.h>
    34  #include <link.h>
    35  #include "../cmd/6l/6.out.h"
    36  #include "../runtime/stack.h"
    37  
    38  static Prog zprg = {
    39  	.back = 2,
    40  	.as = AGOK,
    41  	.from = {
    42  		.type = D_NONE,
    43  		.index = D_NONE,
    44  	},
    45  	.to = {
    46  		.type = D_NONE,
    47  		.index = D_NONE,
    48  	},
    49  };
    50  
    51  static void
    52  nopout(Prog *p)
    53  {
    54  	p->as = ANOP;
    55  	p->from.type = D_NONE;
    56  	p->to.type = D_NONE;
    57  }
    58  
    59  static int
    60  symtype(Addr *a)
    61  {
    62  	int t;
    63  
    64  	t = a->type;
    65  	if(t == D_ADDR)
    66  		t = a->index;
    67  	return t;
    68  }
    69  
    70  static int
    71  isdata(Prog *p)
    72  {
    73  	return p->as == ADATA || p->as == AGLOBL;
    74  }
    75  
    76  static int
    77  iscall(Prog *p)
    78  {
    79  	return p->as == ACALL;
    80  }
    81  
    82  static int
    83  datasize(Prog *p)
    84  {
    85  	return p->from.scale;
    86  }
    87  
    88  static int
    89  textflag(Prog *p)
    90  {
    91  	return p->from.scale;
    92  }
    93  
    94  static void
    95  settextflag(Prog *p, int f)
    96  {
    97  	p->from.scale = f;
    98  }
    99  
   100  static void nacladdr(Link*, Prog*, Addr*);
   101  
   102  static int
   103  canuselocaltls(Link *ctxt)
   104  {
   105  	switch(ctxt->headtype) {
   106  	case Hplan9:
   107  	case Hwindows:
   108  		return 0;
   109  	}
   110  	return 1;
   111  }
   112  
   113  static void
   114  progedit(Link *ctxt, Prog *p)
   115  {
   116  	char literal[64];
   117  	LSym *s;
   118  	Prog *q;
   119  
   120  	// Thread-local storage references use the TLS pseudo-register.
   121  	// As a register, TLS refers to the thread-local storage base, and it
   122  	// can only be loaded into another register:
   123  	//
   124  	//         MOVQ TLS, AX
   125  	//
   126  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
   127  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
   128  	// indexing from the loaded TLS base. This emits a relocation so that
   129  	// if the linker needs to adjust the offset, it can. For example:
   130  	//
   131  	//         MOVQ TLS, AX
   132  	//         MOVQ 8(AX)(TLS*1), CX // load m into CX
   133  	// 
   134  	// On systems that support direct access to the TLS memory, this
   135  	// pair of instructions can be reduced to a direct TLS memory reference:
   136  	// 
   137  	//         MOVQ 8(TLS), CX // load m into CX
   138  	//
   139  	// The 2-instruction and 1-instruction forms correspond roughly to
   140  	// ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
   141  	// 
   142  	// We applies this rewrite on systems that support the 1-instruction form.
   143  	// The decision is made using only the operating system (and probably
   144  	// the -shared flag, eventually), not the link mode. If some link modes
   145  	// on a particular operating system require the 2-instruction form,
   146  	// then all builds for that operating system will use the 2-instruction
   147  	// form, so that the link mode decision can be delayed to link time.
   148  	//
   149  	// In this way, all supported systems use identical instructions to
   150  	// access TLS, and they are rewritten appropriately first here in
   151  	// liblink and then finally using relocations in the linker.
   152  
   153  	if(canuselocaltls(ctxt)) {
   154  		// Reduce TLS initial exec model to TLS local exec model.
   155  		// Sequences like
   156  		//	MOVQ TLS, BX
   157  		//	... off(BX)(TLS*1) ...
   158  		// become
   159  		//	NOP
   160  		//	... off(TLS) ...
   161  		//
   162  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   163  		// guarantee we are producing byte-identical binaries as before this code.
   164  		// But it should be unnecessary.
   165  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
   166  			nopout(p);
   167  		if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
   168  			p->from.type = D_INDIR+D_TLS;
   169  			p->from.scale = 0;
   170  			p->from.index = D_NONE;
   171  		}
   172  		if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
   173  			p->to.type = D_INDIR+D_TLS;
   174  			p->to.scale = 0;
   175  			p->to.index = D_NONE;
   176  		}
   177  	} else {
   178  		// As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
   179  		// The instruction
   180  		//	MOVQ off(TLS), BX
   181  		// becomes the sequence
   182  		//	MOVQ TLS, BX
   183  		//	MOVQ off(BX)(TLS*1), BX
   184  		// This allows the C compilers to emit references to m and g using the direct off(TLS) form.
   185  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
   186  			q = appendp(ctxt, p);
   187  			q->as = p->as;
   188  			q->from = p->from;
   189  			q->from.type = D_INDIR + p->to.type;
   190  			q->from.index = D_TLS;
   191  			q->from.scale = 2; // TODO: use 1
   192  			q->to = p->to;
   193  			p->from.type = D_TLS;
   194  			p->from.index = D_NONE;
   195  			p->from.offset = 0;
   196  		}
   197  	}
   198  
   199  	// TODO: Remove.
   200  	if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
   201  		if(p->from.scale == 1 && p->from.index == D_TLS)
   202  			p->from.scale = 2;
   203  		if(p->to.scale == 1 && p->to.index == D_TLS)
   204  			p->to.scale = 2;
   205  	}
   206  
   207  	if(ctxt->headtype == Hnacl) {
   208  		nacladdr(ctxt, p, &p->from);
   209  		nacladdr(ctxt, p, &p->to);
   210  	}
   211  
   212  	// Maintain information about code generation mode.
   213  	if(ctxt->mode == 0)
   214  		ctxt->mode = 64;
   215  	p->mode = ctxt->mode;
   216  	
   217  	switch(p->as) {
   218  	case AMODE:
   219  		if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE) {
   220  			switch((int)p->from.offset) {
   221  			case 16:
   222  			case 32:
   223  			case 64:
   224  				ctxt->mode = p->from.offset;
   225  				break;
   226  			}
   227  		}
   228  		nopout(p);
   229  		break;
   230  	}
   231  	
   232  	// Rewrite CALL/JMP/RET to symbol as D_BRANCH.
   233  	switch(p->as) {
   234  	case ACALL:
   235  	case AJMP:
   236  	case ARET:
   237  		if((p->to.type == D_EXTERN || p->to.type == D_STATIC) && p->to.sym != nil)
   238  			p->to.type = D_BRANCH;
   239  		break;
   240  	}
   241  
   242  	// Rewrite float constants to values stored in memory.
   243  	switch(p->as) {
   244  	case AMOVSS:
   245  		// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   246  		if(p->from.type == D_FCONST)
   247  		if(p->from.u.dval == 0)
   248  		if(p->to.type >= D_X0)
   249  		if(p->to.type <= D_X15) {
   250  			p->as = AXORPS;
   251  			p->from.type = p->to.type;
   252  			p->from.index = p->to.index;
   253  			break;
   254  		}
   255  		// fallthrough
   256  
   257  	case AFMOVF:
   258  	case AFADDF:
   259  	case AFSUBF:
   260  	case AFSUBRF:
   261  	case AFMULF:
   262  	case AFDIVF:
   263  	case AFDIVRF:
   264  	case AFCOMF:
   265  	case AFCOMFP:
   266  	case AADDSS:
   267  	case ASUBSS:
   268  	case AMULSS:
   269  	case ADIVSS:
   270  	case ACOMISS:
   271  	case AUCOMISS:
   272  		if(p->from.type == D_FCONST) {
   273  			int32 i32;
   274  			float32 f32;
   275  			f32 = p->from.u.dval;
   276  			memmove(&i32, &f32, 4);
   277  			sprint(literal, "$f32.%08ux", (uint32)i32);
   278  			s = linklookup(ctxt, literal, 0);
   279  			if(s->type == 0) {
   280  				s->type = SRODATA;
   281  				adduint32(ctxt, s, i32);
   282  				s->reachable = 0;
   283  			}
   284  			p->from.type = D_EXTERN;
   285  			p->from.sym = s;
   286  			p->from.offset = 0;
   287  		}
   288  		break;
   289  
   290  	case AMOVSD:
   291  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   292  		if(p->from.type == D_FCONST)
   293  		if(p->from.u.dval == 0)
   294  		if(p->to.type >= D_X0)
   295  		if(p->to.type <= D_X15) {
   296  			p->as = AXORPS;
   297  			p->from.type = p->to.type;
   298  			p->from.index = p->to.index;
   299  			break;
   300  		}
   301  		// fallthrough
   302  	
   303  	case AFMOVD:
   304  	case AFADDD:
   305  	case AFSUBD:
   306  	case AFSUBRD:
   307  	case AFMULD:
   308  	case AFDIVD:
   309  	case AFDIVRD:
   310  	case AFCOMD:
   311  	case AFCOMDP:
   312  	case AADDSD:
   313  	case ASUBSD:
   314  	case AMULSD:
   315  	case ADIVSD:
   316  	case ACOMISD:
   317  	case AUCOMISD:
   318  		if(p->from.type == D_FCONST) {
   319  			int64 i64;
   320  			memmove(&i64, &p->from.u.dval, 8);
   321  			sprint(literal, "$f64.%016llux", (uvlong)i64);
   322  			s = linklookup(ctxt, literal, 0);
   323  			if(s->type == 0) {
   324  				s->type = SRODATA;
   325  				adduint64(ctxt, s, i64);
   326  				s->reachable = 0;
   327  			}
   328  			p->from.type = D_EXTERN;
   329  			p->from.sym = s;
   330  			p->from.offset = 0;
   331  		}
   332  		break;
   333  	}
   334  }
   335  
   336  static void
   337  nacladdr(Link *ctxt, Prog *p, Addr *a)
   338  {
   339  	if(p->as == ALEAL || p->as == ALEAQ)
   340  		return;
   341  	
   342  	if(a->type == D_BP || a->type == D_INDIR+D_BP) {
   343  		ctxt->diag("invalid address: %P", p);
   344  		return;
   345  	}
   346  	if(a->type == D_INDIR+D_TLS)
   347  		a->type = D_INDIR+D_BP;
   348  	else if(a->type == D_TLS)
   349  		a->type = D_BP;
   350  	if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
   351  		switch(a->type) {
   352  		case D_INDIR+D_BP:
   353  		case D_INDIR+D_SP:
   354  		case D_INDIR+D_R15:
   355  			// all ok
   356  			break;
   357  		default:
   358  			if(a->index != D_NONE)
   359  				ctxt->diag("invalid address %P", p);
   360  			a->index = a->type - D_INDIR;
   361  			if(a->index != D_NONE)
   362  				a->scale = 1;
   363  			a->type = D_INDIR+D_R15;
   364  			break;
   365  		}
   366  	}
   367  }
   368  
   369  static Prog*	load_g_cx(Link*, Prog*);
   370  static Prog*	stacksplit(Link*, Prog*, int32, int32, int, Prog**);
   371  static void	indir_cx(Link*, Addr*);
   372  
   373  static void
   374  parsetextconst(vlong arg, vlong *textstksiz, vlong *textarg)
   375  {
   376  	*textstksiz = arg & 0xffffffffLL;
   377  	if(*textstksiz & 0x80000000LL)
   378  		*textstksiz = -(-*textstksiz & 0xffffffffLL);
   379  
   380  	*textarg = (arg >> 32) & 0xffffffffLL;
   381  	if(*textarg & 0x80000000LL)
   382  		*textarg = 0;
   383  	*textarg = (*textarg+7) & ~7LL;
   384  }
   385  
   386  static void
   387  addstacksplit(Link *ctxt, LSym *cursym)
   388  {
   389  	Prog *p, *q, *p1, *p2;
   390  	int32 autoffset, deltasp;
   391  	int a, pcsize;
   392  	vlong textstksiz, textarg;
   393  
   394  	if(ctxt->tlsg == nil)
   395  		ctxt->tlsg = linklookup(ctxt, "runtime.tlsg", 0);
   396  	if(ctxt->symmorestack[0] == nil) {
   397  		ctxt->symmorestack[0] = linklookup(ctxt, "runtime.morestack", 0);
   398  		ctxt->symmorestack[1] = linklookup(ctxt, "runtime.morestack_noctxt", 0);
   399  	}
   400  
   401  	if(ctxt->headtype == Hplan9 && ctxt->plan9privates == nil)
   402  		ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
   403  
   404  	ctxt->cursym = cursym;
   405  
   406  	if(cursym->text == nil || cursym->text->link == nil)
   407  		return;				
   408  
   409  	p = cursym->text;
   410  	parsetextconst(p->to.offset, &textstksiz, &textarg);
   411  	autoffset = textstksiz;
   412  	if(autoffset < 0)
   413  		autoffset = 0;
   414  	
   415  	cursym->args = p->to.offset>>32;
   416  	cursym->locals = textstksiz;
   417  
   418  	if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) {
   419  		for(q = p; q != nil; q = q->link) {
   420  			if(q->as == ACALL)
   421  				goto noleaf;
   422  			if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8)
   423  				goto noleaf;
   424  		}
   425  		p->from.scale |= NOSPLIT;
   426  	noleaf:;
   427  	}
   428  
   429  	q = nil;
   430  	if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) {
   431  		p = appendp(ctxt, p);
   432  		p = load_g_cx(ctxt, p); // load g into CX
   433  	}
   434  	if(!(cursym->text->from.scale & NOSPLIT))
   435  		p = stacksplit(ctxt, p, autoffset, textarg, !(cursym->text->from.scale&NEEDCTXT), &q); // emit split check
   436  
   437  	if(autoffset) {
   438  		if(autoffset%ctxt->arch->regsize != 0)
   439  			ctxt->diag("unaligned stack size %d", autoffset);
   440  		p = appendp(ctxt, p);
   441  		p->as = AADJSP;
   442  		p->from.type = D_CONST;
   443  		p->from.offset = autoffset;
   444  		p->spadj = autoffset;
   445  	} else {
   446  		// zero-byte stack adjustment.
   447  		// Insert a fake non-zero adjustment so that stkcheck can
   448  		// recognize the end of the stack-splitting prolog.
   449  		p = appendp(ctxt, p);
   450  		p->as = ANOP;
   451  		p->spadj = -ctxt->arch->ptrsize;
   452  		p = appendp(ctxt, p);
   453  		p->as = ANOP;
   454  		p->spadj = ctxt->arch->ptrsize;
   455  	}
   456  	if(q != nil)
   457  		q->pcond = p;
   458  	deltasp = autoffset;
   459  	
   460  	if(cursym->text->from.scale & WRAPPER) {
   461  		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
   462  		//
   463  		//	MOVQ g_panic(CX), BX
   464  		//	TESTQ BX, BX
   465  		//	JEQ end
   466  		//	LEAQ (autoffset+8)(SP), DI
   467  		//	CMPQ panic_argp(BX), DI
   468  		//	JNE end
   469  		//	MOVQ SP, panic_argp(BX)
   470  		// end:
   471  		//	NOP
   472  		//
   473  		// The NOP is needed to give the jumps somewhere to land.
   474  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   475  
   476  		p = appendp(ctxt, p);
   477  		p->as = AMOVQ;
   478  		p->from.type = D_INDIR+D_CX;
   479  		p->from.offset = 4*ctxt->arch->ptrsize; // G.panic
   480  		p->to.type = D_BX;
   481  		if(ctxt->headtype == Hnacl) {
   482  			p->as = AMOVL;
   483  			p->from.type = D_INDIR+D_R15;
   484  			p->from.scale = 1;
   485  			p->from.index = D_CX;
   486  		}
   487  
   488  		p = appendp(ctxt, p);
   489  		p->as = ATESTQ;
   490  		p->from.type = D_BX;
   491  		p->to.type = D_BX;
   492  		if(ctxt->headtype == Hnacl)
   493  			p->as = ATESTL;
   494  
   495  		p = appendp(ctxt, p);
   496  		p->as = AJEQ;
   497  		p->to.type = D_BRANCH;
   498  		p1 = p;
   499  
   500  		p = appendp(ctxt, p);
   501  		p->as = ALEAQ;
   502  		p->from.type = D_INDIR+D_SP;
   503  		p->from.offset = autoffset+8;
   504  		p->to.type = D_DI;
   505  		if(ctxt->headtype == Hnacl)
   506  			p->as = ALEAL;
   507  
   508  		p = appendp(ctxt, p);
   509  		p->as = ACMPQ;
   510  		p->from.type = D_INDIR+D_BX;
   511  		p->from.offset = 0; // Panic.argp
   512  		p->to.type = D_DI;
   513  		if(ctxt->headtype == Hnacl) {
   514  			p->as = ACMPL;
   515  			p->from.type = D_INDIR+D_R15;
   516  			p->from.scale = 1;
   517  			p->from.index = D_BX;
   518  		}
   519  
   520  		p = appendp(ctxt, p);
   521  		p->as = AJNE;
   522  		p->to.type = D_BRANCH;
   523  		p2 = p;
   524  
   525  		p = appendp(ctxt, p);
   526  		p->as = AMOVQ;
   527  		p->from.type = D_SP;
   528  		p->to.type = D_INDIR+D_BX;
   529  		p->to.offset = 0; // Panic.argp
   530  		if(ctxt->headtype == Hnacl) {
   531  			p->as = AMOVL;
   532  			p->to.type = D_INDIR+D_R15;
   533  			p->to.scale = 1;
   534  			p->to.index = D_BX;
   535  		}
   536  
   537  		p = appendp(ctxt, p);
   538  		p->as = ANOP;
   539  		p1->pcond = p;
   540  		p2->pcond = p;
   541  	}
   542  
   543  	if(ctxt->debugzerostack && autoffset && !(cursym->text->from.scale&NOSPLIT)) {
   544  		// 6l -Z means zero the stack frame on entry.
   545  		// This slows down function calls but can help avoid
   546  		// false positives in garbage collection.
   547  		p = appendp(ctxt, p);
   548  		p->as = AMOVQ;
   549  		p->from.type = D_SP;
   550  		p->to.type = D_DI;
   551  		
   552  		p = appendp(ctxt, p);
   553  		p->as = AMOVQ;
   554  		p->from.type = D_CONST;
   555  		p->from.offset = autoffset/8;
   556  		p->to.type = D_CX;
   557  		
   558  		p = appendp(ctxt, p);
   559  		p->as = AMOVQ;
   560  		p->from.type = D_CONST;
   561  		p->from.offset = 0;
   562  		p->to.type = D_AX;
   563  		
   564  		p = appendp(ctxt, p);
   565  		p->as = AREP;
   566  		
   567  		p = appendp(ctxt, p);
   568  		p->as = ASTOSQ;
   569  	}
   570  	
   571  	for(; p != nil; p = p->link) {
   572  		pcsize = p->mode/8;
   573  		a = p->from.type;
   574  		if(a == D_AUTO)
   575  			p->from.offset += deltasp;
   576  		if(a == D_PARAM)
   577  			p->from.offset += deltasp + pcsize;
   578  		a = p->to.type;
   579  		if(a == D_AUTO)
   580  			p->to.offset += deltasp;
   581  		if(a == D_PARAM)
   582  			p->to.offset += deltasp + pcsize;
   583  
   584  		switch(p->as) {
   585  		default:
   586  			continue;
   587  		case APUSHL:
   588  		case APUSHFL:
   589  			deltasp += 4;
   590  			p->spadj = 4;
   591  			continue;
   592  		case APUSHQ:
   593  		case APUSHFQ:
   594  			deltasp += 8;
   595  			p->spadj = 8;
   596  			continue;
   597  		case APUSHW:
   598  		case APUSHFW:
   599  			deltasp += 2;
   600  			p->spadj = 2;
   601  			continue;
   602  		case APOPL:
   603  		case APOPFL:
   604  			deltasp -= 4;
   605  			p->spadj = -4;
   606  			continue;
   607  		case APOPQ:
   608  		case APOPFQ:
   609  			deltasp -= 8;
   610  			p->spadj = -8;
   611  			continue;
   612  		case APOPW:
   613  		case APOPFW:
   614  			deltasp -= 2;
   615  			p->spadj = -2;
   616  			continue;
   617  		case ARET:
   618  			break;
   619  		}
   620  
   621  		if(autoffset != deltasp)
   622  			ctxt->diag("unbalanced PUSH/POP");
   623  
   624  		if(autoffset) {
   625  			p->as = AADJSP;
   626  			p->from.type = D_CONST;
   627  			p->from.offset = -autoffset;
   628  			p->spadj = -autoffset;
   629  			p = appendp(ctxt, p);
   630  			p->as = ARET;
   631  			// If there are instructions following
   632  			// this ARET, they come from a branch
   633  			// with the same stackframe, so undo
   634  			// the cleanup.
   635  			p->spadj = +autoffset;
   636  		}
   637  		if(p->to.sym) // retjmp
   638  			p->as = AJMP;
   639  	}
   640  }
   641  
   642  static void
   643  indir_cx(Link *ctxt, Addr *a)
   644  {
   645  	if(ctxt->headtype == Hnacl) {
   646  		a->type = D_INDIR + D_R15;
   647  		a->index = D_CX;
   648  		a->scale = 1;
   649  		return;
   650  	}
   651  
   652  	a->type = D_INDIR+D_CX;
   653  }
   654  
   655  // Append code to p to load g into cx.
   656  // Overwrites p with the first instruction (no first appendp).
   657  // Overwriting p is unusual but it lets use this in both the
   658  // prologue (caller must call appendp first) and in the epilogue.
   659  // Returns last new instruction.
   660  static Prog*
   661  load_g_cx(Link *ctxt, Prog *p)
   662  {	
   663  	Prog *next;
   664  
   665  	p->as = AMOVQ;
   666  	if(ctxt->arch->ptrsize == 4)
   667  		p->as = AMOVL;
   668  	p->from.type = D_INDIR+D_TLS;
   669  	p->from.offset = 0;
   670  	p->to.type = D_CX;
   671  	
   672  	next = p->link;
   673  	progedit(ctxt, p);
   674  	while(p->link != next)
   675  		p = p->link;
   676  	
   677  	if(p->from.index == D_TLS)
   678  		p->from.scale = 2;
   679  
   680  	return p;
   681  }
   682  
   683  // Append code to p to check for stack split.
   684  // Appends to (does not overwrite) p.
   685  // Assumes g is in CX.
   686  // Returns last new instruction.
   687  // On return, *jmpok is the instruction that should jump
   688  // to the stack frame allocation if no split is needed.
   689  static Prog*
   690  stacksplit(Link *ctxt, Prog *p, int32 framesize, int32 textarg, int noctxt, Prog **jmpok)
   691  {
   692  	Prog *q, *q1;
   693  	int cmp, lea, mov, sub;
   694  
   695  	USED(textarg);
   696  	cmp = ACMPQ;
   697  	lea = ALEAQ;
   698  	mov = AMOVQ;
   699  	sub = ASUBQ;
   700  
   701  	if(ctxt->headtype == Hnacl) {
   702  		cmp = ACMPL;
   703  		lea = ALEAL;
   704  		mov = AMOVL;
   705  		sub = ASUBL;
   706  	}
   707  
   708  	q1 = nil;
   709  	if(framesize <= StackSmall) {
   710  		// small stack: SP <= stackguard
   711  		//	CMPQ SP, stackguard
   712  		p = appendp(ctxt, p);
   713  		p->as = cmp;
   714  		p->from.type = D_SP;
   715  		indir_cx(ctxt, &p->to);
   716  		p->to.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   717  		if(ctxt->cursym->cfunc)
   718  			p->to.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   719  	} else if(framesize <= StackBig) {
   720  		// large stack: SP-framesize <= stackguard-StackSmall
   721  		//	LEAQ -xxx(SP), AX
   722  		//	CMPQ AX, stackguard
   723  		p = appendp(ctxt, p);
   724  		p->as = lea;
   725  		p->from.type = D_INDIR+D_SP;
   726  		p->from.offset = -(framesize-StackSmall);
   727  		p->to.type = D_AX;
   728  
   729  		p = appendp(ctxt, p);
   730  		p->as = cmp;
   731  		p->from.type = D_AX;
   732  		indir_cx(ctxt, &p->to);
   733  		p->to.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   734  		if(ctxt->cursym->cfunc)
   735  			p->to.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   736  	} else {
   737  		// Such a large stack we need to protect against wraparound.
   738  		// If SP is close to zero:
   739  		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
   740  		// The +StackGuard on both sides is required to keep the left side positive:
   741  		// SP is allowed to be slightly below stackguard. See stack.h.
   742  		//
   743  		// Preemption sets stackguard to StackPreempt, a very large value.
   744  		// That breaks the math above, so we have to check for that explicitly.
   745  		//	MOVQ	stackguard, CX
   746  		//	CMPQ	CX, $StackPreempt
   747  		//	JEQ	label-of-call-to-morestack
   748  		//	LEAQ	StackGuard(SP), AX
   749  		//	SUBQ	CX, AX
   750  		//	CMPQ	AX, $(framesize+(StackGuard-StackSmall))
   751  
   752  		p = appendp(ctxt, p);
   753  		p->as = mov;
   754  		indir_cx(ctxt, &p->from);
   755  		p->from.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   756  		if(ctxt->cursym->cfunc)
   757  			p->from.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   758  		p->to.type = D_SI;
   759  
   760  		p = appendp(ctxt, p);
   761  		p->as = cmp;
   762  		p->from.type = D_SI;
   763  		p->to.type = D_CONST;
   764  		p->to.offset = StackPreempt;
   765  
   766  		p = appendp(ctxt, p);
   767  		p->as = AJEQ;
   768  		p->to.type = D_BRANCH;
   769  		q1 = p;
   770  
   771  		p = appendp(ctxt, p);
   772  		p->as = lea;
   773  		p->from.type = D_INDIR+D_SP;
   774  		p->from.offset = StackGuard;
   775  		p->to.type = D_AX;
   776  		
   777  		p = appendp(ctxt, p);
   778  		p->as = sub;
   779  		p->from.type = D_SI;
   780  		p->to.type = D_AX;
   781  		
   782  		p = appendp(ctxt, p);
   783  		p->as = cmp;
   784  		p->from.type = D_AX;
   785  		p->to.type = D_CONST;
   786  		p->to.offset = framesize+(StackGuard-StackSmall);
   787  	}					
   788  
   789  	// common
   790  	p = appendp(ctxt, p);
   791  	p->as = AJHI;
   792  	p->to.type = D_BRANCH;
   793  	q = p;
   794  
   795  	p = appendp(ctxt, p);
   796  	p->as = ACALL;
   797  	p->to.type = D_BRANCH;
   798  	if(ctxt->cursym->cfunc)
   799  		p->to.sym = linklookup(ctxt, "runtime.morestackc", 0);
   800  	else
   801  		p->to.sym = ctxt->symmorestack[noctxt];
   802  	
   803  	p = appendp(ctxt, p);
   804  	p->as = AJMP;
   805  	p->to.type = D_BRANCH;
   806  	p->pcond = ctxt->cursym->text->link;
   807  	
   808  	if(q != nil)
   809  		q->pcond = p->link;
   810  	if(q1 != nil)
   811  		q1->pcond = q->link;
   812  
   813  	*jmpok = q;
   814  	return p;
   815  }
   816  
   817  static void xfol(Link*, Prog*, Prog**);
   818  
   819  static void
   820  follow(Link *ctxt, LSym *s)
   821  {
   822  	Prog *firstp, *lastp;
   823  
   824  	ctxt->cursym = s;
   825  
   826  	firstp = ctxt->arch->prg();
   827  	lastp = firstp;
   828  	xfol(ctxt, s->text, &lastp);
   829  	lastp->link = nil;
   830  	s->text = firstp->link;
   831  }
   832  
   833  static int
   834  nofollow(int a)
   835  {
   836  	switch(a) {
   837  	case AJMP:
   838  	case ARET:
   839  	case AIRETL:
   840  	case AIRETQ:
   841  	case AIRETW:
   842  	case ARETFL:
   843  	case ARETFQ:
   844  	case ARETFW:
   845  	case AUNDEF:
   846  		return 1;
   847  	}
   848  	return 0;
   849  }
   850  
   851  static int
   852  pushpop(int a)
   853  {
   854  	switch(a) {
   855  	case APUSHL:
   856  	case APUSHFL:
   857  	case APUSHQ:
   858  	case APUSHFQ:
   859  	case APUSHW:
   860  	case APUSHFW:
   861  	case APOPL:
   862  	case APOPFL:
   863  	case APOPQ:
   864  	case APOPFQ:
   865  	case APOPW:
   866  	case APOPFW:
   867  		return 1;
   868  	}
   869  	return 0;
   870  }
   871  
   872  static int
   873  relinv(int a)
   874  {
   875  	switch(a) {
   876  	case AJEQ:	return AJNE;
   877  	case AJNE:	return AJEQ;
   878  	case AJLE:	return AJGT;
   879  	case AJLS:	return AJHI;
   880  	case AJLT:	return AJGE;
   881  	case AJMI:	return AJPL;
   882  	case AJGE:	return AJLT;
   883  	case AJPL:	return AJMI;
   884  	case AJGT:	return AJLE;
   885  	case AJHI:	return AJLS;
   886  	case AJCS:	return AJCC;
   887  	case AJCC:	return AJCS;
   888  	case AJPS:	return AJPC;
   889  	case AJPC:	return AJPS;
   890  	case AJOS:	return AJOC;
   891  	case AJOC:	return AJOS;
   892  	}
   893  	sysfatal("unknown relation: %s", anames6[a]);
   894  	return 0;
   895  }
   896  
   897  static void
   898  xfol(Link *ctxt, Prog *p, Prog **last)
   899  {
   900  	Prog *q;
   901  	int i;
   902  	int a;
   903  
   904  loop:
   905  	if(p == nil)
   906  		return;
   907  	if(p->as == AJMP)
   908  	if((q = p->pcond) != nil && q->as != ATEXT) {
   909  		/* mark instruction as done and continue layout at target of jump */
   910  		p->mark = 1;
   911  		p = q;
   912  		if(p->mark == 0)
   913  			goto loop;
   914  	}
   915  	if(p->mark) {
   916  		/* 
   917  		 * p goes here, but already used it elsewhere.
   918  		 * copy up to 4 instructions or else branch to other copy.
   919  		 */
   920  		for(i=0,q=p; i<4; i++,q=q->link) {
   921  			if(q == nil)
   922  				break;
   923  			if(q == *last)
   924  				break;
   925  			a = q->as;
   926  			if(a == ANOP) {
   927  				i--;
   928  				continue;
   929  			}
   930  			if(nofollow(a) || pushpop(a))	
   931  				break;	// NOTE(rsc): arm does goto copy
   932  			if(q->pcond == nil || q->pcond->mark)
   933  				continue;
   934  			if(a == ACALL || a == ALOOP)
   935  				continue;
   936  			for(;;) {
   937  				if(p->as == ANOP) {
   938  					p = p->link;
   939  					continue;
   940  				}
   941  				q = copyp(ctxt, p);
   942  				p = p->link;
   943  				q->mark = 1;
   944  				(*last)->link = q;
   945  				*last = q;
   946  				if(q->as != a || q->pcond == nil || q->pcond->mark)
   947  					continue;
   948  
   949  				q->as = relinv(q->as);
   950  				p = q->pcond;
   951  				q->pcond = q->link;
   952  				q->link = p;
   953  				xfol(ctxt, q->link, last);
   954  				p = q->link;
   955  				if(p->mark)
   956  					return;
   957  				goto loop;
   958  			}
   959  		} /* */
   960  		q = ctxt->arch->prg();
   961  		q->as = AJMP;
   962  		q->lineno = p->lineno;
   963  		q->to.type = D_BRANCH;
   964  		q->to.offset = p->pc;
   965  		q->pcond = p;
   966  		p = q;
   967  	}
   968  	
   969  	/* emit p */
   970  	p->mark = 1;
   971  	(*last)->link = p;
   972  	*last = p;
   973  	a = p->as;
   974  
   975  	/* continue loop with what comes after p */
   976  	if(nofollow(a))
   977  		return;
   978  	if(p->pcond != nil && a != ACALL) {
   979  		/*
   980  		 * some kind of conditional branch.
   981  		 * recurse to follow one path.
   982  		 * continue loop on the other.
   983  		 */
   984  		if((q = brchain(ctxt, p->pcond)) != nil)
   985  			p->pcond = q;
   986  		if((q = brchain(ctxt, p->link)) != nil)
   987  			p->link = q;
   988  		if(p->from.type == D_CONST) {
   989  			if(p->from.offset == 1) {
   990  				/*
   991  				 * expect conditional jump to be taken.
   992  				 * rewrite so that's the fall-through case.
   993  				 */
   994  				p->as = relinv(a);
   995  				q = p->link;
   996  				p->link = p->pcond;
   997  				p->pcond = q;
   998  			}
   999  		} else {			
  1000  			q = p->link;
  1001  			if(q->mark)
  1002  			if(a != ALOOP) {
  1003  				p->as = relinv(a);
  1004  				p->link = p->pcond;
  1005  				p->pcond = q;
  1006  			}
  1007  		}
  1008  		xfol(ctxt, p->link, last);
  1009  		if(p->pcond->mark)
  1010  			return;
  1011  		p = p->pcond;
  1012  		goto loop;
  1013  	}
  1014  	p = p->link;
  1015  	goto loop;
  1016  }
  1017  
  1018  static Prog*
  1019  prg(void)
  1020  {
  1021  	Prog *p;
  1022  
  1023  	p = emallocz(sizeof(*p));
  1024  	*p = zprg;
  1025  	return p;
  1026  }
  1027  
  1028  LinkArch linkamd64 = {
  1029  	.name = "amd64",
  1030  	.thechar = '6',
  1031  	.endian = LittleEndian,
  1032  
  1033  	.addstacksplit = addstacksplit,
  1034  	.assemble = span6,
  1035  	.datasize = datasize,
  1036  	.follow = follow,
  1037  	.iscall = iscall,
  1038  	.isdata = isdata,
  1039  	.prg = prg,
  1040  	.progedit = progedit,
  1041  	.settextflag = settextflag,
  1042  	.symtype = symtype,
  1043  	.textflag = textflag,
  1044  
  1045  	.minlc = 1,
  1046  	.ptrsize = 8,
  1047  	.regsize = 8,
  1048  
  1049  	.D_ADDR = D_ADDR,
  1050  	.D_AUTO = D_AUTO,
  1051  	.D_BRANCH = D_BRANCH,
  1052  	.D_CONST = D_CONST,
  1053  	.D_EXTERN = D_EXTERN,
  1054  	.D_FCONST = D_FCONST,
  1055  	.D_NONE = D_NONE,
  1056  	.D_PARAM = D_PARAM,
  1057  	.D_SCONST = D_SCONST,
  1058  	.D_STATIC = D_STATIC,
  1059  
  1060  	.ACALL = ACALL,
  1061  	.ADATA = ADATA,
  1062  	.AEND = AEND,
  1063  	.AFUNCDATA = AFUNCDATA,
  1064  	.AGLOBL = AGLOBL,
  1065  	.AJMP = AJMP,
  1066  	.ANOP = ANOP,
  1067  	.APCDATA = APCDATA,
  1068  	.ARET = ARET,
  1069  	.ATEXT = ATEXT,
  1070  	.ATYPE = ATYPE,
  1071  	.AUSEFIELD = AUSEFIELD,
  1072  };
  1073  
  1074  LinkArch linkamd64p32 = {
  1075  	.name = "amd64p32",
  1076  	.thechar = '6',
  1077  	.endian = LittleEndian,
  1078  
  1079  	.addstacksplit = addstacksplit,
  1080  	.assemble = span6,
  1081  	.datasize = datasize,
  1082  	.follow = follow,
  1083  	.iscall = iscall,
  1084  	.isdata = isdata,
  1085  	.prg = prg,
  1086  	.progedit = progedit,
  1087  	.settextflag = settextflag,
  1088  	.symtype = symtype,
  1089  	.textflag = textflag,
  1090  
  1091  	.minlc = 1,
  1092  	.ptrsize = 4,
  1093  	.regsize = 8,
  1094  
  1095  	.D_ADDR = D_ADDR,
  1096  	.D_AUTO = D_AUTO,
  1097  	.D_BRANCH = D_BRANCH,
  1098  	.D_CONST = D_CONST,
  1099  	.D_EXTERN = D_EXTERN,
  1100  	.D_FCONST = D_FCONST,
  1101  	.D_NONE = D_NONE,
  1102  	.D_PARAM = D_PARAM,
  1103  	.D_SCONST = D_SCONST,
  1104  	.D_STATIC = D_STATIC,
  1105  
  1106  	.ACALL = ACALL,
  1107  	.ADATA = ADATA,
  1108  	.AEND = AEND,
  1109  	.AFUNCDATA = AFUNCDATA,
  1110  	.AGLOBL = AGLOBL,
  1111  	.AJMP = AJMP,
  1112  	.ANOP = ANOP,
  1113  	.APCDATA = APCDATA,
  1114  	.ARET = ARET,
  1115  	.ATEXT = ATEXT,
  1116  	.ATYPE = ATYPE,
  1117  	.AUSEFIELD = AUSEFIELD,
  1118  };