github.com/hongwozai/go-src-1.4.3@v0.0.0-20191127132709-dc3fce3dbccb/src/liblink/obj6.c (about)

     1  // Inferno utils/6l/pass.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include <bio.h>
    34  #include <link.h>
    35  #include "../cmd/6l/6.out.h"
    36  #include "../runtime/stack.h"
    37  
    38  static Prog zprg = {
    39  	.back = 2,
    40  	.as = AGOK,
    41  	.from = {
    42  		.type = D_NONE,
    43  		.index = D_NONE,
    44  	},
    45  	.to = {
    46  		.type = D_NONE,
    47  		.index = D_NONE,
    48  	},
    49  };
    50  
    51  static void
    52  nopout(Prog *p)
    53  {
    54  	p->as = ANOP;
    55  	p->from.type = D_NONE;
    56  	p->to.type = D_NONE;
    57  }
    58  
    59  static int
    60  symtype(Addr *a)
    61  {
    62  	int t;
    63  
    64  	t = a->type;
    65  	if(t == D_ADDR)
    66  		t = a->index;
    67  	return t;
    68  }
    69  
    70  static int
    71  isdata(Prog *p)
    72  {
    73  	return p->as == ADATA || p->as == AGLOBL;
    74  }
    75  
    76  static int
    77  iscall(Prog *p)
    78  {
    79  	return p->as == ACALL;
    80  }
    81  
    82  static int
    83  datasize(Prog *p)
    84  {
    85  	return p->from.scale;
    86  }
    87  
    88  static int
    89  textflag(Prog *p)
    90  {
    91  	return p->from.scale;
    92  }
    93  
    94  static void
    95  settextflag(Prog *p, int f)
    96  {
    97  	p->from.scale = f;
    98  }
    99  
   100  static void nacladdr(Link*, Prog*, Addr*);
   101  
   102  static int
   103  canuselocaltls(Link *ctxt)
   104  {
   105  	switch(ctxt->headtype) {
   106  	case Hplan9:
   107  	case Hwindows:
   108  		return 0;
   109  	}
   110  	return 1;
   111  }
   112  
   113  static void
   114  progedit(Link *ctxt, Prog *p)
   115  {
   116  	char literal[64];
   117  	LSym *s;
   118  	Prog *q;
   119  
   120  	// Thread-local storage references use the TLS pseudo-register.
   121  	// As a register, TLS refers to the thread-local storage base, and it
   122  	// can only be loaded into another register:
   123  	//
   124  	//         MOVQ TLS, AX
   125  	//
   126  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
   127  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
   128  	// indexing from the loaded TLS base. This emits a relocation so that
   129  	// if the linker needs to adjust the offset, it can. For example:
   130  	//
   131  	//         MOVQ TLS, AX
   132  	//         MOVQ 8(AX)(TLS*1), CX // load m into CX
   133  	// 
   134  	// On systems that support direct access to the TLS memory, this
   135  	// pair of instructions can be reduced to a direct TLS memory reference:
   136  	// 
   137  	//         MOVQ 8(TLS), CX // load m into CX
   138  	//
   139  	// The 2-instruction and 1-instruction forms correspond roughly to
   140  	// ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
   141  	// 
   142  	// We applies this rewrite on systems that support the 1-instruction form.
   143  	// The decision is made using only the operating system (and probably
   144  	// the -shared flag, eventually), not the link mode. If some link modes
   145  	// on a particular operating system require the 2-instruction form,
   146  	// then all builds for that operating system will use the 2-instruction
   147  	// form, so that the link mode decision can be delayed to link time.
   148  	//
   149  	// In this way, all supported systems use identical instructions to
   150  	// access TLS, and they are rewritten appropriately first here in
   151  	// liblink and then finally using relocations in the linker.
   152  
   153  	if(canuselocaltls(ctxt)) {
   154  		// Reduce TLS initial exec model to TLS local exec model.
   155  		// Sequences like
   156  		//	MOVQ TLS, BX
   157  		//	... off(BX)(TLS*1) ...
   158  		// become
   159  		//	NOP
   160  		//	... off(TLS) ...
   161  		//
   162  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   163  		// guarantee we are producing byte-identical binaries as before this code.
   164  		// But it should be unnecessary.
   165  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
   166  			nopout(p);
   167  		if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
   168  			p->from.type = D_INDIR+D_TLS;
   169  			p->from.scale = 0;
   170  			p->from.index = D_NONE;
   171  		}
   172  		if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
   173  			p->to.type = D_INDIR+D_TLS;
   174  			p->to.scale = 0;
   175  			p->to.index = D_NONE;
   176  		}
   177  	} else {
   178  		// As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
   179  		// The instruction
   180  		//	MOVQ off(TLS), BX
   181  		// becomes the sequence
   182  		//	MOVQ TLS, BX
   183  		//	MOVQ off(BX)(TLS*1), BX
   184  		// This allows the C compilers to emit references to m and g using the direct off(TLS) form.
   185  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
   186  			q = appendp(ctxt, p);
   187  			q->as = p->as;
   188  			q->from = p->from;
   189  			q->from.type = D_INDIR + p->to.type;
   190  			q->from.index = D_TLS;
   191  			q->from.scale = 2; // TODO: use 1
   192  			q->to = p->to;
   193  			p->from.type = D_TLS;
   194  			p->from.index = D_NONE;
   195  			p->from.offset = 0;
   196  		}
   197  	}
   198  
   199  	// TODO: Remove.
   200  	if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
   201  		if(p->from.scale == 1 && p->from.index == D_TLS)
   202  			p->from.scale = 2;
   203  		if(p->to.scale == 1 && p->to.index == D_TLS)
   204  			p->to.scale = 2;
   205  	}
   206  
   207  	if(ctxt->headtype == Hnacl) {
   208  		nacladdr(ctxt, p, &p->from);
   209  		nacladdr(ctxt, p, &p->to);
   210  	}
   211  
   212  	// Maintain information about code generation mode.
   213  	if(ctxt->mode == 0)
   214  		ctxt->mode = 64;
   215  	p->mode = ctxt->mode;
   216  	
   217  	switch(p->as) {
   218  	case AMODE:
   219  		if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE) {
   220  			switch((int)p->from.offset) {
   221  			case 16:
   222  			case 32:
   223  			case 64:
   224  				ctxt->mode = p->from.offset;
   225  				break;
   226  			}
   227  		}
   228  		nopout(p);
   229  		break;
   230  	}
   231  	
   232  	// Rewrite CALL/JMP/RET to symbol as D_BRANCH.
   233  	switch(p->as) {
   234  	case ACALL:
   235  	case AJMP:
   236  	case ARET:
   237  		if((p->to.type == D_EXTERN || p->to.type == D_STATIC) && p->to.sym != nil)
   238  			p->to.type = D_BRANCH;
   239  		break;
   240  	}
   241  
   242  	// Rewrite float constants to values stored in memory.
   243  	switch(p->as) {
   244  	case AFMOVF:
   245  	case AFADDF:
   246  	case AFSUBF:
   247  	case AFSUBRF:
   248  	case AFMULF:
   249  	case AFDIVF:
   250  	case AFDIVRF:
   251  	case AFCOMF:
   252  	case AFCOMFP:
   253  	case AMOVSS:
   254  	case AADDSS:
   255  	case ASUBSS:
   256  	case AMULSS:
   257  	case ADIVSS:
   258  	case ACOMISS:
   259  	case AUCOMISS:
   260  		if(p->from.type == D_FCONST) {
   261  			int32 i32;
   262  			float32 f32;
   263  			f32 = p->from.u.dval;
   264  			memmove(&i32, &f32, 4);
   265  			sprint(literal, "$f32.%08ux", (uint32)i32);
   266  			s = linklookup(ctxt, literal, 0);
   267  			if(s->type == 0) {
   268  				s->type = SRODATA;
   269  				adduint32(ctxt, s, i32);
   270  				s->reachable = 0;
   271  			}
   272  			p->from.type = D_EXTERN;
   273  			p->from.sym = s;
   274  			p->from.offset = 0;
   275  		}
   276  		break;
   277  	
   278  	case AFMOVD:
   279  	case AFADDD:
   280  	case AFSUBD:
   281  	case AFSUBRD:
   282  	case AFMULD:
   283  	case AFDIVD:
   284  	case AFDIVRD:
   285  	case AFCOMD:
   286  	case AFCOMDP:
   287  	case AMOVSD:
   288  	case AADDSD:
   289  	case ASUBSD:
   290  	case AMULSD:
   291  	case ADIVSD:
   292  	case ACOMISD:
   293  	case AUCOMISD:
   294  		if(p->from.type == D_FCONST) {
   295  			int64 i64;
   296  			memmove(&i64, &p->from.u.dval, 8);
   297  			sprint(literal, "$f64.%016llux", (uvlong)i64);
   298  			s = linklookup(ctxt, literal, 0);
   299  			if(s->type == 0) {
   300  				s->type = SRODATA;
   301  				adduint64(ctxt, s, i64);
   302  				s->reachable = 0;
   303  			}
   304  			p->from.type = D_EXTERN;
   305  			p->from.sym = s;
   306  			p->from.offset = 0;
   307  		}
   308  		break;
   309  	}
   310  }
   311  
   312  static void
   313  nacladdr(Link *ctxt, Prog *p, Addr *a)
   314  {
   315  	if(p->as == ALEAL || p->as == ALEAQ)
   316  		return;
   317  	
   318  	if(a->type == D_BP || a->type == D_INDIR+D_BP) {
   319  		ctxt->diag("invalid address: %P", p);
   320  		return;
   321  	}
   322  	if(a->type == D_INDIR+D_TLS)
   323  		a->type = D_INDIR+D_BP;
   324  	else if(a->type == D_TLS)
   325  		a->type = D_BP;
   326  	if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
   327  		switch(a->type) {
   328  		case D_INDIR+D_BP:
   329  		case D_INDIR+D_SP:
   330  		case D_INDIR+D_R15:
   331  			// all ok
   332  			break;
   333  		default:
   334  			if(a->index != D_NONE)
   335  				ctxt->diag("invalid address %P", p);
   336  			a->index = a->type - D_INDIR;
   337  			if(a->index != D_NONE)
   338  				a->scale = 1;
   339  			a->type = D_INDIR+D_R15;
   340  			break;
   341  		}
   342  	}
   343  }
   344  
   345  static Prog*	load_g_cx(Link*, Prog*);
   346  static Prog*	stacksplit(Link*, Prog*, int32, int32, int, Prog**);
   347  static void	indir_cx(Link*, Addr*);
   348  
   349  static void
   350  parsetextconst(vlong arg, vlong *textstksiz, vlong *textarg)
   351  {
   352  	*textstksiz = arg & 0xffffffffLL;
   353  	if(*textstksiz & 0x80000000LL)
   354  		*textstksiz = -(-*textstksiz & 0xffffffffLL);
   355  
   356  	*textarg = (arg >> 32) & 0xffffffffLL;
   357  	if(*textarg & 0x80000000LL)
   358  		*textarg = 0;
   359  	*textarg = (*textarg+7) & ~7LL;
   360  }
   361  
   362  static void
   363  addstacksplit(Link *ctxt, LSym *cursym)
   364  {
   365  	Prog *p, *q, *p1, *p2;
   366  	int32 autoffset, deltasp;
   367  	int a, pcsize;
   368  	vlong textstksiz, textarg;
   369  
   370  	if(ctxt->tlsg == nil)
   371  		ctxt->tlsg = linklookup(ctxt, "runtime.tlsg", 0);
   372  	if(ctxt->symmorestack[0] == nil) {
   373  		ctxt->symmorestack[0] = linklookup(ctxt, "runtime.morestack", 0);
   374  		ctxt->symmorestack[1] = linklookup(ctxt, "runtime.morestack_noctxt", 0);
   375  	}
   376  
   377  	if(ctxt->headtype == Hplan9 && ctxt->plan9privates == nil)
   378  		ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
   379  
   380  	ctxt->cursym = cursym;
   381  
   382  	if(cursym->text == nil || cursym->text->link == nil)
   383  		return;				
   384  
   385  	p = cursym->text;
   386  	parsetextconst(p->to.offset, &textstksiz, &textarg);
   387  	autoffset = textstksiz;
   388  	if(autoffset < 0)
   389  		autoffset = 0;
   390  	
   391  	cursym->args = p->to.offset>>32;
   392  	cursym->locals = textstksiz;
   393  
   394  	if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) {
   395  		for(q = p; q != nil; q = q->link) {
   396  			if(q->as == ACALL)
   397  				goto noleaf;
   398  			if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8)
   399  				goto noleaf;
   400  		}
   401  		p->from.scale |= NOSPLIT;
   402  	noleaf:;
   403  	}
   404  
   405  	q = nil;
   406  	if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) {
   407  		p = appendp(ctxt, p);
   408  		p = load_g_cx(ctxt, p); // load g into CX
   409  	}
   410  	if(!(cursym->text->from.scale & NOSPLIT))
   411  		p = stacksplit(ctxt, p, autoffset, textarg, !(cursym->text->from.scale&NEEDCTXT), &q); // emit split check
   412  
   413  	if(autoffset) {
   414  		if(autoffset%ctxt->arch->regsize != 0)
   415  			ctxt->diag("unaligned stack size %d", autoffset);
   416  		p = appendp(ctxt, p);
   417  		p->as = AADJSP;
   418  		p->from.type = D_CONST;
   419  		p->from.offset = autoffset;
   420  		p->spadj = autoffset;
   421  	} else {
   422  		// zero-byte stack adjustment.
   423  		// Insert a fake non-zero adjustment so that stkcheck can
   424  		// recognize the end of the stack-splitting prolog.
   425  		p = appendp(ctxt, p);
   426  		p->as = ANOP;
   427  		p->spadj = -ctxt->arch->ptrsize;
   428  		p = appendp(ctxt, p);
   429  		p->as = ANOP;
   430  		p->spadj = ctxt->arch->ptrsize;
   431  	}
   432  	if(q != nil)
   433  		q->pcond = p;
   434  	deltasp = autoffset;
   435  	
   436  	if(cursym->text->from.scale & WRAPPER) {
   437  		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
   438  		//
   439  		//	MOVQ g_panic(CX), BX
   440  		//	TESTQ BX, BX
   441  		//	JEQ end
   442  		//	LEAQ (autoffset+8)(SP), DI
   443  		//	CMPQ panic_argp(BX), DI
   444  		//	JNE end
   445  		//	MOVQ SP, panic_argp(BX)
   446  		// end:
   447  		//	NOP
   448  		//
   449  		// The NOP is needed to give the jumps somewhere to land.
   450  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   451  
   452  		p = appendp(ctxt, p);
   453  		p->as = AMOVQ;
   454  		p->from.type = D_INDIR+D_CX;
   455  		p->from.offset = 4*ctxt->arch->ptrsize; // G.panic
   456  		p->to.type = D_BX;
   457  		if(ctxt->headtype == Hnacl) {
   458  			p->as = AMOVL;
   459  			p->from.type = D_INDIR+D_R15;
   460  			p->from.scale = 1;
   461  			p->from.index = D_CX;
   462  		}
   463  
   464  		p = appendp(ctxt, p);
   465  		p->as = ATESTQ;
   466  		p->from.type = D_BX;
   467  		p->to.type = D_BX;
   468  		if(ctxt->headtype == Hnacl)
   469  			p->as = ATESTL;
   470  
   471  		p = appendp(ctxt, p);
   472  		p->as = AJEQ;
   473  		p->to.type = D_BRANCH;
   474  		p1 = p;
   475  
   476  		p = appendp(ctxt, p);
   477  		p->as = ALEAQ;
   478  		p->from.type = D_INDIR+D_SP;
   479  		p->from.offset = autoffset+8;
   480  		p->to.type = D_DI;
   481  		if(ctxt->headtype == Hnacl)
   482  			p->as = ALEAL;
   483  
   484  		p = appendp(ctxt, p);
   485  		p->as = ACMPQ;
   486  		p->from.type = D_INDIR+D_BX;
   487  		p->from.offset = 0; // Panic.argp
   488  		p->to.type = D_DI;
   489  		if(ctxt->headtype == Hnacl) {
   490  			p->as = ACMPL;
   491  			p->from.type = D_INDIR+D_R15;
   492  			p->from.scale = 1;
   493  			p->from.index = D_BX;
   494  		}
   495  
   496  		p = appendp(ctxt, p);
   497  		p->as = AJNE;
   498  		p->to.type = D_BRANCH;
   499  		p2 = p;
   500  
   501  		p = appendp(ctxt, p);
   502  		p->as = AMOVQ;
   503  		p->from.type = D_SP;
   504  		p->to.type = D_INDIR+D_BX;
   505  		p->to.offset = 0; // Panic.argp
   506  		if(ctxt->headtype == Hnacl) {
   507  			p->as = AMOVL;
   508  			p->to.type = D_INDIR+D_R15;
   509  			p->to.scale = 1;
   510  			p->to.index = D_BX;
   511  		}
   512  
   513  		p = appendp(ctxt, p);
   514  		p->as = ANOP;
   515  		p1->pcond = p;
   516  		p2->pcond = p;
   517  	}
   518  
   519  	if(ctxt->debugzerostack && autoffset && !(cursym->text->from.scale&NOSPLIT)) {
   520  		// 6l -Z means zero the stack frame on entry.
   521  		// This slows down function calls but can help avoid
   522  		// false positives in garbage collection.
   523  		p = appendp(ctxt, p);
   524  		p->as = AMOVQ;
   525  		p->from.type = D_SP;
   526  		p->to.type = D_DI;
   527  		
   528  		p = appendp(ctxt, p);
   529  		p->as = AMOVQ;
   530  		p->from.type = D_CONST;
   531  		p->from.offset = autoffset/8;
   532  		p->to.type = D_CX;
   533  		
   534  		p = appendp(ctxt, p);
   535  		p->as = AMOVQ;
   536  		p->from.type = D_CONST;
   537  		p->from.offset = 0;
   538  		p->to.type = D_AX;
   539  		
   540  		p = appendp(ctxt, p);
   541  		p->as = AREP;
   542  		
   543  		p = appendp(ctxt, p);
   544  		p->as = ASTOSQ;
   545  	}
   546  	
   547  	for(; p != nil; p = p->link) {
   548  		pcsize = p->mode/8;
   549  		a = p->from.type;
   550  		if(a == D_AUTO)
   551  			p->from.offset += deltasp;
   552  		if(a == D_PARAM)
   553  			p->from.offset += deltasp + pcsize;
   554  		a = p->to.type;
   555  		if(a == D_AUTO)
   556  			p->to.offset += deltasp;
   557  		if(a == D_PARAM)
   558  			p->to.offset += deltasp + pcsize;
   559  
   560  		switch(p->as) {
   561  		default:
   562  			continue;
   563  		case APUSHL:
   564  		case APUSHFL:
   565  			deltasp += 4;
   566  			p->spadj = 4;
   567  			continue;
   568  		case APUSHQ:
   569  		case APUSHFQ:
   570  			deltasp += 8;
   571  			p->spadj = 8;
   572  			continue;
   573  		case APUSHW:
   574  		case APUSHFW:
   575  			deltasp += 2;
   576  			p->spadj = 2;
   577  			continue;
   578  		case APOPL:
   579  		case APOPFL:
   580  			deltasp -= 4;
   581  			p->spadj = -4;
   582  			continue;
   583  		case APOPQ:
   584  		case APOPFQ:
   585  			deltasp -= 8;
   586  			p->spadj = -8;
   587  			continue;
   588  		case APOPW:
   589  		case APOPFW:
   590  			deltasp -= 2;
   591  			p->spadj = -2;
   592  			continue;
   593  		case ARET:
   594  			break;
   595  		}
   596  
   597  		if(autoffset != deltasp)
   598  			ctxt->diag("unbalanced PUSH/POP");
   599  
   600  		if(autoffset) {
   601  			p->as = AADJSP;
   602  			p->from.type = D_CONST;
   603  			p->from.offset = -autoffset;
   604  			p->spadj = -autoffset;
   605  			p = appendp(ctxt, p);
   606  			p->as = ARET;
   607  			// If there are instructions following
   608  			// this ARET, they come from a branch
   609  			// with the same stackframe, so undo
   610  			// the cleanup.
   611  			p->spadj = +autoffset;
   612  		}
   613  		if(p->to.sym) // retjmp
   614  			p->as = AJMP;
   615  	}
   616  }
   617  
   618  static void
   619  indir_cx(Link *ctxt, Addr *a)
   620  {
   621  	if(ctxt->headtype == Hnacl) {
   622  		a->type = D_INDIR + D_R15;
   623  		a->index = D_CX;
   624  		a->scale = 1;
   625  		return;
   626  	}
   627  
   628  	a->type = D_INDIR+D_CX;
   629  }
   630  
   631  // Append code to p to load g into cx.
   632  // Overwrites p with the first instruction (no first appendp).
   633  // Overwriting p is unusual but it lets use this in both the
   634  // prologue (caller must call appendp first) and in the epilogue.
   635  // Returns last new instruction.
   636  static Prog*
   637  load_g_cx(Link *ctxt, Prog *p)
   638  {	
   639  	Prog *next;
   640  
   641  	p->as = AMOVQ;
   642  	if(ctxt->arch->ptrsize == 4)
   643  		p->as = AMOVL;
   644  	p->from.type = D_INDIR+D_TLS;
   645  	p->from.offset = 0;
   646  	p->to.type = D_CX;
   647  	
   648  	next = p->link;
   649  	progedit(ctxt, p);
   650  	while(p->link != next)
   651  		p = p->link;
   652  	
   653  	if(p->from.index == D_TLS)
   654  		p->from.scale = 2;
   655  
   656  	return p;
   657  }
   658  
   659  // Append code to p to check for stack split.
   660  // Appends to (does not overwrite) p.
   661  // Assumes g is in CX.
   662  // Returns last new instruction.
   663  // On return, *jmpok is the instruction that should jump
   664  // to the stack frame allocation if no split is needed.
   665  static Prog*
   666  stacksplit(Link *ctxt, Prog *p, int32 framesize, int32 textarg, int noctxt, Prog **jmpok)
   667  {
   668  	Prog *q, *q1;
   669  	int cmp, lea, mov, sub;
   670  
   671  	USED(textarg);
   672  	cmp = ACMPQ;
   673  	lea = ALEAQ;
   674  	mov = AMOVQ;
   675  	sub = ASUBQ;
   676  
   677  	if(ctxt->headtype == Hnacl) {
   678  		cmp = ACMPL;
   679  		lea = ALEAL;
   680  		mov = AMOVL;
   681  		sub = ASUBL;
   682  	}
   683  
   684  	q1 = nil;
   685  	if(framesize <= StackSmall) {
   686  		// small stack: SP <= stackguard
   687  		//	CMPQ SP, stackguard
   688  		p = appendp(ctxt, p);
   689  		p->as = cmp;
   690  		p->from.type = D_SP;
   691  		indir_cx(ctxt, &p->to);
   692  		p->to.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   693  		if(ctxt->cursym->cfunc)
   694  			p->to.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   695  	} else if(framesize <= StackBig) {
   696  		// large stack: SP-framesize <= stackguard-StackSmall
   697  		//	LEAQ -xxx(SP), AX
   698  		//	CMPQ AX, stackguard
   699  		p = appendp(ctxt, p);
   700  		p->as = lea;
   701  		p->from.type = D_INDIR+D_SP;
   702  		p->from.offset = -(framesize-StackSmall);
   703  		p->to.type = D_AX;
   704  
   705  		p = appendp(ctxt, p);
   706  		p->as = cmp;
   707  		p->from.type = D_AX;
   708  		indir_cx(ctxt, &p->to);
   709  		p->to.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   710  		if(ctxt->cursym->cfunc)
   711  			p->to.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   712  	} else {
   713  		// Such a large stack we need to protect against wraparound.
   714  		// If SP is close to zero:
   715  		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
   716  		// The +StackGuard on both sides is required to keep the left side positive:
   717  		// SP is allowed to be slightly below stackguard. See stack.h.
   718  		//
   719  		// Preemption sets stackguard to StackPreempt, a very large value.
   720  		// That breaks the math above, so we have to check for that explicitly.
   721  		//	MOVQ	stackguard, CX
   722  		//	CMPQ	CX, $StackPreempt
   723  		//	JEQ	label-of-call-to-morestack
   724  		//	LEAQ	StackGuard(SP), AX
   725  		//	SUBQ	CX, AX
   726  		//	CMPQ	AX, $(framesize+(StackGuard-StackSmall))
   727  
   728  		p = appendp(ctxt, p);
   729  		p->as = mov;
   730  		indir_cx(ctxt, &p->from);
   731  		p->from.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   732  		if(ctxt->cursym->cfunc)
   733  			p->from.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   734  		p->to.type = D_SI;
   735  
   736  		p = appendp(ctxt, p);
   737  		p->as = cmp;
   738  		p->from.type = D_SI;
   739  		p->to.type = D_CONST;
   740  		p->to.offset = StackPreempt;
   741  
   742  		p = appendp(ctxt, p);
   743  		p->as = AJEQ;
   744  		p->to.type = D_BRANCH;
   745  		q1 = p;
   746  
   747  		p = appendp(ctxt, p);
   748  		p->as = lea;
   749  		p->from.type = D_INDIR+D_SP;
   750  		p->from.offset = StackGuard;
   751  		p->to.type = D_AX;
   752  		
   753  		p = appendp(ctxt, p);
   754  		p->as = sub;
   755  		p->from.type = D_SI;
   756  		p->to.type = D_AX;
   757  		
   758  		p = appendp(ctxt, p);
   759  		p->as = cmp;
   760  		p->from.type = D_AX;
   761  		p->to.type = D_CONST;
   762  		p->to.offset = framesize+(StackGuard-StackSmall);
   763  	}					
   764  
   765  	// common
   766  	p = appendp(ctxt, p);
   767  	p->as = AJHI;
   768  	p->to.type = D_BRANCH;
   769  	q = p;
   770  
   771  	p = appendp(ctxt, p);
   772  	p->as = ACALL;
   773  	p->to.type = D_BRANCH;
   774  	if(ctxt->cursym->cfunc)
   775  		p->to.sym = linklookup(ctxt, "runtime.morestackc", 0);
   776  	else
   777  		p->to.sym = ctxt->symmorestack[noctxt];
   778  	
   779  	p = appendp(ctxt, p);
   780  	p->as = AJMP;
   781  	p->to.type = D_BRANCH;
   782  	p->pcond = ctxt->cursym->text->link;
   783  	
   784  	if(q != nil)
   785  		q->pcond = p->link;
   786  	if(q1 != nil)
   787  		q1->pcond = q->link;
   788  
   789  	*jmpok = q;
   790  	return p;
   791  }
   792  
   793  static void xfol(Link*, Prog*, Prog**);
   794  
   795  static void
   796  follow(Link *ctxt, LSym *s)
   797  {
   798  	Prog *firstp, *lastp;
   799  
   800  	ctxt->cursym = s;
   801  
   802  	firstp = ctxt->arch->prg();
   803  	lastp = firstp;
   804  	xfol(ctxt, s->text, &lastp);
   805  	lastp->link = nil;
   806  	s->text = firstp->link;
   807  }
   808  
   809  static int
   810  nofollow(int a)
   811  {
   812  	switch(a) {
   813  	case AJMP:
   814  	case ARET:
   815  	case AIRETL:
   816  	case AIRETQ:
   817  	case AIRETW:
   818  	case ARETFL:
   819  	case ARETFQ:
   820  	case ARETFW:
   821  	case AUNDEF:
   822  		return 1;
   823  	}
   824  	return 0;
   825  }
   826  
   827  static int
   828  pushpop(int a)
   829  {
   830  	switch(a) {
   831  	case APUSHL:
   832  	case APUSHFL:
   833  	case APUSHQ:
   834  	case APUSHFQ:
   835  	case APUSHW:
   836  	case APUSHFW:
   837  	case APOPL:
   838  	case APOPFL:
   839  	case APOPQ:
   840  	case APOPFQ:
   841  	case APOPW:
   842  	case APOPFW:
   843  		return 1;
   844  	}
   845  	return 0;
   846  }
   847  
   848  static int
   849  relinv(int a)
   850  {
   851  	switch(a) {
   852  	case AJEQ:	return AJNE;
   853  	case AJNE:	return AJEQ;
   854  	case AJLE:	return AJGT;
   855  	case AJLS:	return AJHI;
   856  	case AJLT:	return AJGE;
   857  	case AJMI:	return AJPL;
   858  	case AJGE:	return AJLT;
   859  	case AJPL:	return AJMI;
   860  	case AJGT:	return AJLE;
   861  	case AJHI:	return AJLS;
   862  	case AJCS:	return AJCC;
   863  	case AJCC:	return AJCS;
   864  	case AJPS:	return AJPC;
   865  	case AJPC:	return AJPS;
   866  	case AJOS:	return AJOC;
   867  	case AJOC:	return AJOS;
   868  	}
   869  	sysfatal("unknown relation: %s", anames6[a]);
   870  	return 0;
   871  }
   872  
   873  static void
   874  xfol(Link *ctxt, Prog *p, Prog **last)
   875  {
   876  	Prog *q;
   877  	int i;
   878  	int a;
   879  
   880  loop:
   881  	if(p == nil)
   882  		return;
   883  	if(p->as == AJMP)
   884  	if((q = p->pcond) != nil && q->as != ATEXT) {
   885  		/* mark instruction as done and continue layout at target of jump */
   886  		p->mark = 1;
   887  		p = q;
   888  		if(p->mark == 0)
   889  			goto loop;
   890  	}
   891  	if(p->mark) {
   892  		/* 
   893  		 * p goes here, but already used it elsewhere.
   894  		 * copy up to 4 instructions or else branch to other copy.
   895  		 */
   896  		for(i=0,q=p; i<4; i++,q=q->link) {
   897  			if(q == nil)
   898  				break;
   899  			if(q == *last)
   900  				break;
   901  			a = q->as;
   902  			if(a == ANOP) {
   903  				i--;
   904  				continue;
   905  			}
   906  			if(nofollow(a) || pushpop(a))	
   907  				break;	// NOTE(rsc): arm does goto copy
   908  			if(q->pcond == nil || q->pcond->mark)
   909  				continue;
   910  			if(a == ACALL || a == ALOOP)
   911  				continue;
   912  			for(;;) {
   913  				if(p->as == ANOP) {
   914  					p = p->link;
   915  					continue;
   916  				}
   917  				q = copyp(ctxt, p);
   918  				p = p->link;
   919  				q->mark = 1;
   920  				(*last)->link = q;
   921  				*last = q;
   922  				if(q->as != a || q->pcond == nil || q->pcond->mark)
   923  					continue;
   924  
   925  				q->as = relinv(q->as);
   926  				p = q->pcond;
   927  				q->pcond = q->link;
   928  				q->link = p;
   929  				xfol(ctxt, q->link, last);
   930  				p = q->link;
   931  				if(p->mark)
   932  					return;
   933  				goto loop;
   934  			}
   935  		} /* */
   936  		q = ctxt->arch->prg();
   937  		q->as = AJMP;
   938  		q->lineno = p->lineno;
   939  		q->to.type = D_BRANCH;
   940  		q->to.offset = p->pc;
   941  		q->pcond = p;
   942  		p = q;
   943  	}
   944  	
   945  	/* emit p */
   946  	p->mark = 1;
   947  	(*last)->link = p;
   948  	*last = p;
   949  	a = p->as;
   950  
   951  	/* continue loop with what comes after p */
   952  	if(nofollow(a))
   953  		return;
   954  	if(p->pcond != nil && a != ACALL) {
   955  		/*
   956  		 * some kind of conditional branch.
   957  		 * recurse to follow one path.
   958  		 * continue loop on the other.
   959  		 */
   960  		if((q = brchain(ctxt, p->pcond)) != nil)
   961  			p->pcond = q;
   962  		if((q = brchain(ctxt, p->link)) != nil)
   963  			p->link = q;
   964  		if(p->from.type == D_CONST) {
   965  			if(p->from.offset == 1) {
   966  				/*
   967  				 * expect conditional jump to be taken.
   968  				 * rewrite so that's the fall-through case.
   969  				 */
   970  				p->as = relinv(a);
   971  				q = p->link;
   972  				p->link = p->pcond;
   973  				p->pcond = q;
   974  			}
   975  		} else {			
   976  			q = p->link;
   977  			if(q->mark)
   978  			if(a != ALOOP) {
   979  				p->as = relinv(a);
   980  				p->link = p->pcond;
   981  				p->pcond = q;
   982  			}
   983  		}
   984  		xfol(ctxt, p->link, last);
   985  		if(p->pcond->mark)
   986  			return;
   987  		p = p->pcond;
   988  		goto loop;
   989  	}
   990  	p = p->link;
   991  	goto loop;
   992  }
   993  
   994  static Prog*
   995  prg(void)
   996  {
   997  	Prog *p;
   998  
   999  	p = emallocz(sizeof(*p));
  1000  	*p = zprg;
  1001  	return p;
  1002  }
  1003  
  1004  LinkArch linkamd64 = {
  1005  	.name = "amd64",
  1006  	.thechar = '6',
  1007  	.endian = LittleEndian,
  1008  
  1009  	.addstacksplit = addstacksplit,
  1010  	.assemble = span6,
  1011  	.datasize = datasize,
  1012  	.follow = follow,
  1013  	.iscall = iscall,
  1014  	.isdata = isdata,
  1015  	.prg = prg,
  1016  	.progedit = progedit,
  1017  	.settextflag = settextflag,
  1018  	.symtype = symtype,
  1019  	.textflag = textflag,
  1020  
  1021  	.minlc = 1,
  1022  	.ptrsize = 8,
  1023  	.regsize = 8,
  1024  
  1025  	.D_ADDR = D_ADDR,
  1026  	.D_AUTO = D_AUTO,
  1027  	.D_BRANCH = D_BRANCH,
  1028  	.D_CONST = D_CONST,
  1029  	.D_EXTERN = D_EXTERN,
  1030  	.D_FCONST = D_FCONST,
  1031  	.D_NONE = D_NONE,
  1032  	.D_PARAM = D_PARAM,
  1033  	.D_SCONST = D_SCONST,
  1034  	.D_STATIC = D_STATIC,
  1035  
  1036  	.ACALL = ACALL,
  1037  	.ADATA = ADATA,
  1038  	.AEND = AEND,
  1039  	.AFUNCDATA = AFUNCDATA,
  1040  	.AGLOBL = AGLOBL,
  1041  	.AJMP = AJMP,
  1042  	.ANOP = ANOP,
  1043  	.APCDATA = APCDATA,
  1044  	.ARET = ARET,
  1045  	.ATEXT = ATEXT,
  1046  	.ATYPE = ATYPE,
  1047  	.AUSEFIELD = AUSEFIELD,
  1048  };
  1049  
  1050  LinkArch linkamd64p32 = {
  1051  	.name = "amd64p32",
  1052  	.thechar = '6',
  1053  	.endian = LittleEndian,
  1054  
  1055  	.addstacksplit = addstacksplit,
  1056  	.assemble = span6,
  1057  	.datasize = datasize,
  1058  	.follow = follow,
  1059  	.iscall = iscall,
  1060  	.isdata = isdata,
  1061  	.prg = prg,
  1062  	.progedit = progedit,
  1063  	.settextflag = settextflag,
  1064  	.symtype = symtype,
  1065  	.textflag = textflag,
  1066  
  1067  	.minlc = 1,
  1068  	.ptrsize = 4,
  1069  	.regsize = 8,
  1070  
  1071  	.D_ADDR = D_ADDR,
  1072  	.D_AUTO = D_AUTO,
  1073  	.D_BRANCH = D_BRANCH,
  1074  	.D_CONST = D_CONST,
  1075  	.D_EXTERN = D_EXTERN,
  1076  	.D_FCONST = D_FCONST,
  1077  	.D_NONE = D_NONE,
  1078  	.D_PARAM = D_PARAM,
  1079  	.D_SCONST = D_SCONST,
  1080  	.D_STATIC = D_STATIC,
  1081  
  1082  	.ACALL = ACALL,
  1083  	.ADATA = ADATA,
  1084  	.AEND = AEND,
  1085  	.AFUNCDATA = AFUNCDATA,
  1086  	.AGLOBL = AGLOBL,
  1087  	.AJMP = AJMP,
  1088  	.ANOP = ANOP,
  1089  	.APCDATA = APCDATA,
  1090  	.ARET = ARET,
  1091  	.ATEXT = ATEXT,
  1092  	.ATYPE = ATYPE,
  1093  	.AUSEFIELD = AUSEFIELD,
  1094  };