github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/liblink/obj6.c (about)

     1  // Inferno utils/6l/pass.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include <bio.h>
    34  #include <link.h>
    35  #include "../cmd/6l/6.out.h"
    36  #include "../pkg/runtime/stack.h"
    37  
    38  static Prog zprg = {
    39  	.back = 2,
    40  	.as = AGOK,
    41  	.from = {
    42  		.type = D_NONE,
    43  		.index = D_NONE,
    44  	},
    45  	.to = {
    46  		.type = D_NONE,
    47  		.index = D_NONE,
    48  	},
    49  };
    50  
    51  static void
    52  nopout(Prog *p)
    53  {
    54  	p->as = ANOP;
    55  	p->from.type = D_NONE;
    56  	p->to.type = D_NONE;
    57  }
    58  
    59  static int
    60  symtype(Addr *a)
    61  {
    62  	int t;
    63  
    64  	t = a->type;
    65  	if(t == D_ADDR)
    66  		t = a->index;
    67  	return t;
    68  }
    69  
    70  static int
    71  isdata(Prog *p)
    72  {
    73  	return p->as == ADATA || p->as == AGLOBL;
    74  }
    75  
    76  static int
    77  iscall(Prog *p)
    78  {
    79  	return p->as == ACALL;
    80  }
    81  
    82  static int
    83  datasize(Prog *p)
    84  {
    85  	return p->from.scale;
    86  }
    87  
    88  static int
    89  textflag(Prog *p)
    90  {
    91  	return p->from.scale;
    92  }
    93  
    94  static void
    95  settextflag(Prog *p, int f)
    96  {
    97  	p->from.scale = f;
    98  }
    99  
   100  static void nacladdr(Link*, Prog*, Addr*);
   101  
   102  static int
   103  canuselocaltls(Link *ctxt)
   104  {
   105  	switch(ctxt->headtype) {
   106  //	case Hlinux:
   107  	case Hwindows:
   108  		return 0;
   109  	}
   110  	return 1;
   111  }
   112  
   113  static void
   114  progedit(Link *ctxt, Prog *p)
   115  {
   116  	char literal[64];
   117  	LSym *s;
   118  	Prog *q;
   119  
   120  	// Thread-local storage references use the TLS pseudo-register.
   121  	// As a register, TLS refers to the thread-local storage base, and it
   122  	// can only be loaded into another register:
   123  	//
   124  	//         MOVQ TLS, AX
   125  	//
   126  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
   127  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
   128  	// indexing from the loaded TLS base. This emits a relocation so that
   129  	// if the linker needs to adjust the offset, it can. For example:
   130  	//
   131  	//         MOVQ TLS, AX
   132  	//         MOVQ 8(AX)(TLS*1), CX // load m into CX
   133  	// 
   134  	// On systems that support direct access to the TLS memory, this
   135  	// pair of instructions can be reduced to a direct TLS memory reference:
   136  	// 
   137  	//         MOVQ 8(TLS), CX // load m into CX
   138  	//
   139  	// The 2-instruction and 1-instruction forms correspond roughly to
   140  	// ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
   141  	// 
   142  	// We applies this rewrite on systems that support the 1-instruction form.
   143  	// The decision is made using only the operating system (and probably
   144  	// the -shared flag, eventually), not the link mode. If some link modes
   145  	// on a particular operating system require the 2-instruction form,
   146  	// then all builds for that operating system will use the 2-instruction
   147  	// form, so that the link mode decision can be delayed to link time.
   148  	//
   149  	// In this way, all supported systems use identical instructions to
   150  	// access TLS, and they are rewritten appropriately first here in
   151  	// liblink and then finally using relocations in the linker.
   152  
   153  	if(canuselocaltls(ctxt)) {
   154  		// Reduce TLS initial exec model to TLS local exec model.
   155  		// Sequences like
   156  		//	MOVQ TLS, BX
   157  		//	... off(BX)(TLS*1) ...
   158  		// become
   159  		//	NOP
   160  		//	... off(TLS) ...
   161  		//
   162  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   163  		// guarantee we are producing byte-identical binaries as before this code.
   164  		// But it should be unnecessary.
   165  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
   166  			nopout(p);
   167  		if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
   168  			p->from.type = D_INDIR+D_TLS;
   169  			p->from.scale = 0;
   170  			p->from.index = D_NONE;
   171  		}
   172  		if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
   173  			p->to.type = D_INDIR+D_TLS;
   174  			p->to.scale = 0;
   175  			p->to.index = D_NONE;
   176  		}
   177  	} else {
   178  		// As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
   179  		// The instruction
   180  		//	MOVQ off(TLS), BX
   181  		// becomes the sequence
   182  		//	MOVQ TLS, BX
   183  		//	MOVQ off(BX)(TLS*1), BX
   184  		// This allows the C compilers to emit references to m and g using the direct off(TLS) form.
   185  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
   186  			q = appendp(ctxt, p);
   187  			q->as = p->as;
   188  			q->from = p->from;
   189  			q->from.type = D_INDIR + p->to.type;
   190  			q->from.index = D_TLS;
   191  			q->from.scale = 2; // TODO: use 1
   192  			q->to = p->to;
   193  			p->from.type = D_TLS;
   194  			p->from.index = D_NONE;
   195  			p->from.offset = 0;
   196  		}
   197  	}
   198  
   199  	// TODO: Remove.
   200  	if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
   201  		if(p->from.scale == 1 && p->from.index == D_TLS)
   202  			p->from.scale = 2;
   203  		if(p->to.scale == 1 && p->to.index == D_TLS)
   204  			p->to.scale = 2;
   205  	}
   206  
   207  	if(ctxt->headtype == Hnacl) {
   208  		nacladdr(ctxt, p, &p->from);
   209  		nacladdr(ctxt, p, &p->to);
   210  	}
   211  
   212  	// Maintain information about code generation mode.
   213  	if(ctxt->mode == 0)
   214  		ctxt->mode = 64;
   215  	p->mode = ctxt->mode;
   216  	
   217  	switch(p->as) {
   218  	case AMODE:
   219  		if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE) {
   220  			switch((int)p->from.offset) {
   221  			case 16:
   222  			case 32:
   223  			case 64:
   224  				ctxt->mode = p->from.offset;
   225  				break;
   226  			}
   227  		}
   228  		nopout(p);
   229  		break;
   230  	}
   231  	
   232  	// Rewrite CALL/JMP/RET to symbol as D_BRANCH.
   233  	switch(p->as) {
   234  	case ACALL:
   235  	case AJMP:
   236  	case ARET:
   237  		if((p->to.type == D_EXTERN || p->to.type == D_STATIC) && p->to.sym != nil)
   238  			p->to.type = D_BRANCH;
   239  		break;
   240  	}
   241  
   242  	// Rewrite float constants to values stored in memory.
   243  	switch(p->as) {
   244  	case AFMOVF:
   245  	case AFADDF:
   246  	case AFSUBF:
   247  	case AFSUBRF:
   248  	case AFMULF:
   249  	case AFDIVF:
   250  	case AFDIVRF:
   251  	case AFCOMF:
   252  	case AFCOMFP:
   253  	case AMOVSS:
   254  	case AADDSS:
   255  	case ASUBSS:
   256  	case AMULSS:
   257  	case ADIVSS:
   258  	case ACOMISS:
   259  	case AUCOMISS:
   260  		if(p->from.type == D_FCONST) {
   261  			int32 i32;
   262  			float32 f32;
   263  			f32 = p->from.u.dval;
   264  			memmove(&i32, &f32, 4);
   265  			sprint(literal, "$f32.%08ux", (uint32)i32);
   266  			s = linklookup(ctxt, literal, 0);
   267  			if(s->type == 0) {
   268  				s->type = SRODATA;
   269  				adduint32(ctxt, s, i32);
   270  				s->reachable = 0;
   271  			}
   272  			p->from.type = D_EXTERN;
   273  			p->from.sym = s;
   274  			p->from.offset = 0;
   275  		}
   276  		break;
   277  	
   278  	case AFMOVD:
   279  	case AFADDD:
   280  	case AFSUBD:
   281  	case AFSUBRD:
   282  	case AFMULD:
   283  	case AFDIVD:
   284  	case AFDIVRD:
   285  	case AFCOMD:
   286  	case AFCOMDP:
   287  	case AMOVSD:
   288  	case AADDSD:
   289  	case ASUBSD:
   290  	case AMULSD:
   291  	case ADIVSD:
   292  	case ACOMISD:
   293  	case AUCOMISD:
   294  		if(p->from.type == D_FCONST) {
   295  			int64 i64;
   296  			memmove(&i64, &p->from.u.dval, 8);
   297  			sprint(literal, "$f64.%016llux", (uvlong)i64);
   298  			s = linklookup(ctxt, literal, 0);
   299  			if(s->type == 0) {
   300  				s->type = SRODATA;
   301  				adduint64(ctxt, s, i64);
   302  				s->reachable = 0;
   303  			}
   304  			p->from.type = D_EXTERN;
   305  			p->from.sym = s;
   306  			p->from.offset = 0;
   307  		}
   308  		break;
   309  	}
   310  }
   311  
   312  static void
   313  nacladdr(Link *ctxt, Prog *p, Addr *a)
   314  {
   315  	if(p->as == ALEAL || p->as == ALEAQ)
   316  		return;
   317  	
   318  	if(a->type == D_BP || a->type == D_INDIR+D_BP) {
   319  		ctxt->diag("invalid address: %P", p);
   320  		return;
   321  	}
   322  	if(a->type == D_INDIR+D_TLS)
   323  		a->type = D_INDIR+D_BP;
   324  	else if(a->type == D_TLS)
   325  		a->type = D_BP;
   326  	if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
   327  		switch(a->type) {
   328  		case D_INDIR+D_BP:
   329  		case D_INDIR+D_SP:
   330  		case D_INDIR+D_R15:
   331  			// all ok
   332  			break;
   333  		default:
   334  			if(a->index != D_NONE)
   335  				ctxt->diag("invalid address %P", p);
   336  			a->index = a->type - D_INDIR;
   337  			if(a->index != D_NONE)
   338  				a->scale = 1;
   339  			a->type = D_INDIR+D_R15;
   340  			break;
   341  		}
   342  	}
   343  }
   344  
   345  static char*
   346  morename[] =
   347  {
   348  	"runtime.morestack00",
   349  	"runtime.morestack00_noctxt",
   350  	"runtime.morestack10",
   351  	"runtime.morestack10_noctxt",
   352  	"runtime.morestack01",
   353  	"runtime.morestack01_noctxt",
   354  	"runtime.morestack11",
   355  	"runtime.morestack11_noctxt",
   356  
   357  	"runtime.morestack8",
   358  	"runtime.morestack8_noctxt",
   359  	"runtime.morestack16",
   360  	"runtime.morestack16_noctxt",
   361  	"runtime.morestack24",
   362  	"runtime.morestack24_noctxt",
   363  	"runtime.morestack32",
   364  	"runtime.morestack32_noctxt",
   365  	"runtime.morestack40",
   366  	"runtime.morestack40_noctxt",
   367  	"runtime.morestack48",
   368  	"runtime.morestack48_noctxt",
   369  };
   370  
   371  static Prog*	load_g_cx(Link*, Prog*);
   372  static Prog*	stacksplit(Link*, Prog*, int32, int32, int, Prog**);
   373  static void	indir_cx(Link*, Addr*);
   374  
   375  static void
   376  parsetextconst(vlong arg, vlong *textstksiz, vlong *textarg)
   377  {
   378  	*textstksiz = arg & 0xffffffffLL;
   379  	if(*textstksiz & 0x80000000LL)
   380  		*textstksiz = -(-*textstksiz & 0xffffffffLL);
   381  
   382  	*textarg = (arg >> 32) & 0xffffffffLL;
   383  	if(*textarg & 0x80000000LL)
   384  		*textarg = 0;
   385  	*textarg = (*textarg+7) & ~7LL;
   386  }
   387  
   388  static void
   389  addstacksplit(Link *ctxt, LSym *cursym)
   390  {
   391  	Prog *p, *q, *q1;
   392  	int32 autoffset, deltasp;
   393  	int a, pcsize;
   394  	uint32 i;
   395  	vlong textstksiz, textarg;
   396  
   397  	if(ctxt->gmsym == nil)
   398  		ctxt->gmsym = linklookup(ctxt, "runtime.tlsgm", 0);
   399  	if(ctxt->symmorestack[0] == nil) {
   400  		if(nelem(morename) > nelem(ctxt->symmorestack))
   401  			sysfatal("Link.symmorestack needs at least %d elements", nelem(morename));
   402  		for(i=0; i<nelem(morename); i++)
   403  			ctxt->symmorestack[i] = linklookup(ctxt, morename[i], 0);
   404  	}
   405  	ctxt->cursym = cursym;
   406  
   407  	if(cursym->text == nil || cursym->text->link == nil)
   408  		return;				
   409  
   410  	p = cursym->text;
   411  	parsetextconst(p->to.offset, &textstksiz, &textarg);
   412  	autoffset = textstksiz;
   413  	if(autoffset < 0)
   414  		autoffset = 0;
   415  	
   416  	cursym->args = p->to.offset>>32;
   417  	cursym->locals = textstksiz;
   418  
   419  	if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) {
   420  		for(q = p; q != nil; q = q->link) {
   421  			if(q->as == ACALL)
   422  				goto noleaf;
   423  			if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8)
   424  				goto noleaf;
   425  		}
   426  		p->from.scale |= NOSPLIT;
   427  	noleaf:;
   428  	}
   429  
   430  	q = nil;
   431  	if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) {
   432  		p = appendp(ctxt, p);
   433  		p = load_g_cx(ctxt, p); // load g into CX
   434  	}
   435  	if(!(cursym->text->from.scale & NOSPLIT))
   436  		p = stacksplit(ctxt, p, autoffset, textarg, !(cursym->text->from.scale&NEEDCTXT), &q); // emit split check
   437  
   438  	if(autoffset) {
   439  		if(autoffset%ctxt->arch->regsize != 0)
   440  			ctxt->diag("unaligned stack size %d", autoffset);
   441  		p = appendp(ctxt, p);
   442  		p->as = AADJSP;
   443  		p->from.type = D_CONST;
   444  		p->from.offset = autoffset;
   445  		p->spadj = autoffset;
   446  	} else {
   447  		// zero-byte stack adjustment.
   448  		// Insert a fake non-zero adjustment so that stkcheck can
   449  		// recognize the end of the stack-splitting prolog.
   450  		p = appendp(ctxt, p);
   451  		p->as = ANOP;
   452  		p->spadj = -ctxt->arch->ptrsize;
   453  		p = appendp(ctxt, p);
   454  		p->as = ANOP;
   455  		p->spadj = ctxt->arch->ptrsize;
   456  	}
   457  	if(q != nil)
   458  		q->pcond = p;
   459  	deltasp = autoffset;
   460  	
   461  	if(cursym->text->from.scale & WRAPPER) {
   462  		// g->panicwrap += autoffset + ctxt->arch->regsize;
   463  		p = appendp(ctxt, p);
   464  		p->as = AADDL;
   465  		p->from.type = D_CONST;
   466  		p->from.offset = autoffset + ctxt->arch->regsize;
   467  		indir_cx(ctxt, &p->to);
   468  		p->to.offset = 2*ctxt->arch->ptrsize;
   469  	}
   470  
   471  	if(ctxt->debugstack > 1 && autoffset) {
   472  		// 6l -K -K means double-check for stack overflow
   473  		// even after calling morestack and even if the
   474  		// function is marked as nosplit.
   475  		p = appendp(ctxt, p);
   476  		p->as = AMOVQ;
   477  		indir_cx(ctxt, &p->from);
   478  		p->from.offset = 0;
   479  		p->to.type = D_BX;
   480  
   481  		p = appendp(ctxt, p);
   482  		p->as = ASUBQ;
   483  		p->from.type = D_CONST;
   484  		p->from.offset = StackSmall+32;
   485  		p->to.type = D_BX;
   486  
   487  		p = appendp(ctxt, p);
   488  		p->as = ACMPQ;
   489  		p->from.type = D_SP;
   490  		p->to.type = D_BX;
   491  
   492  		p = appendp(ctxt, p);
   493  		p->as = AJHI;
   494  		p->to.type = D_BRANCH;
   495  		q1 = p;
   496  
   497  		p = appendp(ctxt, p);
   498  		p->as = AINT;
   499  		p->from.type = D_CONST;
   500  		p->from.offset = 3;
   501  
   502  		p = appendp(ctxt, p);
   503  		p->as = ANOP;
   504  		q1->pcond = p;
   505  	}
   506  	
   507  	if(ctxt->debugzerostack && autoffset && !(cursym->text->from.scale&NOSPLIT)) {
   508  		// 6l -Z means zero the stack frame on entry.
   509  		// This slows down function calls but can help avoid
   510  		// false positives in garbage collection.
   511  		p = appendp(ctxt, p);
   512  		p->as = AMOVQ;
   513  		p->from.type = D_SP;
   514  		p->to.type = D_DI;
   515  		
   516  		p = appendp(ctxt, p);
   517  		p->as = AMOVQ;
   518  		p->from.type = D_CONST;
   519  		p->from.offset = autoffset/8;
   520  		p->to.type = D_CX;
   521  		
   522  		p = appendp(ctxt, p);
   523  		p->as = AMOVQ;
   524  		p->from.type = D_CONST;
   525  		p->from.offset = 0;
   526  		p->to.type = D_AX;
   527  		
   528  		p = appendp(ctxt, p);
   529  		p->as = AREP;
   530  		
   531  		p = appendp(ctxt, p);
   532  		p->as = ASTOSQ;
   533  	}
   534  	
   535  	for(; p != nil; p = p->link) {
   536  		pcsize = p->mode/8;
   537  		a = p->from.type;
   538  		if(a == D_AUTO)
   539  			p->from.offset += deltasp;
   540  		if(a == D_PARAM)
   541  			p->from.offset += deltasp + pcsize;
   542  		a = p->to.type;
   543  		if(a == D_AUTO)
   544  			p->to.offset += deltasp;
   545  		if(a == D_PARAM)
   546  			p->to.offset += deltasp + pcsize;
   547  
   548  		switch(p->as) {
   549  		default:
   550  			continue;
   551  		case APUSHL:
   552  		case APUSHFL:
   553  			deltasp += 4;
   554  			p->spadj = 4;
   555  			continue;
   556  		case APUSHQ:
   557  		case APUSHFQ:
   558  			deltasp += 8;
   559  			p->spadj = 8;
   560  			continue;
   561  		case APUSHW:
   562  		case APUSHFW:
   563  			deltasp += 2;
   564  			p->spadj = 2;
   565  			continue;
   566  		case APOPL:
   567  		case APOPFL:
   568  			deltasp -= 4;
   569  			p->spadj = -4;
   570  			continue;
   571  		case APOPQ:
   572  		case APOPFQ:
   573  			deltasp -= 8;
   574  			p->spadj = -8;
   575  			continue;
   576  		case APOPW:
   577  		case APOPFW:
   578  			deltasp -= 2;
   579  			p->spadj = -2;
   580  			continue;
   581  		case ARET:
   582  			break;
   583  		}
   584  
   585  		if(autoffset != deltasp)
   586  			ctxt->diag("unbalanced PUSH/POP");
   587  
   588  		if(cursym->text->from.scale & WRAPPER) {
   589  			p = load_g_cx(ctxt, p);
   590  			p = appendp(ctxt, p);
   591  			// g->panicwrap -= autoffset + ctxt->arch->regsize;
   592  			p->as = ASUBL;
   593  			p->from.type = D_CONST;
   594  			p->from.offset = autoffset + ctxt->arch->regsize;
   595  			indir_cx(ctxt, &p->to);
   596  			p->to.offset = 2*ctxt->arch->ptrsize;
   597  			p = appendp(ctxt, p);
   598  			p->as = ARET;
   599  		}
   600  
   601  		if(autoffset) {
   602  			p->as = AADJSP;
   603  			p->from.type = D_CONST;
   604  			p->from.offset = -autoffset;
   605  			p->spadj = -autoffset;
   606  			p = appendp(ctxt, p);
   607  			p->as = ARET;
   608  			// If there are instructions following
   609  			// this ARET, they come from a branch
   610  			// with the same stackframe, so undo
   611  			// the cleanup.
   612  			p->spadj = +autoffset;
   613  		}
   614  		if(p->to.sym) // retjmp
   615  			p->as = AJMP;
   616  	}
   617  }
   618  
   619  static void
   620  indir_cx(Link *ctxt, Addr *a)
   621  {
   622  	if(ctxt->headtype == Hnacl) {
   623  		a->type = D_INDIR + D_R15;
   624  		a->index = D_CX;
   625  		a->scale = 1;
   626  		return;
   627  	}
   628  
   629  	a->type = D_INDIR+D_CX;
   630  }
   631  
   632  // Append code to p to load g into cx.
   633  // Overwrites p with the first instruction (no first appendp).
   634  // Overwriting p is unusual but it lets use this in both the
   635  // prologue (caller must call appendp first) and in the epilogue.
   636  // Returns last new instruction.
   637  static Prog*
   638  load_g_cx(Link *ctxt, Prog *p)
   639  {	
   640  	Prog *next;
   641  
   642  	p->as = AMOVQ;
   643  	if(ctxt->arch->ptrsize == 4)
   644  		p->as = AMOVL;
   645  	p->from.type = D_INDIR+D_TLS;
   646  	p->from.offset = 0;
   647  	p->to.type = D_CX;
   648  	
   649  	next = p->link;
   650  	progedit(ctxt, p);
   651  	while(p->link != next)
   652  		p = p->link;
   653  	
   654  	if(p->from.index == D_TLS)
   655  		p->from.scale = 2;
   656  
   657  	return p;
   658  }
   659  
   660  // Append code to p to check for stack split.
   661  // Appends to (does not overwrite) p.
   662  // Assumes g is in CX.
   663  // Returns last new instruction.
   664  // On return, *jmpok is the instruction that should jump
   665  // to the stack frame allocation if no split is needed.
   666  static Prog*
   667  stacksplit(Link *ctxt, Prog *p, int32 framesize, int32 textarg, int noctxt, Prog **jmpok)
   668  {
   669  	Prog *q, *q1;
   670  	uint32 moreconst1, moreconst2, i;
   671  	int cmp, lea, mov, sub;
   672  
   673  	cmp = ACMPQ;
   674  	lea = ALEAQ;
   675  	mov = AMOVQ;
   676  	sub = ASUBQ;
   677  
   678  	if(ctxt->headtype == Hnacl) {
   679  		cmp = ACMPL;
   680  		lea = ALEAL;
   681  		mov = AMOVL;
   682  		sub = ASUBL;
   683  	}
   684  
   685  	if(ctxt->debugstack) {
   686  		// 6l -K means check not only for stack
   687  		// overflow but stack underflow.
   688  		// On underflow, INT 3 (breakpoint).
   689  		// Underflow itself is rare but this also
   690  		// catches out-of-sync stack guard info
   691  
   692  		p = appendp(ctxt, p);
   693  		p->as = cmp;
   694  		indir_cx(ctxt, &p->from);
   695  		p->from.offset = 8;
   696  		p->to.type = D_SP;
   697  
   698  		p = appendp(ctxt, p);
   699  		p->as = AJHI;
   700  		p->to.type = D_BRANCH;
   701  		p->to.offset = 4;
   702  		q1 = p;
   703  
   704  		p = appendp(ctxt, p);
   705  		p->as = AINT;
   706  		p->from.type = D_CONST;
   707  		p->from.offset = 3;
   708  
   709  		p = appendp(ctxt, p);
   710  		p->as = ANOP;
   711  		q1->pcond = p;
   712  	}
   713  
   714  	q1 = nil;
   715  	if(framesize <= StackSmall) {
   716  		// small stack: SP <= stackguard
   717  		//	CMPQ SP, stackguard
   718  		p = appendp(ctxt, p);
   719  		p->as = cmp;
   720  		p->from.type = D_SP;
   721  		indir_cx(ctxt, &p->to);
   722  	} else if(framesize <= StackBig) {
   723  		// large stack: SP-framesize <= stackguard-StackSmall
   724  		//	LEAQ -xxx(SP), AX
   725  		//	CMPQ AX, stackguard
   726  		p = appendp(ctxt, p);
   727  		p->as = lea;
   728  		p->from.type = D_INDIR+D_SP;
   729  		p->from.offset = -(framesize-StackSmall);
   730  		p->to.type = D_AX;
   731  
   732  		p = appendp(ctxt, p);
   733  		p->as = cmp;
   734  		p->from.type = D_AX;
   735  		indir_cx(ctxt, &p->to);
   736  	} else {
   737  		// Such a large stack we need to protect against wraparound.
   738  		// If SP is close to zero:
   739  		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
   740  		// The +StackGuard on both sides is required to keep the left side positive:
   741  		// SP is allowed to be slightly below stackguard. See stack.h.
   742  		//
   743  		// Preemption sets stackguard to StackPreempt, a very large value.
   744  		// That breaks the math above, so we have to check for that explicitly.
   745  		//	MOVQ	stackguard, CX
   746  		//	CMPQ	CX, $StackPreempt
   747  		//	JEQ	label-of-call-to-morestack
   748  		//	LEAQ	StackGuard(SP), AX
   749  		//	SUBQ	CX, AX
   750  		//	CMPQ	AX, $(framesize+(StackGuard-StackSmall))
   751  
   752  		p = appendp(ctxt, p);
   753  		p->as = mov;
   754  		indir_cx(ctxt, &p->from);
   755  		p->from.offset = 0;
   756  		p->to.type = D_SI;
   757  
   758  		p = appendp(ctxt, p);
   759  		p->as = cmp;
   760  		p->from.type = D_SI;
   761  		p->to.type = D_CONST;
   762  		p->to.offset = StackPreempt;
   763  
   764  		p = appendp(ctxt, p);
   765  		p->as = AJEQ;
   766  		p->to.type = D_BRANCH;
   767  		q1 = p;
   768  
   769  		p = appendp(ctxt, p);
   770  		p->as = lea;
   771  		p->from.type = D_INDIR+D_SP;
   772  		p->from.offset = StackGuard;
   773  		p->to.type = D_AX;
   774  		
   775  		p = appendp(ctxt, p);
   776  		p->as = sub;
   777  		p->from.type = D_SI;
   778  		p->to.type = D_AX;
   779  		
   780  		p = appendp(ctxt, p);
   781  		p->as = cmp;
   782  		p->from.type = D_AX;
   783  		p->to.type = D_CONST;
   784  		p->to.offset = framesize+(StackGuard-StackSmall);
   785  	}					
   786  
   787  	// common
   788  	p = appendp(ctxt, p);
   789  	p->as = AJHI;
   790  	p->to.type = D_BRANCH;
   791  	q = p;
   792  
   793  	// If we ask for more stack, we'll get a minimum of StackMin bytes.
   794  	// We need a stack frame large enough to hold the top-of-stack data,
   795  	// the function arguments+results, our caller's PC, our frame,
   796  	// a word for the return PC of the next call, and then the StackLimit bytes
   797  	// that must be available on entry to any function called from a function
   798  	// that did a stack check.  If StackMin is enough, don't ask for a specific
   799  	// amount: then we can use the custom functions and save a few
   800  	// instructions.
   801  	moreconst1 = 0;
   802  	if(StackTop + textarg + ctxt->arch->ptrsize + framesize + ctxt->arch->ptrsize + StackLimit >= StackMin)
   803  		moreconst1 = framesize;
   804  	moreconst2 = textarg;
   805  	if(moreconst2 == 1) // special marker
   806  		moreconst2 = 0;
   807  	if((moreconst2&7) != 0)
   808  		ctxt->diag("misaligned argument size in stack split");
   809  	// 4 varieties varieties (const1==0 cross const2==0)
   810  	// and 6 subvarieties of (const1==0 and const2!=0)
   811  	p = appendp(ctxt, p);
   812  	if(moreconst1 == 0 && moreconst2 == 0) {
   813  		p->as = ACALL;
   814  		p->to.type = D_BRANCH;
   815  		p->to.sym = ctxt->symmorestack[0*2+noctxt];
   816  	} else
   817  	if(moreconst1 != 0 && moreconst2 == 0) {
   818  		p->as = AMOVL;
   819  		p->from.type = D_CONST;
   820  		p->from.offset = moreconst1;
   821  		p->to.type = D_AX;
   822  
   823  		p = appendp(ctxt, p);
   824  		p->as = ACALL;
   825  		p->to.type = D_BRANCH;
   826  		p->to.sym = ctxt->symmorestack[1*2+noctxt];
   827  	} else
   828  	if(moreconst1 == 0 && moreconst2 <= 48 && moreconst2%8 == 0) {
   829  		i = moreconst2/8 + 3;
   830  		p->as = ACALL;
   831  		p->to.type = D_BRANCH;
   832  		p->to.sym = ctxt->symmorestack[i*2+noctxt];
   833  	} else
   834  	if(moreconst1 == 0 && moreconst2 != 0) {
   835  		p->as = AMOVL;
   836  		p->from.type = D_CONST;
   837  		p->from.offset = moreconst2;
   838  		p->to.type = D_AX;
   839  
   840  		p = appendp(ctxt, p);
   841  		p->as = ACALL;
   842  		p->to.type = D_BRANCH;
   843  		p->to.sym = ctxt->symmorestack[2*2+noctxt];
   844  	} else {
   845  		// Pass framesize and argsize.
   846  		p->as = AMOVQ;
   847  		p->from.type = D_CONST;
   848  		p->from.offset = (uint64)moreconst2 << 32;
   849  		p->from.offset |= moreconst1;
   850  		p->to.type = D_AX;
   851  
   852  		p = appendp(ctxt, p);
   853  		p->as = ACALL;
   854  		p->to.type = D_BRANCH;
   855  		p->to.sym = ctxt->symmorestack[3*2+noctxt];
   856  	}
   857  	
   858  	p = appendp(ctxt, p);
   859  	p->as = AJMP;
   860  	p->to.type = D_BRANCH;
   861  	p->pcond = ctxt->cursym->text->link;
   862  	
   863  	if(q != nil)
   864  		q->pcond = p->link;
   865  	if(q1 != nil)
   866  		q1->pcond = q->link;
   867  
   868  	*jmpok = q;
   869  	return p;
   870  }
   871  
   872  static void xfol(Link*, Prog*, Prog**);
   873  
   874  static void
   875  follow(Link *ctxt, LSym *s)
   876  {
   877  	Prog *firstp, *lastp;
   878  
   879  	ctxt->cursym = s;
   880  
   881  	firstp = ctxt->arch->prg();
   882  	lastp = firstp;
   883  	xfol(ctxt, s->text, &lastp);
   884  	lastp->link = nil;
   885  	s->text = firstp->link;
   886  }
   887  
   888  static int
   889  nofollow(int a)
   890  {
   891  	switch(a) {
   892  	case AJMP:
   893  	case ARET:
   894  	case AIRETL:
   895  	case AIRETQ:
   896  	case AIRETW:
   897  	case ARETFL:
   898  	case ARETFQ:
   899  	case ARETFW:
   900  	case AUNDEF:
   901  		return 1;
   902  	}
   903  	return 0;
   904  }
   905  
   906  static int
   907  pushpop(int a)
   908  {
   909  	switch(a) {
   910  	case APUSHL:
   911  	case APUSHFL:
   912  	case APUSHQ:
   913  	case APUSHFQ:
   914  	case APUSHW:
   915  	case APUSHFW:
   916  	case APOPL:
   917  	case APOPFL:
   918  	case APOPQ:
   919  	case APOPFQ:
   920  	case APOPW:
   921  	case APOPFW:
   922  		return 1;
   923  	}
   924  	return 0;
   925  }
   926  
   927  static int
   928  relinv(int a)
   929  {
   930  	switch(a) {
   931  	case AJEQ:	return AJNE;
   932  	case AJNE:	return AJEQ;
   933  	case AJLE:	return AJGT;
   934  	case AJLS:	return AJHI;
   935  	case AJLT:	return AJGE;
   936  	case AJMI:	return AJPL;
   937  	case AJGE:	return AJLT;
   938  	case AJPL:	return AJMI;
   939  	case AJGT:	return AJLE;
   940  	case AJHI:	return AJLS;
   941  	case AJCS:	return AJCC;
   942  	case AJCC:	return AJCS;
   943  	case AJPS:	return AJPC;
   944  	case AJPC:	return AJPS;
   945  	case AJOS:	return AJOC;
   946  	case AJOC:	return AJOS;
   947  	}
   948  	sysfatal("unknown relation: %s", anames6[a]);
   949  	return 0;
   950  }
   951  
   952  static void
   953  xfol(Link *ctxt, Prog *p, Prog **last)
   954  {
   955  	Prog *q;
   956  	int i;
   957  	enum as a;
   958  
   959  loop:
   960  	if(p == nil)
   961  		return;
   962  	if(p->as == AJMP)
   963  	if((q = p->pcond) != nil && q->as != ATEXT) {
   964  		/* mark instruction as done and continue layout at target of jump */
   965  		p->mark = 1;
   966  		p = q;
   967  		if(p->mark == 0)
   968  			goto loop;
   969  	}
   970  	if(p->mark) {
   971  		/* 
   972  		 * p goes here, but already used it elsewhere.
   973  		 * copy up to 4 instructions or else branch to other copy.
   974  		 */
   975  		for(i=0,q=p; i<4; i++,q=q->link) {
   976  			if(q == nil)
   977  				break;
   978  			if(q == *last)
   979  				break;
   980  			a = q->as;
   981  			if(a == ANOP) {
   982  				i--;
   983  				continue;
   984  			}
   985  			if(nofollow(a) || pushpop(a))	
   986  				break;	// NOTE(rsc): arm does goto copy
   987  			if(q->pcond == nil || q->pcond->mark)
   988  				continue;
   989  			if(a == ACALL || a == ALOOP)
   990  				continue;
   991  			for(;;) {
   992  				if(p->as == ANOP) {
   993  					p = p->link;
   994  					continue;
   995  				}
   996  				q = copyp(ctxt, p);
   997  				p = p->link;
   998  				q->mark = 1;
   999  				(*last)->link = q;
  1000  				*last = q;
  1001  				if(q->as != a || q->pcond == nil || q->pcond->mark)
  1002  					continue;
  1003  
  1004  				q->as = relinv(q->as);
  1005  				p = q->pcond;
  1006  				q->pcond = q->link;
  1007  				q->link = p;
  1008  				xfol(ctxt, q->link, last);
  1009  				p = q->link;
  1010  				if(p->mark)
  1011  					return;
  1012  				goto loop;
  1013  			}
  1014  		} /* */
  1015  		q = ctxt->arch->prg();
  1016  		q->as = AJMP;
  1017  		q->lineno = p->lineno;
  1018  		q->to.type = D_BRANCH;
  1019  		q->to.offset = p->pc;
  1020  		q->pcond = p;
  1021  		p = q;
  1022  	}
  1023  	
  1024  	/* emit p */
  1025  	p->mark = 1;
  1026  	(*last)->link = p;
  1027  	*last = p;
  1028  	a = p->as;
  1029  
  1030  	/* continue loop with what comes after p */
  1031  	if(nofollow(a))
  1032  		return;
  1033  	if(p->pcond != nil && a != ACALL) {
  1034  		/*
  1035  		 * some kind of conditional branch.
  1036  		 * recurse to follow one path.
  1037  		 * continue loop on the other.
  1038  		 */
  1039  		if((q = brchain(ctxt, p->pcond)) != nil)
  1040  			p->pcond = q;
  1041  		if((q = brchain(ctxt, p->link)) != nil)
  1042  			p->link = q;
  1043  		if(p->from.type == D_CONST) {
  1044  			if(p->from.offset == 1) {
  1045  				/*
  1046  				 * expect conditional jump to be taken.
  1047  				 * rewrite so that's the fall-through case.
  1048  				 */
  1049  				p->as = relinv(a);
  1050  				q = p->link;
  1051  				p->link = p->pcond;
  1052  				p->pcond = q;
  1053  			}
  1054  		} else {			
  1055  			q = p->link;
  1056  			if(q->mark)
  1057  			if(a != ALOOP) {
  1058  				p->as = relinv(a);
  1059  				p->link = p->pcond;
  1060  				p->pcond = q;
  1061  			}
  1062  		}
  1063  		xfol(ctxt, p->link, last);
  1064  		if(p->pcond->mark)
  1065  			return;
  1066  		p = p->pcond;
  1067  		goto loop;
  1068  	}
  1069  	p = p->link;
  1070  	goto loop;
  1071  }
  1072  
  1073  static Prog*
  1074  prg(void)
  1075  {
  1076  	Prog *p;
  1077  
  1078  	p = emallocz(sizeof(*p));
  1079  	*p = zprg;
  1080  	return p;
  1081  }
  1082  
  1083  LinkArch linkamd64 = {
  1084  	.name = "amd64",
  1085  	.thechar = '6',
  1086  
  1087  	.addstacksplit = addstacksplit,
  1088  	.assemble = span6,
  1089  	.datasize = datasize,
  1090  	.follow = follow,
  1091  	.iscall = iscall,
  1092  	.isdata = isdata,
  1093  	.prg = prg,
  1094  	.progedit = progedit,
  1095  	.settextflag = settextflag,
  1096  	.symtype = symtype,
  1097  	.textflag = textflag,
  1098  
  1099  	.minlc = 1,
  1100  	.ptrsize = 8,
  1101  	.regsize = 8,
  1102  
  1103  	.D_ADDR = D_ADDR,
  1104  	.D_AUTO = D_AUTO,
  1105  	.D_BRANCH = D_BRANCH,
  1106  	.D_CONST = D_CONST,
  1107  	.D_EXTERN = D_EXTERN,
  1108  	.D_FCONST = D_FCONST,
  1109  	.D_NONE = D_NONE,
  1110  	.D_PARAM = D_PARAM,
  1111  	.D_SCONST = D_SCONST,
  1112  	.D_STATIC = D_STATIC,
  1113  
  1114  	.ACALL = ACALL,
  1115  	.ADATA = ADATA,
  1116  	.AEND = AEND,
  1117  	.AFUNCDATA = AFUNCDATA,
  1118  	.AGLOBL = AGLOBL,
  1119  	.AJMP = AJMP,
  1120  	.ANOP = ANOP,
  1121  	.APCDATA = APCDATA,
  1122  	.ARET = ARET,
  1123  	.ATEXT = ATEXT,
  1124  	.ATYPE = ATYPE,
  1125  	.AUSEFIELD = AUSEFIELD,
  1126  };
  1127  
  1128  LinkArch linkamd64p32 = {
  1129  	.name = "amd64p32",
  1130  	.thechar = '6',
  1131  
  1132  	.addstacksplit = addstacksplit,
  1133  	.assemble = span6,
  1134  	.datasize = datasize,
  1135  	.follow = follow,
  1136  	.iscall = iscall,
  1137  	.isdata = isdata,
  1138  	.prg = prg,
  1139  	.progedit = progedit,
  1140  	.settextflag = settextflag,
  1141  	.symtype = symtype,
  1142  	.textflag = textflag,
  1143  
  1144  	.minlc = 1,
  1145  	.ptrsize = 4,
  1146  	.regsize = 8,
  1147  
  1148  	.D_ADDR = D_ADDR,
  1149  	.D_AUTO = D_AUTO,
  1150  	.D_BRANCH = D_BRANCH,
  1151  	.D_CONST = D_CONST,
  1152  	.D_EXTERN = D_EXTERN,
  1153  	.D_FCONST = D_FCONST,
  1154  	.D_NONE = D_NONE,
  1155  	.D_PARAM = D_PARAM,
  1156  	.D_SCONST = D_SCONST,
  1157  	.D_STATIC = D_STATIC,
  1158  
  1159  	.ACALL = ACALL,
  1160  	.ADATA = ADATA,
  1161  	.AEND = AEND,
  1162  	.AFUNCDATA = AFUNCDATA,
  1163  	.AGLOBL = AGLOBL,
  1164  	.AJMP = AJMP,
  1165  	.ANOP = ANOP,
  1166  	.APCDATA = APCDATA,
  1167  	.ARET = ARET,
  1168  	.ATEXT = ATEXT,
  1169  	.ATYPE = ATYPE,
  1170  	.AUSEFIELD = AUSEFIELD,
  1171  };