github.com/golang-haiku/go-1.4.3@v0.0.0-20190609233734-1f5ae41cc308/src/liblink/obj6.c (about)

     1  // Inferno utils/6l/pass.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include <bio.h>
    34  #include <link.h>
    35  #include "../cmd/6l/6.out.h"
    36  #include "../runtime/stack.h"
    37  
    38  static Prog zprg = {
    39  	.back = 2,
    40  	.as = AGOK,
    41  	.from = {
    42  		.type = D_NONE,
    43  		.index = D_NONE,
    44  	},
    45  	.to = {
    46  		.type = D_NONE,
    47  		.index = D_NONE,
    48  	},
    49  };
    50  
    51  static void
    52  nopout(Prog *p)
    53  {
    54  	p->as = ANOP;
    55  	p->from.type = D_NONE;
    56  	p->to.type = D_NONE;
    57  }
    58  
    59  static int
    60  symtype(Addr *a)
    61  {
    62  	int t;
    63  
    64  	t = a->type;
    65  	if(t == D_ADDR)
    66  		t = a->index;
    67  	return t;
    68  }
    69  
    70  static int
    71  isdata(Prog *p)
    72  {
    73  	return p->as == ADATA || p->as == AGLOBL;
    74  }
    75  
    76  static int
    77  iscall(Prog *p)
    78  {
    79  	return p->as == ACALL;
    80  }
    81  
    82  static int
    83  datasize(Prog *p)
    84  {
    85  	return p->from.scale;
    86  }
    87  
    88  static int
    89  textflag(Prog *p)
    90  {
    91  	return p->from.scale;
    92  }
    93  
    94  static void
    95  settextflag(Prog *p, int f)
    96  {
    97  	p->from.scale = f;
    98  }
    99  
   100  static void nacladdr(Link*, Prog*, Addr*);
   101  
   102  static int
   103  canuselocaltls(Link *ctxt)
   104  {
   105  	switch(ctxt->headtype) {
   106  	case Hhaiku:
   107  	case Hplan9:
   108  	case Hwindows:
   109  		return 0;
   110  	}
   111  	return 1;
   112  }
   113  
   114  static void
   115  progedit(Link *ctxt, Prog *p)
   116  {
   117  	char literal[64];
   118  	LSym *s;
   119  	Prog *q;
   120  
   121  	// Thread-local storage references use the TLS pseudo-register.
   122  	// As a register, TLS refers to the thread-local storage base, and it
   123  	// can only be loaded into another register:
   124  	//
   125  	//         MOVQ TLS, AX
   126  	//
   127  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
   128  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
   129  	// indexing from the loaded TLS base. This emits a relocation so that
   130  	// if the linker needs to adjust the offset, it can. For example:
   131  	//
   132  	//         MOVQ TLS, AX
   133  	//         MOVQ 8(AX)(TLS*1), CX // load m into CX
   134  	// 
   135  	// On systems that support direct access to the TLS memory, this
   136  	// pair of instructions can be reduced to a direct TLS memory reference:
   137  	// 
   138  	//         MOVQ 8(TLS), CX // load m into CX
   139  	//
   140  	// The 2-instruction and 1-instruction forms correspond roughly to
   141  	// ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
   142  	// 
   143  	// We applies this rewrite on systems that support the 1-instruction form.
   144  	// The decision is made using only the operating system (and probably
   145  	// the -shared flag, eventually), not the link mode. If some link modes
   146  	// on a particular operating system require the 2-instruction form,
   147  	// then all builds for that operating system will use the 2-instruction
   148  	// form, so that the link mode decision can be delayed to link time.
   149  	//
   150  	// In this way, all supported systems use identical instructions to
   151  	// access TLS, and they are rewritten appropriately first here in
   152  	// liblink and then finally using relocations in the linker.
   153  
   154  	if(canuselocaltls(ctxt)) {
   155  		// Reduce TLS initial exec model to TLS local exec model.
   156  		// Sequences like
   157  		//	MOVQ TLS, BX
   158  		//	... off(BX)(TLS*1) ...
   159  		// become
   160  		//	NOP
   161  		//	... off(TLS) ...
   162  		//
   163  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   164  		// guarantee we are producing byte-identical binaries as before this code.
   165  		// But it should be unnecessary.
   166  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
   167  			nopout(p);
   168  		if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
   169  			p->from.type = D_INDIR+D_TLS;
   170  			p->from.scale = 0;
   171  			p->from.index = D_NONE;
   172  		}
   173  		if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
   174  			p->to.type = D_INDIR+D_TLS;
   175  			p->to.scale = 0;
   176  			p->to.index = D_NONE;
   177  		}
   178  	} else {
   179  		// As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
   180  		// The instruction
   181  		//	MOVQ off(TLS), BX
   182  		// becomes the sequence
   183  		//	MOVQ TLS, BX
   184  		//	MOVQ off(BX)(TLS*1), BX
   185  		// This allows the C compilers to emit references to m and g using the direct off(TLS) form.
   186  		if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
   187  			q = appendp(ctxt, p);
   188  			q->as = p->as;
   189  			q->from = p->from;
   190  			q->from.type = D_INDIR + p->to.type;
   191  			q->from.index = D_TLS;
   192  			q->from.scale = 2; // TODO: use 1
   193  			q->to = p->to;
   194  			p->from.type = D_TLS;
   195  			p->from.index = D_NONE;
   196  			p->from.offset = 0;
   197  		}
   198  	}
   199  
   200  	// TODO: Remove.
   201  	if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
   202  		if(p->from.scale == 1 && p->from.index == D_TLS)
   203  			p->from.scale = 2;
   204  		if(p->to.scale == 1 && p->to.index == D_TLS)
   205  			p->to.scale = 2;
   206  	}
   207  
   208  	if(ctxt->headtype == Hnacl) {
   209  		nacladdr(ctxt, p, &p->from);
   210  		nacladdr(ctxt, p, &p->to);
   211  	}
   212  
   213  	// Maintain information about code generation mode.
   214  	if(ctxt->mode == 0)
   215  		ctxt->mode = 64;
   216  	p->mode = ctxt->mode;
   217  	
   218  	switch(p->as) {
   219  	case AMODE:
   220  		if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE) {
   221  			switch((int)p->from.offset) {
   222  			case 16:
   223  			case 32:
   224  			case 64:
   225  				ctxt->mode = p->from.offset;
   226  				break;
   227  			}
   228  		}
   229  		nopout(p);
   230  		break;
   231  	}
   232  	
   233  	// Rewrite CALL/JMP/RET to symbol as D_BRANCH.
   234  	switch(p->as) {
   235  	case ACALL:
   236  	case AJMP:
   237  	case ARET:
   238  		if((p->to.type == D_EXTERN || p->to.type == D_STATIC) && p->to.sym != nil)
   239  			p->to.type = D_BRANCH;
   240  		break;
   241  	}
   242  
   243  	// Rewrite float constants to values stored in memory.
   244  	switch(p->as) {
   245  	case AFMOVF:
   246  	case AFADDF:
   247  	case AFSUBF:
   248  	case AFSUBRF:
   249  	case AFMULF:
   250  	case AFDIVF:
   251  	case AFDIVRF:
   252  	case AFCOMF:
   253  	case AFCOMFP:
   254  	case AMOVSS:
   255  	case AADDSS:
   256  	case ASUBSS:
   257  	case AMULSS:
   258  	case ADIVSS:
   259  	case ACOMISS:
   260  	case AUCOMISS:
   261  		if(p->from.type == D_FCONST) {
   262  			int32 i32;
   263  			float32 f32;
   264  			f32 = p->from.u.dval;
   265  			memmove(&i32, &f32, 4);
   266  			sprint(literal, "$f32.%08ux", (uint32)i32);
   267  			s = linklookup(ctxt, literal, 0);
   268  			if(s->type == 0) {
   269  				s->type = SRODATA;
   270  				adduint32(ctxt, s, i32);
   271  				s->reachable = 0;
   272  			}
   273  			p->from.type = D_EXTERN;
   274  			p->from.sym = s;
   275  			p->from.offset = 0;
   276  		}
   277  		break;
   278  	
   279  	case AFMOVD:
   280  	case AFADDD:
   281  	case AFSUBD:
   282  	case AFSUBRD:
   283  	case AFMULD:
   284  	case AFDIVD:
   285  	case AFDIVRD:
   286  	case AFCOMD:
   287  	case AFCOMDP:
   288  	case AMOVSD:
   289  	case AADDSD:
   290  	case ASUBSD:
   291  	case AMULSD:
   292  	case ADIVSD:
   293  	case ACOMISD:
   294  	case AUCOMISD:
   295  		if(p->from.type == D_FCONST) {
   296  			int64 i64;
   297  			memmove(&i64, &p->from.u.dval, 8);
   298  			sprint(literal, "$f64.%016llux", (uvlong)i64);
   299  			s = linklookup(ctxt, literal, 0);
   300  			if(s->type == 0) {
   301  				s->type = SRODATA;
   302  				adduint64(ctxt, s, i64);
   303  				s->reachable = 0;
   304  			}
   305  			p->from.type = D_EXTERN;
   306  			p->from.sym = s;
   307  			p->from.offset = 0;
   308  		}
   309  		break;
   310  	}
   311  }
   312  
   313  static void
   314  nacladdr(Link *ctxt, Prog *p, Addr *a)
   315  {
   316  	if(p->as == ALEAL || p->as == ALEAQ)
   317  		return;
   318  	
   319  	if(a->type == D_BP || a->type == D_INDIR+D_BP) {
   320  		ctxt->diag("invalid address: %P", p);
   321  		return;
   322  	}
   323  	if(a->type == D_INDIR+D_TLS)
   324  		a->type = D_INDIR+D_BP;
   325  	else if(a->type == D_TLS)
   326  		a->type = D_BP;
   327  	if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
   328  		switch(a->type) {
   329  		case D_INDIR+D_BP:
   330  		case D_INDIR+D_SP:
   331  		case D_INDIR+D_R15:
   332  			// all ok
   333  			break;
   334  		default:
   335  			if(a->index != D_NONE)
   336  				ctxt->diag("invalid address %P", p);
   337  			a->index = a->type - D_INDIR;
   338  			if(a->index != D_NONE)
   339  				a->scale = 1;
   340  			a->type = D_INDIR+D_R15;
   341  			break;
   342  		}
   343  	}
   344  }
   345  
   346  static Prog*	load_g_cx(Link*, Prog*);
   347  static Prog*	stacksplit(Link*, Prog*, int32, int32, int, Prog**);
   348  static void	indir_cx(Link*, Addr*);
   349  
   350  static void
   351  parsetextconst(vlong arg, vlong *textstksiz, vlong *textarg)
   352  {
   353  	*textstksiz = arg & 0xffffffffLL;
   354  	if(*textstksiz & 0x80000000LL)
   355  		*textstksiz = -(-*textstksiz & 0xffffffffLL);
   356  
   357  	*textarg = (arg >> 32) & 0xffffffffLL;
   358  	if(*textarg & 0x80000000LL)
   359  		*textarg = 0;
   360  	*textarg = (*textarg+7) & ~7LL;
   361  }
   362  
   363  static void
   364  addstacksplit(Link *ctxt, LSym *cursym)
   365  {
   366  	Prog *p, *q, *p1, *p2;
   367  	int32 autoffset, deltasp;
   368  	int a, pcsize;
   369  	vlong textstksiz, textarg;
   370  
   371  	if(ctxt->tlsg == nil)
   372  		ctxt->tlsg = linklookup(ctxt, "runtime.tlsg", 0);
   373  	if(ctxt->symmorestack[0] == nil) {
   374  		ctxt->symmorestack[0] = linklookup(ctxt, "runtime.morestack", 0);
   375  		ctxt->symmorestack[1] = linklookup(ctxt, "runtime.morestack_noctxt", 0);
   376  	}
   377  
   378  	if(ctxt->headtype == Hplan9 && ctxt->plan9privates == nil)
   379  		ctxt->plan9privates = linklookup(ctxt, "_privates", 0);
   380  
   381  	ctxt->cursym = cursym;
   382  
   383  	if(cursym->text == nil || cursym->text->link == nil)
   384  		return;				
   385  
   386  	p = cursym->text;
   387  	parsetextconst(p->to.offset, &textstksiz, &textarg);
   388  	autoffset = textstksiz;
   389  	if(autoffset < 0)
   390  		autoffset = 0;
   391  	
   392  	cursym->args = p->to.offset>>32;
   393  	cursym->locals = textstksiz;
   394  
   395  	if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) {
   396  		for(q = p; q != nil; q = q->link) {
   397  			if(q->as == ACALL)
   398  				goto noleaf;
   399  			if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8)
   400  				goto noleaf;
   401  		}
   402  		p->from.scale |= NOSPLIT;
   403  	noleaf:;
   404  	}
   405  
   406  	q = nil;
   407  	if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) {
   408  		p = appendp(ctxt, p);
   409  		p = load_g_cx(ctxt, p); // load g into CX
   410  	}
   411  	if(!(cursym->text->from.scale & NOSPLIT))
   412  		p = stacksplit(ctxt, p, autoffset, textarg, !(cursym->text->from.scale&NEEDCTXT), &q); // emit split check
   413  
   414  	if(autoffset) {
   415  		if(autoffset%ctxt->arch->regsize != 0)
   416  			ctxt->diag("unaligned stack size %d", autoffset);
   417  		p = appendp(ctxt, p);
   418  		p->as = AADJSP;
   419  		p->from.type = D_CONST;
   420  		p->from.offset = autoffset;
   421  		p->spadj = autoffset;
   422  	} else {
   423  		// zero-byte stack adjustment.
   424  		// Insert a fake non-zero adjustment so that stkcheck can
   425  		// recognize the end of the stack-splitting prolog.
   426  		p = appendp(ctxt, p);
   427  		p->as = ANOP;
   428  		p->spadj = -ctxt->arch->ptrsize;
   429  		p = appendp(ctxt, p);
   430  		p->as = ANOP;
   431  		p->spadj = ctxt->arch->ptrsize;
   432  	}
   433  	if(q != nil)
   434  		q->pcond = p;
   435  	deltasp = autoffset;
   436  	
   437  	if(cursym->text->from.scale & WRAPPER) {
   438  		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
   439  		//
   440  		//	MOVQ g_panic(CX), BX
   441  		//	TESTQ BX, BX
   442  		//	JEQ end
   443  		//	LEAQ (autoffset+8)(SP), DI
   444  		//	CMPQ panic_argp(BX), DI
   445  		//	JNE end
   446  		//	MOVQ SP, panic_argp(BX)
   447  		// end:
   448  		//	NOP
   449  		//
   450  		// The NOP is needed to give the jumps somewhere to land.
   451  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   452  
   453  		p = appendp(ctxt, p);
   454  		p->as = AMOVQ;
   455  		p->from.type = D_INDIR+D_CX;
   456  		p->from.offset = 4*ctxt->arch->ptrsize; // G.panic
   457  		p->to.type = D_BX;
   458  		if(ctxt->headtype == Hnacl) {
   459  			p->as = AMOVL;
   460  			p->from.type = D_INDIR+D_R15;
   461  			p->from.scale = 1;
   462  			p->from.index = D_CX;
   463  		}
   464  
   465  		p = appendp(ctxt, p);
   466  		p->as = ATESTQ;
   467  		p->from.type = D_BX;
   468  		p->to.type = D_BX;
   469  		if(ctxt->headtype == Hnacl)
   470  			p->as = ATESTL;
   471  
   472  		p = appendp(ctxt, p);
   473  		p->as = AJEQ;
   474  		p->to.type = D_BRANCH;
   475  		p1 = p;
   476  
   477  		p = appendp(ctxt, p);
   478  		p->as = ALEAQ;
   479  		p->from.type = D_INDIR+D_SP;
   480  		p->from.offset = autoffset+8;
   481  		p->to.type = D_DI;
   482  		if(ctxt->headtype == Hnacl)
   483  			p->as = ALEAL;
   484  
   485  		p = appendp(ctxt, p);
   486  		p->as = ACMPQ;
   487  		p->from.type = D_INDIR+D_BX;
   488  		p->from.offset = 0; // Panic.argp
   489  		p->to.type = D_DI;
   490  		if(ctxt->headtype == Hnacl) {
   491  			p->as = ACMPL;
   492  			p->from.type = D_INDIR+D_R15;
   493  			p->from.scale = 1;
   494  			p->from.index = D_BX;
   495  		}
   496  
   497  		p = appendp(ctxt, p);
   498  		p->as = AJNE;
   499  		p->to.type = D_BRANCH;
   500  		p2 = p;
   501  
   502  		p = appendp(ctxt, p);
   503  		p->as = AMOVQ;
   504  		p->from.type = D_SP;
   505  		p->to.type = D_INDIR+D_BX;
   506  		p->to.offset = 0; // Panic.argp
   507  		if(ctxt->headtype == Hnacl) {
   508  			p->as = AMOVL;
   509  			p->to.type = D_INDIR+D_R15;
   510  			p->to.scale = 1;
   511  			p->to.index = D_BX;
   512  		}
   513  
   514  		p = appendp(ctxt, p);
   515  		p->as = ANOP;
   516  		p1->pcond = p;
   517  		p2->pcond = p;
   518  	}
   519  
   520  	if(ctxt->debugzerostack && autoffset && !(cursym->text->from.scale&NOSPLIT)) {
   521  		// 6l -Z means zero the stack frame on entry.
   522  		// This slows down function calls but can help avoid
   523  		// false positives in garbage collection.
   524  		p = appendp(ctxt, p);
   525  		p->as = AMOVQ;
   526  		p->from.type = D_SP;
   527  		p->to.type = D_DI;
   528  		
   529  		p = appendp(ctxt, p);
   530  		p->as = AMOVQ;
   531  		p->from.type = D_CONST;
   532  		p->from.offset = autoffset/8;
   533  		p->to.type = D_CX;
   534  		
   535  		p = appendp(ctxt, p);
   536  		p->as = AMOVQ;
   537  		p->from.type = D_CONST;
   538  		p->from.offset = 0;
   539  		p->to.type = D_AX;
   540  		
   541  		p = appendp(ctxt, p);
   542  		p->as = AREP;
   543  		
   544  		p = appendp(ctxt, p);
   545  		p->as = ASTOSQ;
   546  	}
   547  	
   548  	for(; p != nil; p = p->link) {
   549  		pcsize = p->mode/8;
   550  		a = p->from.type;
   551  		if(a == D_AUTO)
   552  			p->from.offset += deltasp;
   553  		if(a == D_PARAM)
   554  			p->from.offset += deltasp + pcsize;
   555  		a = p->to.type;
   556  		if(a == D_AUTO)
   557  			p->to.offset += deltasp;
   558  		if(a == D_PARAM)
   559  			p->to.offset += deltasp + pcsize;
   560  
   561  		switch(p->as) {
   562  		default:
   563  			continue;
   564  		case APUSHL:
   565  		case APUSHFL:
   566  			deltasp += 4;
   567  			p->spadj = 4;
   568  			continue;
   569  		case APUSHQ:
   570  		case APUSHFQ:
   571  			deltasp += 8;
   572  			p->spadj = 8;
   573  			continue;
   574  		case APUSHW:
   575  		case APUSHFW:
   576  			deltasp += 2;
   577  			p->spadj = 2;
   578  			continue;
   579  		case APOPL:
   580  		case APOPFL:
   581  			deltasp -= 4;
   582  			p->spadj = -4;
   583  			continue;
   584  		case APOPQ:
   585  		case APOPFQ:
   586  			deltasp -= 8;
   587  			p->spadj = -8;
   588  			continue;
   589  		case APOPW:
   590  		case APOPFW:
   591  			deltasp -= 2;
   592  			p->spadj = -2;
   593  			continue;
   594  		case ARET:
   595  			break;
   596  		}
   597  
   598  		if(autoffset != deltasp)
   599  			ctxt->diag("unbalanced PUSH/POP");
   600  
   601  		if(autoffset) {
   602  			p->as = AADJSP;
   603  			p->from.type = D_CONST;
   604  			p->from.offset = -autoffset;
   605  			p->spadj = -autoffset;
   606  			p = appendp(ctxt, p);
   607  			p->as = ARET;
   608  			// If there are instructions following
   609  			// this ARET, they come from a branch
   610  			// with the same stackframe, so undo
   611  			// the cleanup.
   612  			p->spadj = +autoffset;
   613  		}
   614  		if(p->to.sym) // retjmp
   615  			p->as = AJMP;
   616  	}
   617  }
   618  
   619  static void
   620  indir_cx(Link *ctxt, Addr *a)
   621  {
   622  	if(ctxt->headtype == Hnacl) {
   623  		a->type = D_INDIR + D_R15;
   624  		a->index = D_CX;
   625  		a->scale = 1;
   626  		return;
   627  	}
   628  
   629  	a->type = D_INDIR+D_CX;
   630  }
   631  
   632  // Append code to p to load g into cx.
   633  // Overwrites p with the first instruction (no first appendp).
   634  // Overwriting p is unusual but it lets use this in both the
   635  // prologue (caller must call appendp first) and in the epilogue.
   636  // Returns last new instruction.
   637  static Prog*
   638  load_g_cx(Link *ctxt, Prog *p)
   639  {	
   640  	Prog *next;
   641  
   642  	p->as = AMOVQ;
   643  	if(ctxt->arch->ptrsize == 4)
   644  		p->as = AMOVL;
   645  	p->from.type = D_INDIR+D_TLS;
   646  	p->from.offset = 0;
   647  	p->to.type = D_CX;
   648  	
   649  	next = p->link;
   650  	progedit(ctxt, p);
   651  	while(p->link != next)
   652  		p = p->link;
   653  	
   654  	if(p->from.index == D_TLS)
   655  		p->from.scale = 2;
   656  
   657  	return p;
   658  }
   659  
   660  // Append code to p to check for stack split.
   661  // Appends to (does not overwrite) p.
   662  // Assumes g is in CX.
   663  // Returns last new instruction.
   664  // On return, *jmpok is the instruction that should jump
   665  // to the stack frame allocation if no split is needed.
   666  static Prog*
   667  stacksplit(Link *ctxt, Prog *p, int32 framesize, int32 textarg, int noctxt, Prog **jmpok)
   668  {
   669  	Prog *q, *q1;
   670  	int cmp, lea, mov, sub;
   671  
   672  	USED(textarg);
   673  	cmp = ACMPQ;
   674  	lea = ALEAQ;
   675  	mov = AMOVQ;
   676  	sub = ASUBQ;
   677  
   678  	if(ctxt->headtype == Hnacl) {
   679  		cmp = ACMPL;
   680  		lea = ALEAL;
   681  		mov = AMOVL;
   682  		sub = ASUBL;
   683  	}
   684  
   685  	q1 = nil;
   686  	if(framesize <= StackSmall) {
   687  		// small stack: SP <= stackguard
   688  		//	CMPQ SP, stackguard
   689  		p = appendp(ctxt, p);
   690  		p->as = cmp;
   691  		p->from.type = D_SP;
   692  		indir_cx(ctxt, &p->to);
   693  		p->to.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   694  		if(ctxt->cursym->cfunc)
   695  			p->to.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   696  	} else if(framesize <= StackBig) {
   697  		// large stack: SP-framesize <= stackguard-StackSmall
   698  		//	LEAQ -xxx(SP), AX
   699  		//	CMPQ AX, stackguard
   700  		p = appendp(ctxt, p);
   701  		p->as = lea;
   702  		p->from.type = D_INDIR+D_SP;
   703  		p->from.offset = -(framesize-StackSmall);
   704  		p->to.type = D_AX;
   705  
   706  		p = appendp(ctxt, p);
   707  		p->as = cmp;
   708  		p->from.type = D_AX;
   709  		indir_cx(ctxt, &p->to);
   710  		p->to.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   711  		if(ctxt->cursym->cfunc)
   712  			p->to.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   713  	} else {
   714  		// Such a large stack we need to protect against wraparound.
   715  		// If SP is close to zero:
   716  		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
   717  		// The +StackGuard on both sides is required to keep the left side positive:
   718  		// SP is allowed to be slightly below stackguard. See stack.h.
   719  		//
   720  		// Preemption sets stackguard to StackPreempt, a very large value.
   721  		// That breaks the math above, so we have to check for that explicitly.
   722  		//	MOVQ	stackguard, CX
   723  		//	CMPQ	CX, $StackPreempt
   724  		//	JEQ	label-of-call-to-morestack
   725  		//	LEAQ	StackGuard(SP), AX
   726  		//	SUBQ	CX, AX
   727  		//	CMPQ	AX, $(framesize+(StackGuard-StackSmall))
   728  
   729  		p = appendp(ctxt, p);
   730  		p->as = mov;
   731  		indir_cx(ctxt, &p->from);
   732  		p->from.offset = 2*ctxt->arch->ptrsize;	// G.stackguard0
   733  		if(ctxt->cursym->cfunc)
   734  			p->from.offset = 3*ctxt->arch->ptrsize;	// G.stackguard1
   735  		p->to.type = D_SI;
   736  
   737  		p = appendp(ctxt, p);
   738  		p->as = cmp;
   739  		p->from.type = D_SI;
   740  		p->to.type = D_CONST;
   741  		p->to.offset = StackPreempt;
   742  
   743  		p = appendp(ctxt, p);
   744  		p->as = AJEQ;
   745  		p->to.type = D_BRANCH;
   746  		q1 = p;
   747  
   748  		p = appendp(ctxt, p);
   749  		p->as = lea;
   750  		p->from.type = D_INDIR+D_SP;
   751  		p->from.offset = StackGuard;
   752  		p->to.type = D_AX;
   753  		
   754  		p = appendp(ctxt, p);
   755  		p->as = sub;
   756  		p->from.type = D_SI;
   757  		p->to.type = D_AX;
   758  		
   759  		p = appendp(ctxt, p);
   760  		p->as = cmp;
   761  		p->from.type = D_AX;
   762  		p->to.type = D_CONST;
   763  		p->to.offset = framesize+(StackGuard-StackSmall);
   764  	}					
   765  
   766  	// common
   767  	p = appendp(ctxt, p);
   768  	p->as = AJHI;
   769  	p->to.type = D_BRANCH;
   770  	q = p;
   771  
   772  	p = appendp(ctxt, p);
   773  	p->as = ACALL;
   774  	p->to.type = D_BRANCH;
   775  	if(ctxt->cursym->cfunc)
   776  		p->to.sym = linklookup(ctxt, "runtime.morestackc", 0);
   777  	else
   778  		p->to.sym = ctxt->symmorestack[noctxt];
   779  	
   780  	p = appendp(ctxt, p);
   781  	p->as = AJMP;
   782  	p->to.type = D_BRANCH;
   783  	p->pcond = ctxt->cursym->text->link;
   784  	
   785  	if(q != nil)
   786  		q->pcond = p->link;
   787  	if(q1 != nil)
   788  		q1->pcond = q->link;
   789  
   790  	*jmpok = q;
   791  	return p;
   792  }
   793  
   794  static void xfol(Link*, Prog*, Prog**);
   795  
   796  static void
   797  follow(Link *ctxt, LSym *s)
   798  {
   799  	Prog *firstp, *lastp;
   800  
   801  	ctxt->cursym = s;
   802  
   803  	firstp = ctxt->arch->prg();
   804  	lastp = firstp;
   805  	xfol(ctxt, s->text, &lastp);
   806  	lastp->link = nil;
   807  	s->text = firstp->link;
   808  }
   809  
   810  static int
   811  nofollow(int a)
   812  {
   813  	switch(a) {
   814  	case AJMP:
   815  	case ARET:
   816  	case AIRETL:
   817  	case AIRETQ:
   818  	case AIRETW:
   819  	case ARETFL:
   820  	case ARETFQ:
   821  	case ARETFW:
   822  	case AUNDEF:
   823  		return 1;
   824  	}
   825  	return 0;
   826  }
   827  
   828  static int
   829  pushpop(int a)
   830  {
   831  	switch(a) {
   832  	case APUSHL:
   833  	case APUSHFL:
   834  	case APUSHQ:
   835  	case APUSHFQ:
   836  	case APUSHW:
   837  	case APUSHFW:
   838  	case APOPL:
   839  	case APOPFL:
   840  	case APOPQ:
   841  	case APOPFQ:
   842  	case APOPW:
   843  	case APOPFW:
   844  		return 1;
   845  	}
   846  	return 0;
   847  }
   848  
   849  static int
   850  relinv(int a)
   851  {
   852  	switch(a) {
   853  	case AJEQ:	return AJNE;
   854  	case AJNE:	return AJEQ;
   855  	case AJLE:	return AJGT;
   856  	case AJLS:	return AJHI;
   857  	case AJLT:	return AJGE;
   858  	case AJMI:	return AJPL;
   859  	case AJGE:	return AJLT;
   860  	case AJPL:	return AJMI;
   861  	case AJGT:	return AJLE;
   862  	case AJHI:	return AJLS;
   863  	case AJCS:	return AJCC;
   864  	case AJCC:	return AJCS;
   865  	case AJPS:	return AJPC;
   866  	case AJPC:	return AJPS;
   867  	case AJOS:	return AJOC;
   868  	case AJOC:	return AJOS;
   869  	}
   870  	sysfatal("unknown relation: %s", anames6[a]);
   871  	return 0;
   872  }
   873  
   874  static void
   875  xfol(Link *ctxt, Prog *p, Prog **last)
   876  {
   877  	Prog *q;
   878  	int i;
   879  	int a;
   880  
   881  loop:
   882  	if(p == nil)
   883  		return;
   884  	if(p->as == AJMP)
   885  	if((q = p->pcond) != nil && q->as != ATEXT) {
   886  		/* mark instruction as done and continue layout at target of jump */
   887  		p->mark = 1;
   888  		p = q;
   889  		if(p->mark == 0)
   890  			goto loop;
   891  	}
   892  	if(p->mark) {
   893  		/* 
   894  		 * p goes here, but already used it elsewhere.
   895  		 * copy up to 4 instructions or else branch to other copy.
   896  		 */
   897  		for(i=0,q=p; i<4; i++,q=q->link) {
   898  			if(q == nil)
   899  				break;
   900  			if(q == *last)
   901  				break;
   902  			a = q->as;
   903  			if(a == ANOP) {
   904  				i--;
   905  				continue;
   906  			}
   907  			if(nofollow(a) || pushpop(a))	
   908  				break;	// NOTE(rsc): arm does goto copy
   909  			if(q->pcond == nil || q->pcond->mark)
   910  				continue;
   911  			if(a == ACALL || a == ALOOP)
   912  				continue;
   913  			for(;;) {
   914  				if(p->as == ANOP) {
   915  					p = p->link;
   916  					continue;
   917  				}
   918  				q = copyp(ctxt, p);
   919  				p = p->link;
   920  				q->mark = 1;
   921  				(*last)->link = q;
   922  				*last = q;
   923  				if(q->as != a || q->pcond == nil || q->pcond->mark)
   924  					continue;
   925  
   926  				q->as = relinv(q->as);
   927  				p = q->pcond;
   928  				q->pcond = q->link;
   929  				q->link = p;
   930  				xfol(ctxt, q->link, last);
   931  				p = q->link;
   932  				if(p->mark)
   933  					return;
   934  				goto loop;
   935  			}
   936  		} /* */
   937  		q = ctxt->arch->prg();
   938  		q->as = AJMP;
   939  		q->lineno = p->lineno;
   940  		q->to.type = D_BRANCH;
   941  		q->to.offset = p->pc;
   942  		q->pcond = p;
   943  		p = q;
   944  	}
   945  	
   946  	/* emit p */
   947  	p->mark = 1;
   948  	(*last)->link = p;
   949  	*last = p;
   950  	a = p->as;
   951  
   952  	/* continue loop with what comes after p */
   953  	if(nofollow(a))
   954  		return;
   955  	if(p->pcond != nil && a != ACALL) {
   956  		/*
   957  		 * some kind of conditional branch.
   958  		 * recurse to follow one path.
   959  		 * continue loop on the other.
   960  		 */
   961  		if((q = brchain(ctxt, p->pcond)) != nil)
   962  			p->pcond = q;
   963  		if((q = brchain(ctxt, p->link)) != nil)
   964  			p->link = q;
   965  		if(p->from.type == D_CONST) {
   966  			if(p->from.offset == 1) {
   967  				/*
   968  				 * expect conditional jump to be taken.
   969  				 * rewrite so that's the fall-through case.
   970  				 */
   971  				p->as = relinv(a);
   972  				q = p->link;
   973  				p->link = p->pcond;
   974  				p->pcond = q;
   975  			}
   976  		} else {			
   977  			q = p->link;
   978  			if(q->mark)
   979  			if(a != ALOOP) {
   980  				p->as = relinv(a);
   981  				p->link = p->pcond;
   982  				p->pcond = q;
   983  			}
   984  		}
   985  		xfol(ctxt, p->link, last);
   986  		if(p->pcond->mark)
   987  			return;
   988  		p = p->pcond;
   989  		goto loop;
   990  	}
   991  	p = p->link;
   992  	goto loop;
   993  }
   994  
   995  static Prog*
   996  prg(void)
   997  {
   998  	Prog *p;
   999  
  1000  	p = emallocz(sizeof(*p));
  1001  	*p = zprg;
  1002  	return p;
  1003  }
  1004  
  1005  LinkArch linkamd64 = {
  1006  	.name = "amd64",
  1007  	.thechar = '6',
  1008  	.endian = LittleEndian,
  1009  
  1010  	.addstacksplit = addstacksplit,
  1011  	.assemble = span6,
  1012  	.datasize = datasize,
  1013  	.follow = follow,
  1014  	.iscall = iscall,
  1015  	.isdata = isdata,
  1016  	.prg = prg,
  1017  	.progedit = progedit,
  1018  	.settextflag = settextflag,
  1019  	.symtype = symtype,
  1020  	.textflag = textflag,
  1021  
  1022  	.minlc = 1,
  1023  	.ptrsize = 8,
  1024  	.regsize = 8,
  1025  
  1026  	.D_ADDR = D_ADDR,
  1027  	.D_AUTO = D_AUTO,
  1028  	.D_BRANCH = D_BRANCH,
  1029  	.D_CONST = D_CONST,
  1030  	.D_EXTERN = D_EXTERN,
  1031  	.D_FCONST = D_FCONST,
  1032  	.D_NONE = D_NONE,
  1033  	.D_PARAM = D_PARAM,
  1034  	.D_SCONST = D_SCONST,
  1035  	.D_STATIC = D_STATIC,
  1036  
  1037  	.ACALL = ACALL,
  1038  	.ADATA = ADATA,
  1039  	.AEND = AEND,
  1040  	.AFUNCDATA = AFUNCDATA,
  1041  	.AGLOBL = AGLOBL,
  1042  	.AJMP = AJMP,
  1043  	.ANOP = ANOP,
  1044  	.APCDATA = APCDATA,
  1045  	.ARET = ARET,
  1046  	.ATEXT = ATEXT,
  1047  	.ATYPE = ATYPE,
  1048  	.AUSEFIELD = AUSEFIELD,
  1049  };
  1050  
  1051  LinkArch linkamd64p32 = {
  1052  	.name = "amd64p32",
  1053  	.thechar = '6',
  1054  	.endian = LittleEndian,
  1055  
  1056  	.addstacksplit = addstacksplit,
  1057  	.assemble = span6,
  1058  	.datasize = datasize,
  1059  	.follow = follow,
  1060  	.iscall = iscall,
  1061  	.isdata = isdata,
  1062  	.prg = prg,
  1063  	.progedit = progedit,
  1064  	.settextflag = settextflag,
  1065  	.symtype = symtype,
  1066  	.textflag = textflag,
  1067  
  1068  	.minlc = 1,
  1069  	.ptrsize = 4,
  1070  	.regsize = 8,
  1071  
  1072  	.D_ADDR = D_ADDR,
  1073  	.D_AUTO = D_AUTO,
  1074  	.D_BRANCH = D_BRANCH,
  1075  	.D_CONST = D_CONST,
  1076  	.D_EXTERN = D_EXTERN,
  1077  	.D_FCONST = D_FCONST,
  1078  	.D_NONE = D_NONE,
  1079  	.D_PARAM = D_PARAM,
  1080  	.D_SCONST = D_SCONST,
  1081  	.D_STATIC = D_STATIC,
  1082  
  1083  	.ACALL = ACALL,
  1084  	.ADATA = ADATA,
  1085  	.AEND = AEND,
  1086  	.AFUNCDATA = AFUNCDATA,
  1087  	.AGLOBL = AGLOBL,
  1088  	.AJMP = AJMP,
  1089  	.ANOP = ANOP,
  1090  	.APCDATA = APCDATA,
  1091  	.ARET = ARET,
  1092  	.ATEXT = ATEXT,
  1093  	.ATYPE = ATYPE,
  1094  	.AUSEFIELD = AUSEFIELD,
  1095  };