github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6g/peep.c (about)

     1  // Derived from Inferno utils/6c/peep.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/peep.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  #include <u.h>
    32  #include <libc.h>
    33  #include "gg.h"
    34  #include "opt.h"
    35  
    36  static void	conprop(Reg *r);
    37  static void elimshortmov(Reg *r);
    38  static int prevl(Reg *r, int reg);
    39  static void pushback(Reg *r);
    40  static int regconsttyp(Adr*);
    41  
    42  // do we need the carry bit
    43  static int
    44  needc(Prog *p)
    45  {
    46  	while(p != P) {
    47  		switch(p->as) {
    48  		case AADCL:
    49  		case AADCQ:
    50  		case ASBBL:
    51  		case ASBBQ:
    52  		case ARCRB:
    53  		case ARCRW:
    54  		case ARCRL:
    55  		case ARCRQ:
    56  			return 1;
    57  		case AADDB:
    58  		case AADDW:
    59  		case AADDL:
    60  		case AADDQ:
    61  		case ASUBB:
    62  		case ASUBW:
    63  		case ASUBL:
    64  		case ASUBQ:
    65  		case AJMP:
    66  		case ARET:
    67  		case ACALL:
    68  			return 0;
    69  		default:
    70  			if(p->to.type == D_BRANCH)
    71  				return 0;
    72  		}
    73  		p = p->link;
    74  	}
    75  	return 0;
    76  }
    77  
    78  static Reg*
    79  rnops(Reg *r)
    80  {
    81  	Prog *p;
    82  	Reg *r1;
    83  
    84  	if(r != R)
    85  	for(;;) {
    86  		p = r->prog;
    87  		if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
    88  			break;
    89  		r1 = uniqs(r);
    90  		if(r1 == R)
    91  			break;
    92  		r = r1;
    93  	}
    94  	return r;
    95  }
    96  
    97  void
    98  peep(void)
    99  {
   100  	Reg *r, *r1, *r2;
   101  	Prog *p, *p1;
   102  	int t;
   103  
   104  	/*
   105  	 * complete R structure
   106  	 */
   107  	t = 0;
   108  	for(r=firstr; r!=R; r=r1) {
   109  		r1 = r->link;
   110  		if(r1 == R)
   111  			break;
   112  		p = r->prog->link;
   113  		while(p != r1->prog)
   114  		switch(p->as) {
   115  		default:
   116  			r2 = rega();
   117  			r->link = r2;
   118  			r2->link = r1;
   119  
   120  			r2->prog = p;
   121  			p->reg = r2;
   122  
   123  			r2->p1 = r;
   124  			r->s1 = r2;
   125  			r2->s1 = r1;
   126  			r1->p1 = r2;
   127  
   128  			r = r2;
   129  			t++;
   130  
   131  		case ADATA:
   132  		case AGLOBL:
   133  		case ANAME:
   134  		case ASIGNAME:
   135  		case ALOCALS:
   136  		case ATYPE:
   137  			p = p->link;
   138  		}
   139  	}
   140  	
   141  	// byte, word arithmetic elimination.
   142  	elimshortmov(r);
   143  
   144  	// constant propagation
   145  	// find MOV $con,R followed by
   146  	// another MOV $con,R without
   147  	// setting R in the interim
   148  	for(r=firstr; r!=R; r=r->link) {
   149  		p = r->prog;
   150  		switch(p->as) {
   151  		case ALEAL:
   152  		case ALEAQ:
   153  			if(regtyp(&p->to))
   154  			if(p->from.sym != S)
   155  				conprop(r);
   156  			break;
   157  
   158  		case AMOVB:
   159  		case AMOVW:
   160  		case AMOVL:
   161  		case AMOVQ:
   162  		case AMOVSS:
   163  		case AMOVSD:
   164  			if(regtyp(&p->to))
   165  			if(p->from.type == D_CONST)
   166  				conprop(r);
   167  			break;
   168  		}
   169  	}
   170  
   171  loop1:
   172  	if(debug['P'] && debug['v'])
   173  		dumpit("loop1", firstr);
   174  
   175  	t = 0;
   176  	for(r=firstr; r!=R; r=r->link) {
   177  		p = r->prog;
   178  		switch(p->as) {
   179  		case AMOVL:
   180  		case AMOVQ:
   181  		case AMOVSS:
   182  		case AMOVSD:
   183  			if(regtyp(&p->to))
   184  			if(regtyp(&p->from)) {
   185  				if(copyprop(r)) {
   186  					excise(r);
   187  					t++;
   188  				} else
   189  				if(subprop(r) && copyprop(r)) {
   190  					excise(r);
   191  					t++;
   192  				}
   193  			}
   194  			break;
   195  
   196  		case AMOVBLZX:
   197  		case AMOVWLZX:
   198  		case AMOVBLSX:
   199  		case AMOVWLSX:
   200  			if(regtyp(&p->to)) {
   201  				r1 = rnops(uniqs(r));
   202  				if(r1 != R) {
   203  					p1 = r1->prog;
   204  					if(p->as == p1->as && p->to.type == p1->from.type){
   205  						p1->as = AMOVL;
   206  						t++;
   207  					}
   208  				}
   209  			}
   210  			break;
   211  
   212  		case AMOVBQSX:
   213  		case AMOVBQZX:
   214  		case AMOVWQSX:
   215  		case AMOVWQZX:
   216  		case AMOVLQSX:
   217  		case AMOVLQZX:
   218  		case AMOVQL:
   219  			if(regtyp(&p->to)) {
   220  				r1 = rnops(uniqs(r));
   221  				if(r1 != R) {
   222  					p1 = r1->prog;
   223  					if(p->as == p1->as && p->to.type == p1->from.type){
   224  						p1->as = AMOVQ;
   225  						t++;
   226  					}
   227  				}
   228  			}
   229  			break;
   230  
   231  		case AADDL:
   232  		case AADDQ:
   233  		case AADDW:
   234  			if(p->from.type != D_CONST || needc(p->link))
   235  				break;
   236  			if(p->from.offset == -1){
   237  				if(p->as == AADDQ)
   238  					p->as = ADECQ;
   239  				else
   240  				if(p->as == AADDL)
   241  					p->as = ADECL;
   242  				else
   243  					p->as = ADECW;
   244  				p->from = zprog.from;
   245  				break;
   246  			}
   247  			if(p->from.offset == 1){
   248  				if(p->as == AADDQ)
   249  					p->as = AINCQ;
   250  				else if(p->as == AADDL)
   251  					p->as = AINCL;
   252  				else
   253  					p->as = AINCW;
   254  				p->from = zprog.from;
   255  				break;
   256  			}
   257  			break;
   258  
   259  		case ASUBL:
   260  		case ASUBQ:
   261  		case ASUBW:
   262  			if(p->from.type != D_CONST || needc(p->link))
   263  				break;
   264  			if(p->from.offset == -1) {
   265  				if(p->as == ASUBQ)
   266  					p->as = AINCQ;
   267  				else
   268  				if(p->as == ASUBL)
   269  					p->as = AINCL;
   270  				else
   271  					p->as = AINCW;
   272  				p->from = zprog.from;
   273  				break;
   274  			}
   275  			if(p->from.offset == 1){
   276  				if(p->as == ASUBQ)
   277  					p->as = ADECQ;
   278  				else
   279  				if(p->as == ASUBL)
   280  					p->as = ADECL;
   281  				else
   282  					p->as = ADECW;
   283  				p->from = zprog.from;
   284  				break;
   285  			}
   286  			break;
   287  		}
   288  	}
   289  	if(t)
   290  		goto loop1;
   291  
   292  	// MOVLQZX removal.
   293  	// The MOVLQZX exists to avoid being confused for a
   294  	// MOVL that is just copying 32-bit data around during
   295  	// copyprop.  Now that copyprop is done, remov MOVLQZX R1, R2
   296  	// if it is dominated by an earlier ADDL/MOVL/etc into R1 that
   297  	// will have already cleared the high bits.
   298  	//
   299  	// MOVSD removal.
   300  	// We never use packed registers, so a MOVSD between registers
   301  	// can be replaced by MOVAPD, which moves the pair of float64s
   302  	// instead of just the lower one.  We only use the lower one, but
   303  	// the processor can do better if we do moves using both.
   304  	for(r=firstr; r!=R; r=r->link) {
   305  		p = r->prog;
   306  		if(p->as == AMOVLQZX)
   307  		if(regtyp(&p->from))
   308  		if(p->from.type == p->to.type)
   309  		if(prevl(r, p->from.type))
   310  			excise(r);
   311  		
   312  		if(p->as == AMOVSD)
   313  		if(regtyp(&p->from))
   314  		if(regtyp(&p->to))
   315  			p->as = AMOVAPD;
   316  	}
   317  
   318  	// load pipelining
   319  	// push any load from memory as early as possible
   320  	// to give it time to complete before use.
   321  	for(r=firstr; r!=R; r=r->link) {
   322  		p = r->prog;
   323  		switch(p->as) {
   324  		case AMOVB:
   325  		case AMOVW:
   326  		case AMOVL:
   327  		case AMOVQ:
   328  		case AMOVLQZX:
   329  			if(regtyp(&p->to) && !regconsttyp(&p->from))
   330  				pushback(r);
   331  		}
   332  	}
   333  }
   334  
   335  static void
   336  pushback(Reg *r0)
   337  {
   338  	Reg *r, *b;
   339  	Prog *p0, *p, t;
   340  	
   341  	b = R;
   342  	p0 = r0->prog;
   343  	for(r=uniqp(r0); r!=R && uniqs(r)!=R; r=uniqp(r)) {
   344  		p = r->prog;
   345  		if(p->as != ANOP) {
   346  			if(!regconsttyp(&p->from) || !regtyp(&p->to))
   347  				break;
   348  			if(copyu(p, &p0->to, A) || copyu(p0, &p->to, A))
   349  				break;
   350  		}
   351  		if(p->as == ACALL)
   352  			break;
   353  		b = r;
   354  	}
   355  	
   356  	if(b == R) {
   357  		if(debug['v']) {
   358  			print("no pushback: %P\n", r0->prog);
   359  			if(r)
   360  				print("\t%P [%d]\n", r->prog, uniqs(r)!=R);
   361  		}
   362  		return;
   363  	}
   364  
   365  	if(debug['v']) {
   366  		print("pushback\n");
   367  		for(r=b;; r=r->link) {
   368  			print("\t%P\n", r->prog);
   369  			if(r == r0)
   370  				break;
   371  		}
   372  	}
   373  
   374  	t = *r0->prog;
   375  	for(r=uniqp(r0);; r=uniqp(r)) {
   376  		p0 = r->link->prog;
   377  		p = r->prog;
   378  		p0->as = p->as;
   379  		p0->lineno = p->lineno;
   380  		p0->from = p->from;
   381  		p0->to = p->to;
   382  
   383  		if(r == b)
   384  			break;
   385  	}
   386  	p0 = r->prog;
   387  	p0->as = t.as;
   388  	p0->lineno = t.lineno;
   389  	p0->from = t.from;
   390  	p0->to = t.to;
   391  
   392  	if(debug['v']) {
   393  		print("\tafter\n");
   394  		for(r=b;; r=r->link) {
   395  			print("\t%P\n", r->prog);
   396  			if(r == r0)
   397  				break;
   398  		}
   399  	}
   400  }
   401  
   402  void
   403  excise(Reg *r)
   404  {
   405  	Prog *p;
   406  
   407  	p = r->prog;
   408  	if(debug['P'] && debug['v'])
   409  		print("%P ===delete===\n", p);
   410  
   411  	p->as = ANOP;
   412  	p->from = zprog.from;
   413  	p->to = zprog.to;
   414  
   415  	ostats.ndelmov++;
   416  }
   417  
   418  Reg*
   419  uniqp(Reg *r)
   420  {
   421  	Reg *r1;
   422  
   423  	r1 = r->p1;
   424  	if(r1 == R) {
   425  		r1 = r->p2;
   426  		if(r1 == R || r1->p2link != R)
   427  			return R;
   428  	} else
   429  		if(r->p2 != R)
   430  			return R;
   431  	return r1;
   432  }
   433  
   434  Reg*
   435  uniqs(Reg *r)
   436  {
   437  	Reg *r1;
   438  
   439  	r1 = r->s1;
   440  	if(r1 == R) {
   441  		r1 = r->s2;
   442  		if(r1 == R)
   443  			return R;
   444  	} else
   445  		if(r->s2 != R)
   446  			return R;
   447  	return r1;
   448  }
   449  
   450  int
   451  regtyp(Adr *a)
   452  {
   453  	int t;
   454  
   455  	t = a->type;
   456  	if(t >= D_AX && t <= D_R15)
   457  		return 1;
   458  	if(t >= D_X0 && t <= D_X0+15)
   459  		return 1;
   460  	return 0;
   461  }
   462  
   463  // movb elimination.
   464  // movb is simulated by the linker
   465  // when a register other than ax, bx, cx, dx
   466  // is used, so rewrite to other instructions
   467  // when possible.  a movb into a register
   468  // can smash the entire 32-bit register without
   469  // causing any trouble.
   470  static void
   471  elimshortmov(Reg *r)
   472  {
   473  	Prog *p;
   474  
   475  	USED(r);
   476  	for(r=firstr; r!=R; r=r->link) {
   477  		p = r->prog;
   478  		if(regtyp(&p->to)) {
   479  			switch(p->as) {
   480  			case AINCB:
   481  			case AINCW:
   482  				p->as = AINCQ;
   483  				break;
   484  			case ADECB:
   485  			case ADECW:
   486  				p->as = ADECQ;
   487  				break;
   488  			case ANEGB:
   489  			case ANEGW:
   490  				p->as = ANEGQ;
   491  				break;
   492  			case ANOTB:
   493  			case ANOTW:
   494  				p->as = ANOTQ;
   495  				break;
   496  			}
   497  			if(regtyp(&p->from) || p->from.type == D_CONST) {
   498  				// move or artihmetic into partial register.
   499  				// from another register or constant can be movl.
   500  				// we don't switch to 64-bit arithmetic if it can
   501  				// change how the carry bit is set (and the carry bit is needed).
   502  				switch(p->as) {
   503  				case AMOVB:
   504  				case AMOVW:
   505  					p->as = AMOVQ;
   506  					break;
   507  				case AADDB:
   508  				case AADDW:
   509  					if(!needc(p->link))
   510  						p->as = AADDQ;
   511  					break;
   512  				case ASUBB:
   513  				case ASUBW:
   514  					if(!needc(p->link))
   515  						p->as = ASUBQ;
   516  					break;
   517  				case AMULB:
   518  				case AMULW:
   519  					p->as = AMULQ;
   520  					break;
   521  				case AIMULB:
   522  				case AIMULW:
   523  					p->as = AIMULQ;
   524  					break;
   525  				case AANDB:
   526  				case AANDW:
   527  					p->as = AANDQ;
   528  					break;
   529  				case AORB:
   530  				case AORW:
   531  					p->as = AORQ;
   532  					break;
   533  				case AXORB:
   534  				case AXORW:
   535  					p->as = AXORQ;
   536  					break;
   537  				case ASHLB:
   538  				case ASHLW:
   539  					p->as = ASHLQ;
   540  					break;
   541  				}
   542  			} else if(p->from.type >= D_NONE) {
   543  				// explicit zero extension, but don't
   544  				// do that if source is a byte register
   545  				// (only AH can occur and it's forbidden).
   546  				switch(p->as) {
   547  				case AMOVB:
   548  					p->as = AMOVBQZX;
   549  					break;
   550  				case AMOVW:
   551  					p->as = AMOVWQZX;
   552  					break;
   553  				}
   554  			}
   555  		}
   556  	}
   557  }
   558  
   559  static int
   560  regconsttyp(Adr *a)
   561  {
   562  	if(regtyp(a))
   563  		return 1;
   564  	switch(a->type) {
   565  	case D_CONST:
   566  	case D_FCONST:
   567  	case D_SCONST:
   568  	case D_ADDR:
   569  		return 1;
   570  	}
   571  	return 0;
   572  }
   573  
   574  // is reg guaranteed to be truncated by a previous L instruction?
   575  static int
   576  prevl(Reg *r0, int reg)
   577  {
   578  	Prog *p;
   579  	Reg *r;
   580  
   581  	for(r=uniqp(r0); r!=R; r=uniqp(r)) {
   582  		p = r->prog;
   583  		if(p->to.type == reg) {
   584  			switch(p->as) {
   585  			case AADDL:
   586  			case AANDL:
   587  			case ADECL:
   588  			case ADIVL:
   589  			case AIDIVL:
   590  			case AIMULL:
   591  			case AINCL:
   592  			case AMOVL:
   593  			case AMULL:
   594  			case AORL:
   595  			case ARCLL:
   596  			case ARCRL:
   597  			case AROLL:
   598  			case ARORL:
   599  			case ASALL:
   600  			case ASARL:
   601  			case ASHLL:
   602  			case ASHRL:
   603  			case ASUBL:
   604  			case AXORL:
   605  				return 1;
   606  			}
   607  			return 0;
   608  		}
   609  	}
   610  	return 0;
   611  }
   612  
   613  /*
   614   * the idea is to substitute
   615   * one register for another
   616   * from one MOV to another
   617   *	MOV	a, R0
   618   *	ADD	b, R0	/ no use of R1
   619   *	MOV	R0, R1
   620   * would be converted to
   621   *	MOV	a, R1
   622   *	ADD	b, R1
   623   *	MOV	R1, R0
   624   * hopefully, then the former or latter MOV
   625   * will be eliminated by copy propagation.
   626   */
   627  int
   628  subprop(Reg *r0)
   629  {
   630  	Prog *p;
   631  	Adr *v1, *v2;
   632  	Reg *r;
   633  	int t;
   634  
   635  	if(debug['P'] && debug['v'])
   636  		print("subprop %P\n", r0->prog);
   637  	p = r0->prog;
   638  	v1 = &p->from;
   639  	if(!regtyp(v1)) {
   640  		if(debug['P'] && debug['v'])
   641  			print("\tnot regtype %D; return 0\n", v1);
   642  		return 0;
   643  	}
   644  	v2 = &p->to;
   645  	if(!regtyp(v2)) {
   646  		if(debug['P'] && debug['v'])
   647  			print("\tnot regtype %D; return 0\n", v2);
   648  		return 0;
   649  	}
   650  	for(r=uniqp(r0); r!=R; r=uniqp(r)) {
   651  		if(debug['P'] && debug['v'])
   652  			print("\t? %P\n", r->prog);
   653  		if(uniqs(r) == R) {
   654  			if(debug['P'] && debug['v'])
   655  				print("\tno unique successor\n");
   656  			break;
   657  		}
   658  		p = r->prog;
   659  		switch(p->as) {
   660  		case ACALL:
   661  			if(debug['P'] && debug['v'])
   662  				print("\tfound %P; return 0\n", p);
   663  			return 0;
   664  
   665  		case AIMULL:
   666  		case AIMULQ:
   667  		case AIMULW:
   668  			if(p->to.type != D_NONE)
   669  				break;
   670  			goto giveup;
   671  
   672  		case ARCLB:
   673  		case ARCLL:
   674  		case ARCLQ:
   675  		case ARCLW:
   676  		case ARCRB:
   677  		case ARCRL:
   678  		case ARCRQ:
   679  		case ARCRW:
   680  		case AROLB:
   681  		case AROLL:
   682  		case AROLQ:
   683  		case AROLW:
   684  		case ARORB:
   685  		case ARORL:
   686  		case ARORQ:
   687  		case ARORW:
   688  		case ASALB:
   689  		case ASALL:
   690  		case ASALQ:
   691  		case ASALW:
   692  		case ASARB:
   693  		case ASARL:
   694  		case ASARQ:
   695  		case ASARW:
   696  		case ASHLB:
   697  		case ASHLL:
   698  		case ASHLQ:
   699  		case ASHLW:
   700  		case ASHRB:
   701  		case ASHRL:
   702  		case ASHRQ:
   703  		case ASHRW:
   704  			if(p->from.type == D_CONST)
   705  				break;
   706  			goto giveup;
   707  
   708  		case ADIVB:
   709  		case ADIVL:
   710  		case ADIVQ:
   711  		case ADIVW:
   712  		case AIDIVB:
   713  		case AIDIVL:
   714  		case AIDIVQ:
   715  		case AIDIVW:
   716  		case AIMULB:
   717  		case AMULB:
   718  		case AMULL:
   719  		case AMULQ:
   720  		case AMULW:
   721  
   722  		case AREP:
   723  		case AREPN:
   724  
   725  		case ACWD:
   726  		case ACDQ:
   727  		case ACQO:
   728  
   729  		case ASTOSB:
   730  		case ASTOSL:
   731  		case ASTOSQ:
   732  		case AMOVSB:
   733  		case AMOVSL:
   734  		case AMOVSQ:
   735  		giveup:
   736  			if(debug['P'] && debug['v'])
   737  				print("\tfound %P; return 0\n", p);
   738  			return 0;
   739  
   740  		case AMOVL:
   741  		case AMOVQ:
   742  		case AMOVSS:
   743  		case AMOVSD:
   744  			if(p->to.type == v1->type)
   745  				goto gotit;
   746  			break;
   747  		}
   748  		if(copyau(&p->from, v2) ||
   749  		   copyau(&p->to, v2)) {
   750  		   	if(debug['P'] && debug['v'])
   751  		   		print("\tcopyau %D failed\n", v2);
   752  			break;
   753  		}
   754  		if(copysub(&p->from, v1, v2, 0) ||
   755  		   copysub(&p->to, v1, v2, 0)) {
   756  		   	if(debug['P'] && debug['v'])
   757  		   		print("\tcopysub failed\n");
   758  			break;
   759  		}
   760  	}
   761  	if(debug['P'] && debug['v'])
   762  		print("\tran off end; return 0\n");
   763  	return 0;
   764  
   765  gotit:
   766  	copysub(&p->to, v1, v2, 1);
   767  	if(debug['P']) {
   768  		print("gotit: %D->%D\n%P", v1, v2, r->prog);
   769  		if(p->from.type == v2->type)
   770  			print(" excise");
   771  		print("\n");
   772  	}
   773  	for(r=uniqs(r); r!=r0; r=uniqs(r)) {
   774  		p = r->prog;
   775  		copysub(&p->from, v1, v2, 1);
   776  		copysub(&p->to, v1, v2, 1);
   777  		if(debug['P'])
   778  			print("%P\n", r->prog);
   779  	}
   780  	t = v1->type;
   781  	v1->type = v2->type;
   782  	v2->type = t;
   783  	if(debug['P'])
   784  		print("%P last\n", r->prog);
   785  	return 1;
   786  }
   787  
   788  /*
   789   * The idea is to remove redundant copies.
   790   *	v1->v2	F=0
   791   *	(use v2	s/v2/v1/)*
   792   *	set v1	F=1
   793   *	use v2	return fail
   794   *	-----------------
   795   *	v1->v2	F=0
   796   *	(use v2	s/v2/v1/)*
   797   *	set v1	F=1
   798   *	set v2	return success
   799   */
   800  int
   801  copyprop(Reg *r0)
   802  {
   803  	Prog *p;
   804  	Adr *v1, *v2;
   805  	Reg *r;
   806  
   807  	if(debug['P'] && debug['v'])
   808  		print("copyprop %P\n", r0->prog);
   809  	p = r0->prog;
   810  	v1 = &p->from;
   811  	v2 = &p->to;
   812  	if(copyas(v1, v2))
   813  		return 1;
   814  	for(r=firstr; r!=R; r=r->link)
   815  		r->active = 0;
   816  	return copy1(v1, v2, r0->s1, 0);
   817  }
   818  
   819  int
   820  copy1(Adr *v1, Adr *v2, Reg *r, int f)
   821  {
   822  	int t;
   823  	Prog *p;
   824  
   825  	if(r->active) {
   826  		if(debug['P'])
   827  			print("act set; return 1\n");
   828  		return 1;
   829  	}
   830  	r->active = 1;
   831  	if(debug['P'])
   832  		print("copy %D->%D f=%d\n", v1, v2, f);
   833  	for(; r != R; r = r->s1) {
   834  		p = r->prog;
   835  		if(debug['P'])
   836  			print("%P", p);
   837  		if(!f && uniqp(r) == R) {
   838  			f = 1;
   839  			if(debug['P'])
   840  				print("; merge; f=%d", f);
   841  		}
   842  		t = copyu(p, v2, A);
   843  		switch(t) {
   844  		case 2:	/* rar, cant split */
   845  			if(debug['P'])
   846  				print("; %D rar; return 0\n", v2);
   847  			return 0;
   848  
   849  		case 3:	/* set */
   850  			if(debug['P'])
   851  				print("; %D set; return 1\n", v2);
   852  			return 1;
   853  
   854  		case 1:	/* used, substitute */
   855  		case 4:	/* use and set */
   856  			if(f) {
   857  				if(!debug['P'])
   858  					return 0;
   859  				if(t == 4)
   860  					print("; %D used+set and f=%d; return 0\n", v2, f);
   861  				else
   862  					print("; %D used and f=%d; return 0\n", v2, f);
   863  				return 0;
   864  			}
   865  			if(copyu(p, v2, v1)) {
   866  				if(debug['P'])
   867  					print("; sub fail; return 0\n");
   868  				return 0;
   869  			}
   870  			if(debug['P'])
   871  				print("; sub %D/%D", v2, v1);
   872  			if(t == 4) {
   873  				if(debug['P'])
   874  					print("; %D used+set; return 1\n", v2);
   875  				return 1;
   876  			}
   877  			break;
   878  		}
   879  		if(!f) {
   880  			t = copyu(p, v1, A);
   881  			if(!f && (t == 2 || t == 3 || t == 4)) {
   882  				f = 1;
   883  				if(debug['P'])
   884  					print("; %D set and !f; f=%d", v1, f);
   885  			}
   886  		}
   887  		if(debug['P'])
   888  			print("\n");
   889  		if(r->s2)
   890  			if(!copy1(v1, v2, r->s2, f))
   891  				return 0;
   892  	}
   893  	return 1;
   894  }
   895  
   896  /*
   897   * return
   898   * 1 if v only used (and substitute),
   899   * 2 if read-alter-rewrite
   900   * 3 if set
   901   * 4 if set and used
   902   * 0 otherwise (not touched)
   903   */
   904  int
   905  copyu(Prog *p, Adr *v, Adr *s)
   906  {
   907  
   908  	switch(p->as) {
   909  
   910  	default:
   911  		if(debug['P'])
   912  			print("unknown op %A\n", p->as);
   913  		/* SBBL; ADCL; FLD1; SAHF */
   914  		return 2;
   915  
   916  
   917  	case ANEGB:
   918  	case ANEGW:
   919  	case ANEGL:
   920  	case ANEGQ:
   921  	case ANOTB:
   922  	case ANOTW:
   923  	case ANOTL:
   924  	case ANOTQ:
   925  		if(copyas(&p->to, v))
   926  			return 2;
   927  		break;
   928  
   929  	case ALEAL:	/* lhs addr, rhs store */
   930  	case ALEAQ:
   931  		if(copyas(&p->from, v))
   932  			return 2;
   933  
   934  
   935  	case ANOP:	/* rhs store */
   936  	case AMOVL:
   937  	case AMOVQ:
   938  	case AMOVBLSX:
   939  	case AMOVBLZX:
   940  	case AMOVBQSX:
   941  	case AMOVBQZX:
   942  	case AMOVLQSX:
   943  	case AMOVLQZX:
   944  	case AMOVWLSX:
   945  	case AMOVWLZX:
   946  	case AMOVWQSX:
   947  	case AMOVWQZX:
   948  	case AMOVQL:
   949  
   950  	case AMOVSS:
   951  	case AMOVSD:
   952  	case ACVTSD2SL:
   953  	case ACVTSD2SQ:
   954  	case ACVTSD2SS:
   955  	case ACVTSL2SD:
   956  	case ACVTSL2SS:
   957  	case ACVTSQ2SD:
   958  	case ACVTSQ2SS:
   959  	case ACVTSS2SD:
   960  	case ACVTSS2SL:
   961  	case ACVTSS2SQ:
   962  	case ACVTTSD2SL:
   963  	case ACVTTSD2SQ:
   964  	case ACVTTSS2SL:
   965  	case ACVTTSS2SQ:
   966  		if(copyas(&p->to, v)) {
   967  			if(s != A)
   968  				return copysub(&p->from, v, s, 1);
   969  			if(copyau(&p->from, v))
   970  				return 4;
   971  			return 3;
   972  		}
   973  		goto caseread;
   974  
   975  	case ARCLB:
   976  	case ARCLL:
   977  	case ARCLQ:
   978  	case ARCLW:
   979  	case ARCRB:
   980  	case ARCRL:
   981  	case ARCRQ:
   982  	case ARCRW:
   983  	case AROLB:
   984  	case AROLL:
   985  	case AROLQ:
   986  	case AROLW:
   987  	case ARORB:
   988  	case ARORL:
   989  	case ARORQ:
   990  	case ARORW:
   991  	case ASALB:
   992  	case ASALL:
   993  	case ASALQ:
   994  	case ASALW:
   995  	case ASARB:
   996  	case ASARL:
   997  	case ASARQ:
   998  	case ASARW:
   999  	case ASHLB:
  1000  	case ASHLL:
  1001  	case ASHLQ:
  1002  	case ASHLW:
  1003  	case ASHRB:
  1004  	case ASHRL:
  1005  	case ASHRQ:
  1006  	case ASHRW:
  1007  		if(copyas(&p->to, v))
  1008  			return 2;
  1009  		if(copyas(&p->from, v))
  1010  			if(p->from.type == D_CX)
  1011  				return 2;
  1012  		goto caseread;
  1013  
  1014  	case AADDB:	/* rhs rar */
  1015  	case AADDL:
  1016  	case AADDQ:
  1017  	case AADDW:
  1018  	case AANDB:
  1019  	case AANDL:
  1020  	case AANDQ:
  1021  	case AANDW:
  1022  	case ADECL:
  1023  	case ADECQ:
  1024  	case ADECW:
  1025  	case AINCL:
  1026  	case AINCQ:
  1027  	case AINCW:
  1028  	case ASUBB:
  1029  	case ASUBL:
  1030  	case ASUBQ:
  1031  	case ASUBW:
  1032  	case AORB:
  1033  	case AORL:
  1034  	case AORQ:
  1035  	case AORW:
  1036  	case AXORB:
  1037  	case AXORL:
  1038  	case AXORQ:
  1039  	case AXORW:
  1040  	case AMOVB:
  1041  	case AMOVW:
  1042  
  1043  	case AADDSD:
  1044  	case AADDSS:
  1045  	case ACMPSD:
  1046  	case ACMPSS:
  1047  	case ADIVSD:
  1048  	case ADIVSS:
  1049  	case AMAXSD:
  1050  	case AMAXSS:
  1051  	case AMINSD:
  1052  	case AMINSS:
  1053  	case AMULSD:
  1054  	case AMULSS:
  1055  	case ARCPSS:
  1056  	case ARSQRTSS:
  1057  	case ASQRTSD:
  1058  	case ASQRTSS:
  1059  	case ASUBSD:
  1060  	case ASUBSS:
  1061  	case AXORPD:
  1062  		if(copyas(&p->to, v))
  1063  			return 2;
  1064  		goto caseread;
  1065  
  1066  	case ACMPL:	/* read only */
  1067  	case ACMPW:
  1068  	case ACMPB:
  1069  	case ACMPQ:
  1070  
  1071  	case ACOMISD:
  1072  	case ACOMISS:
  1073  	case AUCOMISD:
  1074  	case AUCOMISS:
  1075  	caseread:
  1076  		if(s != A) {
  1077  			if(copysub(&p->from, v, s, 1))
  1078  				return 1;
  1079  			return copysub(&p->to, v, s, 1);
  1080  		}
  1081  		if(copyau(&p->from, v))
  1082  			return 1;
  1083  		if(copyau(&p->to, v))
  1084  			return 1;
  1085  		break;
  1086  
  1087  	case AJGE:	/* no reference */
  1088  	case AJNE:
  1089  	case AJLE:
  1090  	case AJEQ:
  1091  	case AJHI:
  1092  	case AJLS:
  1093  	case AJMI:
  1094  	case AJPL:
  1095  	case AJGT:
  1096  	case AJLT:
  1097  	case AJCC:
  1098  	case AJCS:
  1099  
  1100  	case AADJSP:
  1101  	case AWAIT:
  1102  	case ACLD:
  1103  		break;
  1104  
  1105  	case AIMULL:
  1106  	case AIMULQ:
  1107  	case AIMULW:
  1108  		if(p->to.type != D_NONE) {
  1109  			if(copyas(&p->to, v))
  1110  				return 2;
  1111  			goto caseread;
  1112  		}
  1113  
  1114  	case ADIVB:
  1115  	case ADIVL:
  1116  	case ADIVQ:
  1117  	case ADIVW:
  1118  	case AIDIVB:
  1119  	case AIDIVL:
  1120  	case AIDIVQ:
  1121  	case AIDIVW:
  1122  	case AIMULB:
  1123  	case AMULB:
  1124  	case AMULL:
  1125  	case AMULQ:
  1126  	case AMULW:
  1127  
  1128  	case ACWD:
  1129  	case ACDQ:
  1130  	case ACQO:
  1131  		if(v->type == D_AX || v->type == D_DX)
  1132  			return 2;
  1133  		goto caseread;
  1134  
  1135  	case AREP:
  1136  	case AREPN:
  1137  		if(v->type == D_CX)
  1138  			return 2;
  1139  		goto caseread;
  1140  
  1141  	case AMOVSB:
  1142  	case AMOVSL:
  1143  	case AMOVSQ:
  1144  		if(v->type == D_DI || v->type == D_SI)
  1145  			return 2;
  1146  		goto caseread;
  1147  
  1148  	case ASTOSB:
  1149  	case ASTOSL:
  1150  	case ASTOSQ:
  1151  		if(v->type == D_AX || v->type == D_DI)
  1152  			return 2;
  1153  		goto caseread;
  1154  
  1155  	case AJMP:	/* funny */
  1156  		if(s != A) {
  1157  			if(copysub(&p->to, v, s, 1))
  1158  				return 1;
  1159  			return 0;
  1160  		}
  1161  		if(copyau(&p->to, v))
  1162  			return 1;
  1163  		return 0;
  1164  
  1165  	case ARET:	/* funny */
  1166  		if(s != A)
  1167  			return 1;
  1168  		return 3;
  1169  
  1170  	case ACALL:	/* funny */
  1171  		if(REGEXT && v->type <= REGEXT && v->type > exregoffset)
  1172  			return 2;
  1173  		if(REGARG >= 0 && v->type == (uchar)REGARG)
  1174  			return 2;
  1175  		if(v->type == p->from.type)
  1176  			return 2;
  1177  
  1178  		if(s != A) {
  1179  			if(copysub(&p->to, v, s, 1))
  1180  				return 1;
  1181  			return 0;
  1182  		}
  1183  		if(copyau(&p->to, v))
  1184  			return 4;
  1185  		return 3;
  1186  
  1187  	case ATEXT:	/* funny */
  1188  		if(REGARG >= 0 && v->type == (uchar)REGARG)
  1189  			return 3;
  1190  		return 0;
  1191  	}
  1192  	return 0;
  1193  }
  1194  
  1195  /*
  1196   * direct reference,
  1197   * could be set/use depending on
  1198   * semantics
  1199   */
  1200  int
  1201  copyas(Adr *a, Adr *v)
  1202  {
  1203  	if(a->type != v->type)
  1204  		return 0;
  1205  	if(regtyp(v))
  1206  		return 1;
  1207  	if(v->type == D_AUTO || v->type == D_PARAM)
  1208  		if(v->offset == a->offset)
  1209  			return 1;
  1210  	return 0;
  1211  }
  1212  
  1213  /*
  1214   * either direct or indirect
  1215   */
  1216  int
  1217  copyau(Adr *a, Adr *v)
  1218  {
  1219  
  1220  	if(copyas(a, v)) {
  1221  		if(debug['P'] && debug['v'])
  1222  			print("\tcopyau: copyas returned 1\n");
  1223  		return 1;
  1224  	}
  1225  	if(regtyp(v)) {
  1226  		if(a->type-D_INDIR == v->type) {
  1227  			if(debug['P'] && debug['v'])
  1228  				print("\tcopyau: found indir use - return 1\n");
  1229  			return 1;
  1230  		}
  1231  		if(a->index == v->type) {
  1232  			if(debug['P'] && debug['v'])
  1233  				print("\tcopyau: found index use - return 1\n");
  1234  			return 1;
  1235  		}
  1236  	}
  1237  	return 0;
  1238  }
  1239  
  1240  /*
  1241   * substitute s for v in a
  1242   * return failure to substitute
  1243   */
  1244  int
  1245  copysub(Adr *a, Adr *v, Adr *s, int f)
  1246  {
  1247  	int t;
  1248  
  1249  	if(copyas(a, v)) {
  1250  		t = s->type;
  1251  		if(t >= D_AX && t <= D_R15 || t >= D_X0 && t <= D_X0+15) {
  1252  			if(f)
  1253  				a->type = t;
  1254  		}
  1255  		return 0;
  1256  	}
  1257  	if(regtyp(v)) {
  1258  		t = v->type;
  1259  		if(a->type == t+D_INDIR) {
  1260  			if((s->type == D_BP || s->type == D_R13) && a->index != D_NONE)
  1261  				return 1;	/* can't use BP-base with index */
  1262  			if(f)
  1263  				a->type = s->type+D_INDIR;
  1264  //			return 0;
  1265  		}
  1266  		if(a->index == t) {
  1267  			if(f)
  1268  				a->index = s->type;
  1269  			return 0;
  1270  		}
  1271  		return 0;
  1272  	}
  1273  	return 0;
  1274  }
  1275  
  1276  static void
  1277  conprop(Reg *r0)
  1278  {
  1279  	Reg *r;
  1280  	Prog *p, *p0;
  1281  	int t;
  1282  	Adr *v0;
  1283  
  1284  	p0 = r0->prog;
  1285  	v0 = &p0->to;
  1286  	r = r0;
  1287  
  1288  loop:
  1289  	r = uniqs(r);
  1290  	if(r == R || r == r0)
  1291  		return;
  1292  	if(uniqp(r) == R)
  1293  		return;
  1294  
  1295  	p = r->prog;
  1296  	t = copyu(p, v0, A);
  1297  	switch(t) {
  1298  	case 0:	// miss
  1299  	case 1:	// use
  1300  		goto loop;
  1301  
  1302  	case 2:	// rar
  1303  	case 4:	// use and set
  1304  		break;
  1305  
  1306  	case 3:	// set
  1307  		if(p->as == p0->as)
  1308  		if(p->from.type == p0->from.type)
  1309  		if(p->from.node == p0->from.node)
  1310  		if(p->from.offset == p0->from.offset)
  1311  		if(p->from.scale == p0->from.scale)
  1312  		if(p->from.u.vval == p0->from.u.vval)
  1313  		if(p->from.index == p0->from.index) {
  1314  			excise(r);
  1315  			goto loop;
  1316  		}
  1317  		break;
  1318  	}
  1319  }