github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/cmd/8g/ggen.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"cmd/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  )
    12  
    13  func defframe(ptxt *obj.Prog) {
    14  	var n *gc.Node
    15  
    16  	// fill in argument size, stack size
    17  	ptxt.To.Type = obj.TYPE_TEXTSIZE
    18  
    19  	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
    20  	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
    21  	ptxt.To.Offset = int64(frame)
    22  
    23  	// insert code to zero ambiguously live variables
    24  	// so that the garbage collector only sees initialized values
    25  	// when it looks for pointers.
    26  	p := ptxt
    27  
    28  	hi := int64(0)
    29  	lo := hi
    30  	ax := uint32(0)
    31  	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
    32  		n = l.N
    33  		if !n.Needzero {
    34  			continue
    35  		}
    36  		if n.Class != gc.PAUTO {
    37  			gc.Fatal("needzero class %d", n.Class)
    38  		}
    39  		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
    40  			gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
    41  		}
    42  		if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) {
    43  			// merge with range we already have
    44  			lo = n.Xoffset
    45  
    46  			continue
    47  		}
    48  
    49  		// zero old range
    50  		p = zerorange(p, int64(frame), lo, hi, &ax)
    51  
    52  		// set new range
    53  		hi = n.Xoffset + n.Type.Width
    54  
    55  		lo = n.Xoffset
    56  	}
    57  
    58  	// zero final range
    59  	zerorange(p, int64(frame), lo, hi, &ax)
    60  }
    61  
    62  func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
    63  	cnt := hi - lo
    64  	if cnt == 0 {
    65  		return p
    66  	}
    67  	if *ax == 0 {
    68  		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    69  		*ax = 1
    70  	}
    71  
    72  	if cnt <= int64(4*gc.Widthreg) {
    73  		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
    74  			p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
    75  		}
    76  	} else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) {
    77  		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    78  		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg)))
    79  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    80  	} else {
    81  		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
    82  		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    83  		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    84  		p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    85  	}
    86  
    87  	return p
    88  }
    89  
    90  func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
    91  	q := gc.Ctxt.NewProg()
    92  	gc.Clearp(q)
    93  	q.As = int16(as)
    94  	q.Lineno = p.Lineno
    95  	q.From.Type = int16(ftype)
    96  	q.From.Reg = int16(freg)
    97  	q.From.Offset = foffset
    98  	q.To.Type = int16(ttype)
    99  	q.To.Reg = int16(treg)
   100  	q.To.Offset = toffset
   101  	q.Link = p.Link
   102  	p.Link = q
   103  	return q
   104  }
   105  
   106  func clearfat(nl *gc.Node) {
   107  	/* clear a fat object */
   108  	if gc.Debug['g'] != 0 {
   109  		gc.Dump("\nclearfat", nl)
   110  	}
   111  
   112  	w := uint32(nl.Type.Width)
   113  
   114  	// Avoid taking the address for simple enough types.
   115  	if gc.Componentgen(nil, nl) {
   116  		return
   117  	}
   118  
   119  	c := w % 4 // bytes
   120  	q := w / 4 // quads
   121  
   122  	if q < 4 {
   123  		// Write sequence of MOV 0, off(base) instead of using STOSL.
   124  		// The hope is that although the code will be slightly longer,
   125  		// the MOVs will have no dependencies and pipeline better
   126  		// than the unrolled STOSL loop.
   127  		// NOTE: Must use agen, not igen, so that optimizer sees address
   128  		// being taken. We are not writing on field boundaries.
   129  		var n1 gc.Node
   130  		gc.Regalloc(&n1, gc.Types[gc.Tptr], nil)
   131  
   132  		gc.Agen(nl, &n1)
   133  		n1.Op = gc.OINDREG
   134  		var z gc.Node
   135  		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
   136  		for {
   137  			tmp14 := q
   138  			q--
   139  			if tmp14 <= 0 {
   140  				break
   141  			}
   142  			n1.Type = z.Type
   143  			gins(x86.AMOVL, &z, &n1)
   144  			n1.Xoffset += 4
   145  		}
   146  
   147  		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
   148  		for {
   149  			tmp15 := c
   150  			c--
   151  			if tmp15 <= 0 {
   152  				break
   153  			}
   154  			n1.Type = z.Type
   155  			gins(x86.AMOVB, &z, &n1)
   156  			n1.Xoffset++
   157  		}
   158  
   159  		gc.Regfree(&n1)
   160  		return
   161  	}
   162  
   163  	var n1 gc.Node
   164  	gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI)
   165  	gc.Agen(nl, &n1)
   166  	gconreg(x86.AMOVL, 0, x86.REG_AX)
   167  
   168  	if q > 128 || (q >= 4 && gc.Nacl) {
   169  		gconreg(x86.AMOVL, int64(q), x86.REG_CX)
   170  		gins(x86.AREP, nil, nil)   // repeat
   171  		gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
   172  	} else if q >= 4 {
   173  		p := gins(obj.ADUFFZERO, nil, nil)
   174  		p.To.Type = obj.TYPE_ADDR
   175  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   176  
   177  		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
   178  		p.To.Offset = 1 * (128 - int64(q))
   179  	} else {
   180  		for q > 0 {
   181  			gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
   182  			q--
   183  		}
   184  	}
   185  
   186  	for c > 0 {
   187  		gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
   188  		c--
   189  	}
   190  }
   191  
   192  /*
   193   * generate division.
   194   * caller must set:
   195   *	ax = allocated AX register
   196   *	dx = allocated DX register
   197   * generates one of:
   198   *	res = nl / nr
   199   *	res = nl % nr
   200   * according to op.
   201   */
   202  func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
   203  	// Have to be careful about handling
   204  	// most negative int divided by -1 correctly.
   205  	// The hardware will trap.
   206  	// Also the byte divide instruction needs AH,
   207  	// which we otherwise don't have to deal with.
   208  	// Easiest way to avoid for int8, int16: use int32.
   209  	// For int32 and int64, use explicit test.
   210  	// Could use int64 hw for int32.
   211  	t := nl.Type
   212  
   213  	t0 := t
   214  	check := 0
   215  	if gc.Issigned[t.Etype] {
   216  		check = 1
   217  		if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -1<<uint64(t.Width*8-1) {
   218  			check = 0
   219  		} else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 {
   220  			check = 0
   221  		}
   222  	}
   223  
   224  	if t.Width < 4 {
   225  		if gc.Issigned[t.Etype] {
   226  			t = gc.Types[gc.TINT32]
   227  		} else {
   228  			t = gc.Types[gc.TUINT32]
   229  		}
   230  		check = 0
   231  	}
   232  
   233  	var t1 gc.Node
   234  	gc.Tempname(&t1, t)
   235  	var t2 gc.Node
   236  	gc.Tempname(&t2, t)
   237  	if t0 != t {
   238  		var t3 gc.Node
   239  		gc.Tempname(&t3, t0)
   240  		var t4 gc.Node
   241  		gc.Tempname(&t4, t0)
   242  		gc.Cgen(nl, &t3)
   243  		gc.Cgen(nr, &t4)
   244  
   245  		// Convert.
   246  		gmove(&t3, &t1)
   247  
   248  		gmove(&t4, &t2)
   249  	} else {
   250  		gc.Cgen(nl, &t1)
   251  		gc.Cgen(nr, &t2)
   252  	}
   253  
   254  	var n1 gc.Node
   255  	if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
   256  		gc.Regalloc(&n1, t, res)
   257  	} else {
   258  		gc.Regalloc(&n1, t, nil)
   259  	}
   260  	gmove(&t2, &n1)
   261  	gmove(&t1, ax)
   262  	var p2 *obj.Prog
   263  	var n4 gc.Node
   264  	if gc.Nacl {
   265  		// Native Client does not relay the divide-by-zero trap
   266  		// to the executing program, so we must insert a check
   267  		// for ourselves.
   268  		gc.Nodconst(&n4, t, 0)
   269  
   270  		gins(optoas(gc.OCMP, t), &n1, &n4)
   271  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   272  		if panicdiv == nil {
   273  			panicdiv = gc.Sysfunc("panicdivide")
   274  		}
   275  		gc.Ginscall(panicdiv, -1)
   276  		gc.Patch(p1, gc.Pc)
   277  	}
   278  
   279  	if check != 0 {
   280  		gc.Nodconst(&n4, t, -1)
   281  		gins(optoas(gc.OCMP, t), &n1, &n4)
   282  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   283  		if op == gc.ODIV {
   284  			// a / (-1) is -a.
   285  			gins(optoas(gc.OMINUS, t), nil, ax)
   286  
   287  			gmove(ax, res)
   288  		} else {
   289  			// a % (-1) is 0.
   290  			gc.Nodconst(&n4, t, 0)
   291  
   292  			gmove(&n4, res)
   293  		}
   294  
   295  		p2 = gc.Gbranch(obj.AJMP, nil, 0)
   296  		gc.Patch(p1, gc.Pc)
   297  	}
   298  
   299  	if !gc.Issigned[t.Etype] {
   300  		var nz gc.Node
   301  		gc.Nodconst(&nz, t, 0)
   302  		gmove(&nz, dx)
   303  	} else {
   304  		gins(optoas(gc.OEXTEND, t), nil, nil)
   305  	}
   306  	gins(optoas(op, t), &n1, nil)
   307  	gc.Regfree(&n1)
   308  
   309  	if op == gc.ODIV {
   310  		gmove(ax, res)
   311  	} else {
   312  		gmove(dx, res)
   313  	}
   314  	if check != 0 {
   315  		gc.Patch(p2, gc.Pc)
   316  	}
   317  }
   318  
   319  func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
   320  	r := int(reg[dr])
   321  	gc.Nodreg(x, gc.Types[gc.TINT32], dr)
   322  
   323  	// save current ax and dx if they are live
   324  	// and not the destination
   325  	*oldx = gc.Node{}
   326  
   327  	if r > 0 && !gc.Samereg(x, res) {
   328  		gc.Tempname(oldx, gc.Types[gc.TINT32])
   329  		gmove(x, oldx)
   330  	}
   331  
   332  	gc.Regalloc(x, t, x)
   333  }
   334  
   335  func restx(x *gc.Node, oldx *gc.Node) {
   336  	gc.Regfree(x)
   337  
   338  	if oldx.Op != 0 {
   339  		x.Type = gc.Types[gc.TINT32]
   340  		gmove(oldx, x)
   341  	}
   342  }
   343  
   344  /*
   345   * generate division according to op, one of:
   346   *	res = nl / nr
   347   *	res = nl % nr
   348   */
   349  func cgen_div(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   350  	if gc.Is64(nl.Type) {
   351  		gc.Fatal("cgen_div %v", gc.Tconv(nl.Type, 0))
   352  	}
   353  
   354  	var t *gc.Type
   355  	if gc.Issigned[nl.Type.Etype] {
   356  		t = gc.Types[gc.TINT32]
   357  	} else {
   358  		t = gc.Types[gc.TUINT32]
   359  	}
   360  	var ax gc.Node
   361  	var oldax gc.Node
   362  	savex(x86.REG_AX, &ax, &oldax, res, t)
   363  	var olddx gc.Node
   364  	var dx gc.Node
   365  	savex(x86.REG_DX, &dx, &olddx, res, t)
   366  	dodiv(op, nl, nr, res, &ax, &dx)
   367  	restx(&dx, &olddx)
   368  	restx(&ax, &oldax)
   369  }
   370  
   371  /*
   372   * generate shift according to op, one of:
   373   *	res = nl << nr
   374   *	res = nl >> nr
   375   */
   376  func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   377  	if nl.Type.Width > 4 {
   378  		gc.Fatal("cgen_shift %v", gc.Tconv(nl.Type, 0))
   379  	}
   380  
   381  	w := int(nl.Type.Width * 8)
   382  
   383  	a := optoas(op, nl.Type)
   384  
   385  	if nr.Op == gc.OLITERAL {
   386  		var n2 gc.Node
   387  		gc.Tempname(&n2, nl.Type)
   388  		gc.Cgen(nl, &n2)
   389  		var n1 gc.Node
   390  		gc.Regalloc(&n1, nl.Type, res)
   391  		gmove(&n2, &n1)
   392  		sc := uint64(gc.Mpgetfix(nr.Val.U.Xval))
   393  		if sc >= uint64(nl.Type.Width*8) {
   394  			// large shift gets 2 shifts by width-1
   395  			gins(a, ncon(uint32(w)-1), &n1)
   396  
   397  			gins(a, ncon(uint32(w)-1), &n1)
   398  		} else {
   399  			gins(a, nr, &n1)
   400  		}
   401  		gmove(&n1, res)
   402  		gc.Regfree(&n1)
   403  		return
   404  	}
   405  
   406  	var oldcx gc.Node
   407  	var cx gc.Node
   408  	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
   409  	if reg[x86.REG_CX] > 1 && !gc.Samereg(&cx, res) {
   410  		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
   411  		gmove(&cx, &oldcx)
   412  	}
   413  
   414  	var n1 gc.Node
   415  	var nt gc.Node
   416  	if nr.Type.Width > 4 {
   417  		gc.Tempname(&nt, nr.Type)
   418  		n1 = nt
   419  	} else {
   420  		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   421  		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
   422  	}
   423  
   424  	var n2 gc.Node
   425  	if gc.Samereg(&cx, res) {
   426  		gc.Regalloc(&n2, nl.Type, nil)
   427  	} else {
   428  		gc.Regalloc(&n2, nl.Type, res)
   429  	}
   430  	if nl.Ullman >= nr.Ullman {
   431  		gc.Cgen(nl, &n2)
   432  		gc.Cgen(nr, &n1)
   433  	} else {
   434  		gc.Cgen(nr, &n1)
   435  		gc.Cgen(nl, &n2)
   436  	}
   437  
   438  	// test and fix up large shifts
   439  	if bounded {
   440  		if nr.Type.Width > 4 {
   441  			// delayed reg alloc
   442  			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   443  
   444  			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
   445  			var lo gc.Node
   446  			var hi gc.Node
   447  			split64(&nt, &lo, &hi)
   448  			gmove(&lo, &n1)
   449  			splitclean()
   450  		}
   451  	} else {
   452  		var p1 *obj.Prog
   453  		if nr.Type.Width > 4 {
   454  			// delayed reg alloc
   455  			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   456  
   457  			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
   458  			var lo gc.Node
   459  			var hi gc.Node
   460  			split64(&nt, &lo, &hi)
   461  			gmove(&lo, &n1)
   462  			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
   463  			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
   464  			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
   465  			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
   466  			splitclean()
   467  			gc.Patch(p2, gc.Pc)
   468  		} else {
   469  			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
   470  			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
   471  		}
   472  
   473  		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
   474  			gins(a, ncon(uint32(w)-1), &n2)
   475  		} else {
   476  			gmove(ncon(0), &n2)
   477  		}
   478  
   479  		gc.Patch(p1, gc.Pc)
   480  	}
   481  
   482  	gins(a, &n1, &n2)
   483  
   484  	if oldcx.Op != 0 {
   485  		gmove(&oldcx, &cx)
   486  	}
   487  
   488  	gmove(&n2, res)
   489  
   490  	gc.Regfree(&n1)
   491  	gc.Regfree(&n2)
   492  }
   493  
   494  /*
   495   * generate byte multiply:
   496   *	res = nl * nr
   497   * there is no 2-operand byte multiply instruction so
   498   * we do a full-width multiplication and truncate afterwards.
   499   */
   500  func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
   501  	if optoas(op, nl.Type) != x86.AIMULB {
   502  		return false
   503  	}
   504  
   505  	// copy from byte to full registers
   506  	t := gc.Types[gc.TUINT32]
   507  
   508  	if gc.Issigned[nl.Type.Etype] {
   509  		t = gc.Types[gc.TINT32]
   510  	}
   511  
   512  	// largest ullman on left.
   513  	if nl.Ullman < nr.Ullman {
   514  		tmp := nl
   515  		nl = nr
   516  		nr = tmp
   517  	}
   518  
   519  	var nt gc.Node
   520  	gc.Tempname(&nt, nl.Type)
   521  	gc.Cgen(nl, &nt)
   522  	var n1 gc.Node
   523  	gc.Regalloc(&n1, t, res)
   524  	gc.Cgen(nr, &n1)
   525  	var n2 gc.Node
   526  	gc.Regalloc(&n2, t, nil)
   527  	gmove(&nt, &n2)
   528  	a := optoas(op, t)
   529  	gins(a, &n2, &n1)
   530  	gc.Regfree(&n2)
   531  	gmove(&n1, res)
   532  	gc.Regfree(&n1)
   533  
   534  	return true
   535  }
   536  
   537  /*
   538   * generate high multiply:
   539   *   res = (nl*nr) >> width
   540   */
   541  func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
   542  	var n1 gc.Node
   543  	var n2 gc.Node
   544  	var ax gc.Node
   545  	var dx gc.Node
   546  
   547  	t := nl.Type
   548  	a := optoas(gc.OHMUL, t)
   549  
   550  	// gen nl in n1.
   551  	gc.Tempname(&n1, t)
   552  
   553  	gc.Cgen(nl, &n1)
   554  
   555  	// gen nr in n2.
   556  	gc.Regalloc(&n2, t, res)
   557  
   558  	gc.Cgen(nr, &n2)
   559  
   560  	// multiply.
   561  	gc.Nodreg(&ax, t, x86.REG_AX)
   562  
   563  	gmove(&n2, &ax)
   564  	gins(a, &n1, nil)
   565  	gc.Regfree(&n2)
   566  
   567  	if t.Width == 1 {
   568  		// byte multiply behaves differently.
   569  		gc.Nodreg(&ax, t, x86.REG_AH)
   570  
   571  		gc.Nodreg(&dx, t, x86.REG_DX)
   572  		gmove(&ax, &dx)
   573  	}
   574  
   575  	gc.Nodreg(&dx, t, x86.REG_DX)
   576  	gmove(&dx, res)
   577  }
   578  
   579  /*
   580   * generate floating-point operation.
   581   */
   582  func cgen_float(n *gc.Node, res *gc.Node) {
   583  	nl := n.Left
   584  	switch n.Op {
   585  	case gc.OEQ,
   586  		gc.ONE,
   587  		gc.OLT,
   588  		gc.OLE,
   589  		gc.OGE:
   590  		p1 := gc.Gbranch(obj.AJMP, nil, 0)
   591  		p2 := gc.Pc
   592  		gmove(gc.Nodbool(true), res)
   593  		p3 := gc.Gbranch(obj.AJMP, nil, 0)
   594  		gc.Patch(p1, gc.Pc)
   595  		gc.Bgen(n, true, 0, p2)
   596  		gmove(gc.Nodbool(false), res)
   597  		gc.Patch(p3, gc.Pc)
   598  		return
   599  
   600  	case gc.OPLUS:
   601  		gc.Cgen(nl, res)
   602  		return
   603  
   604  	case gc.OCONV:
   605  		if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
   606  			gc.Cgen(nl, res)
   607  			return
   608  		}
   609  
   610  		var n2 gc.Node
   611  		gc.Tempname(&n2, n.Type)
   612  		var n1 gc.Node
   613  		gc.Mgen(nl, &n1, res)
   614  		gmove(&n1, &n2)
   615  		gmove(&n2, res)
   616  		gc.Mfree(&n1)
   617  		return
   618  	}
   619  
   620  	if gc.Thearch.Use387 {
   621  		cgen_float387(n, res)
   622  	} else {
   623  		cgen_floatsse(n, res)
   624  	}
   625  }
   626  
   627  // floating-point.  387 (not SSE2)
   628  func cgen_float387(n *gc.Node, res *gc.Node) {
   629  	var f0 gc.Node
   630  	var f1 gc.Node
   631  
   632  	nl := n.Left
   633  	nr := n.Right
   634  	gc.Nodreg(&f0, nl.Type, x86.REG_F0)
   635  	gc.Nodreg(&f1, n.Type, x86.REG_F0+1)
   636  	if nr != nil {
   637  		// binary
   638  		if nl.Ullman >= nr.Ullman {
   639  			gc.Cgen(nl, &f0)
   640  			if nr.Addable {
   641  				gins(foptoas(int(n.Op), n.Type, 0), nr, &f0)
   642  			} else {
   643  				gc.Cgen(nr, &f0)
   644  				gins(foptoas(int(n.Op), n.Type, Fpop), &f0, &f1)
   645  			}
   646  		} else {
   647  			gc.Cgen(nr, &f0)
   648  			if nl.Addable {
   649  				gins(foptoas(int(n.Op), n.Type, Frev), nl, &f0)
   650  			} else {
   651  				gc.Cgen(nl, &f0)
   652  				gins(foptoas(int(n.Op), n.Type, Frev|Fpop), &f0, &f1)
   653  			}
   654  		}
   655  
   656  		gmove(&f0, res)
   657  		return
   658  	}
   659  
   660  	// unary
   661  	gc.Cgen(nl, &f0)
   662  
   663  	if n.Op != gc.OCONV && n.Op != gc.OPLUS {
   664  		gins(foptoas(int(n.Op), n.Type, 0), nil, nil)
   665  	}
   666  	gmove(&f0, res)
   667  	return
   668  }
   669  
   670  func cgen_floatsse(n *gc.Node, res *gc.Node) {
   671  	var a int
   672  
   673  	nl := n.Left
   674  	nr := n.Right
   675  	switch n.Op {
   676  	default:
   677  		gc.Dump("cgen_floatsse", n)
   678  		gc.Fatal("cgen_floatsse %v", gc.Oconv(int(n.Op), 0))
   679  		return
   680  
   681  	case gc.OMINUS,
   682  		gc.OCOM:
   683  		nr = gc.Nodintconst(-1)
   684  		gc.Convlit(&nr, n.Type)
   685  		a = foptoas(gc.OMUL, nl.Type, 0)
   686  		goto sbop
   687  
   688  		// symmetric binary
   689  	case gc.OADD,
   690  		gc.OMUL:
   691  		a = foptoas(int(n.Op), nl.Type, 0)
   692  
   693  		goto sbop
   694  
   695  		// asymmetric binary
   696  	case gc.OSUB,
   697  		gc.OMOD,
   698  		gc.ODIV:
   699  		a = foptoas(int(n.Op), nl.Type, 0)
   700  
   701  		goto abop
   702  	}
   703  
   704  sbop: // symmetric binary
   705  	if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
   706  		r := nl
   707  		nl = nr
   708  		nr = r
   709  	}
   710  
   711  abop: // asymmetric binary
   712  	if nl.Ullman >= nr.Ullman {
   713  		var nt gc.Node
   714  		gc.Tempname(&nt, nl.Type)
   715  		gc.Cgen(nl, &nt)
   716  		var n2 gc.Node
   717  		gc.Mgen(nr, &n2, nil)
   718  		var n1 gc.Node
   719  		gc.Regalloc(&n1, nl.Type, res)
   720  		gmove(&nt, &n1)
   721  		gins(a, &n2, &n1)
   722  		gmove(&n1, res)
   723  		gc.Regfree(&n1)
   724  		gc.Mfree(&n2)
   725  	} else {
   726  		var n2 gc.Node
   727  		gc.Regalloc(&n2, nr.Type, res)
   728  		gc.Cgen(nr, &n2)
   729  		var n1 gc.Node
   730  		gc.Regalloc(&n1, nl.Type, nil)
   731  		gc.Cgen(nl, &n1)
   732  		gins(a, &n2, &n1)
   733  		gc.Regfree(&n2)
   734  		gmove(&n1, res)
   735  		gc.Regfree(&n1)
   736  	}
   737  
   738  	return
   739  }
   740  
   741  func bgen_float(n *gc.Node, true_ int, likely int, to *obj.Prog) {
   742  	nl := n.Left
   743  	nr := n.Right
   744  	a := int(n.Op)
   745  	if true_ == 0 {
   746  		// brcom is not valid on floats when NaN is involved.
   747  		p1 := gc.Gbranch(obj.AJMP, nil, 0)
   748  
   749  		p2 := gc.Gbranch(obj.AJMP, nil, 0)
   750  		gc.Patch(p1, gc.Pc)
   751  
   752  		// No need to avoid re-genning ninit.
   753  		bgen_float(n, 1, -likely, p2)
   754  
   755  		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
   756  		gc.Patch(p2, gc.Pc)
   757  		return
   758  	}
   759  
   760  	var tmp gc.Node
   761  	var et int
   762  	var n2 gc.Node
   763  	var ax gc.Node
   764  	if !gc.Thearch.Use387 {
   765  		if !nl.Addable {
   766  			var n1 gc.Node
   767  			gc.Tempname(&n1, nl.Type)
   768  			gc.Cgen(nl, &n1)
   769  			nl = &n1
   770  		}
   771  
   772  		if !nr.Addable {
   773  			var tmp gc.Node
   774  			gc.Tempname(&tmp, nr.Type)
   775  			gc.Cgen(nr, &tmp)
   776  			nr = &tmp
   777  		}
   778  
   779  		var n2 gc.Node
   780  		gc.Regalloc(&n2, nr.Type, nil)
   781  		gmove(nr, &n2)
   782  		nr = &n2
   783  
   784  		if nl.Op != gc.OREGISTER {
   785  			var n3 gc.Node
   786  			gc.Regalloc(&n3, nl.Type, nil)
   787  			gmove(nl, &n3)
   788  			nl = &n3
   789  		}
   790  
   791  		if a == gc.OGE || a == gc.OGT {
   792  			// only < and <= work right with NaN; reverse if needed
   793  			r := nr
   794  
   795  			nr = nl
   796  			nl = r
   797  			a = gc.Brrev(a)
   798  		}
   799  
   800  		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
   801  		if nl.Op == gc.OREGISTER {
   802  			gc.Regfree(nl)
   803  		}
   804  		gc.Regfree(nr)
   805  		goto ret
   806  	} else {
   807  		goto x87
   808  	}
   809  
   810  x87:
   811  	a = gc.Brrev(a) // because the args are stacked
   812  	if a == gc.OGE || a == gc.OGT {
   813  		// only < and <= work right with NaN; reverse if needed
   814  		r := nr
   815  
   816  		nr = nl
   817  		nl = r
   818  		a = gc.Brrev(a)
   819  	}
   820  
   821  	gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
   822  	gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
   823  	gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
   824  	et = gc.Simsimtype(nr.Type)
   825  	if et == gc.TFLOAT64 {
   826  		if nl.Ullman > nr.Ullman {
   827  			gc.Cgen(nl, &tmp)
   828  			gc.Cgen(nr, &tmp)
   829  			gins(x86.AFXCHD, &tmp, &n2)
   830  		} else {
   831  			gc.Cgen(nr, &tmp)
   832  			gc.Cgen(nl, &tmp)
   833  		}
   834  
   835  		gins(x86.AFUCOMIP, &tmp, &n2)
   836  		gins(x86.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF
   837  	} else {
   838  		// TODO(rsc): The moves back and forth to memory
   839  		// here are for truncating the value to 32 bits.
   840  		// This handles 32-bit comparison but presumably
   841  		// all the other ops have the same problem.
   842  		// We need to figure out what the right general
   843  		// solution is, besides telling people to use float64.
   844  		var t1 gc.Node
   845  		gc.Tempname(&t1, gc.Types[gc.TFLOAT32])
   846  
   847  		var t2 gc.Node
   848  		gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
   849  		gc.Cgen(nr, &t1)
   850  		gc.Cgen(nl, &t2)
   851  		gmove(&t2, &tmp)
   852  		gins(x86.AFCOMFP, &t1, &tmp)
   853  		gins(x86.AFSTSW, nil, &ax)
   854  		gins(x86.ASAHF, nil, nil)
   855  	}
   856  
   857  	goto ret
   858  
   859  ret:
   860  	if a == gc.OEQ {
   861  		// neither NE nor P
   862  		p1 := gc.Gbranch(x86.AJNE, nil, -likely)
   863  
   864  		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
   865  		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
   866  		gc.Patch(p1, gc.Pc)
   867  		gc.Patch(p2, gc.Pc)
   868  	} else if a == gc.ONE {
   869  		// either NE or P
   870  		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)
   871  
   872  		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
   873  	} else {
   874  		gc.Patch(gc.Gbranch(optoas(a, nr.Type), nil, likely), to)
   875  	}
   876  }
   877  
   878  // Called after regopt and peep have run.
   879  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   880  func expandchecks(firstp *obj.Prog) {
   881  	var p1 *obj.Prog
   882  	var p2 *obj.Prog
   883  
   884  	for p := firstp; p != nil; p = p.Link {
   885  		if p.As != obj.ACHECKNIL {
   886  			continue
   887  		}
   888  		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
   889  			gc.Warnl(int(p.Lineno), "generated nil check")
   890  		}
   891  
   892  		// check is
   893  		//	CMP arg, $0
   894  		//	JNE 2(PC) (likely)
   895  		//	MOV AX, 0
   896  		p1 = gc.Ctxt.NewProg()
   897  
   898  		p2 = gc.Ctxt.NewProg()
   899  		gc.Clearp(p1)
   900  		gc.Clearp(p2)
   901  		p1.Link = p2
   902  		p2.Link = p.Link
   903  		p.Link = p1
   904  		p1.Lineno = p.Lineno
   905  		p2.Lineno = p.Lineno
   906  		p1.Pc = 9999
   907  		p2.Pc = 9999
   908  		p.As = x86.ACMPL
   909  		p.To.Type = obj.TYPE_CONST
   910  		p.To.Offset = 0
   911  		p1.As = x86.AJNE
   912  		p1.From.Type = obj.TYPE_CONST
   913  		p1.From.Offset = 1 // likely
   914  		p1.To.Type = obj.TYPE_BRANCH
   915  		p1.To.Val = p2.Link
   916  
   917  		// crash by write to memory address 0.
   918  		// if possible, since we know arg is 0, use 0(arg),
   919  		// which will be shorter to encode than plain 0.
   920  		p2.As = x86.AMOVL
   921  
   922  		p2.From.Type = obj.TYPE_REG
   923  		p2.From.Reg = x86.REG_AX
   924  		if regtyp(&p.From) {
   925  			p2.To.Type = obj.TYPE_MEM
   926  			p2.To.Reg = p.From.Reg
   927  		} else {
   928  			p2.To.Type = obj.TYPE_MEM
   929  		}
   930  		p2.To.Offset = 0
   931  	}
   932  }
   933  
   934  // addr += index*width if possible.
   935  func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
   936  	switch width {
   937  	case 1, 2, 4, 8:
   938  		p1 := gins(x86.ALEAL, index, addr)
   939  		p1.From.Type = obj.TYPE_MEM
   940  		p1.From.Scale = int16(width)
   941  		p1.From.Index = p1.From.Reg
   942  		p1.From.Reg = p1.To.Reg
   943  		return true
   944  	}
   945  	return false
   946  }
   947  
   948  // res = runtime.getg()
   949  func getg(res *gc.Node) {
   950  	var n1 gc.Node
   951  	gc.Regalloc(&n1, res.Type, res)
   952  	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
   953  	p := gins(mov, nil, &n1)
   954  	p.From.Type = obj.TYPE_REG
   955  	p.From.Reg = x86.REG_TLS
   956  	p = gins(mov, nil, &n1)
   957  	p.From = p.To
   958  	p.From.Type = obj.TYPE_MEM
   959  	p.From.Index = x86.REG_TLS
   960  	p.From.Scale = 1
   961  	gmove(&n1, res)
   962  	gc.Regfree(&n1)
   963  }