github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/x86/ggen.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  )
    12  
    13  func defframe(ptxt *obj.Prog) {
    14  	var n *gc.Node
    15  
    16  	// fill in argument size, stack size
    17  	ptxt.To.Type = obj.TYPE_TEXTSIZE
    18  
    19  	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
    20  	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
    21  	ptxt.To.Offset = int64(frame)
    22  
    23  	// insert code to zero ambiguously live variables
    24  	// so that the garbage collector only sees initialized values
    25  	// when it looks for pointers.
    26  	p := ptxt
    27  
    28  	hi := int64(0)
    29  	lo := hi
    30  	ax := uint32(0)
    31  	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
    32  		n = l.N
    33  		if !n.Name.Needzero {
    34  			continue
    35  		}
    36  		if n.Class != gc.PAUTO {
    37  			gc.Fatalf("needzero class %d", n.Class)
    38  		}
    39  		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
    40  			gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
    41  		}
    42  		if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) {
    43  			// merge with range we already have
    44  			lo = n.Xoffset
    45  
    46  			continue
    47  		}
    48  
    49  		// zero old range
    50  		p = zerorange(p, int64(frame), lo, hi, &ax)
    51  
    52  		// set new range
    53  		hi = n.Xoffset + n.Type.Width
    54  
    55  		lo = n.Xoffset
    56  	}
    57  
    58  	// zero final range
    59  	zerorange(p, int64(frame), lo, hi, &ax)
    60  }
    61  
    62  func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
    63  	cnt := hi - lo
    64  	if cnt == 0 {
    65  		return p
    66  	}
    67  	if *ax == 0 {
    68  		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    69  		*ax = 1
    70  	}
    71  
    72  	if cnt <= int64(4*gc.Widthreg) {
    73  		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
    74  			p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
    75  		}
    76  	} else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) {
    77  		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    78  		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg)))
    79  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    80  	} else {
    81  		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
    82  		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    83  		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    84  		p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    85  	}
    86  
    87  	return p
    88  }
    89  
    90  func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
    91  	q := gc.Ctxt.NewProg()
    92  	gc.Clearp(q)
    93  	q.As = int16(as)
    94  	q.Lineno = p.Lineno
    95  	q.From.Type = int16(ftype)
    96  	q.From.Reg = int16(freg)
    97  	q.From.Offset = foffset
    98  	q.To.Type = int16(ttype)
    99  	q.To.Reg = int16(treg)
   100  	q.To.Offset = toffset
   101  	q.Link = p.Link
   102  	p.Link = q
   103  	return q
   104  }
   105  
   106  func clearfat(nl *gc.Node) {
   107  	/* clear a fat object */
   108  	if gc.Debug['g'] != 0 {
   109  		gc.Dump("\nclearfat", nl)
   110  	}
   111  
   112  	w := uint32(nl.Type.Width)
   113  
   114  	// Avoid taking the address for simple enough types.
   115  	if gc.Componentgen(nil, nl) {
   116  		return
   117  	}
   118  
   119  	c := w % 4 // bytes
   120  	q := w / 4 // quads
   121  
   122  	if q < 4 {
   123  		// Write sequence of MOV 0, off(base) instead of using STOSL.
   124  		// The hope is that although the code will be slightly longer,
   125  		// the MOVs will have no dependencies and pipeline better
   126  		// than the unrolled STOSL loop.
   127  		// NOTE: Must use agen, not igen, so that optimizer sees address
   128  		// being taken. We are not writing on field boundaries.
   129  		var n1 gc.Node
   130  		gc.Regalloc(&n1, gc.Types[gc.Tptr], nil)
   131  
   132  		gc.Agen(nl, &n1)
   133  		n1.Op = gc.OINDREG
   134  		var z gc.Node
   135  		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
   136  		for ; q > 0; q-- {
   137  			n1.Type = z.Type
   138  			gins(x86.AMOVL, &z, &n1)
   139  			n1.Xoffset += 4
   140  		}
   141  
   142  		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
   143  		for ; c > 0; c-- {
   144  			n1.Type = z.Type
   145  			gins(x86.AMOVB, &z, &n1)
   146  			n1.Xoffset++
   147  		}
   148  
   149  		gc.Regfree(&n1)
   150  		return
   151  	}
   152  
   153  	var n1 gc.Node
   154  	gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI)
   155  	gc.Agen(nl, &n1)
   156  	gconreg(x86.AMOVL, 0, x86.REG_AX)
   157  
   158  	if q > 128 || (q >= 4 && gc.Nacl) {
   159  		gconreg(x86.AMOVL, int64(q), x86.REG_CX)
   160  		gins(x86.AREP, nil, nil)   // repeat
   161  		gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
   162  	} else if q >= 4 {
   163  		p := gins(obj.ADUFFZERO, nil, nil)
   164  		p.To.Type = obj.TYPE_ADDR
   165  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   166  
   167  		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
   168  		p.To.Offset = 1 * (128 - int64(q))
   169  	} else {
   170  		for q > 0 {
   171  			gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
   172  			q--
   173  		}
   174  	}
   175  
   176  	for c > 0 {
   177  		gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
   178  		c--
   179  	}
   180  }
   181  
   182  var panicdiv *gc.Node
   183  
   184  /*
   185   * generate division.
   186   * caller must set:
   187   *	ax = allocated AX register
   188   *	dx = allocated DX register
   189   * generates one of:
   190   *	res = nl / nr
   191   *	res = nl % nr
   192   * according to op.
   193   */
   194  func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
   195  	// Have to be careful about handling
   196  	// most negative int divided by -1 correctly.
   197  	// The hardware will trap.
   198  	// Also the byte divide instruction needs AH,
   199  	// which we otherwise don't have to deal with.
   200  	// Easiest way to avoid for int8, int16: use int32.
   201  	// For int32 and int64, use explicit test.
   202  	// Could use int64 hw for int32.
   203  	t := nl.Type
   204  
   205  	t0 := t
   206  	check := false
   207  	if gc.Issigned[t.Etype] {
   208  		check = true
   209  		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -1<<uint64(t.Width*8-1) {
   210  			check = false
   211  		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
   212  			check = false
   213  		}
   214  	}
   215  
   216  	if t.Width < 4 {
   217  		if gc.Issigned[t.Etype] {
   218  			t = gc.Types[gc.TINT32]
   219  		} else {
   220  			t = gc.Types[gc.TUINT32]
   221  		}
   222  		check = false
   223  	}
   224  
   225  	var t1 gc.Node
   226  	gc.Tempname(&t1, t)
   227  	var t2 gc.Node
   228  	gc.Tempname(&t2, t)
   229  	if t0 != t {
   230  		var t3 gc.Node
   231  		gc.Tempname(&t3, t0)
   232  		var t4 gc.Node
   233  		gc.Tempname(&t4, t0)
   234  		gc.Cgen(nl, &t3)
   235  		gc.Cgen(nr, &t4)
   236  
   237  		// Convert.
   238  		gmove(&t3, &t1)
   239  
   240  		gmove(&t4, &t2)
   241  	} else {
   242  		gc.Cgen(nl, &t1)
   243  		gc.Cgen(nr, &t2)
   244  	}
   245  
   246  	var n1 gc.Node
   247  	if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
   248  		gc.Regalloc(&n1, t, res)
   249  	} else {
   250  		gc.Regalloc(&n1, t, nil)
   251  	}
   252  	gmove(&t2, &n1)
   253  	gmove(&t1, ax)
   254  	var p2 *obj.Prog
   255  	var n4 gc.Node
   256  	if gc.Nacl {
   257  		// Native Client does not relay the divide-by-zero trap
   258  		// to the executing program, so we must insert a check
   259  		// for ourselves.
   260  		gc.Nodconst(&n4, t, 0)
   261  
   262  		gins(optoas(gc.OCMP, t), &n1, &n4)
   263  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   264  		if panicdiv == nil {
   265  			panicdiv = gc.Sysfunc("panicdivide")
   266  		}
   267  		gc.Ginscall(panicdiv, -1)
   268  		gc.Patch(p1, gc.Pc)
   269  	}
   270  
   271  	if check {
   272  		gc.Nodconst(&n4, t, -1)
   273  		gins(optoas(gc.OCMP, t), &n1, &n4)
   274  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   275  		if op == gc.ODIV {
   276  			// a / (-1) is -a.
   277  			gins(optoas(gc.OMINUS, t), nil, ax)
   278  
   279  			gmove(ax, res)
   280  		} else {
   281  			// a % (-1) is 0.
   282  			gc.Nodconst(&n4, t, 0)
   283  
   284  			gmove(&n4, res)
   285  		}
   286  
   287  		p2 = gc.Gbranch(obj.AJMP, nil, 0)
   288  		gc.Patch(p1, gc.Pc)
   289  	}
   290  
   291  	if !gc.Issigned[t.Etype] {
   292  		var nz gc.Node
   293  		gc.Nodconst(&nz, t, 0)
   294  		gmove(&nz, dx)
   295  	} else {
   296  		gins(optoas(gc.OEXTEND, t), nil, nil)
   297  	}
   298  	gins(optoas(op, t), &n1, nil)
   299  	gc.Regfree(&n1)
   300  
   301  	if op == gc.ODIV {
   302  		gmove(ax, res)
   303  	} else {
   304  		gmove(dx, res)
   305  	}
   306  	if check {
   307  		gc.Patch(p2, gc.Pc)
   308  	}
   309  }
   310  
   311  func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
   312  	r := gc.GetReg(dr)
   313  	gc.Nodreg(x, gc.Types[gc.TINT32], dr)
   314  
   315  	// save current ax and dx if they are live
   316  	// and not the destination
   317  	*oldx = gc.Node{}
   318  
   319  	if r > 0 && !gc.Samereg(x, res) {
   320  		gc.Tempname(oldx, gc.Types[gc.TINT32])
   321  		gmove(x, oldx)
   322  	}
   323  
   324  	gc.Regalloc(x, t, x)
   325  }
   326  
   327  func restx(x *gc.Node, oldx *gc.Node) {
   328  	gc.Regfree(x)
   329  
   330  	if oldx.Op != 0 {
   331  		x.Type = gc.Types[gc.TINT32]
   332  		gmove(oldx, x)
   333  	}
   334  }
   335  
   336  /*
   337   * generate division according to op, one of:
   338   *	res = nl / nr
   339   *	res = nl % nr
   340   */
   341  func cgen_div(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   342  	if gc.Is64(nl.Type) {
   343  		gc.Fatalf("cgen_div %v", nl.Type)
   344  	}
   345  
   346  	var t *gc.Type
   347  	if gc.Issigned[nl.Type.Etype] {
   348  		t = gc.Types[gc.TINT32]
   349  	} else {
   350  		t = gc.Types[gc.TUINT32]
   351  	}
   352  	var ax gc.Node
   353  	var oldax gc.Node
   354  	savex(x86.REG_AX, &ax, &oldax, res, t)
   355  	var olddx gc.Node
   356  	var dx gc.Node
   357  	savex(x86.REG_DX, &dx, &olddx, res, t)
   358  	dodiv(op, nl, nr, res, &ax, &dx)
   359  	restx(&dx, &olddx)
   360  	restx(&ax, &oldax)
   361  }
   362  
   363  /*
   364   * generate shift according to op, one of:
   365   *	res = nl << nr
   366   *	res = nl >> nr
   367   */
   368  func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   369  	if nl.Type.Width > 4 {
   370  		gc.Fatalf("cgen_shift %v", nl.Type)
   371  	}
   372  
   373  	w := int(nl.Type.Width * 8)
   374  
   375  	a := optoas(op, nl.Type)
   376  
   377  	if nr.Op == gc.OLITERAL {
   378  		var n2 gc.Node
   379  		gc.Tempname(&n2, nl.Type)
   380  		gc.Cgen(nl, &n2)
   381  		var n1 gc.Node
   382  		gc.Regalloc(&n1, nl.Type, res)
   383  		gmove(&n2, &n1)
   384  		sc := uint64(nr.Int())
   385  		if sc >= uint64(nl.Type.Width*8) {
   386  			// large shift gets 2 shifts by width-1
   387  			gins(a, ncon(uint32(w)-1), &n1)
   388  
   389  			gins(a, ncon(uint32(w)-1), &n1)
   390  		} else {
   391  			gins(a, nr, &n1)
   392  		}
   393  		gmove(&n1, res)
   394  		gc.Regfree(&n1)
   395  		return
   396  	}
   397  
   398  	var oldcx gc.Node
   399  	var cx gc.Node
   400  	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
   401  	if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) {
   402  		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
   403  		gmove(&cx, &oldcx)
   404  	}
   405  
   406  	var n1 gc.Node
   407  	var nt gc.Node
   408  	if nr.Type.Width > 4 {
   409  		gc.Tempname(&nt, nr.Type)
   410  		n1 = nt
   411  	} else {
   412  		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   413  		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
   414  	}
   415  
   416  	var n2 gc.Node
   417  	if gc.Samereg(&cx, res) {
   418  		gc.Regalloc(&n2, nl.Type, nil)
   419  	} else {
   420  		gc.Regalloc(&n2, nl.Type, res)
   421  	}
   422  	if nl.Ullman >= nr.Ullman {
   423  		gc.Cgen(nl, &n2)
   424  		gc.Cgen(nr, &n1)
   425  	} else {
   426  		gc.Cgen(nr, &n1)
   427  		gc.Cgen(nl, &n2)
   428  	}
   429  
   430  	// test and fix up large shifts
   431  	if bounded {
   432  		if nr.Type.Width > 4 {
   433  			// delayed reg alloc
   434  			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   435  
   436  			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
   437  			var lo gc.Node
   438  			var hi gc.Node
   439  			split64(&nt, &lo, &hi)
   440  			gmove(&lo, &n1)
   441  			splitclean()
   442  		}
   443  	} else {
   444  		var p1 *obj.Prog
   445  		if nr.Type.Width > 4 {
   446  			// delayed reg alloc
   447  			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   448  
   449  			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
   450  			var lo gc.Node
   451  			var hi gc.Node
   452  			split64(&nt, &lo, &hi)
   453  			gmove(&lo, &n1)
   454  			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
   455  			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
   456  			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
   457  			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
   458  			splitclean()
   459  			gc.Patch(p2, gc.Pc)
   460  		} else {
   461  			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
   462  			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
   463  		}
   464  
   465  		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
   466  			gins(a, ncon(uint32(w)-1), &n2)
   467  		} else {
   468  			gmove(ncon(0), &n2)
   469  		}
   470  
   471  		gc.Patch(p1, gc.Pc)
   472  	}
   473  
   474  	gins(a, &n1, &n2)
   475  
   476  	if oldcx.Op != 0 {
   477  		gmove(&oldcx, &cx)
   478  	}
   479  
   480  	gmove(&n2, res)
   481  
   482  	gc.Regfree(&n1)
   483  	gc.Regfree(&n2)
   484  }
   485  
   486  /*
   487   * generate byte multiply:
   488   *	res = nl * nr
   489   * there is no 2-operand byte multiply instruction so
   490   * we do a full-width multiplication and truncate afterwards.
   491   */
   492  func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
   493  	if optoas(op, nl.Type) != x86.AIMULB {
   494  		return false
   495  	}
   496  
   497  	// copy from byte to full registers
   498  	t := gc.Types[gc.TUINT32]
   499  
   500  	if gc.Issigned[nl.Type.Etype] {
   501  		t = gc.Types[gc.TINT32]
   502  	}
   503  
   504  	// largest ullman on left.
   505  	if nl.Ullman < nr.Ullman {
   506  		nl, nr = nr, nl
   507  	}
   508  
   509  	var nt gc.Node
   510  	gc.Tempname(&nt, nl.Type)
   511  	gc.Cgen(nl, &nt)
   512  	var n1 gc.Node
   513  	gc.Regalloc(&n1, t, res)
   514  	gc.Cgen(nr, &n1)
   515  	var n2 gc.Node
   516  	gc.Regalloc(&n2, t, nil)
   517  	gmove(&nt, &n2)
   518  	a := optoas(op, t)
   519  	gins(a, &n2, &n1)
   520  	gc.Regfree(&n2)
   521  	gmove(&n1, res)
   522  	gc.Regfree(&n1)
   523  
   524  	return true
   525  }
   526  
   527  /*
   528   * generate high multiply:
   529   *   res = (nl*nr) >> width
   530   */
   531  func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
   532  	var n1 gc.Node
   533  	var n2 gc.Node
   534  	var ax gc.Node
   535  	var dx gc.Node
   536  
   537  	t := nl.Type
   538  	a := optoas(gc.OHMUL, t)
   539  
   540  	// gen nl in n1.
   541  	gc.Tempname(&n1, t)
   542  
   543  	gc.Cgen(nl, &n1)
   544  
   545  	// gen nr in n2.
   546  	gc.Regalloc(&n2, t, res)
   547  
   548  	gc.Cgen(nr, &n2)
   549  
   550  	// multiply.
   551  	gc.Nodreg(&ax, t, x86.REG_AX)
   552  
   553  	gmove(&n2, &ax)
   554  	gins(a, &n1, nil)
   555  	gc.Regfree(&n2)
   556  
   557  	if t.Width == 1 {
   558  		// byte multiply behaves differently.
   559  		gc.Nodreg(&ax, t, x86.REG_AH)
   560  
   561  		gc.Nodreg(&dx, t, x86.REG_DX)
   562  		gmove(&ax, &dx)
   563  	}
   564  
   565  	gc.Nodreg(&dx, t, x86.REG_DX)
   566  	gmove(&dx, res)
   567  }
   568  
   569  /*
   570   * generate floating-point operation.
   571   */
   572  func cgen_float(n *gc.Node, res *gc.Node) {
   573  	nl := n.Left
   574  	switch n.Op {
   575  	case gc.OEQ,
   576  		gc.ONE,
   577  		gc.OLT,
   578  		gc.OLE,
   579  		gc.OGE:
   580  		p1 := gc.Gbranch(obj.AJMP, nil, 0)
   581  		p2 := gc.Pc
   582  		gmove(gc.Nodbool(true), res)
   583  		p3 := gc.Gbranch(obj.AJMP, nil, 0)
   584  		gc.Patch(p1, gc.Pc)
   585  		gc.Bgen(n, true, 0, p2)
   586  		gmove(gc.Nodbool(false), res)
   587  		gc.Patch(p3, gc.Pc)
   588  		return
   589  
   590  	case gc.OPLUS:
   591  		gc.Cgen(nl, res)
   592  		return
   593  
   594  	case gc.OCONV:
   595  		if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
   596  			gc.Cgen(nl, res)
   597  			return
   598  		}
   599  
   600  		var n2 gc.Node
   601  		gc.Tempname(&n2, n.Type)
   602  		var n1 gc.Node
   603  		gc.Mgen(nl, &n1, res)
   604  		gmove(&n1, &n2)
   605  		gmove(&n2, res)
   606  		gc.Mfree(&n1)
   607  		return
   608  	}
   609  
   610  	if gc.Thearch.Use387 {
   611  		cgen_float387(n, res)
   612  	} else {
   613  		cgen_floatsse(n, res)
   614  	}
   615  }
   616  
   617  // floating-point.  387 (not SSE2)
   618  func cgen_float387(n *gc.Node, res *gc.Node) {
   619  	var f0 gc.Node
   620  	var f1 gc.Node
   621  
   622  	nl := n.Left
   623  	nr := n.Right
   624  	gc.Nodreg(&f0, nl.Type, x86.REG_F0)
   625  	gc.Nodreg(&f1, n.Type, x86.REG_F0+1)
   626  	if nr != nil {
   627  		// binary
   628  		if nl.Ullman >= nr.Ullman {
   629  			gc.Cgen(nl, &f0)
   630  			if nr.Addable {
   631  				gins(foptoas(n.Op, n.Type, 0), nr, &f0)
   632  			} else {
   633  				gc.Cgen(nr, &f0)
   634  				gins(foptoas(n.Op, n.Type, Fpop), &f0, &f1)
   635  			}
   636  		} else {
   637  			gc.Cgen(nr, &f0)
   638  			if nl.Addable {
   639  				gins(foptoas(n.Op, n.Type, Frev), nl, &f0)
   640  			} else {
   641  				gc.Cgen(nl, &f0)
   642  				gins(foptoas(n.Op, n.Type, Frev|Fpop), &f0, &f1)
   643  			}
   644  		}
   645  
   646  		gmove(&f0, res)
   647  		return
   648  	}
   649  
   650  	// unary
   651  	gc.Cgen(nl, &f0)
   652  
   653  	if n.Op != gc.OCONV && n.Op != gc.OPLUS {
   654  		gins(foptoas(n.Op, n.Type, 0), nil, nil)
   655  	}
   656  	gmove(&f0, res)
   657  	return
   658  }
   659  
   660  func cgen_floatsse(n *gc.Node, res *gc.Node) {
   661  	var a int
   662  
   663  	nl := n.Left
   664  	nr := n.Right
   665  	switch n.Op {
   666  	default:
   667  		gc.Dump("cgen_floatsse", n)
   668  		gc.Fatalf("cgen_floatsse %v", gc.Oconv(int(n.Op), 0))
   669  		return
   670  
   671  	case gc.OMINUS,
   672  		gc.OCOM:
   673  		nr = gc.Nodintconst(-1)
   674  		gc.Convlit(&nr, n.Type)
   675  		a = foptoas(gc.OMUL, nl.Type, 0)
   676  		goto sbop
   677  
   678  		// symmetric binary
   679  	case gc.OADD,
   680  		gc.OMUL:
   681  		a = foptoas(n.Op, nl.Type, 0)
   682  
   683  		goto sbop
   684  
   685  		// asymmetric binary
   686  	case gc.OSUB,
   687  		gc.OMOD,
   688  		gc.ODIV:
   689  		a = foptoas(n.Op, nl.Type, 0)
   690  
   691  		goto abop
   692  	}
   693  
   694  sbop: // symmetric binary
   695  	if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
   696  		nl, nr = nr, nl
   697  	}
   698  
   699  abop: // asymmetric binary
   700  	if nl.Ullman >= nr.Ullman {
   701  		var nt gc.Node
   702  		gc.Tempname(&nt, nl.Type)
   703  		gc.Cgen(nl, &nt)
   704  		var n2 gc.Node
   705  		gc.Mgen(nr, &n2, nil)
   706  		var n1 gc.Node
   707  		gc.Regalloc(&n1, nl.Type, res)
   708  		gmove(&nt, &n1)
   709  		gins(a, &n2, &n1)
   710  		gmove(&n1, res)
   711  		gc.Regfree(&n1)
   712  		gc.Mfree(&n2)
   713  	} else {
   714  		var n2 gc.Node
   715  		gc.Regalloc(&n2, nr.Type, res)
   716  		gc.Cgen(nr, &n2)
   717  		var n1 gc.Node
   718  		gc.Regalloc(&n1, nl.Type, nil)
   719  		gc.Cgen(nl, &n1)
   720  		gins(a, &n2, &n1)
   721  		gc.Regfree(&n2)
   722  		gmove(&n1, res)
   723  		gc.Regfree(&n1)
   724  	}
   725  
   726  	return
   727  }
   728  
   729  func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) {
   730  	nl := n.Left
   731  	nr := n.Right
   732  	op := n.Op
   733  	if !wantTrue {
   734  		// brcom is not valid on floats when NaN is involved.
   735  		p1 := gc.Gbranch(obj.AJMP, nil, 0)
   736  		p2 := gc.Gbranch(obj.AJMP, nil, 0)
   737  		gc.Patch(p1, gc.Pc)
   738  
   739  		// No need to avoid re-genning ninit.
   740  		bgen_float(n, true, -likely, p2)
   741  
   742  		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
   743  		gc.Patch(p2, gc.Pc)
   744  		return
   745  	}
   746  
   747  	if gc.Thearch.Use387 {
   748  		op = gc.Brrev(op) // because the args are stacked
   749  		if op == gc.OGE || op == gc.OGT {
   750  			// only < and <= work right with NaN; reverse if needed
   751  			nl, nr = nr, nl
   752  			op = gc.Brrev(op)
   753  		}
   754  
   755  		var ax, n2, tmp gc.Node
   756  		gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
   757  		gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
   758  		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
   759  		if gc.Simsimtype(nr.Type) == gc.TFLOAT64 {
   760  			if nl.Ullman > nr.Ullman {
   761  				gc.Cgen(nl, &tmp)
   762  				gc.Cgen(nr, &tmp)
   763  				gins(x86.AFXCHD, &tmp, &n2)
   764  			} else {
   765  				gc.Cgen(nr, &tmp)
   766  				gc.Cgen(nl, &tmp)
   767  			}
   768  
   769  			gins(x86.AFUCOMIP, &tmp, &n2)
   770  			gins(x86.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF
   771  		} else {
   772  			// TODO(rsc): The moves back and forth to memory
   773  			// here are for truncating the value to 32 bits.
   774  			// This handles 32-bit comparison but presumably
   775  			// all the other ops have the same problem.
   776  			// We need to figure out what the right general
   777  			// solution is, besides telling people to use float64.
   778  			var t1 gc.Node
   779  			gc.Tempname(&t1, gc.Types[gc.TFLOAT32])
   780  
   781  			var t2 gc.Node
   782  			gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
   783  			gc.Cgen(nr, &t1)
   784  			gc.Cgen(nl, &t2)
   785  			gmove(&t2, &tmp)
   786  			gins(x86.AFCOMFP, &t1, &tmp)
   787  			gins(x86.AFSTSW, nil, &ax)
   788  			gins(x86.ASAHF, nil, nil)
   789  		}
   790  	} else {
   791  		// Not 387
   792  		if !nl.Addable {
   793  			nl = gc.CgenTemp(nl)
   794  		}
   795  		if !nr.Addable {
   796  			nr = gc.CgenTemp(nr)
   797  		}
   798  
   799  		var n2 gc.Node
   800  		gc.Regalloc(&n2, nr.Type, nil)
   801  		gmove(nr, &n2)
   802  		nr = &n2
   803  
   804  		if nl.Op != gc.OREGISTER {
   805  			var n3 gc.Node
   806  			gc.Regalloc(&n3, nl.Type, nil)
   807  			gmove(nl, &n3)
   808  			nl = &n3
   809  		}
   810  
   811  		if op == gc.OGE || op == gc.OGT {
   812  			// only < and <= work right with NopN; reverse if needed
   813  			nl, nr = nr, nl
   814  			op = gc.Brrev(op)
   815  		}
   816  
   817  		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
   818  		if nl.Op == gc.OREGISTER {
   819  			gc.Regfree(nl)
   820  		}
   821  		gc.Regfree(nr)
   822  	}
   823  
   824  	switch op {
   825  	case gc.OEQ:
   826  		// neither NE nor P
   827  		p1 := gc.Gbranch(x86.AJNE, nil, -likely)
   828  		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
   829  		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
   830  		gc.Patch(p1, gc.Pc)
   831  		gc.Patch(p2, gc.Pc)
   832  	case gc.ONE:
   833  		// either NE or P
   834  		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)
   835  		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
   836  	default:
   837  		gc.Patch(gc.Gbranch(optoas(op, nr.Type), nil, likely), to)
   838  	}
   839  }
   840  
   841  // Called after regopt and peep have run.
   842  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   843  func expandchecks(firstp *obj.Prog) {
   844  	var p1 *obj.Prog
   845  	var p2 *obj.Prog
   846  
   847  	for p := firstp; p != nil; p = p.Link {
   848  		if p.As != obj.ACHECKNIL {
   849  			continue
   850  		}
   851  		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
   852  			gc.Warnl(int(p.Lineno), "generated nil check")
   853  		}
   854  
   855  		// check is
   856  		//	CMP arg, $0
   857  		//	JNE 2(PC) (likely)
   858  		//	MOV AX, 0
   859  		p1 = gc.Ctxt.NewProg()
   860  
   861  		p2 = gc.Ctxt.NewProg()
   862  		gc.Clearp(p1)
   863  		gc.Clearp(p2)
   864  		p1.Link = p2
   865  		p2.Link = p.Link
   866  		p.Link = p1
   867  		p1.Lineno = p.Lineno
   868  		p2.Lineno = p.Lineno
   869  		p1.Pc = 9999
   870  		p2.Pc = 9999
   871  		p.As = x86.ACMPL
   872  		p.To.Type = obj.TYPE_CONST
   873  		p.To.Offset = 0
   874  		p1.As = x86.AJNE
   875  		p1.From.Type = obj.TYPE_CONST
   876  		p1.From.Offset = 1 // likely
   877  		p1.To.Type = obj.TYPE_BRANCH
   878  		p1.To.Val = p2.Link
   879  
   880  		// crash by write to memory address 0.
   881  		// if possible, since we know arg is 0, use 0(arg),
   882  		// which will be shorter to encode than plain 0.
   883  		p2.As = x86.AMOVL
   884  
   885  		p2.From.Type = obj.TYPE_REG
   886  		p2.From.Reg = x86.REG_AX
   887  		if regtyp(&p.From) {
   888  			p2.To.Type = obj.TYPE_MEM
   889  			p2.To.Reg = p.From.Reg
   890  		} else {
   891  			p2.To.Type = obj.TYPE_MEM
   892  		}
   893  		p2.To.Offset = 0
   894  	}
   895  }
   896  
   897  // addr += index*width if possible.
   898  func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
   899  	switch width {
   900  	case 1, 2, 4, 8:
   901  		p1 := gins(x86.ALEAL, index, addr)
   902  		p1.From.Type = obj.TYPE_MEM
   903  		p1.From.Scale = int16(width)
   904  		p1.From.Index = p1.From.Reg
   905  		p1.From.Reg = p1.To.Reg
   906  		return true
   907  	}
   908  	return false
   909  }
   910  
   911  // res = runtime.getg()
   912  func getg(res *gc.Node) {
   913  	var n1 gc.Node
   914  	gc.Regalloc(&n1, res.Type, res)
   915  	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
   916  	p := gins(mov, nil, &n1)
   917  	p.From.Type = obj.TYPE_REG
   918  	p.From.Reg = x86.REG_TLS
   919  	p = gins(mov, nil, &n1)
   920  	p.From = p.To
   921  	p.From.Type = obj.TYPE_MEM
   922  	p.From.Index = x86.REG_TLS
   923  	p.From.Scale = 1
   924  	gmove(&n1, res)
   925  	gc.Regfree(&n1)
   926  }