github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/x86/ggen.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  )
    12  
    13  func defframe(ptxt *obj.Prog) {
    14  	// fill in argument size, stack size
    15  	ptxt.To.Type = obj.TYPE_TEXTSIZE
    16  
    17  	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.ArgWidth(), int64(gc.Widthptr)))
    18  	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
    19  	ptxt.To.Offset = int64(frame)
    20  
    21  	// insert code to zero ambiguously live variables
    22  	// so that the garbage collector only sees initialized values
    23  	// when it looks for pointers.
    24  	p := ptxt
    25  
    26  	hi := int64(0)
    27  	lo := hi
    28  	ax := uint32(0)
    29  	for _, n := range gc.Curfn.Func.Dcl {
    30  		if !n.Name.Needzero {
    31  			continue
    32  		}
    33  		if n.Class != gc.PAUTO {
    34  			gc.Fatalf("needzero class %d", n.Class)
    35  		}
    36  		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
    37  			gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, gc.FmtLong), int(n.Type.Width), int(n.Xoffset))
    38  		}
    39  		if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) {
    40  			// merge with range we already have
    41  			lo = n.Xoffset
    42  
    43  			continue
    44  		}
    45  
    46  		// zero old range
    47  		p = zerorange(p, int64(frame), lo, hi, &ax)
    48  
    49  		// set new range
    50  		hi = n.Xoffset + n.Type.Width
    51  
    52  		lo = n.Xoffset
    53  	}
    54  
    55  	// zero final range
    56  	zerorange(p, int64(frame), lo, hi, &ax)
    57  }
    58  
    59  func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
    60  	cnt := hi - lo
    61  	if cnt == 0 {
    62  		return p
    63  	}
    64  	if *ax == 0 {
    65  		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    66  		*ax = 1
    67  	}
    68  
    69  	if cnt <= int64(4*gc.Widthreg) {
    70  		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
    71  			p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
    72  		}
    73  	} else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) {
    74  		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    75  		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg)))
    76  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    77  	} else {
    78  		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
    79  		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    80  		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    81  		p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    82  	}
    83  
    84  	return p
    85  }
    86  
    87  func appendpp(p *obj.Prog, as obj.As, ftype obj.AddrType, freg int, foffset int64, ttype obj.AddrType, treg int, toffset int64) *obj.Prog {
    88  	q := gc.Ctxt.NewProg()
    89  	gc.Clearp(q)
    90  	q.As = as
    91  	q.Lineno = p.Lineno
    92  	q.From.Type = ftype
    93  	q.From.Reg = int16(freg)
    94  	q.From.Offset = foffset
    95  	q.To.Type = ttype
    96  	q.To.Reg = int16(treg)
    97  	q.To.Offset = toffset
    98  	q.Link = p.Link
    99  	p.Link = q
   100  	return q
   101  }
   102  
   103  func clearfat(nl *gc.Node) {
   104  	/* clear a fat object */
   105  	if gc.Debug['g'] != 0 {
   106  		gc.Dump("\nclearfat", nl)
   107  	}
   108  
   109  	w := uint32(nl.Type.Width)
   110  
   111  	// Avoid taking the address for simple enough types.
   112  	if gc.Componentgen(nil, nl) {
   113  		return
   114  	}
   115  
   116  	c := w % 4 // bytes
   117  	q := w / 4 // quads
   118  
   119  	if q < 4 {
   120  		// Write sequence of MOV 0, off(base) instead of using STOSL.
   121  		// The hope is that although the code will be slightly longer,
   122  		// the MOVs will have no dependencies and pipeline better
   123  		// than the unrolled STOSL loop.
   124  		// NOTE: Must use agen, not igen, so that optimizer sees address
   125  		// being taken. We are not writing on field boundaries.
   126  		var n1 gc.Node
   127  		gc.Regalloc(&n1, gc.Types[gc.Tptr], nil)
   128  
   129  		gc.Agen(nl, &n1)
   130  		n1.Op = gc.OINDREG
   131  		var z gc.Node
   132  		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
   133  		for ; q > 0; q-- {
   134  			n1.Type = z.Type
   135  			gins(x86.AMOVL, &z, &n1)
   136  			n1.Xoffset += 4
   137  		}
   138  
   139  		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
   140  		for ; c > 0; c-- {
   141  			n1.Type = z.Type
   142  			gins(x86.AMOVB, &z, &n1)
   143  			n1.Xoffset++
   144  		}
   145  
   146  		gc.Regfree(&n1)
   147  		return
   148  	}
   149  
   150  	var n1 gc.Node
   151  	gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI)
   152  	gc.Agen(nl, &n1)
   153  	gconreg(x86.AMOVL, 0, x86.REG_AX)
   154  
   155  	if q > 128 || (q >= 4 && gc.Nacl) {
   156  		gconreg(x86.AMOVL, int64(q), x86.REG_CX)
   157  		gins(x86.AREP, nil, nil)   // repeat
   158  		gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
   159  	} else if q >= 4 {
   160  		p := gins(obj.ADUFFZERO, nil, nil)
   161  		p.To.Type = obj.TYPE_ADDR
   162  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   163  
   164  		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
   165  		p.To.Offset = 1 * (128 - int64(q))
   166  	} else {
   167  		for q > 0 {
   168  			gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
   169  			q--
   170  		}
   171  	}
   172  
   173  	for c > 0 {
   174  		gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
   175  		c--
   176  	}
   177  }
   178  
   179  var panicdiv *gc.Node
   180  
   181  /*
   182   * generate division.
   183   * caller must set:
   184   *	ax = allocated AX register
   185   *	dx = allocated DX register
   186   * generates one of:
   187   *	res = nl / nr
   188   *	res = nl % nr
   189   * according to op.
   190   */
   191  func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
   192  	// Have to be careful about handling
   193  	// most negative int divided by -1 correctly.
   194  	// The hardware will trap.
   195  	// Also the byte divide instruction needs AH,
   196  	// which we otherwise don't have to deal with.
   197  	// Easiest way to avoid for int8, int16: use int32.
   198  	// For int32 and int64, use explicit test.
   199  	// Could use int64 hw for int32.
   200  	t := nl.Type
   201  
   202  	t0 := t
   203  	check := false
   204  	if t.IsSigned() {
   205  		check = true
   206  		if gc.Isconst(nl, gc.CTINT) && nl.Int64() != -1<<uint64(t.Width*8-1) {
   207  			check = false
   208  		} else if gc.Isconst(nr, gc.CTINT) && nr.Int64() != -1 {
   209  			check = false
   210  		}
   211  	}
   212  
   213  	if t.Width < 4 {
   214  		if t.IsSigned() {
   215  			t = gc.Types[gc.TINT32]
   216  		} else {
   217  			t = gc.Types[gc.TUINT32]
   218  		}
   219  		check = false
   220  	}
   221  
   222  	var t1 gc.Node
   223  	gc.Tempname(&t1, t)
   224  	var t2 gc.Node
   225  	gc.Tempname(&t2, t)
   226  	if t0 != t {
   227  		var t3 gc.Node
   228  		gc.Tempname(&t3, t0)
   229  		var t4 gc.Node
   230  		gc.Tempname(&t4, t0)
   231  		gc.Cgen(nl, &t3)
   232  		gc.Cgen(nr, &t4)
   233  
   234  		// Convert.
   235  		gmove(&t3, &t1)
   236  
   237  		gmove(&t4, &t2)
   238  	} else {
   239  		gc.Cgen(nl, &t1)
   240  		gc.Cgen(nr, &t2)
   241  	}
   242  
   243  	var n1 gc.Node
   244  	if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
   245  		gc.Regalloc(&n1, t, res)
   246  	} else {
   247  		gc.Regalloc(&n1, t, nil)
   248  	}
   249  	gmove(&t2, &n1)
   250  	gmove(&t1, ax)
   251  	var p2 *obj.Prog
   252  	var n4 gc.Node
   253  	if gc.Nacl {
   254  		// Native Client does not relay the divide-by-zero trap
   255  		// to the executing program, so we must insert a check
   256  		// for ourselves.
   257  		gc.Nodconst(&n4, t, 0)
   258  
   259  		gins(optoas(gc.OCMP, t), &n1, &n4)
   260  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   261  		if panicdiv == nil {
   262  			panicdiv = gc.Sysfunc("panicdivide")
   263  		}
   264  		gc.Ginscall(panicdiv, -1)
   265  		gc.Patch(p1, gc.Pc)
   266  	}
   267  
   268  	if check {
   269  		gc.Nodconst(&n4, t, -1)
   270  		gins(optoas(gc.OCMP, t), &n1, &n4)
   271  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   272  		if op == gc.ODIV {
   273  			// a / (-1) is -a.
   274  			gins(optoas(gc.OMINUS, t), nil, ax)
   275  
   276  			gmove(ax, res)
   277  		} else {
   278  			// a % (-1) is 0.
   279  			gc.Nodconst(&n4, t, 0)
   280  
   281  			gmove(&n4, res)
   282  		}
   283  
   284  		p2 = gc.Gbranch(obj.AJMP, nil, 0)
   285  		gc.Patch(p1, gc.Pc)
   286  	}
   287  
   288  	if !t.IsSigned() {
   289  		var nz gc.Node
   290  		gc.Nodconst(&nz, t, 0)
   291  		gmove(&nz, dx)
   292  	} else {
   293  		gins(optoas(gc.OEXTEND, t), nil, nil)
   294  	}
   295  	gins(optoas(op, t), &n1, nil)
   296  	gc.Regfree(&n1)
   297  
   298  	if op == gc.ODIV {
   299  		gmove(ax, res)
   300  	} else {
   301  		gmove(dx, res)
   302  	}
   303  	if check {
   304  		gc.Patch(p2, gc.Pc)
   305  	}
   306  }
   307  
   308  func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
   309  	r := gc.GetReg(dr)
   310  	gc.Nodreg(x, gc.Types[gc.TINT32], dr)
   311  
   312  	// save current ax and dx if they are live
   313  	// and not the destination
   314  	*oldx = gc.Node{}
   315  
   316  	if r > 0 && !gc.Samereg(x, res) {
   317  		gc.Tempname(oldx, gc.Types[gc.TINT32])
   318  		gmove(x, oldx)
   319  	}
   320  
   321  	gc.Regalloc(x, t, x)
   322  }
   323  
   324  func restx(x *gc.Node, oldx *gc.Node) {
   325  	gc.Regfree(x)
   326  
   327  	if oldx.Op != 0 {
   328  		x.Type = gc.Types[gc.TINT32]
   329  		gmove(oldx, x)
   330  	}
   331  }
   332  
   333  /*
   334   * generate division according to op, one of:
   335   *	res = nl / nr
   336   *	res = nl % nr
   337   */
   338  func cgen_div(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   339  	if gc.Is64(nl.Type) {
   340  		gc.Fatalf("cgen_div %v", nl.Type)
   341  	}
   342  
   343  	var t *gc.Type
   344  	if nl.Type.IsSigned() {
   345  		t = gc.Types[gc.TINT32]
   346  	} else {
   347  		t = gc.Types[gc.TUINT32]
   348  	}
   349  	var ax gc.Node
   350  	var oldax gc.Node
   351  	savex(x86.REG_AX, &ax, &oldax, res, t)
   352  	var olddx gc.Node
   353  	var dx gc.Node
   354  	savex(x86.REG_DX, &dx, &olddx, res, t)
   355  	dodiv(op, nl, nr, res, &ax, &dx)
   356  	restx(&dx, &olddx)
   357  	restx(&ax, &oldax)
   358  }
   359  
   360  /*
   361   * generate shift according to op, one of:
   362   *	res = nl << nr
   363   *	res = nl >> nr
   364   */
   365  func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   366  	if nl.Type.Width > 4 {
   367  		gc.Fatalf("cgen_shift %v", nl.Type)
   368  	}
   369  
   370  	w := int(nl.Type.Width * 8)
   371  
   372  	a := optoas(op, nl.Type)
   373  
   374  	if nr.Op == gc.OLITERAL {
   375  		var n2 gc.Node
   376  		gc.Tempname(&n2, nl.Type)
   377  		gc.Cgen(nl, &n2)
   378  		var n1 gc.Node
   379  		gc.Regalloc(&n1, nl.Type, res)
   380  		gmove(&n2, &n1)
   381  		sc := uint64(nr.Int64())
   382  		if sc >= uint64(nl.Type.Width*8) {
   383  			// large shift gets 2 shifts by width-1
   384  			gins(a, ncon(uint32(w)-1), &n1)
   385  
   386  			gins(a, ncon(uint32(w)-1), &n1)
   387  		} else {
   388  			gins(a, nr, &n1)
   389  		}
   390  		gmove(&n1, res)
   391  		gc.Regfree(&n1)
   392  		return
   393  	}
   394  
   395  	var oldcx gc.Node
   396  	var cx gc.Node
   397  	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
   398  	if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) {
   399  		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
   400  		gmove(&cx, &oldcx)
   401  	}
   402  
   403  	var n1 gc.Node
   404  	var nt gc.Node
   405  	if nr.Type.Width > 4 {
   406  		gc.Tempname(&nt, nr.Type)
   407  		n1 = nt
   408  	} else {
   409  		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   410  		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
   411  	}
   412  
   413  	var n2 gc.Node
   414  	if gc.Samereg(&cx, res) {
   415  		gc.Regalloc(&n2, nl.Type, nil)
   416  	} else {
   417  		gc.Regalloc(&n2, nl.Type, res)
   418  	}
   419  	if nl.Ullman >= nr.Ullman {
   420  		gc.Cgen(nl, &n2)
   421  		gc.Cgen(nr, &n1)
   422  	} else {
   423  		gc.Cgen(nr, &n1)
   424  		gc.Cgen(nl, &n2)
   425  	}
   426  
   427  	// test and fix up large shifts
   428  	if bounded {
   429  		if nr.Type.Width > 4 {
   430  			// delayed reg alloc
   431  			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   432  
   433  			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
   434  			var lo gc.Node
   435  			var hi gc.Node
   436  			split64(&nt, &lo, &hi)
   437  			gmove(&lo, &n1)
   438  			splitclean()
   439  		}
   440  	} else {
   441  		var p1 *obj.Prog
   442  		if nr.Type.Width > 4 {
   443  			// delayed reg alloc
   444  			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   445  
   446  			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
   447  			var lo gc.Node
   448  			var hi gc.Node
   449  			split64(&nt, &lo, &hi)
   450  			gmove(&lo, &n1)
   451  			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
   452  			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
   453  			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
   454  			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
   455  			splitclean()
   456  			gc.Patch(p2, gc.Pc)
   457  		} else {
   458  			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
   459  			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
   460  		}
   461  
   462  		if op == gc.ORSH && nl.Type.IsSigned() {
   463  			gins(a, ncon(uint32(w)-1), &n2)
   464  		} else {
   465  			gmove(ncon(0), &n2)
   466  		}
   467  
   468  		gc.Patch(p1, gc.Pc)
   469  	}
   470  
   471  	gins(a, &n1, &n2)
   472  
   473  	if oldcx.Op != 0 {
   474  		gmove(&oldcx, &cx)
   475  	}
   476  
   477  	gmove(&n2, res)
   478  
   479  	gc.Regfree(&n1)
   480  	gc.Regfree(&n2)
   481  }
   482  
   483  /*
   484   * generate byte multiply:
   485   *	res = nl * nr
   486   * there is no 2-operand byte multiply instruction so
   487   * we do a full-width multiplication and truncate afterwards.
   488   */
   489  func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
   490  	if optoas(op, nl.Type) != x86.AIMULB {
   491  		return false
   492  	}
   493  
   494  	// copy from byte to full registers
   495  	t := gc.Types[gc.TUINT32]
   496  
   497  	if nl.Type.IsSigned() {
   498  		t = gc.Types[gc.TINT32]
   499  	}
   500  
   501  	// largest ullman on left.
   502  	if nl.Ullman < nr.Ullman {
   503  		nl, nr = nr, nl
   504  	}
   505  
   506  	var nt gc.Node
   507  	gc.Tempname(&nt, nl.Type)
   508  	gc.Cgen(nl, &nt)
   509  	var n1 gc.Node
   510  	gc.Regalloc(&n1, t, res)
   511  	gc.Cgen(nr, &n1)
   512  	var n2 gc.Node
   513  	gc.Regalloc(&n2, t, nil)
   514  	gmove(&nt, &n2)
   515  	a := optoas(op, t)
   516  	gins(a, &n2, &n1)
   517  	gc.Regfree(&n2)
   518  	gmove(&n1, res)
   519  	gc.Regfree(&n1)
   520  
   521  	return true
   522  }
   523  
   524  /*
   525   * generate high multiply:
   526   *   res = (nl*nr) >> width
   527   */
   528  func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
   529  	var n1 gc.Node
   530  	var n2 gc.Node
   531  
   532  	t := nl.Type
   533  	a := optoas(gc.OHMUL, t)
   534  
   535  	// gen nl in n1.
   536  	gc.Tempname(&n1, t)
   537  	gc.Cgen(nl, &n1)
   538  
   539  	// gen nr in n2.
   540  	gc.Regalloc(&n2, t, res)
   541  	gc.Cgen(nr, &n2)
   542  
   543  	var ax, oldax, dx, olddx gc.Node
   544  	savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT32])
   545  	savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT32])
   546  
   547  	gmove(&n2, &ax)
   548  	gins(a, &n1, nil)
   549  	gc.Regfree(&n2)
   550  
   551  	if t.Width == 1 {
   552  		// byte multiply behaves differently.
   553  		var byteAH, byteDX gc.Node
   554  		gc.Nodreg(&byteAH, t, x86.REG_AH)
   555  		gc.Nodreg(&byteDX, t, x86.REG_DX)
   556  		gmove(&byteAH, &byteDX)
   557  	}
   558  
   559  	gmove(&dx, res)
   560  
   561  	restx(&ax, &oldax)
   562  	restx(&dx, &olddx)
   563  }
   564  
   565  /*
   566   * generate floating-point operation.
   567   */
   568  func cgen_float(n *gc.Node, res *gc.Node) {
   569  	nl := n.Left
   570  	switch n.Op {
   571  	case gc.OEQ,
   572  		gc.ONE,
   573  		gc.OLT,
   574  		gc.OLE,
   575  		gc.OGE:
   576  		p1 := gc.Gbranch(obj.AJMP, nil, 0)
   577  		p2 := gc.Pc
   578  		gmove(gc.Nodbool(true), res)
   579  		p3 := gc.Gbranch(obj.AJMP, nil, 0)
   580  		gc.Patch(p1, gc.Pc)
   581  		gc.Bgen(n, true, 0, p2)
   582  		gmove(gc.Nodbool(false), res)
   583  		gc.Patch(p3, gc.Pc)
   584  		return
   585  
   586  	case gc.OPLUS:
   587  		gc.Cgen(nl, res)
   588  		return
   589  
   590  	case gc.OCONV:
   591  		if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
   592  			gc.Cgen(nl, res)
   593  			return
   594  		}
   595  
   596  		var n2 gc.Node
   597  		gc.Tempname(&n2, n.Type)
   598  		var n1 gc.Node
   599  		gc.Mgen(nl, &n1, res)
   600  		gmove(&n1, &n2)
   601  		gmove(&n2, res)
   602  		gc.Mfree(&n1)
   603  		return
   604  	}
   605  
   606  	if gc.Thearch.Use387 {
   607  		cgen_float387(n, res)
   608  	} else {
   609  		cgen_floatsse(n, res)
   610  	}
   611  }
   612  
   613  // floating-point.  387 (not SSE2)
   614  func cgen_float387(n *gc.Node, res *gc.Node) {
   615  	var f0 gc.Node
   616  	var f1 gc.Node
   617  
   618  	nl := n.Left
   619  	nr := n.Right
   620  	gc.Nodreg(&f0, nl.Type, x86.REG_F0)
   621  	gc.Nodreg(&f1, n.Type, x86.REG_F0+1)
   622  	if nr != nil {
   623  		// binary
   624  		if nl.Ullman >= nr.Ullman {
   625  			gc.Cgen(nl, &f0)
   626  			if nr.Addable {
   627  				gins(foptoas(n.Op, n.Type, 0), nr, &f0)
   628  			} else {
   629  				gc.Cgen(nr, &f0)
   630  				gins(foptoas(n.Op, n.Type, Fpop), &f0, &f1)
   631  			}
   632  		} else {
   633  			gc.Cgen(nr, &f0)
   634  			if nl.Addable {
   635  				gins(foptoas(n.Op, n.Type, Frev), nl, &f0)
   636  			} else {
   637  				gc.Cgen(nl, &f0)
   638  				gins(foptoas(n.Op, n.Type, Frev|Fpop), &f0, &f1)
   639  			}
   640  		}
   641  
   642  		gmove(&f0, res)
   643  		return
   644  	}
   645  
   646  	// unary
   647  	gc.Cgen(nl, &f0)
   648  
   649  	if n.Op != gc.OCONV && n.Op != gc.OPLUS {
   650  		gins(foptoas(n.Op, n.Type, 0), nil, nil)
   651  	}
   652  	gmove(&f0, res)
   653  	return
   654  }
   655  
   656  func cgen_floatsse(n *gc.Node, res *gc.Node) {
   657  	var a obj.As
   658  
   659  	nl := n.Left
   660  	nr := n.Right
   661  	switch n.Op {
   662  	default:
   663  		gc.Dump("cgen_floatsse", n)
   664  		gc.Fatalf("cgen_floatsse %v", n.Op)
   665  		return
   666  
   667  	case gc.OMINUS,
   668  		gc.OCOM:
   669  		nr = gc.NegOne(n.Type)
   670  		a = foptoas(gc.OMUL, nl.Type, 0)
   671  		goto sbop
   672  
   673  		// symmetric binary
   674  	case gc.OADD,
   675  		gc.OMUL:
   676  		a = foptoas(n.Op, nl.Type, 0)
   677  
   678  		goto sbop
   679  
   680  		// asymmetric binary
   681  	case gc.OSUB,
   682  		gc.OMOD,
   683  		gc.ODIV:
   684  		a = foptoas(n.Op, nl.Type, 0)
   685  
   686  		goto abop
   687  	}
   688  
   689  sbop: // symmetric binary
   690  	if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
   691  		nl, nr = nr, nl
   692  	}
   693  
   694  abop: // asymmetric binary
   695  	if nl.Ullman >= nr.Ullman {
   696  		var nt gc.Node
   697  		gc.Tempname(&nt, nl.Type)
   698  		gc.Cgen(nl, &nt)
   699  		var n2 gc.Node
   700  		gc.Mgen(nr, &n2, nil)
   701  		var n1 gc.Node
   702  		gc.Regalloc(&n1, nl.Type, res)
   703  		gmove(&nt, &n1)
   704  		gins(a, &n2, &n1)
   705  		gmove(&n1, res)
   706  		gc.Regfree(&n1)
   707  		gc.Mfree(&n2)
   708  	} else {
   709  		var n2 gc.Node
   710  		gc.Regalloc(&n2, nr.Type, res)
   711  		gc.Cgen(nr, &n2)
   712  		var n1 gc.Node
   713  		gc.Regalloc(&n1, nl.Type, nil)
   714  		gc.Cgen(nl, &n1)
   715  		gins(a, &n2, &n1)
   716  		gc.Regfree(&n2)
   717  		gmove(&n1, res)
   718  		gc.Regfree(&n1)
   719  	}
   720  
   721  	return
   722  }
   723  
   724  func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) {
   725  	nl := n.Left
   726  	nr := n.Right
   727  	op := n.Op
   728  	if !wantTrue {
   729  		// brcom is not valid on floats when NaN is involved.
   730  		p1 := gc.Gbranch(obj.AJMP, nil, 0)
   731  		p2 := gc.Gbranch(obj.AJMP, nil, 0)
   732  		gc.Patch(p1, gc.Pc)
   733  
   734  		// No need to avoid re-genning ninit.
   735  		bgen_float(n, true, -likely, p2)
   736  
   737  		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
   738  		gc.Patch(p2, gc.Pc)
   739  		return
   740  	}
   741  
   742  	if gc.Thearch.Use387 {
   743  		op = gc.Brrev(op) // because the args are stacked
   744  		if op == gc.OGE || op == gc.OGT {
   745  			// only < and <= work right with NaN; reverse if needed
   746  			nl, nr = nr, nl
   747  			op = gc.Brrev(op)
   748  		}
   749  
   750  		var ax, n2, tmp gc.Node
   751  		gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
   752  		gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
   753  		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
   754  		if gc.Simsimtype(nr.Type) == gc.TFLOAT64 {
   755  			if nl.Ullman > nr.Ullman {
   756  				gc.Cgen(nl, &tmp)
   757  				gc.Cgen(nr, &tmp)
   758  				gins(x86.AFXCHD, &tmp, &n2)
   759  			} else {
   760  				gc.Cgen(nr, &tmp)
   761  				gc.Cgen(nl, &tmp)
   762  			}
   763  			gins(x86.AFUCOMPP, &tmp, &n2)
   764  		} else {
   765  			// TODO(rsc): The moves back and forth to memory
   766  			// here are for truncating the value to 32 bits.
   767  			// This handles 32-bit comparison but presumably
   768  			// all the other ops have the same problem.
   769  			// We need to figure out what the right general
   770  			// solution is, besides telling people to use float64.
   771  			var t1 gc.Node
   772  			gc.Tempname(&t1, gc.Types[gc.TFLOAT32])
   773  
   774  			var t2 gc.Node
   775  			gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
   776  			gc.Cgen(nr, &t1)
   777  			gc.Cgen(nl, &t2)
   778  			gmove(&t2, &tmp)
   779  			gins(x86.AFCOMFP, &t1, &tmp)
   780  		}
   781  		gins(x86.AFSTSW, nil, &ax)
   782  		gins(x86.ASAHF, nil, nil)
   783  	} else {
   784  		// Not 387
   785  		if !nl.Addable {
   786  			nl = gc.CgenTemp(nl)
   787  		}
   788  		if !nr.Addable {
   789  			nr = gc.CgenTemp(nr)
   790  		}
   791  
   792  		var n2 gc.Node
   793  		gc.Regalloc(&n2, nr.Type, nil)
   794  		gmove(nr, &n2)
   795  		nr = &n2
   796  
   797  		if nl.Op != gc.OREGISTER {
   798  			var n3 gc.Node
   799  			gc.Regalloc(&n3, nl.Type, nil)
   800  			gmove(nl, &n3)
   801  			nl = &n3
   802  		}
   803  
   804  		if op == gc.OGE || op == gc.OGT {
   805  			// only < and <= work right with NopN; reverse if needed
   806  			nl, nr = nr, nl
   807  			op = gc.Brrev(op)
   808  		}
   809  
   810  		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
   811  		if nl.Op == gc.OREGISTER {
   812  			gc.Regfree(nl)
   813  		}
   814  		gc.Regfree(nr)
   815  	}
   816  
   817  	switch op {
   818  	case gc.OEQ:
   819  		// neither NE nor P
   820  		p1 := gc.Gbranch(x86.AJNE, nil, -likely)
   821  		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
   822  		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
   823  		gc.Patch(p1, gc.Pc)
   824  		gc.Patch(p2, gc.Pc)
   825  	case gc.ONE:
   826  		// either NE or P
   827  		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)
   828  		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
   829  	default:
   830  		gc.Patch(gc.Gbranch(optoas(op, nr.Type), nil, likely), to)
   831  	}
   832  }
   833  
   834  // Called after regopt and peep have run.
   835  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   836  func expandchecks(firstp *obj.Prog) {
   837  	var p1 *obj.Prog
   838  	var p2 *obj.Prog
   839  
   840  	for p := firstp; p != nil; p = p.Link {
   841  		if p.As != obj.ACHECKNIL {
   842  			continue
   843  		}
   844  		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
   845  			gc.Warnl(p.Lineno, "generated nil check")
   846  		}
   847  
   848  		// check is
   849  		//	CMP arg, $0
   850  		//	JNE 2(PC) (likely)
   851  		//	MOV AX, 0
   852  		p1 = gc.Ctxt.NewProg()
   853  
   854  		p2 = gc.Ctxt.NewProg()
   855  		gc.Clearp(p1)
   856  		gc.Clearp(p2)
   857  		p1.Link = p2
   858  		p2.Link = p.Link
   859  		p.Link = p1
   860  		p1.Lineno = p.Lineno
   861  		p2.Lineno = p.Lineno
   862  		p1.Pc = 9999
   863  		p2.Pc = 9999
   864  		p.As = x86.ACMPL
   865  		p.To.Type = obj.TYPE_CONST
   866  		p.To.Offset = 0
   867  		p1.As = x86.AJNE
   868  		p1.From.Type = obj.TYPE_CONST
   869  		p1.From.Offset = 1 // likely
   870  		p1.To.Type = obj.TYPE_BRANCH
   871  		p1.To.Val = p2.Link
   872  
   873  		// crash by write to memory address 0.
   874  		// if possible, since we know arg is 0, use 0(arg),
   875  		// which will be shorter to encode than plain 0.
   876  		p2.As = x86.AMOVL
   877  
   878  		p2.From.Type = obj.TYPE_REG
   879  		p2.From.Reg = x86.REG_AX
   880  		if regtyp(&p.From) {
   881  			p2.To.Type = obj.TYPE_MEM
   882  			p2.To.Reg = p.From.Reg
   883  		} else {
   884  			p2.To.Type = obj.TYPE_MEM
   885  		}
   886  		p2.To.Offset = 0
   887  	}
   888  }
   889  
   890  // addr += index*width if possible.
   891  func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
   892  	switch width {
   893  	case 1, 2, 4, 8:
   894  		p1 := gins(x86.ALEAL, index, addr)
   895  		p1.From.Type = obj.TYPE_MEM
   896  		p1.From.Scale = int16(width)
   897  		p1.From.Index = p1.From.Reg
   898  		p1.From.Reg = p1.To.Reg
   899  		return true
   900  	}
   901  	return false
   902  }
   903  
   904  // res = runtime.getg()
   905  func getg(res *gc.Node) {
   906  	var n1 gc.Node
   907  	gc.Regalloc(&n1, res.Type, res)
   908  	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
   909  	p := gins(mov, nil, &n1)
   910  	p.From.Type = obj.TYPE_REG
   911  	p.From.Reg = x86.REG_TLS
   912  	p = gins(mov, nil, &n1)
   913  	p.From = p.To
   914  	p.From.Type = obj.TYPE_MEM
   915  	p.From.Index = x86.REG_TLS
   916  	p.From.Scale = 1
   917  	gmove(&n1, res)
   918  	gc.Regfree(&n1)
   919  }