github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/cmd/compile/internal/amd64/ggen.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  )
    12  
    13  func defframe(ptxt *obj.Prog) {
    14  	var n *gc.Node
    15  
    16  	// fill in argument size, stack size
    17  	ptxt.To.Type = obj.TYPE_TEXTSIZE
    18  
    19  	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
    20  	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
    21  	ptxt.To.Offset = int64(frame)
    22  
    23  	// insert code to zero ambiguously live variables
    24  	// so that the garbage collector only sees initialized values
    25  	// when it looks for pointers.
    26  	p := ptxt
    27  
    28  	hi := int64(0)
    29  	lo := hi
    30  	ax := uint32(0)
    31  
    32  	// iterate through declarations - they are sorted in decreasing xoffset order.
    33  	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
    34  		n = l.N
    35  		if !n.Name.Needzero {
    36  			continue
    37  		}
    38  		if n.Class != gc.PAUTO {
    39  			gc.Fatal("needzero class %d", n.Class)
    40  		}
    41  		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
    42  			gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
    43  		}
    44  
    45  		if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) {
    46  			// merge with range we already have
    47  			lo = n.Xoffset
    48  
    49  			continue
    50  		}
    51  
    52  		// zero old range
    53  		p = zerorange(p, int64(frame), lo, hi, &ax)
    54  
    55  		// set new range
    56  		hi = n.Xoffset + n.Type.Width
    57  
    58  		lo = n.Xoffset
    59  	}
    60  
    61  	// zero final range
    62  	zerorange(p, int64(frame), lo, hi, &ax)
    63  }
    64  
    65  // DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
    66  // with 4 STOSQs at the very end.
    67  // The trailing STOSQs prevent the need for a DI preadjustment
    68  // for small numbers of words to clear.
    69  // See runtime/mkduff.go.
    70  const (
    71  	dzBlocks    = 31 // number of MOV/ADD blocks
    72  	dzBlockLen  = 4  // number of clears per block
    73  	dzBlockSize = 19 // size of instructions in a single block
    74  	dzMovSize   = 4  // size of single MOV instruction w/ offset
    75  	dzAddSize   = 4  // size of single ADD instruction
    76  	dzDIStep    = 8  // number of bytes cleared by each MOV instruction
    77  
    78  	dzTailLen  = 4 // number of final STOSQ instructions
    79  	dzTailSize = 2 // size of single STOSQ instruction
    80  
    81  	dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
    82  )
    83  
    84  // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
    85  // q is the number of words to zero.
    86  func dzDI(q int64) int64 {
    87  	if q < dzTailLen {
    88  		return 0
    89  	}
    90  	q -= dzTailLen
    91  	if q%dzBlockLen == 0 {
    92  		return 0
    93  	}
    94  	return -dzDIStep * (dzBlockLen - q%dzBlockLen)
    95  }
    96  
    97  // dzOff returns the offset for a jump into DUFFZERO.
    98  // q is the number of words to zero.
    99  func dzOff(q int64) int64 {
   100  	off := int64(dzSize)
   101  	if q < dzTailLen {
   102  		return off - q*dzTailSize
   103  	}
   104  	off -= dzTailLen * dzTailSize
   105  	q -= dzTailLen
   106  	blocks, steps := q/dzBlockLen, q%dzBlockLen
   107  	off -= dzBlockSize * blocks
   108  	if steps > 0 {
   109  		off -= dzAddSize + dzMovSize*steps
   110  	}
   111  	return off
   112  }
   113  
   114  func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
   115  	cnt := hi - lo
   116  	if cnt == 0 {
   117  		return p
   118  	}
   119  	if *ax == 0 {
   120  		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
   121  		*ax = 1
   122  	}
   123  
   124  	if cnt%int64(gc.Widthreg) != 0 {
   125  		// should only happen with nacl
   126  		if cnt%int64(gc.Widthptr) != 0 {
   127  			gc.Fatal("zerorange count not a multiple of widthptr %d", cnt)
   128  		}
   129  		p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo)
   130  		lo += int64(gc.Widthptr)
   131  		cnt -= int64(gc.Widthptr)
   132  	}
   133  
   134  	if cnt <= int64(4*gc.Widthreg) {
   135  		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
   136  			p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
   137  		}
   138  	} else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) {
   139  		q := cnt / int64(gc.Widthreg)
   140  		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(q), obj.TYPE_REG, x86.REG_DI, 0)
   141  		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(q))
   142  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   143  	} else {
   144  		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
   145  		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
   146  		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   147  		p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   148  	}
   149  
   150  	return p
   151  }
   152  
   153  func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
   154  	q := gc.Ctxt.NewProg()
   155  	gc.Clearp(q)
   156  	q.As = int16(as)
   157  	q.Lineno = p.Lineno
   158  	q.From.Type = int16(ftype)
   159  	q.From.Reg = int16(freg)
   160  	q.From.Offset = foffset
   161  	q.To.Type = int16(ttype)
   162  	q.To.Reg = int16(treg)
   163  	q.To.Offset = toffset
   164  	q.Link = p.Link
   165  	p.Link = q
   166  	return q
   167  }
   168  
   169  var panicdiv *gc.Node
   170  
   171  /*
   172   * generate division.
   173   * generates one of:
   174   *	res = nl / nr
   175   *	res = nl % nr
   176   * according to op.
   177   */
   178  func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   179  	// Have to be careful about handling
   180  	// most negative int divided by -1 correctly.
   181  	// The hardware will trap.
   182  	// Also the byte divide instruction needs AH,
   183  	// which we otherwise don't have to deal with.
   184  	// Easiest way to avoid for int8, int16: use int32.
   185  	// For int32 and int64, use explicit test.
   186  	// Could use int64 hw for int32.
   187  	t := nl.Type
   188  
   189  	t0 := t
   190  	check := 0
   191  	if gc.Issigned[t.Etype] {
   192  		check = 1
   193  		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) {
   194  			check = 0
   195  		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
   196  			check = 0
   197  		}
   198  	}
   199  
   200  	if t.Width < 4 {
   201  		if gc.Issigned[t.Etype] {
   202  			t = gc.Types[gc.TINT32]
   203  		} else {
   204  			t = gc.Types[gc.TUINT32]
   205  		}
   206  		check = 0
   207  	}
   208  
   209  	a := optoas(op, t)
   210  
   211  	var n3 gc.Node
   212  	gc.Regalloc(&n3, t0, nil)
   213  	var ax gc.Node
   214  	var oldax gc.Node
   215  	if nl.Ullman >= nr.Ullman {
   216  		savex(x86.REG_AX, &ax, &oldax, res, t0)
   217  		gc.Cgen(nl, &ax)
   218  		gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen
   219  		gc.Cgen(nr, &n3)
   220  		gc.Regfree(&ax)
   221  	} else {
   222  		gc.Cgen(nr, &n3)
   223  		savex(x86.REG_AX, &ax, &oldax, res, t0)
   224  		gc.Cgen(nl, &ax)
   225  	}
   226  
   227  	if t != t0 {
   228  		// Convert
   229  		ax1 := ax
   230  
   231  		n31 := n3
   232  		ax.Type = t
   233  		n3.Type = t
   234  		gmove(&ax1, &ax)
   235  		gmove(&n31, &n3)
   236  	}
   237  
   238  	var n4 gc.Node
   239  	if gc.Nacl {
   240  		// Native Client does not relay the divide-by-zero trap
   241  		// to the executing program, so we must insert a check
   242  		// for ourselves.
   243  		gc.Nodconst(&n4, t, 0)
   244  
   245  		gins(optoas(gc.OCMP, t), &n3, &n4)
   246  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   247  		if panicdiv == nil {
   248  			panicdiv = gc.Sysfunc("panicdivide")
   249  		}
   250  		gc.Ginscall(panicdiv, -1)
   251  		gc.Patch(p1, gc.Pc)
   252  	}
   253  
   254  	var p2 *obj.Prog
   255  	if check != 0 {
   256  		gc.Nodconst(&n4, t, -1)
   257  		gins(optoas(gc.OCMP, t), &n3, &n4)
   258  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   259  		if op == gc.ODIV {
   260  			// a / (-1) is -a.
   261  			gins(optoas(gc.OMINUS, t), nil, &ax)
   262  
   263  			gmove(&ax, res)
   264  		} else {
   265  			// a % (-1) is 0.
   266  			gc.Nodconst(&n4, t, 0)
   267  
   268  			gmove(&n4, res)
   269  		}
   270  
   271  		p2 = gc.Gbranch(obj.AJMP, nil, 0)
   272  		gc.Patch(p1, gc.Pc)
   273  	}
   274  
   275  	var olddx gc.Node
   276  	var dx gc.Node
   277  	savex(x86.REG_DX, &dx, &olddx, res, t)
   278  	if !gc.Issigned[t.Etype] {
   279  		gc.Nodconst(&n4, t, 0)
   280  		gmove(&n4, &dx)
   281  	} else {
   282  		gins(optoas(gc.OEXTEND, t), nil, nil)
   283  	}
   284  	gins(a, &n3, nil)
   285  	gc.Regfree(&n3)
   286  	if op == gc.ODIV {
   287  		gmove(&ax, res)
   288  	} else {
   289  		gmove(&dx, res)
   290  	}
   291  	restx(&dx, &olddx)
   292  	if check != 0 {
   293  		gc.Patch(p2, gc.Pc)
   294  	}
   295  	restx(&ax, &oldax)
   296  }
   297  
   298  /*
   299   * register dr is one of the special ones (AX, CX, DI, SI, etc.).
   300   * we need to use it.  if it is already allocated as a temporary
   301   * (r > 1; can only happen if a routine like sgen passed a
   302   * special as cgen's res and then cgen used regalloc to reuse
   303   * it as its own temporary), then move it for now to another
   304   * register.  caller must call restx to move it back.
   305   * the move is not necessary if dr == res, because res is
   306   * known to be dead.
   307   */
   308  func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
   309  	r := uint8(gc.GetReg(dr))
   310  
   311  	// save current ax and dx if they are live
   312  	// and not the destination
   313  	*oldx = gc.Node{}
   314  
   315  	gc.Nodreg(x, t, dr)
   316  	if r > 1 && !gc.Samereg(x, res) {
   317  		gc.Regalloc(oldx, gc.Types[gc.TINT64], nil)
   318  		x.Type = gc.Types[gc.TINT64]
   319  		gmove(x, oldx)
   320  		x.Type = t
   321  		oldx.Etype = r // squirrel away old r value
   322  		gc.SetReg(dr, 1)
   323  	}
   324  }
   325  
   326  func restx(x *gc.Node, oldx *gc.Node) {
   327  	if oldx.Op != 0 {
   328  		x.Type = gc.Types[gc.TINT64]
   329  		gc.SetReg(int(x.Reg), int(oldx.Etype))
   330  		gmove(oldx, x)
   331  		gc.Regfree(oldx)
   332  	}
   333  }
   334  
   335  /*
   336   * generate high multiply:
   337   *   res = (nl*nr) >> width
   338   */
   339  func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
   340  	t := nl.Type
   341  	a := optoas(gc.OHMUL, t)
   342  	if nl.Ullman < nr.Ullman {
   343  		tmp := nl
   344  		nl = nr
   345  		nr = tmp
   346  	}
   347  
   348  	var n1 gc.Node
   349  	gc.Cgenr(nl, &n1, res)
   350  	var n2 gc.Node
   351  	gc.Cgenr(nr, &n2, nil)
   352  	var ax gc.Node
   353  	gc.Nodreg(&ax, t, x86.REG_AX)
   354  	gmove(&n1, &ax)
   355  	gins(a, &n2, nil)
   356  	gc.Regfree(&n2)
   357  	gc.Regfree(&n1)
   358  
   359  	var dx gc.Node
   360  	if t.Width == 1 {
   361  		// byte multiply behaves differently.
   362  		gc.Nodreg(&ax, t, x86.REG_AH)
   363  
   364  		gc.Nodreg(&dx, t, x86.REG_DX)
   365  		gmove(&ax, &dx)
   366  	}
   367  
   368  	gc.Nodreg(&dx, t, x86.REG_DX)
   369  	gmove(&dx, res)
   370  }
   371  
   372  /*
   373   * generate shift according to op, one of:
   374   *	res = nl << nr
   375   *	res = nl >> nr
   376   */
   377  func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   378  	a := optoas(op, nl.Type)
   379  
   380  	if nr.Op == gc.OLITERAL {
   381  		var n1 gc.Node
   382  		gc.Regalloc(&n1, nl.Type, res)
   383  		gc.Cgen(nl, &n1)
   384  		sc := uint64(nr.Int())
   385  		if sc >= uint64(nl.Type.Width*8) {
   386  			// large shift gets 2 shifts by width-1
   387  			var n3 gc.Node
   388  			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
   389  
   390  			gins(a, &n3, &n1)
   391  			gins(a, &n3, &n1)
   392  		} else {
   393  			gins(a, nr, &n1)
   394  		}
   395  		gmove(&n1, res)
   396  		gc.Regfree(&n1)
   397  		return
   398  	}
   399  
   400  	if nl.Ullman >= gc.UINF {
   401  		var n4 gc.Node
   402  		gc.Tempname(&n4, nl.Type)
   403  		gc.Cgen(nl, &n4)
   404  		nl = &n4
   405  	}
   406  
   407  	if nr.Ullman >= gc.UINF {
   408  		var n5 gc.Node
   409  		gc.Tempname(&n5, nr.Type)
   410  		gc.Cgen(nr, &n5)
   411  		nr = &n5
   412  	}
   413  
   414  	rcx := gc.GetReg(x86.REG_CX)
   415  	var n1 gc.Node
   416  	gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   417  
   418  	// Allow either uint32 or uint64 as shift type,
   419  	// to avoid unnecessary conversion from uint32 to uint64
   420  	// just to do the comparison.
   421  	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]
   422  
   423  	if tcount.Etype < gc.TUINT32 {
   424  		tcount = gc.Types[gc.TUINT32]
   425  	}
   426  
   427  	gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
   428  	var n3 gc.Node
   429  	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX
   430  
   431  	var cx gc.Node
   432  	gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX)
   433  
   434  	var oldcx gc.Node
   435  	if rcx > 0 && !gc.Samereg(&cx, res) {
   436  		gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil)
   437  		gmove(&cx, &oldcx)
   438  	}
   439  
   440  	cx.Type = tcount
   441  
   442  	var n2 gc.Node
   443  	if gc.Samereg(&cx, res) {
   444  		gc.Regalloc(&n2, nl.Type, nil)
   445  	} else {
   446  		gc.Regalloc(&n2, nl.Type, res)
   447  	}
   448  	if nl.Ullman >= nr.Ullman {
   449  		gc.Cgen(nl, &n2)
   450  		gc.Cgen(nr, &n1)
   451  		gmove(&n1, &n3)
   452  	} else {
   453  		gc.Cgen(nr, &n1)
   454  		gmove(&n1, &n3)
   455  		gc.Cgen(nl, &n2)
   456  	}
   457  
   458  	gc.Regfree(&n3)
   459  
   460  	// test and fix up large shifts
   461  	if !bounded {
   462  		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
   463  		gins(optoas(gc.OCMP, tcount), &n1, &n3)
   464  		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)
   465  		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
   466  			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
   467  			gins(a, &n3, &n2)
   468  		} else {
   469  			gc.Nodconst(&n3, nl.Type, 0)
   470  			gmove(&n3, &n2)
   471  		}
   472  
   473  		gc.Patch(p1, gc.Pc)
   474  	}
   475  
   476  	gins(a, &n1, &n2)
   477  
   478  	if oldcx.Op != 0 {
   479  		cx.Type = gc.Types[gc.TUINT64]
   480  		gmove(&oldcx, &cx)
   481  		gc.Regfree(&oldcx)
   482  	}
   483  
   484  	gmove(&n2, res)
   485  
   486  	gc.Regfree(&n1)
   487  	gc.Regfree(&n2)
   488  }
   489  
   490  /*
   491   * generate byte multiply:
   492   *	res = nl * nr
   493   * there is no 2-operand byte multiply instruction so
   494   * we do a full-width multiplication and truncate afterwards.
   495   */
   496  func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
   497  	if optoas(op, nl.Type) != x86.AIMULB {
   498  		return false
   499  	}
   500  
   501  	// largest ullman on left.
   502  	if nl.Ullman < nr.Ullman {
   503  		tmp := nl
   504  		nl = nr
   505  		nr = tmp
   506  	}
   507  
   508  	// generate operands in "8-bit" registers.
   509  	var n1b gc.Node
   510  	gc.Regalloc(&n1b, nl.Type, res)
   511  
   512  	gc.Cgen(nl, &n1b)
   513  	var n2b gc.Node
   514  	gc.Regalloc(&n2b, nr.Type, nil)
   515  	gc.Cgen(nr, &n2b)
   516  
   517  	// perform full-width multiplication.
   518  	t := gc.Types[gc.TUINT64]
   519  
   520  	if gc.Issigned[nl.Type.Etype] {
   521  		t = gc.Types[gc.TINT64]
   522  	}
   523  	var n1 gc.Node
   524  	gc.Nodreg(&n1, t, int(n1b.Reg))
   525  	var n2 gc.Node
   526  	gc.Nodreg(&n2, t, int(n2b.Reg))
   527  	a := optoas(op, t)
   528  	gins(a, &n2, &n1)
   529  
   530  	// truncate.
   531  	gmove(&n1, res)
   532  
   533  	gc.Regfree(&n1b)
   534  	gc.Regfree(&n2b)
   535  	return true
   536  }
   537  
   538  func clearfat(nl *gc.Node) {
   539  	/* clear a fat object */
   540  	if gc.Debug['g'] != 0 {
   541  		gc.Dump("\nclearfat", nl)
   542  	}
   543  
   544  	w := nl.Type.Width
   545  
   546  	// Avoid taking the address for simple enough types.
   547  	if gc.Componentgen(nil, nl) {
   548  		return
   549  	}
   550  
   551  	c := w % 8 // bytes
   552  	q := w / 8 // quads
   553  
   554  	if q < 4 {
   555  		// Write sequence of MOV 0, off(base) instead of using STOSQ.
   556  		// The hope is that although the code will be slightly longer,
   557  		// the MOVs will have no dependencies and pipeline better
   558  		// than the unrolled STOSQ loop.
   559  		// NOTE: Must use agen, not igen, so that optimizer sees address
   560  		// being taken. We are not writing on field boundaries.
   561  		var n1 gc.Node
   562  		gc.Agenr(nl, &n1, nil)
   563  
   564  		n1.Op = gc.OINDREG
   565  		var z gc.Node
   566  		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
   567  		for {
   568  			tmp14 := q
   569  			q--
   570  			if tmp14 <= 0 {
   571  				break
   572  			}
   573  			n1.Type = z.Type
   574  			gins(x86.AMOVQ, &z, &n1)
   575  			n1.Xoffset += 8
   576  		}
   577  
   578  		if c >= 4 {
   579  			gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
   580  			n1.Type = z.Type
   581  			gins(x86.AMOVL, &z, &n1)
   582  			n1.Xoffset += 4
   583  			c -= 4
   584  		}
   585  
   586  		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
   587  		for {
   588  			tmp15 := c
   589  			c--
   590  			if tmp15 <= 0 {
   591  				break
   592  			}
   593  			n1.Type = z.Type
   594  			gins(x86.AMOVB, &z, &n1)
   595  			n1.Xoffset++
   596  		}
   597  
   598  		gc.Regfree(&n1)
   599  		return
   600  	}
   601  
   602  	var oldn1 gc.Node
   603  	var n1 gc.Node
   604  	savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
   605  	gc.Agen(nl, &n1)
   606  
   607  	var ax gc.Node
   608  	var oldax gc.Node
   609  	savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
   610  	gconreg(x86.AMOVL, 0, x86.REG_AX)
   611  
   612  	if q > 128 || gc.Nacl {
   613  		gconreg(movptr, q, x86.REG_CX)
   614  		gins(x86.AREP, nil, nil)   // repeat
   615  		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+
   616  	} else {
   617  		if di := dzDI(q); di != 0 {
   618  			gconreg(addptr, di, x86.REG_DI)
   619  		}
   620  		p := gins(obj.ADUFFZERO, nil, nil)
   621  		p.To.Type = obj.TYPE_ADDR
   622  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   623  		p.To.Offset = dzOff(q)
   624  	}
   625  
   626  	z := ax
   627  	di := n1
   628  	if w >= 8 && c >= 4 {
   629  		di.Op = gc.OINDREG
   630  		z.Type = gc.Types[gc.TINT64]
   631  		di.Type = z.Type
   632  		p := gins(x86.AMOVQ, &z, &di)
   633  		p.To.Scale = 1
   634  		p.To.Offset = c - 8
   635  	} else if c >= 4 {
   636  		di.Op = gc.OINDREG
   637  		z.Type = gc.Types[gc.TINT32]
   638  		di.Type = z.Type
   639  		gins(x86.AMOVL, &z, &di)
   640  		if c > 4 {
   641  			p := gins(x86.AMOVL, &z, &di)
   642  			p.To.Scale = 1
   643  			p.To.Offset = c - 4
   644  		}
   645  	} else {
   646  		for c > 0 {
   647  			gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
   648  			c--
   649  		}
   650  	}
   651  
   652  	restx(&n1, &oldn1)
   653  	restx(&ax, &oldax)
   654  }
   655  
   656  // Called after regopt and peep have run.
   657  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   658  func expandchecks(firstp *obj.Prog) {
   659  	var p1 *obj.Prog
   660  	var p2 *obj.Prog
   661  
   662  	for p := firstp; p != nil; p = p.Link {
   663  		if p.As != obj.ACHECKNIL {
   664  			continue
   665  		}
   666  		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
   667  			gc.Warnl(int(p.Lineno), "generated nil check")
   668  		}
   669  
   670  		// check is
   671  		//	CMP arg, $0
   672  		//	JNE 2(PC) (likely)
   673  		//	MOV AX, 0
   674  		p1 = gc.Ctxt.NewProg()
   675  
   676  		p2 = gc.Ctxt.NewProg()
   677  		gc.Clearp(p1)
   678  		gc.Clearp(p2)
   679  		p1.Link = p2
   680  		p2.Link = p.Link
   681  		p.Link = p1
   682  		p1.Lineno = p.Lineno
   683  		p2.Lineno = p.Lineno
   684  		p1.Pc = 9999
   685  		p2.Pc = 9999
   686  		p.As = int16(cmpptr)
   687  		p.To.Type = obj.TYPE_CONST
   688  		p.To.Offset = 0
   689  		p1.As = x86.AJNE
   690  		p1.From.Type = obj.TYPE_CONST
   691  		p1.From.Offset = 1 // likely
   692  		p1.To.Type = obj.TYPE_BRANCH
   693  		p1.To.Val = p2.Link
   694  
   695  		// crash by write to memory address 0.
   696  		// if possible, since we know arg is 0, use 0(arg),
   697  		// which will be shorter to encode than plain 0.
   698  		p2.As = x86.AMOVL
   699  
   700  		p2.From.Type = obj.TYPE_REG
   701  		p2.From.Reg = x86.REG_AX
   702  		if regtyp(&p.From) {
   703  			p2.To.Type = obj.TYPE_MEM
   704  			p2.To.Reg = p.From.Reg
   705  		} else {
   706  			p2.To.Type = obj.TYPE_MEM
   707  			p2.To.Reg = x86.REG_NONE
   708  		}
   709  
   710  		p2.To.Offset = 0
   711  	}
   712  }
   713  
   714  // addr += index*width if possible.
   715  func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
   716  	switch width {
   717  	case 1, 2, 4, 8:
   718  		p1 := gins(x86.ALEAQ, index, addr)
   719  		p1.From.Type = obj.TYPE_MEM
   720  		p1.From.Scale = int16(width)
   721  		p1.From.Index = p1.From.Reg
   722  		p1.From.Reg = p1.To.Reg
   723  		return true
   724  	}
   725  	return false
   726  }
   727  
   728  // res = runtime.getg()
   729  func getg(res *gc.Node) {
   730  	var n1 gc.Node
   731  	gc.Regalloc(&n1, res.Type, res)
   732  	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
   733  	p := gins(mov, nil, &n1)
   734  	p.From.Type = obj.TYPE_REG
   735  	p.From.Reg = x86.REG_TLS
   736  	p = gins(mov, nil, &n1)
   737  	p.From = p.To
   738  	p.From.Type = obj.TYPE_MEM
   739  	p.From.Index = x86.REG_TLS
   740  	p.From.Scale = 1
   741  	gmove(&n1, res)
   742  	gc.Regfree(&n1)
   743  }