github.com/rsc/tmp@v0.0.0-20240517235954-6deaab19748b/bootstrap/bootstrap6g/ggen.go (about)

     1  // Do not edit. Bootstrap copy of /Users/rsc/g/go/src/cmd/6g/ggen.go
     2  
     3  // Copyright 2009 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package main
     8  
     9  import (
    10  	"rsc.io/tmp/bootstrap/internal/gc"
    11  	"rsc.io/tmp/bootstrap/internal/obj"
    12  	"rsc.io/tmp/bootstrap/internal/obj/x86"
    13  )
    14  
    15  func defframe(ptxt *obj.Prog) {
    16  	var n *gc.Node
    17  
    18  	// fill in argument size, stack size
    19  	ptxt.To.Type = obj.TYPE_TEXTSIZE
    20  
    21  	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
    22  	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
    23  	ptxt.To.Offset = int64(frame)
    24  
    25  	// insert code to zero ambiguously live variables
    26  	// so that the garbage collector only sees initialized values
    27  	// when it looks for pointers.
    28  	p := ptxt
    29  
    30  	hi := int64(0)
    31  	lo := hi
    32  	ax := uint32(0)
    33  
    34  	// iterate through declarations - they are sorted in decreasing xoffset order.
    35  	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
    36  		n = l.N
    37  		if !n.Needzero {
    38  			continue
    39  		}
    40  		if n.Class != gc.PAUTO {
    41  			gc.Fatal("needzero class %d", n.Class)
    42  		}
    43  		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
    44  			gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
    45  		}
    46  
    47  		if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) {
    48  			// merge with range we already have
    49  			lo = n.Xoffset
    50  
    51  			continue
    52  		}
    53  
    54  		// zero old range
    55  		p = zerorange(p, int64(frame), lo, hi, &ax)
    56  
    57  		// set new range
    58  		hi = n.Xoffset + n.Type.Width
    59  
    60  		lo = n.Xoffset
    61  	}
    62  
    63  	// zero final range
    64  	zerorange(p, int64(frame), lo, hi, &ax)
    65  }
    66  
    67  // DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
    68  // with 4 STOSQs at the very end.
    69  // The trailing STOSQs prevent the need for a DI preadjustment
    70  // for small numbers of words to clear.
    71  // See runtime/mkduff.go.
    72  const (
    73  	dzBlocks    = 31 // number of MOV/ADD blocks
    74  	dzBlockLen  = 4  // number of clears per block
    75  	dzBlockSize = 19 // size of instructions in a single block
    76  	dzMovSize   = 4  // size of single MOV instruction w/ offset
    77  	dzAddSize   = 4  // size of single ADD instruction
    78  	dzDIStep    = 8  // number of bytes cleared by each MOV instruction
    79  
    80  	dzTailLen  = 4 // number of final STOSQ instructions
    81  	dzTailSize = 2 // size of single STOSQ instruction
    82  
    83  	dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
    84  )
    85  
    86  // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
    87  // q is the number of words to zero.
    88  func dzDI(q int64) int64 {
    89  	if q < dzTailLen {
    90  		return 0
    91  	}
    92  	q -= dzTailLen
    93  	if q%dzBlockLen == 0 {
    94  		return 0
    95  	}
    96  	return -dzDIStep * (dzBlockLen - q%dzBlockLen)
    97  }
    98  
    99  // dzOff returns the offset for a jump into DUFFZERO.
   100  // q is the number of words to zero.
   101  func dzOff(q int64) int64 {
   102  	off := int64(dzSize)
   103  	if q < dzTailLen {
   104  		return off - q*dzTailSize
   105  	}
   106  	off -= dzTailLen * dzTailSize
   107  	q -= dzTailLen
   108  	blocks, steps := q/dzBlockLen, q%dzBlockLen
   109  	off -= dzBlockSize * blocks
   110  	if steps > 0 {
   111  		off -= dzAddSize + dzMovSize*steps
   112  	}
   113  	return off
   114  }
   115  
   116  func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
   117  	cnt := hi - lo
   118  	if cnt == 0 {
   119  		return p
   120  	}
   121  	if *ax == 0 {
   122  		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
   123  		*ax = 1
   124  	}
   125  
   126  	if cnt%int64(gc.Widthreg) != 0 {
   127  		// should only happen with nacl
   128  		if cnt%int64(gc.Widthptr) != 0 {
   129  			gc.Fatal("zerorange count not a multiple of widthptr %d", cnt)
   130  		}
   131  		p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo)
   132  		lo += int64(gc.Widthptr)
   133  		cnt -= int64(gc.Widthptr)
   134  	}
   135  
   136  	if cnt <= int64(4*gc.Widthreg) {
   137  		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
   138  			p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
   139  		}
   140  	} else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) {
   141  		q := cnt / int64(gc.Widthreg)
   142  		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(q), obj.TYPE_REG, x86.REG_DI, 0)
   143  		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(q))
   144  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   145  	} else {
   146  		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
   147  		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
   148  		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   149  		p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   150  	}
   151  
   152  	return p
   153  }
   154  
   155  func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
   156  	q := gc.Ctxt.NewProg()
   157  	gc.Clearp(q)
   158  	q.As = int16(as)
   159  	q.Lineno = p.Lineno
   160  	q.From.Type = int16(ftype)
   161  	q.From.Reg = int16(freg)
   162  	q.From.Offset = foffset
   163  	q.To.Type = int16(ttype)
   164  	q.To.Reg = int16(treg)
   165  	q.To.Offset = toffset
   166  	q.Link = p.Link
   167  	p.Link = q
   168  	return q
   169  }
   170  
   171  var panicdiv *gc.Node
   172  
   173  /*
   174   * generate division.
   175   * generates one of:
   176   *	res = nl / nr
   177   *	res = nl % nr
   178   * according to op.
   179   */
   180  func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   181  	// Have to be careful about handling
   182  	// most negative int divided by -1 correctly.
   183  	// The hardware will trap.
   184  	// Also the byte divide instruction needs AH,
   185  	// which we otherwise don't have to deal with.
   186  	// Easiest way to avoid for int8, int16: use int32.
   187  	// For int32 and int64, use explicit test.
   188  	// Could use int64 hw for int32.
   189  	t := nl.Type
   190  
   191  	t0 := t
   192  	check := 0
   193  	if gc.Issigned[t.Etype] {
   194  		check = 1
   195  		if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -(1<<uint64(t.Width*8-1)) {
   196  			check = 0
   197  		} else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 {
   198  			check = 0
   199  		}
   200  	}
   201  
   202  	if t.Width < 4 {
   203  		if gc.Issigned[t.Etype] {
   204  			t = gc.Types[gc.TINT32]
   205  		} else {
   206  			t = gc.Types[gc.TUINT32]
   207  		}
   208  		check = 0
   209  	}
   210  
   211  	a := optoas(op, t)
   212  
   213  	var n3 gc.Node
   214  	gc.Regalloc(&n3, t0, nil)
   215  	var ax gc.Node
   216  	var oldax gc.Node
   217  	if nl.Ullman >= nr.Ullman {
   218  		savex(x86.REG_AX, &ax, &oldax, res, t0)
   219  		gc.Cgen(nl, &ax)
   220  		gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen
   221  		gc.Cgen(nr, &n3)
   222  		gc.Regfree(&ax)
   223  	} else {
   224  		gc.Cgen(nr, &n3)
   225  		savex(x86.REG_AX, &ax, &oldax, res, t0)
   226  		gc.Cgen(nl, &ax)
   227  	}
   228  
   229  	if t != t0 {
   230  		// Convert
   231  		ax1 := ax
   232  
   233  		n31 := n3
   234  		ax.Type = t
   235  		n3.Type = t
   236  		gmove(&ax1, &ax)
   237  		gmove(&n31, &n3)
   238  	}
   239  
   240  	var n4 gc.Node
   241  	if gc.Nacl {
   242  		// Native Client does not relay the divide-by-zero trap
   243  		// to the executing program, so we must insert a check
   244  		// for ourselves.
   245  		gc.Nodconst(&n4, t, 0)
   246  
   247  		gins(optoas(gc.OCMP, t), &n3, &n4)
   248  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   249  		if panicdiv == nil {
   250  			panicdiv = gc.Sysfunc("panicdivide")
   251  		}
   252  		gc.Ginscall(panicdiv, -1)
   253  		gc.Patch(p1, gc.Pc)
   254  	}
   255  
   256  	var p2 *obj.Prog
   257  	if check != 0 {
   258  		gc.Nodconst(&n4, t, -1)
   259  		gins(optoas(gc.OCMP, t), &n3, &n4)
   260  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   261  		if op == gc.ODIV {
   262  			// a / (-1) is -a.
   263  			gins(optoas(gc.OMINUS, t), nil, &ax)
   264  
   265  			gmove(&ax, res)
   266  		} else {
   267  			// a % (-1) is 0.
   268  			gc.Nodconst(&n4, t, 0)
   269  
   270  			gmove(&n4, res)
   271  		}
   272  
   273  		p2 = gc.Gbranch(obj.AJMP, nil, 0)
   274  		gc.Patch(p1, gc.Pc)
   275  	}
   276  
   277  	var olddx gc.Node
   278  	var dx gc.Node
   279  	savex(x86.REG_DX, &dx, &olddx, res, t)
   280  	if !gc.Issigned[t.Etype] {
   281  		gc.Nodconst(&n4, t, 0)
   282  		gmove(&n4, &dx)
   283  	} else {
   284  		gins(optoas(gc.OEXTEND, t), nil, nil)
   285  	}
   286  	gins(a, &n3, nil)
   287  	gc.Regfree(&n3)
   288  	if op == gc.ODIV {
   289  		gmove(&ax, res)
   290  	} else {
   291  		gmove(&dx, res)
   292  	}
   293  	restx(&dx, &olddx)
   294  	if check != 0 {
   295  		gc.Patch(p2, gc.Pc)
   296  	}
   297  	restx(&ax, &oldax)
   298  }
   299  
   300  /*
   301   * register dr is one of the special ones (AX, CX, DI, SI, etc.).
   302   * we need to use it.  if it is already allocated as a temporary
   303   * (r > 1; can only happen if a routine like sgen passed a
   304   * special as cgen's res and then cgen used regalloc to reuse
   305   * it as its own temporary), then move it for now to another
   306   * register.  caller must call restx to move it back.
   307   * the move is not necessary if dr == res, because res is
   308   * known to be dead.
   309   */
   310  func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
   311  	r := int(reg[dr])
   312  
   313  	// save current ax and dx if they are live
   314  	// and not the destination
   315  	*oldx = gc.Node{}
   316  
   317  	gc.Nodreg(x, t, dr)
   318  	if r > 1 && !gc.Samereg(x, res) {
   319  		gc.Regalloc(oldx, gc.Types[gc.TINT64], nil)
   320  		x.Type = gc.Types[gc.TINT64]
   321  		gmove(x, oldx)
   322  		x.Type = t
   323  		oldx.Ostk = int32(r) // squirrel away old r value
   324  		reg[dr] = 1
   325  	}
   326  }
   327  
   328  func restx(x *gc.Node, oldx *gc.Node) {
   329  	if oldx.Op != 0 {
   330  		x.Type = gc.Types[gc.TINT64]
   331  		reg[x.Reg] = uint8(oldx.Ostk)
   332  		gmove(oldx, x)
   333  		gc.Regfree(oldx)
   334  	}
   335  }
   336  
   337  /*
   338   * generate high multiply:
   339   *   res = (nl*nr) >> width
   340   */
   341  func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
   342  	t := nl.Type
   343  	a := optoas(gc.OHMUL, t)
   344  	if nl.Ullman < nr.Ullman {
   345  		tmp := nl
   346  		nl = nr
   347  		nr = tmp
   348  	}
   349  
   350  	var n1 gc.Node
   351  	gc.Cgenr(nl, &n1, res)
   352  	var n2 gc.Node
   353  	gc.Cgenr(nr, &n2, nil)
   354  	var ax gc.Node
   355  	gc.Nodreg(&ax, t, x86.REG_AX)
   356  	gmove(&n1, &ax)
   357  	gins(a, &n2, nil)
   358  	gc.Regfree(&n2)
   359  	gc.Regfree(&n1)
   360  
   361  	var dx gc.Node
   362  	if t.Width == 1 {
   363  		// byte multiply behaves differently.
   364  		gc.Nodreg(&ax, t, x86.REG_AH)
   365  
   366  		gc.Nodreg(&dx, t, x86.REG_DX)
   367  		gmove(&ax, &dx)
   368  	}
   369  
   370  	gc.Nodreg(&dx, t, x86.REG_DX)
   371  	gmove(&dx, res)
   372  }
   373  
   374  /*
   375   * generate shift according to op, one of:
   376   *	res = nl << nr
   377   *	res = nl >> nr
   378   */
   379  func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   380  	a := optoas(op, nl.Type)
   381  
   382  	if nr.Op == gc.OLITERAL {
   383  		var n1 gc.Node
   384  		gc.Regalloc(&n1, nl.Type, res)
   385  		gc.Cgen(nl, &n1)
   386  		sc := uint64(gc.Mpgetfix(nr.Val.U.Xval))
   387  		if sc >= uint64(nl.Type.Width*8) {
   388  			// large shift gets 2 shifts by width-1
   389  			var n3 gc.Node
   390  			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
   391  
   392  			gins(a, &n3, &n1)
   393  			gins(a, &n3, &n1)
   394  		} else {
   395  			gins(a, nr, &n1)
   396  		}
   397  		gmove(&n1, res)
   398  		gc.Regfree(&n1)
   399  		return
   400  	}
   401  
   402  	if nl.Ullman >= gc.UINF {
   403  		var n4 gc.Node
   404  		gc.Tempname(&n4, nl.Type)
   405  		gc.Cgen(nl, &n4)
   406  		nl = &n4
   407  	}
   408  
   409  	if nr.Ullman >= gc.UINF {
   410  		var n5 gc.Node
   411  		gc.Tempname(&n5, nr.Type)
   412  		gc.Cgen(nr, &n5)
   413  		nr = &n5
   414  	}
   415  
   416  	rcx := int(reg[x86.REG_CX])
   417  	var n1 gc.Node
   418  	gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
   419  
   420  	// Allow either uint32 or uint64 as shift type,
   421  	// to avoid unnecessary conversion from uint32 to uint64
   422  	// just to do the comparison.
   423  	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]
   424  
   425  	if tcount.Etype < gc.TUINT32 {
   426  		tcount = gc.Types[gc.TUINT32]
   427  	}
   428  
   429  	gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
   430  	var n3 gc.Node
   431  	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX
   432  
   433  	var cx gc.Node
   434  	gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX)
   435  
   436  	var oldcx gc.Node
   437  	if rcx > 0 && !gc.Samereg(&cx, res) {
   438  		gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil)
   439  		gmove(&cx, &oldcx)
   440  	}
   441  
   442  	cx.Type = tcount
   443  
   444  	var n2 gc.Node
   445  	if gc.Samereg(&cx, res) {
   446  		gc.Regalloc(&n2, nl.Type, nil)
   447  	} else {
   448  		gc.Regalloc(&n2, nl.Type, res)
   449  	}
   450  	if nl.Ullman >= nr.Ullman {
   451  		gc.Cgen(nl, &n2)
   452  		gc.Cgen(nr, &n1)
   453  		gmove(&n1, &n3)
   454  	} else {
   455  		gc.Cgen(nr, &n1)
   456  		gmove(&n1, &n3)
   457  		gc.Cgen(nl, &n2)
   458  	}
   459  
   460  	gc.Regfree(&n3)
   461  
   462  	// test and fix up large shifts
   463  	if !bounded {
   464  		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
   465  		gins(optoas(gc.OCMP, tcount), &n1, &n3)
   466  		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)
   467  		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
   468  			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
   469  			gins(a, &n3, &n2)
   470  		} else {
   471  			gc.Nodconst(&n3, nl.Type, 0)
   472  			gmove(&n3, &n2)
   473  		}
   474  
   475  		gc.Patch(p1, gc.Pc)
   476  	}
   477  
   478  	gins(a, &n1, &n2)
   479  
   480  	if oldcx.Op != 0 {
   481  		cx.Type = gc.Types[gc.TUINT64]
   482  		gmove(&oldcx, &cx)
   483  		gc.Regfree(&oldcx)
   484  	}
   485  
   486  	gmove(&n2, res)
   487  
   488  	gc.Regfree(&n1)
   489  	gc.Regfree(&n2)
   490  }
   491  
   492  /*
   493   * generate byte multiply:
   494   *	res = nl * nr
   495   * there is no 2-operand byte multiply instruction so
   496   * we do a full-width multiplication and truncate afterwards.
   497   */
   498  func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
   499  	if optoas(op, nl.Type) != x86.AIMULB {
   500  		return false
   501  	}
   502  
   503  	// largest ullman on left.
   504  	if nl.Ullman < nr.Ullman {
   505  		tmp := nl
   506  		nl = nr
   507  		nr = tmp
   508  	}
   509  
   510  	// generate operands in "8-bit" registers.
   511  	var n1b gc.Node
   512  	gc.Regalloc(&n1b, nl.Type, res)
   513  
   514  	gc.Cgen(nl, &n1b)
   515  	var n2b gc.Node
   516  	gc.Regalloc(&n2b, nr.Type, nil)
   517  	gc.Cgen(nr, &n2b)
   518  
   519  	// perform full-width multiplication.
   520  	t := gc.Types[gc.TUINT64]
   521  
   522  	if gc.Issigned[nl.Type.Etype] {
   523  		t = gc.Types[gc.TINT64]
   524  	}
   525  	var n1 gc.Node
   526  	gc.Nodreg(&n1, t, int(n1b.Reg))
   527  	var n2 gc.Node
   528  	gc.Nodreg(&n2, t, int(n2b.Reg))
   529  	a := optoas(op, t)
   530  	gins(a, &n2, &n1)
   531  
   532  	// truncate.
   533  	gmove(&n1, res)
   534  
   535  	gc.Regfree(&n1b)
   536  	gc.Regfree(&n2b)
   537  	return true
   538  }
   539  
   540  func clearfat(nl *gc.Node) {
   541  	/* clear a fat object */
   542  	if gc.Debug['g'] != 0 {
   543  		gc.Dump("\nclearfat", nl)
   544  	}
   545  
   546  	w := nl.Type.Width
   547  
   548  	// Avoid taking the address for simple enough types.
   549  	if gc.Componentgen(nil, nl) {
   550  		return
   551  	}
   552  
   553  	c := w % 8 // bytes
   554  	q := w / 8 // quads
   555  
   556  	if q < 4 {
   557  		// Write sequence of MOV 0, off(base) instead of using STOSQ.
   558  		// The hope is that although the code will be slightly longer,
   559  		// the MOVs will have no dependencies and pipeline better
   560  		// than the unrolled STOSQ loop.
   561  		// NOTE: Must use agen, not igen, so that optimizer sees address
   562  		// being taken. We are not writing on field boundaries.
   563  		var n1 gc.Node
   564  		gc.Agenr(nl, &n1, nil)
   565  
   566  		n1.Op = gc.OINDREG
   567  		var z gc.Node
   568  		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
   569  		for {
   570  			tmp14 := q
   571  			q--
   572  			if tmp14 <= 0 {
   573  				break
   574  			}
   575  			n1.Type = z.Type
   576  			gins(x86.AMOVQ, &z, &n1)
   577  			n1.Xoffset += 8
   578  		}
   579  
   580  		if c >= 4 {
   581  			gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
   582  			n1.Type = z.Type
   583  			gins(x86.AMOVL, &z, &n1)
   584  			n1.Xoffset += 4
   585  			c -= 4
   586  		}
   587  
   588  		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
   589  		for {
   590  			tmp15 := c
   591  			c--
   592  			if tmp15 <= 0 {
   593  				break
   594  			}
   595  			n1.Type = z.Type
   596  			gins(x86.AMOVB, &z, &n1)
   597  			n1.Xoffset++
   598  		}
   599  
   600  		gc.Regfree(&n1)
   601  		return
   602  	}
   603  
   604  	var oldn1 gc.Node
   605  	var n1 gc.Node
   606  	savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
   607  	gc.Agen(nl, &n1)
   608  
   609  	var ax gc.Node
   610  	var oldax gc.Node
   611  	savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
   612  	gconreg(x86.AMOVL, 0, x86.REG_AX)
   613  
   614  	if q > 128 || gc.Nacl {
   615  		gconreg(movptr, q, x86.REG_CX)
   616  		gins(x86.AREP, nil, nil)   // repeat
   617  		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+
   618  	} else {
   619  		if di := dzDI(q); di != 0 {
   620  			gconreg(addptr, di, x86.REG_DI)
   621  		}
   622  		p := gins(obj.ADUFFZERO, nil, nil)
   623  		p.To.Type = obj.TYPE_ADDR
   624  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   625  		p.To.Offset = dzOff(q)
   626  	}
   627  
   628  	z := ax
   629  	di := n1
   630  	if w >= 8 && c >= 4 {
   631  		di.Op = gc.OINDREG
   632  		z.Type = gc.Types[gc.TINT64]
   633  		di.Type = z.Type
   634  		p := gins(x86.AMOVQ, &z, &di)
   635  		p.To.Scale = 1
   636  		p.To.Offset = c - 8
   637  	} else if c >= 4 {
   638  		di.Op = gc.OINDREG
   639  		z.Type = gc.Types[gc.TINT32]
   640  		di.Type = z.Type
   641  		gins(x86.AMOVL, &z, &di)
   642  		if c > 4 {
   643  			p := gins(x86.AMOVL, &z, &di)
   644  			p.To.Scale = 1
   645  			p.To.Offset = c - 4
   646  		}
   647  	} else {
   648  		for c > 0 {
   649  			gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
   650  			c--
   651  		}
   652  	}
   653  
   654  	restx(&n1, &oldn1)
   655  	restx(&ax, &oldax)
   656  }
   657  
   658  // Called after regopt and peep have run.
   659  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   660  func expandchecks(firstp *obj.Prog) {
   661  	var p1 *obj.Prog
   662  	var p2 *obj.Prog
   663  
   664  	for p := firstp; p != nil; p = p.Link {
   665  		if p.As != obj.ACHECKNIL {
   666  			continue
   667  		}
   668  		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
   669  			gc.Warnl(int(p.Lineno), "generated nil check")
   670  		}
   671  
   672  		// check is
   673  		//	CMP arg, $0
   674  		//	JNE 2(PC) (likely)
   675  		//	MOV AX, 0
   676  		p1 = gc.Ctxt.NewProg()
   677  
   678  		p2 = gc.Ctxt.NewProg()
   679  		gc.Clearp(p1)
   680  		gc.Clearp(p2)
   681  		p1.Link = p2
   682  		p2.Link = p.Link
   683  		p.Link = p1
   684  		p1.Lineno = p.Lineno
   685  		p2.Lineno = p.Lineno
   686  		p1.Pc = 9999
   687  		p2.Pc = 9999
   688  		p.As = int16(cmpptr)
   689  		p.To.Type = obj.TYPE_CONST
   690  		p.To.Offset = 0
   691  		p1.As = x86.AJNE
   692  		p1.From.Type = obj.TYPE_CONST
   693  		p1.From.Offset = 1 // likely
   694  		p1.To.Type = obj.TYPE_BRANCH
   695  		p1.To.Val = p2.Link
   696  
   697  		// crash by write to memory address 0.
   698  		// if possible, since we know arg is 0, use 0(arg),
   699  		// which will be shorter to encode than plain 0.
   700  		p2.As = x86.AMOVL
   701  
   702  		p2.From.Type = obj.TYPE_REG
   703  		p2.From.Reg = x86.REG_AX
   704  		if regtyp(&p.From) {
   705  			p2.To.Type = obj.TYPE_MEM
   706  			p2.To.Reg = p.From.Reg
   707  		} else {
   708  			p2.To.Type = obj.TYPE_MEM
   709  			p2.To.Reg = x86.REG_NONE
   710  		}
   711  
   712  		p2.To.Offset = 0
   713  	}
   714  }
   715  
   716  // addr += index*width if possible.
   717  func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
   718  	switch width {
   719  	case 1, 2, 4, 8:
   720  		p1 := gins(x86.ALEAQ, index, addr)
   721  		p1.From.Type = obj.TYPE_MEM
   722  		p1.From.Scale = int16(width)
   723  		p1.From.Index = p1.From.Reg
   724  		p1.From.Reg = p1.To.Reg
   725  		return true
   726  	}
   727  	return false
   728  }
   729  
   730  // res = runtime.getg()
   731  func getg(res *gc.Node) {
   732  	var n1 gc.Node
   733  	gc.Regalloc(&n1, res.Type, res)
   734  	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
   735  	p := gins(mov, nil, &n1)
   736  	p.From.Type = obj.TYPE_REG
   737  	p.From.Reg = x86.REG_TLS
   738  	p = gins(mov, nil, &n1)
   739  	p.From = p.To
   740  	p.From.Type = obj.TYPE_MEM
   741  	p.From.Index = x86.REG_TLS
   742  	p.From.Scale = 1
   743  	gmove(&n1, res)
   744  	gc.Regfree(&n1)
   745  }