github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/s390x/ggen.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package s390x
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/s390x"
    11  	"fmt"
    12  )
    13  
    14  // clearLoopCutOff is the (somewhat arbitrary) value above which it is better
    15  // to have a loop of clear instructions (e.g. XCs) rather than just generating
    16  // multiple instructions (i.e. loop unrolling).
    17  // Must be between 256 and 4096.
    18  const clearLoopCutoff = 1024
    19  
    20  func defframe(ptxt *obj.Prog) {
    21  	// fill in argument size, stack size
    22  	ptxt.To.Type = obj.TYPE_TEXTSIZE
    23  
    24  	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.ArgWidth(), int64(gc.Widthptr)))
    25  	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
    26  	ptxt.To.Offset = int64(frame)
    27  
    28  	// insert code to zero ambiguously live variables
    29  	// so that the garbage collector only sees initialized values
    30  	// when it looks for pointers.
    31  	p := ptxt
    32  
    33  	hi := int64(0)
    34  	lo := hi
    35  
    36  	// iterate through declarations - they are sorted in decreasing xoffset order.
    37  	for _, n := range gc.Curfn.Func.Dcl {
    38  		if !n.Name.Needzero {
    39  			continue
    40  		}
    41  		if n.Class != gc.PAUTO {
    42  			gc.Fatalf("needzero class %d", n.Class)
    43  		}
    44  		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
    45  			gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, gc.FmtLong), int(n.Type.Width), int(n.Xoffset))
    46  		}
    47  
    48  		if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) {
    49  			// merge with range we already have
    50  			lo = n.Xoffset
    51  
    52  			continue
    53  		}
    54  
    55  		// zero old range
    56  		p = zerorange(p, int64(frame), lo, hi)
    57  
    58  		// set new range
    59  		hi = n.Xoffset + n.Type.Width
    60  
    61  		lo = n.Xoffset
    62  	}
    63  
    64  	// zero final range
    65  	zerorange(p, int64(frame), lo, hi)
    66  }
    67  
    68  // zerorange clears the stack in the given range.
    69  func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog {
    70  	cnt := hi - lo
    71  	if cnt == 0 {
    72  		return p
    73  	}
    74  
    75  	// Adjust the frame to account for LR.
    76  	frame += gc.Ctxt.FixedFrameSize()
    77  	offset := frame + lo
    78  	reg := int16(s390x.REGSP)
    79  
    80  	// If the offset cannot fit in a 12-bit unsigned displacement then we
    81  	// need to create a copy of the stack pointer that we can adjust.
    82  	// We also need to do this if we are going to loop.
    83  	if offset < 0 || offset > 4096-clearLoopCutoff || cnt > clearLoopCutoff {
    84  		p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset, obj.TYPE_REG, s390x.REGRT1, 0)
    85  		p.Reg = int16(s390x.REGSP)
    86  		reg = s390x.REGRT1
    87  		offset = 0
    88  	}
    89  
    90  	// Generate a loop of large clears.
    91  	if cnt > clearLoopCutoff {
    92  		n := cnt - (cnt % 256)
    93  		end := int16(s390x.REGRT2)
    94  		p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset+n, obj.TYPE_REG, end, 0)
    95  		p.Reg = reg
    96  		p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset)
    97  		p.From3 = new(obj.Addr)
    98  		p.From3.Type = obj.TYPE_CONST
    99  		p.From3.Offset = 256
   100  		pl := p
   101  		p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0)
   102  		p = appendpp(p, s390x.ACMP, obj.TYPE_REG, reg, 0, obj.TYPE_REG, end, 0)
   103  		p = appendpp(p, s390x.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0)
   104  		gc.Patch(p, pl)
   105  
   106  		cnt -= n
   107  	}
   108  
   109  	// Generate remaining clear instructions without a loop.
   110  	for cnt > 0 {
   111  		n := cnt
   112  
   113  		// Can clear at most 256 bytes per instruction.
   114  		if n > 256 {
   115  			n = 256
   116  		}
   117  
   118  		switch n {
   119  		// Handle very small clears with move instructions.
   120  		case 8, 4, 2, 1:
   121  			ins := s390x.AMOVB
   122  			switch n {
   123  			case 8:
   124  				ins = s390x.AMOVD
   125  			case 4:
   126  				ins = s390x.AMOVW
   127  			case 2:
   128  				ins = s390x.AMOVH
   129  			}
   130  			p = appendpp(p, ins, obj.TYPE_CONST, 0, 0, obj.TYPE_MEM, reg, offset)
   131  
   132  		// Handle clears that would require multiple move instructions with XC.
   133  		default:
   134  			p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset)
   135  			p.From3 = new(obj.Addr)
   136  			p.From3.Type = obj.TYPE_CONST
   137  			p.From3.Offset = n
   138  		}
   139  
   140  		cnt -= n
   141  		offset += n
   142  	}
   143  
   144  	return p
   145  }
   146  
   147  func appendpp(p *obj.Prog, as obj.As, ftype obj.AddrType, freg int16, foffset int64, ttype obj.AddrType, treg int16, toffset int64) *obj.Prog {
   148  	q := gc.Ctxt.NewProg()
   149  	gc.Clearp(q)
   150  	q.As = as
   151  	q.Lineno = p.Lineno
   152  	q.From.Type = ftype
   153  	q.From.Reg = freg
   154  	q.From.Offset = foffset
   155  	q.To.Type = ttype
   156  	q.To.Reg = treg
   157  	q.To.Offset = toffset
   158  	q.Link = p.Link
   159  	p.Link = q
   160  	return q
   161  }
   162  
   163  func ginsnop() {
   164  	var reg gc.Node
   165  	gc.Nodreg(&reg, gc.Types[gc.TINT], s390x.REG_R0)
   166  	gins(s390x.AOR, &reg, &reg)
   167  }
   168  
   169  var panicdiv *gc.Node
   170  
   171  /*
   172   * generate division.
   173   * generates one of:
   174   *	res = nl / nr
   175   *	res = nl % nr
   176   * according to op.
   177   */
   178  func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   179  	// Have to be careful about handling
   180  	// most negative int divided by -1 correctly.
   181  	// The hardware will generate undefined result.
   182  	// Also need to explicitly trap on division on zero,
   183  	// the hardware will silently generate undefined result.
   184  	// DIVW will leave unpredicable result in higher 32-bit,
   185  	// so always use DIVD/DIVDU.
   186  	t := nl.Type
   187  
   188  	t0 := t
   189  	check := 0
   190  	if t.IsSigned() {
   191  		check = 1
   192  		if gc.Isconst(nl, gc.CTINT) && nl.Int64() != -(1<<uint64(t.Width*8-1)) {
   193  			check = 0
   194  		} else if gc.Isconst(nr, gc.CTINT) && nr.Int64() != -1 {
   195  			check = 0
   196  		}
   197  	}
   198  
   199  	if t.Width < 8 {
   200  		if t.IsSigned() {
   201  			t = gc.Types[gc.TINT64]
   202  		} else {
   203  			t = gc.Types[gc.TUINT64]
   204  		}
   205  		check = 0
   206  	}
   207  
   208  	a := optoas(gc.ODIV, t)
   209  
   210  	var tl gc.Node
   211  	gc.Regalloc(&tl, t0, nil)
   212  	var tr gc.Node
   213  	gc.Regalloc(&tr, t0, nil)
   214  	if nl.Ullman >= nr.Ullman {
   215  		gc.Cgen(nl, &tl)
   216  		gc.Cgen(nr, &tr)
   217  	} else {
   218  		gc.Cgen(nr, &tr)
   219  		gc.Cgen(nl, &tl)
   220  	}
   221  
   222  	if t != t0 {
   223  		// Convert
   224  		tl2 := tl
   225  
   226  		tr2 := tr
   227  		tl.Type = t
   228  		tr.Type = t
   229  		gmove(&tl2, &tl)
   230  		gmove(&tr2, &tr)
   231  	}
   232  
   233  	// Handle divide-by-zero panic.
   234  	p1 := gins(optoas(gc.OCMP, t), &tr, nil)
   235  
   236  	p1.To.Type = obj.TYPE_REG
   237  	p1.To.Reg = s390x.REGZERO
   238  	p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   239  	if panicdiv == nil {
   240  		panicdiv = gc.Sysfunc("panicdivide")
   241  	}
   242  	gc.Ginscall(panicdiv, -1)
   243  	gc.Patch(p1, gc.Pc)
   244  
   245  	var p2 *obj.Prog
   246  	if check != 0 {
   247  		var nm1 gc.Node
   248  		gc.Nodconst(&nm1, t, -1)
   249  		gins(optoas(gc.OCMP, t), &tr, &nm1)
   250  		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
   251  		if op == gc.ODIV {
   252  			// a / (-1) is -a.
   253  			gins(optoas(gc.OMINUS, t), nil, &tl)
   254  
   255  			gmove(&tl, res)
   256  		} else {
   257  			// a % (-1) is 0.
   258  			var nz gc.Node
   259  			gc.Nodconst(&nz, t, 0)
   260  
   261  			gmove(&nz, res)
   262  		}
   263  
   264  		p2 = gc.Gbranch(obj.AJMP, nil, 0)
   265  		gc.Patch(p1, gc.Pc)
   266  	}
   267  
   268  	p1 = gins(a, &tr, &tl)
   269  	if op == gc.ODIV {
   270  		gc.Regfree(&tr)
   271  		gmove(&tl, res)
   272  	} else {
   273  		// A%B = A-(A/B*B)
   274  		var tm gc.Node
   275  		gc.Regalloc(&tm, t, nil)
   276  
   277  		// patch div to use the 3 register form
   278  		// TODO(minux): add gins3?
   279  		p1.Reg = p1.To.Reg
   280  
   281  		p1.To.Reg = tm.Reg
   282  		gins(optoas(gc.OMUL, t), &tr, &tm)
   283  		gc.Regfree(&tr)
   284  		gins(optoas(gc.OSUB, t), &tm, &tl)
   285  		gc.Regfree(&tm)
   286  		gmove(&tl, res)
   287  	}
   288  
   289  	gc.Regfree(&tl)
   290  	if check != 0 {
   291  		gc.Patch(p2, gc.Pc)
   292  	}
   293  }
   294  
   295  /*
   296   * generate high multiply:
   297   *   res = (nl*nr) >> width
   298   */
   299  func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
   300  	// largest ullman on left.
   301  	if nl.Ullman < nr.Ullman {
   302  		nl, nr = nr, nl
   303  	}
   304  
   305  	t := nl.Type
   306  	w := int(t.Width) * 8
   307  	var n1 gc.Node
   308  	gc.Cgenr(nl, &n1, res)
   309  	var n2 gc.Node
   310  	gc.Cgenr(nr, &n2, nil)
   311  	switch gc.Simtype[t.Etype] {
   312  	case gc.TINT8,
   313  		gc.TINT16,
   314  		gc.TINT32:
   315  		gins(optoas(gc.OMUL, t), &n2, &n1)
   316  		p := gins(s390x.ASRAD, nil, &n1)
   317  		p.From.Type = obj.TYPE_CONST
   318  		p.From.Offset = int64(w)
   319  
   320  	case gc.TUINT8,
   321  		gc.TUINT16,
   322  		gc.TUINT32:
   323  		gins(optoas(gc.OMUL, t), &n2, &n1)
   324  		p := gins(s390x.ASRD, nil, &n1)
   325  		p.From.Type = obj.TYPE_CONST
   326  		p.From.Offset = int64(w)
   327  
   328  	case gc.TINT64:
   329  		gins(s390x.AMULHD, &n2, &n1)
   330  
   331  	case gc.TUINT64:
   332  		gins(s390x.AMULHDU, &n2, &n1)
   333  
   334  	default:
   335  		gc.Fatalf("cgen_hmul %v", t)
   336  	}
   337  
   338  	gc.Cgen(&n1, res)
   339  	gc.Regfree(&n1)
   340  	gc.Regfree(&n2)
   341  }
   342  
   343  /*
   344   * generate shift according to op, one of:
   345   *	res = nl << nr
   346   *	res = nl >> nr
   347   */
   348  func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
   349  	a := optoas(op, nl.Type)
   350  
   351  	if nr.Op == gc.OLITERAL {
   352  		var n1 gc.Node
   353  		gc.Regalloc(&n1, nl.Type, res)
   354  		gc.Cgen(nl, &n1)
   355  		sc := uint64(nr.Int64())
   356  		if sc >= uint64(nl.Type.Width*8) {
   357  			// large shift gets 2 shifts by width-1
   358  			var n3 gc.Node
   359  			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
   360  
   361  			gins(a, &n3, &n1)
   362  			gins(a, &n3, &n1)
   363  		} else {
   364  			gins(a, nr, &n1)
   365  		}
   366  		gmove(&n1, res)
   367  		gc.Regfree(&n1)
   368  		return
   369  	}
   370  
   371  	if nl.Ullman >= gc.UINF {
   372  		var n4 gc.Node
   373  		gc.Tempname(&n4, nl.Type)
   374  		gc.Cgen(nl, &n4)
   375  		nl = &n4
   376  	}
   377  
   378  	if nr.Ullman >= gc.UINF {
   379  		var n5 gc.Node
   380  		gc.Tempname(&n5, nr.Type)
   381  		gc.Cgen(nr, &n5)
   382  		nr = &n5
   383  	}
   384  
   385  	// Allow either uint32 or uint64 as shift type,
   386  	// to avoid unnecessary conversion from uint32 to uint64
   387  	// just to do the comparison.
   388  	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]
   389  
   390  	if tcount.Etype < gc.TUINT32 {
   391  		tcount = gc.Types[gc.TUINT32]
   392  	}
   393  
   394  	var n1 gc.Node
   395  	gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX
   396  	var n3 gc.Node
   397  	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX
   398  
   399  	var n2 gc.Node
   400  	gc.Regalloc(&n2, nl.Type, res)
   401  
   402  	if nl.Ullman >= nr.Ullman {
   403  		gc.Cgen(nl, &n2)
   404  		gc.Cgen(nr, &n1)
   405  		gmove(&n1, &n3)
   406  	} else {
   407  		gc.Cgen(nr, &n1)
   408  		gmove(&n1, &n3)
   409  		gc.Cgen(nl, &n2)
   410  	}
   411  
   412  	gc.Regfree(&n3)
   413  
   414  	// test and fix up large shifts
   415  	if !bounded {
   416  		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
   417  		gins(optoas(gc.OCMP, tcount), &n1, &n3)
   418  		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, 1)
   419  		if op == gc.ORSH && nl.Type.IsSigned() {
   420  			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
   421  			gins(a, &n3, &n2)
   422  		} else {
   423  			gc.Nodconst(&n3, nl.Type, 0)
   424  			gmove(&n3, &n2)
   425  		}
   426  
   427  		gc.Patch(p1, gc.Pc)
   428  	}
   429  
   430  	gins(a, &n1, &n2)
   431  
   432  	gmove(&n2, res)
   433  
   434  	gc.Regfree(&n1)
   435  	gc.Regfree(&n2)
   436  }
   437  
   438  // clearfat clears (i.e. replaces with zeros) the value pointed to by nl.
   439  func clearfat(nl *gc.Node) {
   440  	if gc.Debug['g'] != 0 {
   441  		fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width)
   442  	}
   443  
   444  	// Avoid taking the address for simple enough types.
   445  	if gc.Componentgen(nil, nl) {
   446  		return
   447  	}
   448  
   449  	var dst gc.Node
   450  	gc.Regalloc(&dst, gc.Types[gc.Tptr], nil)
   451  	gc.Agen(nl, &dst)
   452  
   453  	var boff int64
   454  	w := nl.Type.Width
   455  	if w > clearLoopCutoff {
   456  		// Generate a loop clearing 256 bytes per iteration using XCs.
   457  		var end gc.Node
   458  		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
   459  		p := gins(s390x.AMOVD, &dst, &end)
   460  		p.From.Type = obj.TYPE_ADDR
   461  		p.From.Offset = w - (w % 256)
   462  
   463  		p = gins(s390x.AXC, &dst, &dst)
   464  		p.From.Type = obj.TYPE_MEM
   465  		p.From.Offset = 0
   466  		p.To.Type = obj.TYPE_MEM
   467  		p.To.Offset = 0
   468  		p.From3 = new(obj.Addr)
   469  		p.From3.Offset = 256
   470  		p.From3.Type = obj.TYPE_CONST
   471  		pl := p
   472  
   473  		ginscon(s390x.AADD, 256, &dst)
   474  		gins(s390x.ACMP, &dst, &end)
   475  		gc.Patch(gc.Gbranch(s390x.ABNE, nil, 0), pl)
   476  		gc.Regfree(&end)
   477  		w = w % 256
   478  	}
   479  
   480  	// Generate instructions to clear the remaining memory.
   481  	for w > 0 {
   482  		n := w
   483  
   484  		// Can clear at most 256 bytes per instruction.
   485  		if n > 256 {
   486  			n = 256
   487  		}
   488  
   489  		switch n {
   490  		// Handle very small clears using moves.
   491  		case 8, 4, 2, 1:
   492  			ins := s390x.AMOVB
   493  			switch n {
   494  			case 8:
   495  				ins = s390x.AMOVD
   496  			case 4:
   497  				ins = s390x.AMOVW
   498  			case 2:
   499  				ins = s390x.AMOVH
   500  			}
   501  			p := gins(ins, nil, &dst)
   502  			p.From.Type = obj.TYPE_CONST
   503  			p.From.Offset = 0
   504  			p.To.Type = obj.TYPE_MEM
   505  			p.To.Offset = boff
   506  
   507  		// Handle clears that would require multiple moves with a XC.
   508  		default:
   509  			p := gins(s390x.AXC, &dst, &dst)
   510  			p.From.Type = obj.TYPE_MEM
   511  			p.From.Offset = boff
   512  			p.To.Type = obj.TYPE_MEM
   513  			p.To.Offset = boff
   514  			p.From3 = new(obj.Addr)
   515  			p.From3.Offset = n
   516  			p.From3.Type = obj.TYPE_CONST
   517  		}
   518  
   519  		boff += n
   520  		w -= n
   521  	}
   522  
   523  	gc.Regfree(&dst)
   524  }
   525  
   526  // Called after regopt and peep have run.
   527  // Expand CHECKNIL pseudo-op into actual nil pointer check.
   528  func expandchecks(firstp *obj.Prog) {
   529  	for p := firstp; p != nil; p = p.Link {
   530  		if gc.Debug_checknil != 0 && gc.Ctxt.Debugvlog != 0 {
   531  			fmt.Printf("expandchecks: %v\n", p)
   532  		}
   533  		if p.As != obj.ACHECKNIL {
   534  			continue
   535  		}
   536  		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
   537  			gc.Warnl(p.Lineno, "generated nil check")
   538  		}
   539  		if p.From.Type != obj.TYPE_REG {
   540  			gc.Fatalf("invalid nil check %v\n", p)
   541  		}
   542  
   543  		// check is
   544  		//	CMPBNE arg, $0, 2(PC) [likely]
   545  		//	MOVD   R0, 0(R0)
   546  		p1 := gc.Ctxt.NewProg()
   547  
   548  		gc.Clearp(p1)
   549  		p1.Link = p.Link
   550  		p.Link = p1
   551  		p1.Lineno = p.Lineno
   552  		p1.Pc = 9999
   553  		p.As = s390x.ACMPBNE
   554  		p.From3 = new(obj.Addr)
   555  		p.From3.Type = obj.TYPE_CONST
   556  		p.From3.Offset = 0
   557  
   558  		p.To.Type = obj.TYPE_BRANCH
   559  		p.To.Val = p1.Link
   560  
   561  		// crash by write to memory address 0.
   562  		p1.As = s390x.AMOVD
   563  
   564  		p1.From.Type = obj.TYPE_REG
   565  		p1.From.Reg = s390x.REGZERO
   566  		p1.To.Type = obj.TYPE_MEM
   567  		p1.To.Reg = s390x.REGZERO
   568  		p1.To.Offset = 0
   569  	}
   570  }
   571  
   572  // res = runtime.getg()
   573  func getg(res *gc.Node) {
   574  	var n1 gc.Node
   575  	gc.Nodreg(&n1, res.Type, s390x.REGG)
   576  	gmove(&n1, res)
   577  }