github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/s390x/ggen.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package s390x
     6  
     7  import (
     8  	"github.com/go-asm/go/cmd/compile/base"
     9  	"github.com/go-asm/go/cmd/compile/objw"
    10  	"github.com/go-asm/go/cmd/obj"
    11  	"github.com/go-asm/go/cmd/obj/s390x"
    12  )
    13  
    14  // clearLoopCutOff is the (somewhat arbitrary) value above which it is better
    15  // to have a loop of clear instructions (e.g. XCs) rather than just generating
    16  // multiple instructions (i.e. loop unrolling).
    17  // Must be between 256 and 4096.
    18  const clearLoopCutoff = 1024
    19  
    20  // zerorange clears the stack in the given range.
    21  func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
    22  	if cnt == 0 {
    23  		return p
    24  	}
    25  
    26  	// Adjust the frame to account for LR.
    27  	off += base.Ctxt.Arch.FixedFrameSize
    28  	reg := int16(s390x.REGSP)
    29  
    30  	// If the off cannot fit in a 12-bit unsigned displacement then we
    31  	// need to create a copy of the stack pointer that we can adjust.
    32  	// We also need to do this if we are going to loop.
    33  	if off < 0 || off > 4096-clearLoopCutoff || cnt > clearLoopCutoff {
    34  		p = pp.Append(p, s390x.AADD, obj.TYPE_CONST, 0, off, obj.TYPE_REG, s390x.REGRT1, 0)
    35  		p.Reg = int16(s390x.REGSP)
    36  		reg = s390x.REGRT1
    37  		off = 0
    38  	}
    39  
    40  	// Generate a loop of large clears.
    41  	if cnt > clearLoopCutoff {
    42  		ireg := int16(s390x.REGRT2) // register holds number of remaining loop iterations
    43  		p = pp.Append(p, s390x.AMOVD, obj.TYPE_CONST, 0, cnt/256, obj.TYPE_REG, ireg, 0)
    44  		p = pp.Append(p, s390x.ACLEAR, obj.TYPE_CONST, 0, 256, obj.TYPE_MEM, reg, off)
    45  		pl := p
    46  		p = pp.Append(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0)
    47  		p = pp.Append(p, s390x.ABRCTG, obj.TYPE_REG, ireg, 0, obj.TYPE_BRANCH, 0, 0)
    48  		p.To.SetTarget(pl)
    49  		cnt = cnt % 256
    50  	}
    51  
    52  	// Generate remaining clear instructions without a loop.
    53  	for cnt > 0 {
    54  		n := cnt
    55  
    56  		// Can clear at most 256 bytes per instruction.
    57  		if n > 256 {
    58  			n = 256
    59  		}
    60  
    61  		switch n {
    62  		// Handle very small clears with move instructions.
    63  		case 8, 4, 2, 1:
    64  			ins := s390x.AMOVB
    65  			switch n {
    66  			case 8:
    67  				ins = s390x.AMOVD
    68  			case 4:
    69  				ins = s390x.AMOVW
    70  			case 2:
    71  				ins = s390x.AMOVH
    72  			}
    73  			p = pp.Append(p, ins, obj.TYPE_CONST, 0, 0, obj.TYPE_MEM, reg, off)
    74  
    75  		// Handle clears that would require multiple move instructions with CLEAR (assembled as XC).
    76  		default:
    77  			p = pp.Append(p, s390x.ACLEAR, obj.TYPE_CONST, 0, n, obj.TYPE_MEM, reg, off)
    78  		}
    79  
    80  		cnt -= n
    81  		off += n
    82  	}
    83  
    84  	return p
    85  }
    86  
    87  func ginsnop(pp *objw.Progs) *obj.Prog {
    88  	return pp.Prog(s390x.ANOPH)
    89  }