github.com/AR1011/wazero@v1.0.5/internal/engine/wazevo/backend/isa/arm64/machine.go (about)

     1  package arm64
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"strings"
     8  
     9  	"github.com/AR1011/wazero/internal/engine/wazevo/backend"
    10  	"github.com/AR1011/wazero/internal/engine/wazevo/backend/regalloc"
    11  	"github.com/AR1011/wazero/internal/engine/wazevo/ssa"
    12  	"github.com/AR1011/wazero/internal/engine/wazevo/wazevoapi"
    13  )
    14  
    15  type (
    16  	// machine implements backend.Machine.
    17  	machine struct {
    18  		compiler      backend.Compiler
    19  		currentABI    *abiImpl
    20  		currentSSABlk ssa.BasicBlock
    21  		// abis maps ssa.SignatureID to the ABI implementation.
    22  		abis      []abiImpl
    23  		instrPool wazevoapi.Pool[instruction]
    24  		// rootInstr is the root instruction of the currently-compiled function.
    25  		rootInstr *instruction
    26  		// perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
    27  		perBlockHead, perBlockEnd *instruction
    28  		// pendingInstructions are the instructions which are not yet emitted into the instruction list.
    29  		pendingInstructions []*instruction
    30  		regAllocFn          regAllocFunctionImpl
    31  		nextLabel           label
    32  
    33  		// ssaBlockIDToLabels maps an SSA block ID to the label.
    34  		ssaBlockIDToLabels []label
    35  		// labelToInstructions maps a label to the instructions of the region which the label represents.
    36  		labelPositions     map[label]*labelPosition
    37  		orderedBlockLabels []*labelPosition
    38  		labelPositionPool  wazevoapi.Pool[labelPosition]
    39  
    40  		// addendsWorkQueue is used during address lowering, defined here for reuse.
    41  		addendsWorkQueue queue[ssa.Value]
    42  		addends32        queue[addend32]
    43  		// addends64 is used during address lowering, defined here for reuse.
    44  		addends64              queue[regalloc.VReg]
    45  		unresolvedAddressModes []*instruction
    46  
    47  		// condBrRelocs holds the conditional branches which need offset relocation.
    48  		condBrRelocs []condBrReloc
    49  
    50  		// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
    51  		// During the execution of the function, the stack looks like:
    52  		//
    53  		//
    54  		//            (high address)
    55  		//          +-----------------+
    56  		//          |     .......     |
    57  		//          |      ret Y      |
    58  		//          |     .......     |
    59  		//          |      ret 0      |
    60  		//          |      arg X      |
    61  		//          |     .......     |
    62  		//          |      arg 1      |
    63  		//          |      arg 0      |
    64  		//          |      xxxxx      |
    65  		//          |   ReturnAddress |
    66  		//          +-----------------+   <<-|
    67  		//          |   ...........   |      |
    68  		//          |   spill slot M  |      | <--- spillSlotSize
    69  		//          |   ............  |      |
    70  		//          |   spill slot 2  |      |
    71  		//          |   spill slot 1  |   <<-+
    72  		//          |   clobbered N   |
    73  		//          |   ...........   |
    74  		//          |   clobbered 1   |
    75  		//          |   clobbered 0   |
    76  		//   SP---> +-----------------+
    77  		//             (low address)
    78  		//
    79  		// and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16.
    80  		// Also note that this is only known after register allocation.
    81  		spillSlotSize int64
    82  		spillSlots    map[regalloc.VRegID]int64 // regalloc.VRegID to offset.
    83  		// clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue.
    84  		clobberedRegs []regalloc.VReg
    85  
    86  		maxRequiredStackSizeForCalls int64
    87  		stackBoundsCheckDisabled     bool
    88  
    89  		regAllocStarted bool
    90  	}
    91  
    92  	addend32 struct {
    93  		r   regalloc.VReg
    94  		ext extendOp
    95  	}
    96  
    97  	// label represents a position in the generated code which is either
    98  	// a real instruction or the constant pool (e.g. jump tables).
    99  	//
   100  	// This is exactly the same as the traditional "label" in assembly code.
   101  	label uint32
   102  
   103  	// labelPosition represents the regions of the generated code which the label represents.
   104  	labelPosition struct {
   105  		l            label
   106  		begin, end   *instruction
   107  		binarySize   int64
   108  		binaryOffset int64
   109  	}
   110  
   111  	condBrReloc struct {
   112  		cbr *instruction
   113  		// currentLabelPos is the labelPosition within which condBr is defined.
   114  		currentLabelPos *labelPosition
   115  		// Next block's labelPosition.
   116  		nextLabel label
   117  		offset    int64
   118  	}
   119  )
   120  
   121  const (
   122  	invalidLabel = 0
   123  	returnLabel  = math.MaxUint32
   124  )
   125  
   126  // NewBackend returns a new backend for arm64.
   127  func NewBackend() backend.Machine {
   128  	m := &machine{
   129  		instrPool:         wazevoapi.NewPool[instruction](resetInstruction),
   130  		labelPositionPool: wazevoapi.NewPool[labelPosition](resetLabelPosition),
   131  		labelPositions:    make(map[label]*labelPosition),
   132  		spillSlots:        make(map[regalloc.VRegID]int64),
   133  		nextLabel:         invalidLabel,
   134  	}
   135  	m.regAllocFn.m = m
   136  	m.regAllocFn.labelToRegAllocBlockIndex = make(map[label]int)
   137  	return m
   138  }
   139  
   140  // Reset implements backend.Machine.
   141  func (m *machine) Reset() {
   142  	m.regAllocStarted = false
   143  	m.instrPool.Reset()
   144  	m.labelPositionPool.Reset()
   145  	m.currentSSABlk = nil
   146  	for l := label(0); l <= m.nextLabel; l++ {
   147  		delete(m.labelPositions, l)
   148  	}
   149  	m.pendingInstructions = m.pendingInstructions[:0]
   150  	m.clobberedRegs = m.clobberedRegs[:0]
   151  	for key := range m.spillSlots {
   152  		m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
   153  	}
   154  	for _, key := range m.clobberedRegs {
   155  		delete(m.spillSlots, regalloc.VRegID(key))
   156  	}
   157  	m.clobberedRegs = m.clobberedRegs[:0]
   158  	m.orderedBlockLabels = m.orderedBlockLabels[:0]
   159  	m.regAllocFn.reset()
   160  	m.spillSlotSize = 0
   161  	m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
   162  	m.rootInstr = nil
   163  	m.ssaBlockIDToLabels = m.ssaBlockIDToLabels[:0]
   164  	m.perBlockHead, m.perBlockEnd = nil, nil
   165  	m.maxRequiredStackSizeForCalls = 0
   166  	m.nextLabel = invalidLabel
   167  }
   168  
   169  // InitializeABI implements backend.Machine InitializeABI.
   170  func (m *machine) InitializeABI(sig *ssa.Signature) {
   171  	m.currentABI = m.getOrCreateABIImpl(sig)
   172  }
   173  
   174  // DisableStackCheck implements backend.Machine DisableStackCheck.
   175  func (m *machine) DisableStackCheck() {
   176  	m.stackBoundsCheckDisabled = true
   177  }
   178  
   179  // ABI implements backend.Machine.
   180  func (m *machine) ABI() backend.FunctionABI {
   181  	return m.currentABI
   182  }
   183  
   184  // allocateLabel allocates an unused label.
   185  func (m *machine) allocateLabel() label {
   186  	m.nextLabel++
   187  	return m.nextLabel
   188  }
   189  
   190  // SetCompiler implements backend.Machine.
   191  func (m *machine) SetCompiler(ctx backend.Compiler) {
   192  	m.compiler = ctx
   193  }
   194  
   195  // StartLoweringFunction implements backend.Machine.
   196  func (m *machine) StartLoweringFunction(max ssa.BasicBlockID) {
   197  	imax := int(max)
   198  	if len(m.ssaBlockIDToLabels) <= imax {
   199  		// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration.
   200  		m.ssaBlockIDToLabels = append(m.ssaBlockIDToLabels, make([]label, imax+1)...)
   201  	}
   202  }
   203  
   204  // EndLoweringFunction implements backend.Machine.
   205  func (m *machine) EndLoweringFunction() {}
   206  
   207  // StartBlock implements backend.Machine.
   208  func (m *machine) StartBlock(blk ssa.BasicBlock) {
   209  	m.currentSSABlk = blk
   210  
   211  	l := m.ssaBlockIDToLabels[m.currentSSABlk.ID()]
   212  	if l == invalidLabel {
   213  		l = m.allocateLabel()
   214  		m.ssaBlockIDToLabels[blk.ID()] = l
   215  	}
   216  
   217  	end := m.allocateNop()
   218  	m.perBlockHead, m.perBlockEnd = end, end
   219  
   220  	labelPos, ok := m.labelPositions[l]
   221  	if !ok {
   222  		labelPos = m.allocateLabelPosition(l)
   223  		m.labelPositions[l] = labelPos
   224  	}
   225  	m.orderedBlockLabels = append(m.orderedBlockLabels, labelPos)
   226  	labelPos.begin, labelPos.end = end, end
   227  	m.regAllocFn.addBlock(blk, l, labelPos)
   228  }
   229  
   230  // EndBlock implements backend.Machine.
   231  func (m *machine) EndBlock() {
   232  	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions.
   233  	m.insertAtPerBlockHead(m.allocateNop())
   234  
   235  	l := m.ssaBlockIDToLabels[m.currentSSABlk.ID()]
   236  	m.labelPositions[l].begin = m.perBlockHead
   237  
   238  	if m.currentSSABlk.EntryBlock() {
   239  		m.rootInstr = m.perBlockHead
   240  	}
   241  }
   242  
   243  func (m *machine) insert(i *instruction) {
   244  	m.pendingInstructions = append(m.pendingInstructions, i)
   245  }
   246  
   247  func (m *machine) insertBrTargetLabel() label {
   248  	nop, l := m.allocateBrTarget()
   249  	m.insert(nop)
   250  	return l
   251  }
   252  
   253  func (m *machine) allocateBrTarget() (nop *instruction, l label) {
   254  	l = m.allocateLabel()
   255  	nop = m.allocateInstr()
   256  	nop.asNop0WithLabel(l)
   257  	pos := m.allocateLabelPosition(l)
   258  	pos.begin, pos.end = nop, nop
   259  	m.labelPositions[l] = pos
   260  	return
   261  }
   262  
   263  func (m *machine) allocateLabelPosition(la label) *labelPosition {
   264  	l := m.labelPositionPool.Allocate()
   265  	l.l = la
   266  	return l
   267  }
   268  
   269  func resetLabelPosition(l *labelPosition) {
   270  	*l = labelPosition{}
   271  }
   272  
   273  // FlushPendingInstructions implements backend.Machine.
   274  func (m *machine) FlushPendingInstructions() {
   275  	l := len(m.pendingInstructions)
   276  	if l == 0 {
   277  		return
   278  	}
   279  	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order.
   280  		m.insertAtPerBlockHead(m.pendingInstructions[i])
   281  	}
   282  	m.pendingInstructions = m.pendingInstructions[:0]
   283  }
   284  
   285  func (m *machine) insertAtPerBlockHead(i *instruction) {
   286  	if m.perBlockHead == nil {
   287  		m.perBlockHead = i
   288  		m.perBlockEnd = i
   289  		return
   290  	}
   291  	i.next = m.perBlockHead
   292  	m.perBlockHead.prev = i
   293  	m.perBlockHead = i
   294  }
   295  
   296  // String implements backend.Machine.
   297  func (l label) String() string {
   298  	return fmt.Sprintf("L%d", l)
   299  }
   300  
   301  // allocateInstr allocates an instruction.
   302  func (m *machine) allocateInstr() *instruction {
   303  	instr := m.instrPool.Allocate()
   304  	if !m.regAllocStarted {
   305  		instr.addedBeforeRegAlloc = true
   306  	}
   307  	return instr
   308  }
   309  
   310  func resetInstruction(i *instruction) {
   311  	*i = instruction{}
   312  }
   313  
   314  func (m *machine) allocateNop() *instruction {
   315  	instr := m.allocateInstr()
   316  	instr.asNop0()
   317  	return instr
   318  }
   319  
   320  func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
   321  	amode := &i.amode
   322  	switch amode.kind {
   323  	case addressModeKindResultStackSpace:
   324  		amode.imm += ret0offset
   325  	case addressModeKindArgStackSpace:
   326  		amode.imm += arg0offset
   327  	default:
   328  		panic("BUG")
   329  	}
   330  
   331  	var sizeInBits byte
   332  	switch i.kind {
   333  	case store8, uLoad8:
   334  		sizeInBits = 8
   335  	case store16, uLoad16:
   336  		sizeInBits = 16
   337  	case store32, fpuStore32, uLoad32, fpuLoad32:
   338  		sizeInBits = 32
   339  	case store64, fpuStore64, uLoad64, fpuLoad64:
   340  		sizeInBits = 64
   341  	case fpuStore128, fpuLoad128:
   342  		sizeInBits = 128
   343  	default:
   344  		panic("BUG")
   345  	}
   346  
   347  	if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) {
   348  		amode.kind = addressModeKindRegUnsignedImm12
   349  	} else {
   350  		// This case, we load the offset into the temporary register,
   351  		// and then use it as the index register.
   352  		newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm)
   353  		linkInstr(newPrev, i)
   354  		*amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */}
   355  	}
   356  }
   357  
   358  // ResolveRelativeAddresses implements backend.Machine.
   359  func (m *machine) ResolveRelativeAddresses(ctx context.Context) {
   360  	if len(m.unresolvedAddressModes) > 0 {
   361  		arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
   362  		for _, i := range m.unresolvedAddressModes {
   363  			m.resolveAddressingMode(arg0offset, ret0offset, i)
   364  		}
   365  	}
   366  
   367  	// Reuse the slice to gather the unresolved conditional branches.
   368  	cbrs := m.condBrRelocs[:0]
   369  
   370  	var fn string
   371  	var fnIndex int
   372  	var labelToSSABlockID map[label]ssa.BasicBlockID
   373  	if wazevoapi.PerfMapEnabled {
   374  		fn = wazevoapi.GetCurrentFunctionName(ctx)
   375  		labelToSSABlockID = make(map[label]ssa.BasicBlockID)
   376  		for i, l := range m.ssaBlockIDToLabels {
   377  			labelToSSABlockID[l] = ssa.BasicBlockID(i)
   378  		}
   379  		fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
   380  	}
   381  
   382  	// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
   383  	var offset int64
   384  	for i, pos := range m.orderedBlockLabels {
   385  		pos.binaryOffset = offset
   386  		var size int64
   387  		for cur := pos.begin; ; cur = cur.next {
   388  			switch cur.kind {
   389  			case nop0:
   390  				l := cur.nop0Label()
   391  				if pos, ok := m.labelPositions[l]; ok {
   392  					pos.binaryOffset = offset + size
   393  				}
   394  			case condBr:
   395  				if !cur.condBrOffsetResolved() {
   396  					var nextLabel label
   397  					if i < len(m.orderedBlockLabels)-1 {
   398  						// Note: this is only used when the block ends with fallthrough,
   399  						// therefore can be safely assumed that the next block exists when it's needed.
   400  						nextLabel = m.orderedBlockLabels[i+1].l
   401  					}
   402  					cbrs = append(cbrs, condBrReloc{
   403  						cbr: cur, currentLabelPos: pos, offset: offset + size,
   404  						nextLabel: nextLabel,
   405  					})
   406  				}
   407  			}
   408  			size += cur.size()
   409  			if cur == pos.end {
   410  				break
   411  			}
   412  		}
   413  
   414  		if wazevoapi.PerfMapEnabled {
   415  			if size > 0 {
   416  				l := pos.l
   417  				var labelStr string
   418  				if blkID, ok := labelToSSABlockID[l]; ok {
   419  					labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
   420  				} else {
   421  					labelStr = l.String()
   422  				}
   423  				wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
   424  			}
   425  		}
   426  
   427  		pos.binarySize = size
   428  		offset += size
   429  	}
   430  
   431  	// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
   432  	var needRerun bool
   433  	for i := range cbrs {
   434  		reloc := &cbrs[i]
   435  		cbr := reloc.cbr
   436  		offset := reloc.offset
   437  
   438  		target := cbr.condBrLabel()
   439  		offsetOfTarget := m.labelPositions[target].binaryOffset
   440  		diff := offsetOfTarget - offset
   441  		if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
   442  			// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
   443  			// and jump to it.
   444  			m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
   445  			// Then, we need to recall this function to fix up the label offsets
   446  			// as they have changed after the trampoline is inserted.
   447  			needRerun = true
   448  		}
   449  	}
   450  	if needRerun {
   451  		m.ResolveRelativeAddresses(ctx)
   452  		if wazevoapi.PerfMapEnabled {
   453  			wazevoapi.PerfMap.Clear()
   454  		}
   455  		return
   456  	}
   457  
   458  	var currentOffset int64
   459  	for cur := m.rootInstr; cur != nil; cur = cur.next {
   460  		switch cur.kind {
   461  		case br:
   462  			target := cur.brLabel()
   463  			offsetOfTarget := m.labelPositions[target].binaryOffset
   464  			diff := offsetOfTarget - currentOffset
   465  			divided := diff >> 2
   466  			if divided < minSignedInt26 || divided > maxSignedInt26 {
   467  				// This means the currently compiled single function is extremely large.
   468  				panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
   469  			}
   470  			cur.brOffsetResolve(diff)
   471  		case condBr:
   472  			if !cur.condBrOffsetResolved() {
   473  				target := cur.condBrLabel()
   474  				offsetOfTarget := m.labelPositions[target].binaryOffset
   475  				diff := offsetOfTarget - currentOffset
   476  				if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
   477  					panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
   478  				}
   479  				cur.condBrOffsetResolve(diff)
   480  			}
   481  		case brTableSequence:
   482  			for i := range cur.targets {
   483  				l := label(cur.targets[i])
   484  				offsetOfTarget := m.labelPositions[l].binaryOffset
   485  				diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)
   486  				cur.targets[i] = uint32(diff)
   487  			}
   488  			cur.brTableSequenceOffsetsResolved()
   489  		case emitSourceOffsetInfo:
   490  			m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo())
   491  		}
   492  		currentOffset += cur.size()
   493  	}
   494  }
   495  
   496  const (
   497  	maxSignedInt26 int64 = 1<<25 - 1
   498  	minSignedInt26 int64 = -(1 << 25)
   499  
   500  	maxSignedInt19 int64 = 1<<19 - 1
   501  	minSignedInt19 int64 = -(1 << 19)
   502  )
   503  
   504  func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
   505  	cur := currentBlk.end
   506  	originalTarget := cbr.condBrLabel()
   507  	endNext := cur.next
   508  
   509  	if cur.kind != br {
   510  		// If the current block ends with a conditional branch, we can just insert the trampoline after it.
   511  		// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
   512  		skip := m.allocateInstr()
   513  		skip.asBr(nextLabel)
   514  		cur = linkInstr(cur, skip)
   515  	}
   516  
   517  	cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
   518  	cbr.setCondBrTargets(cbrNewTargetLabel)
   519  	cur = linkInstr(cur, cbrNewTargetInstr)
   520  
   521  	// Then insert the unconditional branch to the original, which should be possible to get encoded
   522  	// as 26-bit offset should be enough for any practical application.
   523  	br := m.allocateInstr()
   524  	br.asBr(originalTarget)
   525  	cur = linkInstr(cur, br)
   526  
   527  	// Update the end of the current block.
   528  	currentBlk.end = cur
   529  
   530  	linkInstr(cur, endNext)
   531  }
   532  
   533  func (m *machine) getOrAllocateSSABlockLabel(blk ssa.BasicBlock) label {
   534  	if blk.ReturnBlock() {
   535  		return returnLabel
   536  	}
   537  	l := m.ssaBlockIDToLabels[blk.ID()]
   538  	if l == invalidLabel {
   539  		l = m.allocateLabel()
   540  		m.ssaBlockIDToLabels[blk.ID()] = l
   541  	}
   542  	return l
   543  }
   544  
   545  // LinkAdjacentBlocks implements backend.Machine.
   546  func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) {
   547  	prevLabelPos := m.labelPositions[m.getOrAllocateSSABlockLabel(prev)]
   548  	nextLabelPos := m.labelPositions[m.getOrAllocateSSABlockLabel(next)]
   549  	prevLabelPos.end.next = nextLabelPos.begin
   550  }
   551  
   552  // Format implements backend.Machine.
   553  func (m *machine) Format() string {
   554  	begins := map[*instruction]label{}
   555  	for l, pos := range m.labelPositions {
   556  		begins[pos.begin] = l
   557  	}
   558  
   559  	irBlocks := map[label]ssa.BasicBlockID{}
   560  	for i, l := range m.ssaBlockIDToLabels {
   561  		irBlocks[l] = ssa.BasicBlockID(i)
   562  	}
   563  
   564  	var lines []string
   565  	for cur := m.rootInstr; cur != nil; cur = cur.next {
   566  		if l, ok := begins[cur]; ok {
   567  			var labelStr string
   568  			if blkID, ok := irBlocks[l]; ok {
   569  				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
   570  			} else {
   571  				labelStr = fmt.Sprintf("%s:", l)
   572  			}
   573  			lines = append(lines, labelStr)
   574  		}
   575  		if cur.kind == nop0 {
   576  			continue
   577  		}
   578  		lines = append(lines, "\t"+cur.String())
   579  	}
   580  	return "\n" + strings.Join(lines, "\n") + "\n"
   581  }
   582  
   583  // InsertReturn implements backend.Machine.
   584  func (m *machine) InsertReturn() {
   585  	i := m.allocateInstr()
   586  	i.asRet(m.currentABI)
   587  	m.insert(i)
   588  }
   589  
   590  func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
   591  	offset, ok := m.spillSlots[id]
   592  	if !ok {
   593  		offset = m.spillSlotSize
   594  		// TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible.
   595  		m.spillSlots[id] = offset
   596  		m.spillSlotSize += int64(size)
   597  	}
   598  	return offset + 16 // spill slot starts above the clobbered registers and the frame size.
   599  }
   600  
   601  func (m *machine) clobberedRegSlotSize() int64 {
   602  	return int64(len(m.clobberedRegs) * 16)
   603  }
   604  
   605  func (m *machine) arg0OffsetFromSP() int64 {
   606  	return m.frameSize() +
   607  		16 + // 16-byte aligned return address
   608  		16 // frame size saved below the clobbered registers.
   609  }
   610  
   611  func (m *machine) ret0OffsetFromSP() int64 {
   612  	return m.arg0OffsetFromSP() + m.currentABI.argStackSize
   613  }
   614  
   615  func (m *machine) requiredStackSize() int64 {
   616  	return m.maxRequiredStackSizeForCalls +
   617  		m.frameSize() +
   618  		16 + // 16-byte aligned return address.
   619  		16 // frame size saved below the clobbered registers.
   620  }
   621  
   622  func (m *machine) frameSize() int64 {
   623  	s := m.clobberedRegSlotSize() + m.spillSlotSize
   624  	if s&0xf != 0 {
   625  		panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
   626  	}
   627  	return s
   628  }