github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go (about)

     1  package arm64
     2  
     3  import (
     4  	"github.com/wasilibs/wazerox/internal/engine/wazevo/backend"
     5  	"github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc"
     6  	"github.com/wasilibs/wazerox/internal/engine/wazevo/ssa"
     7  	"github.com/wasilibs/wazerox/internal/engine/wazevo/wazevoapi"
     8  )
     9  
    10  var calleeSavedRegistersSorted = []regalloc.VReg{
    11  	x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg,
    12  	v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
    13  }
    14  
    15  // CompileGoFunctionTrampoline implements backend.Machine.
    16  func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
    17  	argBegin := 1 // Skips exec context by default.
    18  	if needModuleContextPtr {
    19  		argBegin++
    20  	}
    21  
    22  	abi := &abiImpl{m: m}
    23  	abi.init(sig)
    24  	m.currentABI = abi
    25  
    26  	cur := m.allocateInstr()
    27  	cur.asNop0()
    28  	m.rootInstr = cur
    29  
    30  	// Execution context is always the first argument.
    31  	execCtrPtr := x0VReg
    32  
    33  	// In the following, we create the following stack layout:
    34  	//
    35  	//                   (high address)
    36  	//     SP ------> +-----------------+  <----+
    37  	//                |     .......     |       |
    38  	//                |      ret Y      |       |
    39  	//                |     .......     |       |
    40  	//                |      ret 0      |       |
    41  	//                |      arg X      |       |  size_of_arg_ret
    42  	//                |     .......     |       |
    43  	//                |      arg 1      |       |
    44  	//                |      arg 0      |  <----+ <-------- originalArg0Reg
    45  	//                | size_of_arg_ret |
    46  	//                |  ReturnAddress  |
    47  	//                +-----------------+ <----+
    48  	//                |      xxxx       |      |  ;; might be padded to make it 16-byte aligned.
    49  	//           +--->|  arg[N]/ret[M]  |      |
    50  	//  sliceSize|    |   ............  |      | goCallStackSize
    51  	//           |    |  arg[1]/ret[1]  |      |
    52  	//           +--->|  arg[0]/ret[0]  | <----+ <-------- arg0ret0AddrReg
    53  	//                |    sliceSize    |
    54  	//                |   frame_size    |
    55  	//                +-----------------+
    56  	//                   (low address)
    57  	//
    58  	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
    59  	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
    60  	// the arguments/return values.
    61  
    62  	// First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret".
    63  	cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
    64  
    65  	const frameInfoSize = 16 // == frame_size + sliceSize.
    66  
    67  	// Next, we should allocate the stack for the Go function call if necessary.
    68  	goCallStackSize, sliceSizeInBytes := goFunctionCallRequiredStackSize(sig, argBegin)
    69  	cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur)
    70  
    71  	originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want.
    72  	if m.currentABI.alignedArgResultStackSlotSize() > 0 {
    73  		// At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot.
    74  		cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true)
    75  	}
    76  
    77  	// Save the callee saved registers.
    78  	cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
    79  
    80  	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
    81  	if needModuleContextPtr {
    82  		offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64()
    83  		if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) {
    84  			panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context")
    85  		}
    86  
    87  		// Module context is always the second argument.
    88  		moduleCtrPtr := x1VReg
    89  		store := m.allocateInstr()
    90  		amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
    91  		store.asStore(operandNR(moduleCtrPtr), amode, 64)
    92  		cur = linkInstr(cur, store)
    93  	}
    94  
    95  	// Advances the stack pointer.
    96  	cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false)
    97  
    98  	// Copy the pointer to x15VReg.
    99  	arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want.
   100  	copySp := m.allocateInstr()
   101  	copySp.asMove64(arg0ret0AddrReg, spVReg)
   102  	cur = linkInstr(cur, copySp)
   103  
   104  	for i := range abi.args[argBegin:] {
   105  		arg := &abi.args[argBegin+i]
   106  		store := m.allocateInstr()
   107  		var v regalloc.VReg
   108  		if arg.Kind == backend.ABIArgKindReg {
   109  			v = arg.Reg
   110  		} else {
   111  			cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg,
   112  				// Caller save, so we can use it for whatever we want.
   113  				x11VReg, v11VReg)
   114  		}
   115  
   116  		var sizeInBits byte
   117  		if arg.Type == ssa.TypeV128 {
   118  			sizeInBits = 128
   119  		} else {
   120  			sizeInBits = 64
   121  		}
   122  		store.asStore(operandNR(v),
   123  			addressMode{
   124  				kind: addressModeKindPostIndex,
   125  				rn:   arg0ret0AddrReg, imm: int64(sizeInBits / 8),
   126  			}, sizeInBits)
   127  		cur = linkInstr(cur, store)
   128  	}
   129  
   130  	// Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`.
   131  	var frameSizeReg, sliceSizeReg regalloc.VReg
   132  	if goCallStackSize > 0 {
   133  		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize)
   134  		frameSizeReg = tmpRegVReg
   135  		cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8)
   136  		sliceSizeReg = x16VReg
   137  	} else {
   138  		frameSizeReg = xzrVReg
   139  		sliceSizeReg = xzrVReg
   140  	}
   141  	_amode := addressModePreOrPostIndex(spVReg, -16, true)
   142  	storeP := m.allocateInstr()
   143  	storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
   144  	cur = linkInstr(cur, storeP)
   145  
   146  	// Set the exit status on the execution context.
   147  	cur = m.setExitCode(cur, x0VReg, exitCode)
   148  
   149  	// Save the current stack pointer.
   150  	cur = m.saveCurrentStackPointer(cur, x0VReg)
   151  
   152  	// Exit the execution.
   153  	cur = m.storeReturnAddressAndExit(cur)
   154  
   155  	// After the call, we need to restore the callee saved registers.
   156  	cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
   157  
   158  	// Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`.
   159  	if len(abi.rets) > 0 {
   160  		cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true)
   161  	}
   162  
   163  	// Advances the SP so that it points to `ReturnAddress`.
   164  	cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
   165  	ldr := m.allocateInstr()
   166  	// And load the return address.
   167  	ldr.asULoad(operandNR(lrVReg),
   168  		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
   169  	cur = linkInstr(cur, ldr)
   170  
   171  	originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
   172  	if m.currentABI.retStackSize > 0 {
   173  		cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.argStackSize, true)
   174  	}
   175  
   176  	// Make the SP point to the original address (above the result slot).
   177  	if s := m.currentABI.alignedArgResultStackSlotSize(); s > 0 {
   178  		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
   179  	}
   180  
   181  	for i := range abi.rets {
   182  		r := &abi.rets[i]
   183  		if r.Kind == backend.ABIArgKindReg {
   184  			loadIntoReg := m.allocateInstr()
   185  			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
   186  			switch r.Type {
   187  			case ssa.TypeI32:
   188  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   189  				loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
   190  			case ssa.TypeI64:
   191  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   192  				loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
   193  			case ssa.TypeF32:
   194  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   195  				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
   196  			case ssa.TypeF64:
   197  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   198  				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
   199  			case ssa.TypeV128:
   200  				mode.imm = 16
   201  				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
   202  			default:
   203  				panic("TODO")
   204  			}
   205  			cur = linkInstr(cur, loadIntoReg)
   206  		} else {
   207  			// First we need to load the value to a temporary just like ^^.
   208  			intTmp, floatTmp := x11VReg, v11VReg
   209  			loadIntoTmpReg := m.allocateInstr()
   210  			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
   211  			var resultReg regalloc.VReg
   212  			switch r.Type {
   213  			case ssa.TypeI32:
   214  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   215  				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
   216  				resultReg = intTmp
   217  			case ssa.TypeI64:
   218  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   219  				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
   220  				resultReg = intTmp
   221  			case ssa.TypeF32:
   222  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   223  				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
   224  				resultReg = floatTmp
   225  			case ssa.TypeF64:
   226  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   227  				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
   228  				resultReg = floatTmp
   229  			case ssa.TypeV128:
   230  				mode.imm = 16
   231  				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
   232  				resultReg = floatTmp
   233  			default:
   234  				panic("TODO")
   235  			}
   236  			cur = linkInstr(cur, loadIntoTmpReg)
   237  			cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg)
   238  		}
   239  	}
   240  
   241  	ret := m.allocateInstr()
   242  	ret.asRet(nil)
   243  	linkInstr(cur, ret)
   244  
   245  	m.encode(m.rootInstr)
   246  	return m.compiler.Buf()
   247  }
   248  
   249  func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
   250  	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
   251  	for _, v := range regs {
   252  		store := m.allocateInstr()
   253  		var sizeInBits byte
   254  		switch v.RegType() {
   255  		case regalloc.RegTypeInt:
   256  			sizeInBits = 64
   257  		case regalloc.RegTypeFloat:
   258  			sizeInBits = 128
   259  		}
   260  		store.asStore(operandNR(v),
   261  			addressMode{
   262  				kind: addressModeKindRegUnsignedImm12,
   263  				// Execution context is always the first argument.
   264  				rn: x0VReg, imm: offset,
   265  			}, sizeInBits)
   266  		store.prev = cur
   267  		cur.next = store
   268  		cur = store
   269  		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16.
   270  	}
   271  	return cur
   272  }
   273  
   274  func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
   275  	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
   276  	for _, v := range regs {
   277  		load := m.allocateInstr()
   278  		var as func(dst operand, amode addressMode, sizeInBits byte)
   279  		var sizeInBits byte
   280  		switch v.RegType() {
   281  		case regalloc.RegTypeInt:
   282  			as = load.asULoad
   283  			sizeInBits = 64
   284  		case regalloc.RegTypeFloat:
   285  			as = load.asFpuLoad
   286  			sizeInBits = 128
   287  		}
   288  		as(operandNR(v),
   289  			addressMode{
   290  				kind: addressModeKindRegUnsignedImm12,
   291  				// Execution context is always the first argument.
   292  				rn: x0VReg, imm: offset,
   293  			}, sizeInBits)
   294  		cur = linkInstr(cur, load)
   295  		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
   296  	}
   297  	return cur
   298  }
   299  
   300  func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction {
   301  	m.pendingInstructions = m.pendingInstructions[:0]
   302  	m.lowerConstantI64(dst, v)
   303  	for _, instr := range m.pendingInstructions {
   304  		cur = linkInstr(cur, instr)
   305  	}
   306  	return cur
   307  }
   308  
   309  func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction {
   310  	m.pendingInstructions = m.pendingInstructions[:0]
   311  	m.lowerConstantI32(dst, v)
   312  	for _, instr := range m.pendingInstructions {
   313  		cur = linkInstr(cur, instr)
   314  	}
   315  	return cur
   316  }
   317  
   318  func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction {
   319  	constReg := x17VReg // caller-saved, so we can use it.
   320  	cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode))
   321  
   322  	// Set the exit status on the execution context.
   323  	setExistStatus := m.allocateInstr()
   324  	setExistStatus.asStore(operandNR(constReg),
   325  		addressMode{
   326  			kind: addressModeKindRegUnsignedImm12,
   327  			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
   328  		}, 32)
   329  	cur = linkInstr(cur, setExistStatus)
   330  	return cur
   331  }
   332  
   333  func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
   334  	// Read the return address into tmp, and store it in the execution context.
   335  	adr := m.allocateInstr()
   336  	adr.asAdr(tmpRegVReg, exitSequenceSize+8)
   337  	cur = linkInstr(cur, adr)
   338  
   339  	storeReturnAddr := m.allocateInstr()
   340  	storeReturnAddr.asStore(operandNR(tmpRegVReg),
   341  		addressMode{
   342  			kind: addressModeKindRegUnsignedImm12,
   343  			// Execution context is always the first argument.
   344  			rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
   345  		}, 64)
   346  	cur = linkInstr(cur, storeReturnAddr)
   347  
   348  	// Exit the execution.
   349  	trapSeq := m.allocateInstr()
   350  	trapSeq.asExitSequence(x0VReg)
   351  	cur = linkInstr(cur, trapSeq)
   352  	return cur
   353  }
   354  
   355  func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction {
   356  	// Save the current stack pointer:
   357  	// 	mov tmp, sp,
   358  	// 	str tmp, [exec_ctx, #stackPointerBeforeGoCall]
   359  	movSp := m.allocateInstr()
   360  	movSp.asMove64(tmpRegVReg, spVReg)
   361  	cur = linkInstr(cur, movSp)
   362  
   363  	strSp := m.allocateInstr()
   364  	strSp.asStore(operandNR(tmpRegVReg),
   365  		addressMode{
   366  			kind: addressModeKindRegUnsignedImm12,
   367  			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
   368  		}, 64)
   369  	cur = linkInstr(cur, strSp)
   370  	return cur
   371  }
   372  
   373  // goFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
   374  func goFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
   375  	var paramNeededInBytes, resultNeededInBytes int64
   376  	for _, p := range sig.Params[argBegin:] {
   377  		s := int64(p.Size())
   378  		if s < 8 {
   379  			s = 8 // We use uint64 for all basic types, except SIMD v128.
   380  		}
   381  		paramNeededInBytes += s
   382  	}
   383  	for _, r := range sig.Results {
   384  		s := int64(r.Size())
   385  		if s < 8 {
   386  			s = 8 // We use uint64 for all basic types, except SIMD v128.
   387  		}
   388  		resultNeededInBytes += s
   389  	}
   390  
   391  	if paramNeededInBytes > resultNeededInBytes {
   392  		ret = paramNeededInBytes
   393  	} else {
   394  		ret = resultNeededInBytes
   395  	}
   396  	retUnaligned = ret
   397  	// Align to 16 bytes.
   398  	ret = (ret + 15) &^ 15
   399  	return
   400  }
   401  
   402  func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
   403  	load := m.allocateInstr()
   404  	var result regalloc.VReg
   405  	mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
   406  	switch arg.Type {
   407  	case ssa.TypeI32:
   408  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   409  		load.asULoad(operandNR(intVReg), mode, 32)
   410  		result = intVReg
   411  	case ssa.TypeI64:
   412  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   413  		load.asULoad(operandNR(intVReg), mode, 64)
   414  		result = intVReg
   415  	case ssa.TypeF32:
   416  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   417  		load.asFpuLoad(operandNR(floatVReg), mode, 32)
   418  		result = floatVReg
   419  	case ssa.TypeF64:
   420  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
   421  		load.asFpuLoad(operandNR(floatVReg), mode, 64)
   422  		result = floatVReg
   423  	case ssa.TypeV128:
   424  		mode.imm = 16
   425  		load.asFpuLoad(operandNR(floatVReg), mode, 128)
   426  		result = floatVReg
   427  	default:
   428  		panic("TODO")
   429  	}
   430  
   431  	cur = linkInstr(cur, load)
   432  	return cur, result
   433  }
   434  
   435  func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
   436  	store := m.allocateInstr()
   437  	mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
   438  	var sizeInBits byte
   439  	switch result.Type {
   440  	case ssa.TypeI32, ssa.TypeF32:
   441  		mode.imm = 8
   442  		sizeInBits = 32
   443  	case ssa.TypeI64, ssa.TypeF64:
   444  		mode.imm = 8
   445  		sizeInBits = 64
   446  	case ssa.TypeV128:
   447  		mode.imm = 16
   448  		sizeInBits = 128
   449  	default:
   450  		panic("TODO")
   451  	}
   452  	store.asStore(operandNR(resultVReg), mode, sizeInBits)
   453  	return linkInstr(cur, store)
   454  }