github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go (about) 1 package arm64 2 3 import ( 4 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend" 5 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc" 6 "github.com/wasilibs/wazerox/internal/engine/wazevo/ssa" 7 "github.com/wasilibs/wazerox/internal/engine/wazevo/wazevoapi" 8 ) 9 10 var calleeSavedRegistersSorted = []regalloc.VReg{ 11 x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, 12 v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg, 13 } 14 15 // CompileGoFunctionTrampoline implements backend.Machine. 16 func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { 17 argBegin := 1 // Skips exec context by default. 18 if needModuleContextPtr { 19 argBegin++ 20 } 21 22 abi := &abiImpl{m: m} 23 abi.init(sig) 24 m.currentABI = abi 25 26 cur := m.allocateInstr() 27 cur.asNop0() 28 m.rootInstr = cur 29 30 // Execution context is always the first argument. 31 execCtrPtr := x0VReg 32 33 // In the following, we create the following stack layout: 34 // 35 // (high address) 36 // SP ------> +-----------------+ <----+ 37 // | ....... | | 38 // | ret Y | | 39 // | ....... | | 40 // | ret 0 | | 41 // | arg X | | size_of_arg_ret 42 // | ....... | | 43 // | arg 1 | | 44 // | arg 0 | <----+ <-------- originalArg0Reg 45 // | size_of_arg_ret | 46 // | ReturnAddress | 47 // +-----------------+ <----+ 48 // | xxxx | | ;; might be padded to make it 16-byte aligned. 49 // +--->| arg[N]/ret[M] | | 50 // sliceSize| | ............ | | goCallStackSize 51 // | | arg[1]/ret[1] | | 52 // +--->| arg[0]/ret[0] | <----+ <-------- arg0ret0AddrReg 53 // | sliceSize | 54 // | frame_size | 55 // +-----------------+ 56 // (low address) 57 // 58 // where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions, 59 // therefore will be accessed as the usual []uint64. So that's where we need to pass/receive 60 // the arguments/return values. 61 62 // First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret". 63 cur = m.createReturnAddrAndSizeOfArgRetSlot(cur) 64 65 const frameInfoSize = 16 // == frame_size + sliceSize. 66 67 // Next, we should allocate the stack for the Go function call if necessary. 68 goCallStackSize, sliceSizeInBytes := goFunctionCallRequiredStackSize(sig, argBegin) 69 cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur) 70 71 originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want. 72 if m.currentABI.alignedArgResultStackSlotSize() > 0 { 73 // At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot. 74 cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true) 75 } 76 77 // Save the callee saved registers. 78 cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted) 79 80 // Next, we need to store all the arguments to the stack in the typical Wasm stack style. 81 if needModuleContextPtr { 82 offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64() 83 if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) { 84 panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context") 85 } 86 87 // Module context is always the second argument. 88 moduleCtrPtr := x1VReg 89 store := m.allocateInstr() 90 amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} 91 store.asStore(operandNR(moduleCtrPtr), amode, 64) 92 cur = linkInstr(cur, store) 93 } 94 95 // Advances the stack pointer. 96 cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false) 97 98 // Copy the pointer to x15VReg. 99 arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want. 100 copySp := m.allocateInstr() 101 copySp.asMove64(arg0ret0AddrReg, spVReg) 102 cur = linkInstr(cur, copySp) 103 104 for i := range abi.args[argBegin:] { 105 arg := &abi.args[argBegin+i] 106 store := m.allocateInstr() 107 var v regalloc.VReg 108 if arg.Kind == backend.ABIArgKindReg { 109 v = arg.Reg 110 } else { 111 cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg, 112 // Caller save, so we can use it for whatever we want. 113 x11VReg, v11VReg) 114 } 115 116 var sizeInBits byte 117 if arg.Type == ssa.TypeV128 { 118 sizeInBits = 128 119 } else { 120 sizeInBits = 64 121 } 122 store.asStore(operandNR(v), 123 addressMode{ 124 kind: addressModeKindPostIndex, 125 rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8), 126 }, sizeInBits) 127 cur = linkInstr(cur, store) 128 } 129 130 // Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`. 131 var frameSizeReg, sliceSizeReg regalloc.VReg 132 if goCallStackSize > 0 { 133 cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize) 134 frameSizeReg = tmpRegVReg 135 cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8) 136 sliceSizeReg = x16VReg 137 } else { 138 frameSizeReg = xzrVReg 139 sliceSizeReg = xzrVReg 140 } 141 _amode := addressModePreOrPostIndex(spVReg, -16, true) 142 storeP := m.allocateInstr() 143 storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode) 144 cur = linkInstr(cur, storeP) 145 146 // Set the exit status on the execution context. 147 cur = m.setExitCode(cur, x0VReg, exitCode) 148 149 // Save the current stack pointer. 150 cur = m.saveCurrentStackPointer(cur, x0VReg) 151 152 // Exit the execution. 153 cur = m.storeReturnAddressAndExit(cur) 154 155 // After the call, we need to restore the callee saved registers. 156 cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted) 157 158 // Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`. 159 if len(abi.rets) > 0 { 160 cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true) 161 } 162 163 // Advances the SP so that it points to `ReturnAddress`. 164 cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true) 165 ldr := m.allocateInstr() 166 // And load the return address. 167 ldr.asULoad(operandNR(lrVReg), 168 addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) 169 cur = linkInstr(cur, ldr) 170 171 originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. 172 if m.currentABI.retStackSize > 0 { 173 cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.argStackSize, true) 174 } 175 176 // Make the SP point to the original address (above the result slot). 177 if s := m.currentABI.alignedArgResultStackSlotSize(); s > 0 { 178 cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) 179 } 180 181 for i := range abi.rets { 182 r := &abi.rets[i] 183 if r.Kind == backend.ABIArgKindReg { 184 loadIntoReg := m.allocateInstr() 185 mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} 186 switch r.Type { 187 case ssa.TypeI32: 188 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 189 loadIntoReg.asULoad(operandNR(r.Reg), mode, 32) 190 case ssa.TypeI64: 191 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 192 loadIntoReg.asULoad(operandNR(r.Reg), mode, 64) 193 case ssa.TypeF32: 194 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 195 loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32) 196 case ssa.TypeF64: 197 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 198 loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64) 199 case ssa.TypeV128: 200 mode.imm = 16 201 loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128) 202 default: 203 panic("TODO") 204 } 205 cur = linkInstr(cur, loadIntoReg) 206 } else { 207 // First we need to load the value to a temporary just like ^^. 208 intTmp, floatTmp := x11VReg, v11VReg 209 loadIntoTmpReg := m.allocateInstr() 210 mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} 211 var resultReg regalloc.VReg 212 switch r.Type { 213 case ssa.TypeI32: 214 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 215 loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32) 216 resultReg = intTmp 217 case ssa.TypeI64: 218 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 219 loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64) 220 resultReg = intTmp 221 case ssa.TypeF32: 222 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 223 loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32) 224 resultReg = floatTmp 225 case ssa.TypeF64: 226 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 227 loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64) 228 resultReg = floatTmp 229 case ssa.TypeV128: 230 mode.imm = 16 231 loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128) 232 resultReg = floatTmp 233 default: 234 panic("TODO") 235 } 236 cur = linkInstr(cur, loadIntoTmpReg) 237 cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg) 238 } 239 } 240 241 ret := m.allocateInstr() 242 ret.asRet(nil) 243 linkInstr(cur, ret) 244 245 m.encode(m.rootInstr) 246 return m.compiler.Buf() 247 } 248 249 func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction { 250 offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() 251 for _, v := range regs { 252 store := m.allocateInstr() 253 var sizeInBits byte 254 switch v.RegType() { 255 case regalloc.RegTypeInt: 256 sizeInBits = 64 257 case regalloc.RegTypeFloat: 258 sizeInBits = 128 259 } 260 store.asStore(operandNR(v), 261 addressMode{ 262 kind: addressModeKindRegUnsignedImm12, 263 // Execution context is always the first argument. 264 rn: x0VReg, imm: offset, 265 }, sizeInBits) 266 store.prev = cur 267 cur.next = store 268 cur = store 269 offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16. 270 } 271 return cur 272 } 273 274 func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction { 275 offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() 276 for _, v := range regs { 277 load := m.allocateInstr() 278 var as func(dst operand, amode addressMode, sizeInBits byte) 279 var sizeInBits byte 280 switch v.RegType() { 281 case regalloc.RegTypeInt: 282 as = load.asULoad 283 sizeInBits = 64 284 case regalloc.RegTypeFloat: 285 as = load.asFpuLoad 286 sizeInBits = 128 287 } 288 as(operandNR(v), 289 addressMode{ 290 kind: addressModeKindRegUnsignedImm12, 291 // Execution context is always the first argument. 292 rn: x0VReg, imm: offset, 293 }, sizeInBits) 294 cur = linkInstr(cur, load) 295 offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16. 296 } 297 return cur 298 } 299 300 func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction { 301 m.pendingInstructions = m.pendingInstructions[:0] 302 m.lowerConstantI64(dst, v) 303 for _, instr := range m.pendingInstructions { 304 cur = linkInstr(cur, instr) 305 } 306 return cur 307 } 308 309 func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction { 310 m.pendingInstructions = m.pendingInstructions[:0] 311 m.lowerConstantI32(dst, v) 312 for _, instr := range m.pendingInstructions { 313 cur = linkInstr(cur, instr) 314 } 315 return cur 316 } 317 318 func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction { 319 constReg := x17VReg // caller-saved, so we can use it. 320 cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode)) 321 322 // Set the exit status on the execution context. 323 setExistStatus := m.allocateInstr() 324 setExistStatus.asStore(operandNR(constReg), 325 addressMode{ 326 kind: addressModeKindRegUnsignedImm12, 327 rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), 328 }, 32) 329 cur = linkInstr(cur, setExistStatus) 330 return cur 331 } 332 333 func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction { 334 // Read the return address into tmp, and store it in the execution context. 335 adr := m.allocateInstr() 336 adr.asAdr(tmpRegVReg, exitSequenceSize+8) 337 cur = linkInstr(cur, adr) 338 339 storeReturnAddr := m.allocateInstr() 340 storeReturnAddr.asStore(operandNR(tmpRegVReg), 341 addressMode{ 342 kind: addressModeKindRegUnsignedImm12, 343 // Execution context is always the first argument. 344 rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), 345 }, 64) 346 cur = linkInstr(cur, storeReturnAddr) 347 348 // Exit the execution. 349 trapSeq := m.allocateInstr() 350 trapSeq.asExitSequence(x0VReg) 351 cur = linkInstr(cur, trapSeq) 352 return cur 353 } 354 355 func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction { 356 // Save the current stack pointer: 357 // mov tmp, sp, 358 // str tmp, [exec_ctx, #stackPointerBeforeGoCall] 359 movSp := m.allocateInstr() 360 movSp.asMove64(tmpRegVReg, spVReg) 361 cur = linkInstr(cur, movSp) 362 363 strSp := m.allocateInstr() 364 strSp.asStore(operandNR(tmpRegVReg), 365 addressMode{ 366 kind: addressModeKindRegUnsignedImm12, 367 rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), 368 }, 64) 369 cur = linkInstr(cur, strSp) 370 return cur 371 } 372 373 // goFunctionCallRequiredStackSize returns the size of the stack required for the Go function call. 374 func goFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) { 375 var paramNeededInBytes, resultNeededInBytes int64 376 for _, p := range sig.Params[argBegin:] { 377 s := int64(p.Size()) 378 if s < 8 { 379 s = 8 // We use uint64 for all basic types, except SIMD v128. 380 } 381 paramNeededInBytes += s 382 } 383 for _, r := range sig.Results { 384 s := int64(r.Size()) 385 if s < 8 { 386 s = 8 // We use uint64 for all basic types, except SIMD v128. 387 } 388 resultNeededInBytes += s 389 } 390 391 if paramNeededInBytes > resultNeededInBytes { 392 ret = paramNeededInBytes 393 } else { 394 ret = resultNeededInBytes 395 } 396 retUnaligned = ret 397 // Align to 16 bytes. 398 ret = (ret + 15) &^ 15 399 return 400 } 401 402 func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) { 403 load := m.allocateInstr() 404 var result regalloc.VReg 405 mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} 406 switch arg.Type { 407 case ssa.TypeI32: 408 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 409 load.asULoad(operandNR(intVReg), mode, 32) 410 result = intVReg 411 case ssa.TypeI64: 412 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 413 load.asULoad(operandNR(intVReg), mode, 64) 414 result = intVReg 415 case ssa.TypeF32: 416 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 417 load.asFpuLoad(operandNR(floatVReg), mode, 32) 418 result = floatVReg 419 case ssa.TypeF64: 420 mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. 421 load.asFpuLoad(operandNR(floatVReg), mode, 64) 422 result = floatVReg 423 case ssa.TypeV128: 424 mode.imm = 16 425 load.asFpuLoad(operandNR(floatVReg), mode, 128) 426 result = floatVReg 427 default: 428 panic("TODO") 429 } 430 431 cur = linkInstr(cur, load) 432 return cur, result 433 } 434 435 func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction { 436 store := m.allocateInstr() 437 mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} 438 var sizeInBits byte 439 switch result.Type { 440 case ssa.TypeI32, ssa.TypeF32: 441 mode.imm = 8 442 sizeInBits = 32 443 case ssa.TypeI64, ssa.TypeF64: 444 mode.imm = 8 445 sizeInBits = 64 446 case ssa.TypeV128: 447 mode.imm = 16 448 sizeInBits = 128 449 default: 450 panic("TODO") 451 } 452 store.asStore(operandNR(resultVReg), mode, sizeInBits) 453 return linkInstr(cur, store) 454 }