github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go (about) 1 package amd64 2 3 import ( 4 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" 5 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 6 "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" 7 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" 8 ) 9 10 var calleeSavedVRegs = []regalloc.VReg{ 11 rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg, 12 xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg, 13 } 14 15 // CompileGoFunctionTrampoline implements backend.Machine. 16 func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { 17 ectx := m.ectx 18 argBegin := 1 // Skips exec context by default. 19 if needModuleContextPtr { 20 argBegin++ 21 } 22 23 abi := &backend.FunctionABI{} 24 abi.Init(sig, intArgResultRegs, floatArgResultRegs) 25 m.currentABI = abi 26 27 cur := m.allocateNop() 28 ectx.RootInstr = cur 29 30 // Execution context is always the first argument. 31 execCtrPtr := raxVReg 32 33 // First we update RBP and RSP just like the normal prologue. 34 // 35 // (high address) (high address) 36 // RBP ----> +-----------------+ +-----------------+ 37 // | ....... | | ....... | 38 // | ret Y | | ret Y | 39 // | ....... | | ....... | 40 // | ret 0 | | ret 0 | 41 // | arg X | | arg X | 42 // | ....... | ====> | ....... | 43 // | arg 1 | | arg 1 | 44 // | arg 0 | | arg 0 | 45 // | Return Addr | | Return Addr | 46 // RSP ----> +-----------------+ | Caller_RBP | 47 // (low address) +-----------------+ <----- RSP, RBP 48 // 49 cur = m.setupRBPRSP(cur) 50 51 goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin) 52 cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur) 53 54 // Save the callee saved registers. 55 cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs) 56 57 if needModuleContextPtr { 58 moduleCtrPtr := rbxVReg // Module context is always the second argument. 59 mem := m.newAmodeImmReg( 60 wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(), 61 execCtrPtr) 62 store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8) 63 cur = linkInstr(cur, store) 64 } 65 66 // Now let's advance the RSP to the stack slot for the arguments. 67 // 68 // (high address) (high address) 69 // +-----------------+ +-----------------+ 70 // | ....... | | ....... | 71 // | ret Y | | ret Y | 72 // | ....... | | ....... | 73 // | ret 0 | | ret 0 | 74 // | arg X | | arg X | 75 // | ....... | =======> | ....... | 76 // | arg 1 | | arg 1 | 77 // | arg 0 | | arg 0 | 78 // | Return Addr | | Return Addr | 79 // | Caller_RBP | | Caller_RBP | 80 // RBP,RSP --> +-----------------+ +-----------------+ <----- RBP 81 // (low address) | arg[N]/ret[M] | 82 // | .......... | 83 // | arg[1]/ret[1] | 84 // | arg[0]/ret[0] | 85 // +-----------------+ <----- RSP 86 // (low address) 87 // 88 // where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions, 89 // therefore will be accessed as the usual []uint64. So that's where we need to pass/receive 90 // the arguments/return values to/from Go function. 91 cur = m.addRSP(-int32(goSliceSizeAligned), cur) 92 93 // Next, we need to store all the arguments to the stack in the typical Wasm stack style. 94 var offsetInGoSlice int32 95 for i := range abi.Args[argBegin:] { 96 arg := &abi.Args[argBegin+i] 97 var v regalloc.VReg 98 if arg.Kind == backend.ABIArgKindReg { 99 v = arg.Reg 100 } else { 101 // We have saved callee saved registers, so we can use them. 102 if arg.Type.IsInt() { 103 v = r15VReg 104 } else { 105 v = xmm15VReg 106 } 107 mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg)) 108 load := m.allocateInstr() 109 switch arg.Type { 110 case ssa.TypeI32: 111 load.asMovzxRmR(extModeLQ, mem, v) 112 case ssa.TypeI64: 113 load.asMov64MR(mem, v) 114 case ssa.TypeF32: 115 load.asXmmUnaryRmR(sseOpcodeMovss, mem, v) 116 case ssa.TypeF64: 117 load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v) 118 case ssa.TypeV128: 119 load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v) 120 default: 121 panic("BUG") 122 } 123 cur = linkInstr(cur, load) 124 } 125 126 store := m.allocateInstr() 127 mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg)) 128 switch arg.Type { 129 case ssa.TypeI32: 130 store.asMovRM(v, mem, 4) 131 offsetInGoSlice += 8 // always uint64 rep. 132 case ssa.TypeI64: 133 store.asMovRM(v, mem, 8) 134 offsetInGoSlice += 8 135 case ssa.TypeF32: 136 store.asXmmMovRM(sseOpcodeMovss, v, mem) 137 offsetInGoSlice += 8 // always uint64 rep. 138 case ssa.TypeF64: 139 store.asXmmMovRM(sseOpcodeMovsd, v, mem) 140 offsetInGoSlice += 8 141 case ssa.TypeV128: 142 store.asXmmMovRM(sseOpcodeMovdqu, v, mem) 143 offsetInGoSlice += 16 144 default: 145 panic("BUG") 146 } 147 cur = linkInstr(cur, store) 148 } 149 150 // Finally we push the size of the slice to the stack so the stack looks like: 151 // 152 // (high address) 153 // +-----------------+ 154 // | ....... | 155 // | ret Y | 156 // | ....... | 157 // | ret 0 | 158 // | arg X | 159 // | ....... | 160 // | arg 1 | 161 // | arg 0 | 162 // | Return Addr | 163 // | Caller_RBP | 164 // +-----------------+ <----- RBP 165 // | arg[N]/ret[M] | 166 // | .......... | 167 // | arg[1]/ret[1] | 168 // | arg[0]/ret[0] | 169 // | slice size | 170 // +-----------------+ <----- RSP 171 // (low address) 172 // 173 // push $sliceSize 174 cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned)))) 175 176 // Load the exitCode to the register. 177 exitCodeReg := r12VReg // Callee saved which is already saved. 178 cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false)) 179 180 saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg) 181 cur = linkInstr(cur, setExitCode) 182 cur = linkInstr(cur, saveRsp) 183 cur = linkInstr(cur, saveRbp) 184 185 // Ready to exit the execution. 186 cur = m.storeReturnAddressAndExit(cur, execCtrPtr) 187 188 // We don't need the slice size anymore, so pop it. 189 cur = m.addRSP(8, cur) 190 191 // Ready to set up the results. 192 offsetInGoSlice = 0 193 // To avoid overwriting with the execution context pointer by the result, we need to track the offset, 194 // and defer the restoration of the result to the end of this function. 195 var argOverlapWithExecCtxOffset int32 = -1 196 for i := range abi.Rets { 197 r := &abi.Rets[i] 198 var v regalloc.VReg 199 isRegResult := r.Kind == backend.ABIArgKindReg 200 if isRegResult { 201 v = r.Reg 202 if v.RealReg() == execCtrPtr.RealReg() { 203 argOverlapWithExecCtxOffset = offsetInGoSlice 204 offsetInGoSlice += 8 // always uint64 rep. 205 continue 206 } 207 } else { 208 if r.Type.IsInt() { 209 v = r15VReg 210 } else { 211 v = xmm15VReg 212 } 213 } 214 215 load := m.allocateInstr() 216 mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg)) 217 switch r.Type { 218 case ssa.TypeI32: 219 load.asMovzxRmR(extModeLQ, mem, v) 220 offsetInGoSlice += 8 // always uint64 rep. 221 case ssa.TypeI64: 222 load.asMov64MR(mem, v) 223 offsetInGoSlice += 8 224 case ssa.TypeF32: 225 load.asXmmUnaryRmR(sseOpcodeMovss, mem, v) 226 offsetInGoSlice += 8 // always uint64 rep. 227 case ssa.TypeF64: 228 load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v) 229 offsetInGoSlice += 8 230 case ssa.TypeV128: 231 load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v) 232 offsetInGoSlice += 16 233 default: 234 panic("BUG") 235 } 236 cur = linkInstr(cur, load) 237 238 if !isRegResult { 239 // We need to store it back to the result slot above rbp. 240 store := m.allocateInstr() 241 mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg)) 242 switch r.Type { 243 case ssa.TypeI32: 244 store.asMovRM(v, mem, 4) 245 case ssa.TypeI64: 246 store.asMovRM(v, mem, 8) 247 case ssa.TypeF32: 248 store.asXmmMovRM(sseOpcodeMovss, v, mem) 249 case ssa.TypeF64: 250 store.asXmmMovRM(sseOpcodeMovsd, v, mem) 251 case ssa.TypeV128: 252 store.asXmmMovRM(sseOpcodeMovdqu, v, mem) 253 default: 254 panic("BUG") 255 } 256 cur = linkInstr(cur, store) 257 } 258 } 259 260 // Before return, we need to restore the callee saved registers. 261 cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs) 262 263 if argOverlapWithExecCtxOffset >= 0 { 264 // At this point execCtt is not used anymore, so we can finally store the 265 // result to the register which overlaps with the execution context pointer. 266 mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg)) 267 load := m.allocateInstr().asMov64MR(mem, execCtrPtr) 268 cur = linkInstr(cur, load) 269 } 270 271 // Finally ready to return. 272 cur = m.revertRBPRSP(cur) 273 linkInstr(cur, m.allocateInstr().asRet()) 274 275 m.encodeWithoutSSA(ectx.RootInstr) 276 return m.c.Buf() 277 } 278 279 func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction { 280 offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() 281 for _, v := range regs { 282 store := m.allocateInstr() 283 mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx)) 284 switch v.RegType() { 285 case regalloc.RegTypeInt: 286 store.asMovRM(v, mem, 8) 287 case regalloc.RegTypeFloat: 288 store.asXmmMovRM(sseOpcodeMovdqu, v, mem) 289 default: 290 panic("BUG") 291 } 292 cur = linkInstr(cur, store) 293 offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally. 294 } 295 return cur 296 } 297 298 func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction { 299 offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() 300 for _, v := range regs { 301 load := m.allocateInstr() 302 mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx)) 303 switch v.RegType() { 304 case regalloc.RegTypeInt: 305 load.asMov64MR(mem, v) 306 case regalloc.RegTypeFloat: 307 load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v) 308 default: 309 panic("BUG") 310 } 311 cur = linkInstr(cur, load) 312 offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally. 313 } 314 return cur 315 } 316 317 func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction { 318 readRip := m.allocateInstr() 319 cur = linkInstr(cur, readRip) 320 321 ripReg := r12VReg // Callee saved which is already saved. 322 saveRip := m.allocateInstr().asMovRM( 323 ripReg, 324 newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)), 325 8, 326 ) 327 cur = linkInstr(cur, saveRip) 328 329 exit := m.allocateExitSeq(execCtx) 330 cur = linkInstr(cur, exit) 331 332 nop, l := m.allocateBrTarget() 333 cur = linkInstr(cur, nop) 334 readRip.asLEA(newOperandLabel(l), ripReg) 335 return cur 336 } 337 338 // saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient 339 // stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the 340 // execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it. 341 var stackGrowSaveVRegs = []regalloc.VReg{ 342 rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg, 343 rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg, 344 xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg, 345 xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg, 346 } 347 348 // CompileStackGrowCallSequence implements backend.Machine. 349 func (m *machine) CompileStackGrowCallSequence() []byte { 350 ectx := m.ectx 351 352 cur := m.allocateNop() 353 ectx.RootInstr = cur 354 355 cur = m.setupRBPRSP(cur) 356 357 // Execution context is always the first argument. 358 execCtrPtr := raxVReg 359 360 // Save the callee saved and argument registers. 361 cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs) 362 363 // Load the exitCode to the register. 364 exitCodeReg := r12VReg // Already saved. 365 cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false)) 366 367 saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg) 368 cur = linkInstr(cur, setExitCode) 369 cur = linkInstr(cur, saveRsp) 370 cur = linkInstr(cur, saveRbp) 371 372 // Ready to exit the execution. 373 cur = m.storeReturnAddressAndExit(cur, execCtrPtr) 374 375 // After the exit, restore the saved registers. 376 cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs) 377 378 // Finally ready to return. 379 cur = m.revertRBPRSP(cur) 380 linkInstr(cur, m.allocateInstr().asRet()) 381 382 m.encodeWithoutSSA(ectx.RootInstr) 383 return m.c.Buf() 384 } 385 386 // insertStackBoundsCheck will insert the instructions after `cur` to check the 387 // stack bounds, and if there's no sufficient spaces required for the function, 388 // exit the execution and try growing it in Go world. 389 func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction { 390 // add $requiredStackSize, %rsp ;; Temporarily update the sp. 391 // cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp. 392 // ja .ok 393 // sub $requiredStackSize, %rsp ;; Reverse the temporary update. 394 // pushq r15 ;; save the temporary. 395 // mov $requiredStackSize, %r15 396 // mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context. 397 // popq r15 ;; restore the temporary. 398 // callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack. 399 // jmp .cont 400 // .ok: 401 // sub $requiredStackSize, %rsp ;; Reverse the temporary update. 402 // .cont: 403 cur = m.addRSP(-int32(requiredStackSize), cur) 404 cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true, 405 newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)), 406 rspVReg, true)) 407 408 ja := m.allocateInstr() 409 cur = linkInstr(cur, ja) 410 411 cur = m.addRSP(int32(requiredStackSize), cur) 412 413 // Save the temporary. 414 415 cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg))) 416 // Load the required size to the temporary. 417 cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true)) 418 // Set the required size in the execution context. 419 cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg, 420 newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8)) 421 // Restore the temporary. 422 cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg)) 423 // Call the Go function to grow the stack. 424 cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg( 425 wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil)) 426 // Jump to the continuation. 427 jmpToCont := m.allocateInstr() 428 cur = linkInstr(cur, jmpToCont) 429 430 // .ok: 431 okInstr, ok := m.allocateBrTarget() 432 cur = linkInstr(cur, okInstr) 433 ja.asJmpIf(condNBE, newOperandLabel(ok)) 434 // On the ok path, we only need to reverse the temporary update. 435 cur = m.addRSP(int32(requiredStackSize), cur) 436 437 // .cont: 438 contInstr, cont := m.allocateBrTarget() 439 cur = linkInstr(cur, contInstr) 440 jmpToCont.asJmp(newOperandLabel(cont)) 441 442 return cur 443 }