github.com/AR1011/wazero@v1.0.5/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go (about) 1 package arm64 2 3 import ( 4 "fmt" 5 6 "github.com/AR1011/wazero/internal/engine/wazevo/backend/regalloc" 7 "github.com/AR1011/wazero/internal/engine/wazevo/wazevoapi" 8 ) 9 10 // SetupPrologue implements backend.Machine. 11 func (m *machine) SetupPrologue() { 12 cur := m.rootInstr 13 prevInitInst := cur.next 14 15 // 16 // (high address) (high address) 17 // SP----> +-----------------+ +------------------+ <----+ 18 // | ....... | | ....... | | 19 // | ret Y | | ret Y | | 20 // | ....... | | ....... | | 21 // | ret 0 | | ret 0 | | 22 // | arg X | | arg X | | size_of_arg_ret. 23 // | ....... | ====> | ....... | | 24 // | arg 1 | | arg 1 | | 25 // | arg 0 | | arg 0 | <----+ 26 // |-----------------| | size_of_arg_ret | 27 // | return address | 28 // +------------------+ <---- SP 29 // (low address) (low address) 30 31 // Saves the return address (lr) and the size_of_arg_ret below the SP. 32 // size_of_arg_ret is used for stack unwinding. 33 cur = m.createReturnAddrAndSizeOfArgRetSlot(cur) 34 35 if !m.stackBoundsCheckDisabled { 36 cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur) 37 } 38 39 // Decrement SP if spillSlotSize > 0. 40 if m.spillSlotSize == 0 && len(m.spillSlots) != 0 { 41 panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots)) 42 } 43 44 if regs := m.clobberedRegs; len(regs) > 0 { 45 // 46 // (high address) (high address) 47 // +-----------------+ +-----------------+ 48 // | ....... | | ....... | 49 // | ret Y | | ret Y | 50 // | ....... | | ....... | 51 // | ret 0 | | ret 0 | 52 // | arg X | | arg X | 53 // | ....... | | ....... | 54 // | arg 1 | | arg 1 | 55 // | arg 0 | | arg 0 | 56 // | size_of_arg_ret | | size_of_arg_ret | 57 // | ReturnAddress | | ReturnAddress | 58 // SP----> +-----------------+ ====> +-----------------+ 59 // (low address) | clobbered M | 60 // | ............ | 61 // | clobbered 0 | 62 // +-----------------+ <----- SP 63 // (low address) 64 // 65 _amode := addressModePreOrPostIndex(spVReg, 66 -16, // stack pointer must be 16-byte aligned. 67 true, // Decrement before store. 68 ) 69 for _, vr := range regs { 70 // TODO: pair stores to reduce the number of instructions. 71 store := m.allocateInstr() 72 store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType())) 73 cur = linkInstr(cur, store) 74 } 75 } 76 77 if size := m.spillSlotSize; size > 0 { 78 // Check if size is 16-byte aligned. 79 if size&0xf != 0 { 80 panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size)) 81 } 82 83 cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false) 84 85 // At this point, the stack looks like: 86 // 87 // (high address) 88 // +------------------+ 89 // | ....... | 90 // | ret Y | 91 // | ....... | 92 // | ret 0 | 93 // | arg X | 94 // | ....... | 95 // | arg 1 | 96 // | arg 0 | 97 // | size_of_arg_ret | 98 // | ReturnAddress | 99 // +------------------+ 100 // | clobbered M | 101 // | ............ | 102 // | clobbered 0 | 103 // | spill slot N | 104 // | ............ | 105 // | spill slot 2 | 106 // | spill slot 0 | 107 // SP----> +------------------+ 108 // (low address) 109 } 110 111 // We push the frame size into the stack to make it possible to unwind stack: 112 // 113 // 114 // (high address) (high address) 115 // +-----------------+ +-----------------+ 116 // | ....... | | ....... | 117 // | ret Y | | ret Y | 118 // | ....... | | ....... | 119 // | ret 0 | | ret 0 | 120 // | arg X | | arg X | 121 // | ....... | | ....... | 122 // | arg 1 | | arg 1 | 123 // | arg 0 | | arg 0 | 124 // | size_of_arg_ret | | size_of_arg_ret | 125 // | ReturnAddress | | ReturnAddress | 126 // +-----------------+ ==> +-----------------+ <----+ 127 // | clobbered M | | clobbered M | | 128 // | ............ | | ............ | | 129 // | clobbered 2 | | clobbered 2 | | 130 // | clobbered 1 | | clobbered 1 | | frame size 131 // | clobbered 0 | | clobbered 0 | | 132 // | spill slot N | | spill slot N | | 133 // | ............ | | ............ | | 134 // | spill slot 0 | | spill slot 0 | <----+ 135 // SP---> +-----------------+ | xxxxxx | ;; unused space to make it 16-byte aligned. 136 // | frame_size | 137 // +-----------------+ <---- SP 138 // (low address) 139 // 140 cur = m.createFrameSizeSlot(cur, m.frameSize()) 141 142 linkInstr(cur, prevInitInst) 143 } 144 145 func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction { 146 // First we decrement the stack pointer to point the arg0 slot. 147 var sizeOfArgRetReg regalloc.VReg 148 s := m.currentABI.alignedArgResultStackSlotSize() 149 if s > 0 { 150 cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s) 151 sizeOfArgRetReg = tmpRegVReg 152 153 subSp := m.allocateInstr() 154 subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true) 155 cur = linkInstr(cur, subSp) 156 } else { 157 sizeOfArgRetReg = xzrVReg 158 } 159 160 // Saves the return address (lr) and the size_of_arg_ret below the SP. 161 // size_of_arg_ret is used for stack unwinding. 162 pstr := m.allocateInstr() 163 amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */) 164 pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode) 165 cur = linkInstr(cur, pstr) 166 return cur 167 } 168 169 func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { 170 var frameSizeReg regalloc.VReg 171 if s > 0 { 172 cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s) 173 frameSizeReg = tmpRegVReg 174 } else { 175 frameSizeReg = xzrVReg 176 } 177 _amode := addressModePreOrPostIndex(spVReg, 178 -16, // stack pointer must be 16-byte aligned. 179 true, // Decrement before store. 180 ) 181 store := m.allocateInstr() 182 store.asStore(operandNR(frameSizeReg), _amode, 64) 183 cur = linkInstr(cur, store) 184 return cur 185 } 186 187 // SetupEpilogue implements backend.Machine. 188 func (m *machine) SetupEpilogue() { 189 for cur := m.rootInstr; cur != nil; cur = cur.next { 190 if cur.kind == ret { 191 m.setupEpilogueAfter(cur.prev) 192 continue 193 } 194 195 // Removes the redundant copy instruction. 196 // TODO: doing this in `SetupEpilogue` seems weird. Find a better home. 197 if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() { 198 prev, next := cur.prev, cur.next 199 // Remove the copy instruction. 200 prev.next = next 201 if next != nil { 202 next.prev = prev 203 } 204 } 205 } 206 } 207 208 func (m *machine) setupEpilogueAfter(cur *instruction) { 209 prevNext := cur.next 210 211 // We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore. 212 cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true) 213 214 if s := m.spillSlotSize; s > 0 { 215 // Adjust SP to the original value: 216 // 217 // (high address) (high address) 218 // +-----------------+ +-----------------+ 219 // | ....... | | ....... | 220 // | ret Y | | ret Y | 221 // | ....... | | ....... | 222 // | ret 0 | | ret 0 | 223 // | arg X | | arg X | 224 // | ....... | | ....... | 225 // | arg 1 | | arg 1 | 226 // | arg 0 | | arg 0 | 227 // | xxxxx | | xxxxx | 228 // | ReturnAddress | | ReturnAddress | 229 // +-----------------+ ====> +-----------------+ 230 // | clobbered M | | clobbered M | 231 // | ............ | | ............ | 232 // | clobbered 1 | | clobbered 1 | 233 // | clobbered 0 | | clobbered 0 | 234 // | spill slot N | +-----------------+ <---- SP 235 // | ............ | 236 // | spill slot 0 | 237 // SP---> +-----------------+ 238 // (low address) 239 // 240 cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) 241 } 242 243 // First we need to restore the clobbered registers. 244 if len(m.clobberedRegs) > 0 { 245 // (high address) 246 // +-----------------+ +-----------------+ 247 // | ....... | | ....... | 248 // | ret Y | | ret Y | 249 // | ....... | | ....... | 250 // | ret 0 | | ret 0 | 251 // | arg X | | arg X | 252 // | ....... | | ....... | 253 // | arg 1 | | arg 1 | 254 // | arg 0 | | arg 0 | 255 // | xxxxx | | xxxxx | 256 // | ReturnAddress | | ReturnAddress | 257 // +-----------------+ ========> +-----------------+ <---- SP 258 // | clobbered M | 259 // | clobbered 1 | 260 // | ........... | 261 // | clobbered 0 | 262 // SP---> +-----------------+ 263 // (low address) 264 265 l := len(m.clobberedRegs) - 1 266 for i := range m.clobberedRegs { 267 vr := m.clobberedRegs[l-i] // reverse order to restore. 268 load := m.allocateInstr() 269 amode := addressModePreOrPostIndex(spVReg, 270 16, // stack pointer must be 16-byte aligned. 271 false, // Increment after store. 272 ) 273 // TODO: pair loads to reduce the number of instructions. 274 switch regTypeToRegisterSizeInBits(vr.RegType()) { 275 case 64: // save int reg. 276 load.asULoad(operandNR(vr), amode, 64) 277 case 128: // save vector reg. 278 load.asFpuLoad(operandNR(vr), amode, 128) 279 } 280 cur = linkInstr(cur, load) 281 } 282 } 283 284 // Reload the return address (lr). 285 // 286 // +-----------------+ +-----------------+ 287 // | ....... | | ....... | 288 // | ret Y | | ret Y | 289 // | ....... | | ....... | 290 // | ret 0 | | ret 0 | 291 // | arg X | | arg X | 292 // | ....... | ===> | ....... | 293 // | arg 1 | | arg 1 | 294 // | arg 0 | | arg 0 | 295 // | xxxxx | +-----------------+ <---- SP 296 // | ReturnAddress | 297 // SP----> +-----------------+ 298 299 ldr := m.allocateInstr() 300 ldr.asULoad(operandNR(lrVReg), 301 addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) 302 cur = linkInstr(cur, ldr) 303 304 if s := m.currentABI.alignedArgResultStackSlotSize(); s > 0 { 305 cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) 306 } 307 308 linkInstr(cur, prevNext) 309 } 310 311 // saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient 312 // stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0, 313 // which always points to the execution context whenever the native code is entered from Go. 314 var saveRequiredRegs = []regalloc.VReg{ 315 x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg, 316 x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg, 317 v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg, 318 v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg, 319 } 320 321 // insertStackBoundsCheck will insert the instructions after `cur` to check the 322 // stack bounds, and if there's no sufficient spaces required for the function, 323 // exit the execution and try growing it in Go world. 324 // 325 // TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable. 326 func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction { 327 if requiredStackSize%16 != 0 { 328 panic("BUG") 329 } 330 331 if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok { 332 // sub tmp, sp, #requiredStackSize 333 sub := m.allocateInstr() 334 sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true) 335 cur = linkInstr(cur, sub) 336 } else { 337 // This case, we first load the requiredStackSize into the temporary register, 338 cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) 339 // Then subtract it. 340 sub := m.allocateInstr() 341 sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) 342 cur = linkInstr(cur, sub) 343 } 344 345 tmp2 := x11VReg // Callee save, so it is safe to use it here in the prologue. 346 347 // ldr tmp2, [executionContext #StackBottomPtr] 348 ldr := m.allocateInstr() 349 ldr.asULoad(operandNR(tmp2), addressMode{ 350 kind: addressModeKindRegUnsignedImm12, 351 rn: x0VReg, // execution context is always the first argument. 352 imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), 353 }, 64) 354 cur = linkInstr(cur, ldr) 355 356 // subs xzr, tmp, tmp2 357 subs := m.allocateInstr() 358 subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true) 359 cur = linkInstr(cur, subs) 360 361 // b.ge #imm 362 cbr := m.allocateInstr() 363 cbr.asCondBr(ge.asCond(), invalidLabel, false /* ignored */) 364 cur = linkInstr(cur, cbr) 365 366 // Set the required stack size and set it to the exec context. 367 { 368 // First load the requiredStackSize into the temporary register, 369 cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) 370 setRequiredStackSize := m.allocateInstr() 371 setRequiredStackSize.asStore(operandNR(tmpRegVReg), 372 addressMode{ 373 kind: addressModeKindRegUnsignedImm12, 374 // Execution context is always the first argument. 375 rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), 376 }, 64) 377 378 cur = linkInstr(cur, setRequiredStackSize) 379 } 380 381 ldrAddress := m.allocateInstr() 382 ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{ 383 kind: addressModeKindRegUnsignedImm12, 384 rn: x0VReg, // execution context is always the first argument 385 imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), 386 }, 64) 387 cur = linkInstr(cur, ldrAddress) 388 389 // Then jumps to the stack grow call sequence's address, meaning 390 // transferring the control to the code compiled by CompileStackGrowCallSequence. 391 bl := m.allocateInstr() 392 bl.asCallIndirect(tmpRegVReg, nil) 393 cur = linkInstr(cur, bl) 394 395 // Now that we know the entire code, we can finalize how many bytes 396 // we have to skip when the stack size is sufficient. 397 var cbrOffset int64 398 for _cur := cbr; ; _cur = _cur.next { 399 cbrOffset += _cur.size() 400 if _cur == cur { 401 break 402 } 403 } 404 cbr.condBrOffsetResolve(cbrOffset) 405 return cur 406 } 407 408 // CompileStackGrowCallSequence implements backend.Machine. 409 func (m *machine) CompileStackGrowCallSequence() []byte { 410 cur := m.allocateInstr() 411 cur.asNop0() 412 m.rootInstr = cur 413 414 // Save the callee saved and argument registers. 415 cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs) 416 417 // Save the current stack pointer. 418 cur = m.saveCurrentStackPointer(cur, x0VReg) 419 420 // Set the exit status on the execution context. 421 cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack) 422 423 // Exit the execution. 424 cur = m.storeReturnAddressAndExit(cur) 425 426 // After the exit, restore the saved registers. 427 cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs) 428 429 // Then goes back the original address of this stack grow call. 430 ret := m.allocateInstr() 431 ret.asRet(nil) 432 linkInstr(cur, ret) 433 434 m.encode(m.rootInstr) 435 return m.compiler.Buf() 436 } 437 438 func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction { 439 m.pendingInstructions = m.pendingInstructions[:0] 440 m.insertAddOrSubStackPointer(rd, diff, add) 441 for _, inserted := range m.pendingInstructions { 442 cur = linkInstr(cur, inserted) 443 } 444 return cur 445 }