github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/abi.go (about) 1 package arm64 2 3 import ( 4 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend" 5 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc" 6 "github.com/wasilibs/wazerox/internal/engine/wazevo/ssa" 7 ) 8 9 // References: 10 // * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture 11 // * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard 12 13 const xArgRetRegMax, vArgRetRegMax = x7, v7 // x0-x7 & v0-v7. 14 15 var regInfo = ®alloc.RegisterInfo{ 16 AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{ 17 // We don't allocate: 18 // - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers 19 // - x28: Reserved by Go runtime. 20 // - x27(=tmpReg): because of the reason described on tmpReg. 21 regalloc.RegTypeInt: { 22 x8, x9, x10, x11, x12, x13, x14, x15, 23 x16, x17, x19, x20, x21, x22, x23, x24, x25, 24 x26, x29, x30, 25 // These are the argument/return registers. Less preferred in the allocation. 26 x7, x6, x5, x4, x3, x2, x1, x0, 27 }, 28 regalloc.RegTypeFloat: { 29 v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, 30 v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, 31 // These are the argument/return registers. Less preferred in the allocation. 32 v7, v6, v5, v4, v3, v2, v1, v0, 33 }, 34 }, 35 CalleeSavedRegisters: [regalloc.RealRegsNumMax]bool{ 36 x19: true, x20: true, x21: true, x22: true, x23: true, x24: true, x25: true, x26: true, x28: true, 37 v18: true, v19: true, v20: true, v21: true, v22: true, v23: true, v24: true, v25: true, v26: true, 38 v27: true, v28: true, v29: true, v30: true, v31: true, 39 }, 40 CallerSavedRegisters: [regalloc.RealRegsNumMax]bool{ 41 x0: true, x1: true, x2: true, x3: true, x4: true, x5: true, x6: true, x7: true, x8: true, x9: true, x10: true, 42 x11: true, x12: true, x13: true, x14: true, x15: true, x16: true, x17: true, x29: true, x30: true, 43 v0: true, v1: true, v2: true, v3: true, v4: true, v5: true, v6: true, v7: true, v8: true, v9: true, v10: true, 44 v11: true, v12: true, v13: true, v14: true, v15: true, v16: true, v17: true, 45 }, 46 RealRegToVReg: []regalloc.VReg{ 47 x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg, 48 v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg, 49 }, 50 RealRegName: func(r regalloc.RealReg) string { return regNames[r] }, 51 RealRegType: func(r regalloc.RealReg) regalloc.RegType { 52 if r < v0 { 53 return regalloc.RegTypeInt 54 } 55 return regalloc.RegTypeFloat 56 }, 57 } 58 59 // abiImpl implements backend.FunctionABI. 60 type abiImpl struct { 61 m *machine 62 args, rets []backend.ABIArg 63 argStackSize, retStackSize int64 64 65 argRealRegs []regalloc.VReg 66 retRealRegs []regalloc.VReg 67 } 68 69 func (m *machine) getOrCreateABIImpl(sig *ssa.Signature) *abiImpl { 70 if int(sig.ID) >= len(m.abis) { 71 m.abis = append(m.abis, make([]abiImpl, int(sig.ID)+1)...) 72 } 73 74 abi := &m.abis[sig.ID] 75 if abi.m != nil { 76 return abi 77 } 78 79 abi.m = m 80 abi.init(sig) 81 return abi 82 } 83 84 // int initializes the abiImpl for the given signature. 85 func (a *abiImpl) init(sig *ssa.Signature) { 86 if len(a.rets) < len(sig.Results) { 87 a.rets = make([]backend.ABIArg, len(sig.Results)) 88 } 89 a.rets = a.rets[:len(sig.Results)] 90 a.retStackSize = a.setABIArgs(a.rets, sig.Results) 91 if argsNum := len(sig.Params); len(a.args) < argsNum { 92 a.args = make([]backend.ABIArg, argsNum) 93 } 94 a.args = a.args[:len(sig.Params)] 95 a.argStackSize = a.setABIArgs(a.args, sig.Params) 96 97 // Gather the real registers usages in arg/return. 98 a.retRealRegs = a.retRealRegs[:0] 99 for i := range a.rets { 100 r := &a.rets[i] 101 if r.Kind == backend.ABIArgKindReg { 102 a.retRealRegs = append(a.retRealRegs, r.Reg) 103 } 104 } 105 a.argRealRegs = a.argRealRegs[:0] 106 for i := range a.args { 107 arg := &a.args[i] 108 if arg.Kind == backend.ABIArgKindReg { 109 reg := arg.Reg 110 a.argRealRegs = append(a.argRealRegs, reg) 111 } 112 } 113 } 114 115 // setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types) 116 // where if len(s) > len(types), the last elements of s is for the multi-return slot. 117 func (a *abiImpl) setABIArgs(s []backend.ABIArg, types []ssa.Type) (stackSize int64) { 118 var stackOffset int64 119 nextX, nextV := x0, v0 120 for i, typ := range types { 121 arg := &s[i] 122 arg.Index = i 123 arg.Type = typ 124 if typ.IsInt() { 125 if nextX > xArgRetRegMax { 126 arg.Kind = backend.ABIArgKindStack 127 const slotSize = 8 // Align 8 bytes. 128 arg.Offset = stackOffset 129 stackOffset += slotSize 130 } else { 131 arg.Kind = backend.ABIArgKindReg 132 arg.Reg = regalloc.FromRealReg(nextX, regalloc.RegTypeInt) 133 nextX++ 134 } 135 } else { 136 if nextV > vArgRetRegMax { 137 arg.Kind = backend.ABIArgKindStack 138 slotSize := int64(8) // Align at least 8 bytes. 139 if typ.Bits() == 128 { // Vector. 140 slotSize = 16 141 } 142 arg.Offset = stackOffset 143 stackOffset += slotSize 144 } else { 145 arg.Kind = backend.ABIArgKindReg 146 arg.Reg = regalloc.FromRealReg(nextV, regalloc.RegTypeFloat) 147 nextV++ 148 } 149 } 150 } 151 return stackOffset 152 } 153 154 // CalleeGenFunctionArgsToVRegs implements backend.FunctionABI. 155 func (a *abiImpl) CalleeGenFunctionArgsToVRegs(args []ssa.Value) { 156 for i, ssaArg := range args { 157 if !ssaArg.Valid() { 158 continue 159 } 160 reg := a.m.compiler.VRegOf(ssaArg) 161 arg := &a.args[i] 162 if arg.Kind == backend.ABIArgKindReg { 163 a.m.InsertMove(reg, arg.Reg, arg.Type) 164 } else { 165 // TODO: we could use pair load if there's consecutive loads for the same type. 166 // 167 // (high address) 168 // +-----------------+ 169 // | ....... | 170 // | ret Y | 171 // | ....... | 172 // | ret 0 | 173 // | arg X | 174 // | ....... | 175 // | arg 1 | 176 // | arg 0 | <-| 177 // | ReturnAddress | | 178 // +-----------------+ | 179 // | ........... | | 180 // | spill slot M | | argStackOffset: is unknown at this point of compilation. 181 // | ............ | | 182 // | spill slot 2 | | 183 // | spill slot 1 | | 184 // | clobbered 0 | | 185 // | clobbered 1 | | 186 // | ........... | | 187 // | clobbered N | | 188 // SP---> +-----------------+ <-+ 189 // (low address) 190 191 m := a.m 192 bits := arg.Type.Bits() 193 // At this point of compilation, we don't yet know how much space exist below the return address. 194 // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. 195 amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} 196 load := m.allocateInstr() 197 switch arg.Type { 198 case ssa.TypeI32, ssa.TypeI64: 199 load.asULoad(operandNR(reg), amode, bits) 200 case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: 201 load.asFpuLoad(operandNR(reg), amode, bits) 202 default: 203 panic("BUG") 204 } 205 m.insert(load) 206 a.m.unresolvedAddressModes = append(a.m.unresolvedAddressModes, load) 207 } 208 } 209 } 210 211 // CalleeGenVRegsToFunctionReturns implements backend.FunctionABI. 212 func (a *abiImpl) CalleeGenVRegsToFunctionReturns(rets []ssa.Value) { 213 l := len(rets) - 1 214 for i := range rets { 215 // Reverse order in order to avoid overwriting the stack returns existing in the return registers. 216 ret := rets[l-i] 217 r := &a.rets[l-i] 218 reg := a.m.compiler.VRegOf(ret) 219 if def := a.m.compiler.ValueDefinition(ret); def.IsFromInstr() { 220 // Constant instructions are inlined. 221 if inst := def.Instr; inst.Constant() { 222 a.m.InsertLoadConstant(inst, reg) 223 } 224 } 225 if r.Kind == backend.ABIArgKindReg { 226 a.m.InsertMove(r.Reg, reg, ret.Type()) 227 } else { 228 // TODO: we could use pair store if there's consecutive stores for the same type. 229 // 230 // (high address) 231 // +-----------------+ 232 // | ....... | 233 // | ret Y | 234 // | ....... | 235 // | ret 0 | <-+ 236 // | arg X | | 237 // | ....... | | 238 // | arg 1 | | 239 // | arg 0 | | 240 // | ReturnAddress | | 241 // +-----------------+ | 242 // | ........... | | 243 // | spill slot M | | retStackOffset: is unknown at this point of compilation. 244 // | ............ | | 245 // | spill slot 2 | | 246 // | spill slot 1 | | 247 // | clobbered 0 | | 248 // | clobbered 1 | | 249 // | ........... | | 250 // | clobbered N | | 251 // SP---> +-----------------+ <-+ 252 // (low address) 253 254 bits := r.Type.Bits() 255 256 // At this point of compilation, we don't yet know how much space exist below the return address. 257 // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. 258 amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} 259 store := a.m.allocateInstr() 260 store.asStore(operandNR(reg), amode, bits) 261 a.m.insert(store) 262 a.m.unresolvedAddressModes = append(a.m.unresolvedAddressModes, store) 263 } 264 } 265 } 266 267 // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the 268 // caller side of the function call. 269 func (a *abiImpl) callerGenVRegToFunctionArg(argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { 270 arg := &a.args[argIndex] 271 if def != nil && def.IsFromInstr() { 272 // Constant instructions are inlined. 273 if inst := def.Instr; inst.Constant() { 274 a.m.InsertLoadConstant(inst, reg) 275 } 276 } 277 if arg.Kind == backend.ABIArgKindReg { 278 a.m.InsertMove(arg.Reg, reg, arg.Type) 279 } else { 280 // TODO: we could use pair store if there's consecutive stores for the same type. 281 // 282 // Note that at this point, stack pointer is already adjusted. 283 bits := arg.Type.Bits() 284 amode := a.m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false) 285 store := a.m.allocateInstr() 286 store.asStore(operandNR(reg), amode, bits) 287 a.m.insert(store) 288 } 289 } 290 291 func (a *abiImpl) callerGenFunctionReturnVReg(retIndex int, reg regalloc.VReg, slotBegin int64) { 292 r := &a.rets[retIndex] 293 if r.Kind == backend.ABIArgKindReg { 294 a.m.InsertMove(reg, r.Reg, r.Type) 295 } else { 296 // TODO: we could use pair load if there's consecutive loads for the same type. 297 amode := a.m.resolveAddressModeForOffset(a.argStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false) 298 ldr := a.m.allocateInstr() 299 switch r.Type { 300 case ssa.TypeI32, ssa.TypeI64: 301 ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) 302 case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: 303 ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) 304 default: 305 panic("BUG") 306 } 307 a.m.insert(ldr) 308 } 309 } 310 311 func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { 312 m.pendingInstructions = m.pendingInstructions[:0] 313 mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) 314 for _, instr := range m.pendingInstructions { 315 cur = linkInstr(cur, instr) 316 } 317 return cur, mode 318 } 319 320 func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { 321 if rn.RegType() != regalloc.RegTypeInt { 322 panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) 323 } 324 var amode addressMode 325 if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { 326 amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} 327 } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { 328 amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} 329 } else { 330 var indexReg regalloc.VReg 331 if allowTmpRegUse { 332 m.lowerConstantI64(tmpRegVReg, offset) 333 indexReg = tmpRegVReg 334 } else { 335 indexReg = m.compiler.AllocateVReg(ssa.TypeI64) 336 m.lowerConstantI64(indexReg, offset) 337 } 338 amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} 339 } 340 return amode 341 } 342 343 func (a *abiImpl) alignedArgResultStackSlotSize() int64 { 344 stackSlotSize := a.retStackSize + a.argStackSize 345 // Align stackSlotSize to 16 bytes. 346 stackSlotSize = (stackSlotSize + 15) &^ 15 347 return stackSlotSize 348 } 349 350 func (m *machine) lowerCall(si *ssa.Instruction) { 351 isDirectCall := si.Opcode() == ssa.OpcodeCall 352 var indirectCalleePtr ssa.Value 353 var directCallee ssa.FuncRef 354 var sigID ssa.SignatureID 355 var args []ssa.Value 356 if isDirectCall { 357 directCallee, sigID, args = si.CallData() 358 } else { 359 indirectCalleePtr, sigID, args = si.CallIndirectData() 360 } 361 calleeABI := m.getOrCreateABIImpl(m.compiler.SSABuilder().ResolveSignature(sigID)) 362 363 stackSlotSize := calleeABI.alignedArgResultStackSlotSize() 364 if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { 365 m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame. 366 } 367 368 for i, arg := range args { 369 reg := m.compiler.VRegOf(arg) 370 def := m.compiler.ValueDefinition(arg) 371 calleeABI.callerGenVRegToFunctionArg(i, reg, def, stackSlotSize) 372 } 373 374 if isDirectCall { 375 call := m.allocateInstr() 376 call.asCall(directCallee, calleeABI) 377 m.insert(call) 378 } else { 379 ptr := m.compiler.VRegOf(indirectCalleePtr) 380 callInd := m.allocateInstr() 381 callInd.asCallIndirect(ptr, calleeABI) 382 m.insert(callInd) 383 } 384 385 var index int 386 r1, rs := si.Returns() 387 if r1.Valid() { 388 calleeABI.callerGenFunctionReturnVReg(0, m.compiler.VRegOf(r1), stackSlotSize) 389 index++ 390 } 391 392 for _, r := range rs { 393 calleeABI.callerGenFunctionReturnVReg(index, m.compiler.VRegOf(r), stackSlotSize) 394 index++ 395 } 396 } 397 398 func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) { 399 if imm12Operand, ok := asImm12Operand(uint64(diff)); ok { 400 alu := m.allocateInstr() 401 var ao aluOp 402 if add { 403 ao = aluOpAdd 404 } else { 405 ao = aluOpSub 406 } 407 alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) 408 m.insert(alu) 409 } else { 410 m.lowerConstantI64(tmpRegVReg, diff) 411 alu := m.allocateInstr() 412 var ao aluOp 413 if add { 414 ao = aluOpAdd 415 } else { 416 ao = aluOpSub 417 } 418 alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) 419 m.insert(alu) 420 } 421 }