github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/abi.go (about) 1 package arm64 2 3 import ( 4 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend" 5 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc" 6 "github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa" 7 ) 8 9 // References: 10 // * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture 11 // * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard 12 13 const xArgRetRegMax, vArgRetRegMax = x7, v7 // x0-x7 & v0-v7. 14 15 var regInfo = ®alloc.RegisterInfo{ 16 AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{ 17 // We don't allocate: 18 // - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers 19 // - x28: Reserved by Go runtime. 20 // - x27(=tmpReg): because of the reason described on tmpReg. 21 regalloc.RegTypeInt: { 22 x8, x9, x10, x11, x12, x13, x14, x15, 23 x16, x17, x19, x20, x21, x22, x23, x24, x25, 24 x26, x29, x30, 25 // These are the argument/return registers. Less preferred in the allocation. 26 x7, x6, x5, x4, x3, x2, x1, x0, 27 }, 28 regalloc.RegTypeFloat: { 29 v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, 30 v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, 31 // These are the argument/return registers. Less preferred in the allocation. 32 v7, v6, v5, v4, v3, v2, v1, v0, 33 }, 34 }, 35 CalleeSavedRegisters: [regalloc.RealRegsNumMax]bool{ 36 x19: true, x20: true, x21: true, x22: true, x23: true, x24: true, x25: true, x26: true, x28: true, 37 v18: true, v19: true, v20: true, v21: true, v22: true, v23: true, v24: true, v25: true, v26: true, 38 v27: true, v28: true, v29: true, v30: true, v31: true, 39 }, 40 CallerSavedRegisters: [regalloc.RealRegsNumMax]bool{ 41 x0: true, x1: true, x2: true, x3: true, x4: true, x5: true, x6: true, x7: true, x8: true, x9: true, x10: true, 42 x11: true, x12: true, x13: true, x14: true, x15: true, x16: true, x17: true, x29: true, x30: true, 43 v0: true, v1: true, v2: true, v3: true, v4: true, v5: true, v6: true, v7: true, v8: true, v9: true, v10: true, 44 v11: true, v12: true, v13: true, v14: true, v15: true, v16: true, v17: true, 45 }, 46 RealRegToVReg: []regalloc.VReg{ 47 x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg, 48 v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg, 49 }, 50 RealRegName: func(r regalloc.RealReg) string { return regNames[r] }, 51 RealRegType: func(r regalloc.RealReg) regalloc.RegType { 52 if r < v0 { 53 return regalloc.RegTypeInt 54 } 55 return regalloc.RegTypeFloat 56 }, 57 } 58 59 // abiImpl implements backend.FunctionABI. 60 type abiImpl struct { 61 m *machine 62 args, rets []backend.ABIArg 63 argStackSize, retStackSize int64 64 65 argRealRegs []regalloc.VReg 66 retRealRegs []regalloc.VReg 67 } 68 69 func (m *machine) getOrCreateABIImpl(sig *ssa.Signature) *abiImpl { 70 if int(sig.ID) >= len(m.abis) { 71 m.abis = append(m.abis, make([]abiImpl, int(sig.ID)+1)...) 72 } 73 74 abi := &m.abis[sig.ID] 75 if abi.m != nil { 76 return abi 77 } 78 79 abi.m = m 80 abi.init(sig) 81 return abi 82 } 83 84 // int initializes the abiImpl for the given signature. 85 func (a *abiImpl) init(sig *ssa.Signature) { 86 if len(a.rets) < len(sig.Results) { 87 a.rets = make([]backend.ABIArg, len(sig.Results)) 88 } 89 a.rets = a.rets[:len(sig.Results)] 90 a.retStackSize = a.setABIArgs(a.rets, sig.Results) 91 if argsNum := len(sig.Params); len(a.args) < argsNum { 92 a.args = make([]backend.ABIArg, argsNum) 93 } 94 a.args = a.args[:len(sig.Params)] 95 a.argStackSize = a.setABIArgs(a.args, sig.Params) 96 97 // Gather the real registers usages in arg/return. 98 a.retRealRegs = a.retRealRegs[:0] 99 for i := range a.rets { 100 r := &a.rets[i] 101 if r.Kind == backend.ABIArgKindReg { 102 a.retRealRegs = append(a.retRealRegs, r.Reg) 103 } 104 } 105 a.argRealRegs = a.argRealRegs[:0] 106 for i := range a.args { 107 arg := &a.args[i] 108 if arg.Kind == backend.ABIArgKindReg { 109 reg := arg.Reg 110 a.argRealRegs = append(a.argRealRegs, reg) 111 } 112 } 113 } 114 115 // setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types) 116 // where if len(s) > len(types), the last elements of s is for the multi-return slot. 117 func (a *abiImpl) setABIArgs(s []backend.ABIArg, types []ssa.Type) (stackSize int64) { 118 var stackOffset int64 119 nextX, nextV := x0, v0 120 for i, typ := range types { 121 arg := &s[i] 122 arg.Index = i 123 arg.Type = typ 124 if typ.IsInt() { 125 if nextX > xArgRetRegMax { 126 arg.Kind = backend.ABIArgKindStack 127 const slotSize = 8 // Align 8 bytes. 128 arg.Offset = stackOffset 129 stackOffset += slotSize 130 } else { 131 arg.Kind = backend.ABIArgKindReg 132 arg.Reg = regalloc.FromRealReg(nextX, regalloc.RegTypeInt) 133 nextX++ 134 } 135 } else { 136 if nextV > vArgRetRegMax { 137 arg.Kind = backend.ABIArgKindStack 138 slotSize := int64(8) // Align at least 8 bytes. 139 if typ.Bits() == 128 { // Vector. 140 slotSize = 16 141 } 142 arg.Offset = stackOffset 143 stackOffset += slotSize 144 } else { 145 arg.Kind = backend.ABIArgKindReg 146 arg.Reg = regalloc.FromRealReg(nextV, regalloc.RegTypeFloat) 147 nextV++ 148 } 149 } 150 } 151 return stackOffset 152 } 153 154 // CalleeGenFunctionArgsToVRegs implements backend.FunctionABI. 155 func (a *abiImpl) CalleeGenFunctionArgsToVRegs(args []ssa.Value) { 156 for i, ssaArg := range args { 157 if !ssaArg.Valid() { 158 continue 159 } 160 reg := a.m.compiler.VRegOf(ssaArg) 161 arg := &a.args[i] 162 if arg.Kind == backend.ABIArgKindReg { 163 a.m.InsertMove(reg, arg.Reg, arg.Type) 164 } else { 165 // TODO: we could use pair load if there's consecutive loads for the same type. 166 // 167 // (high address) 168 // +-----------------+ 169 // | ....... | 170 // | ret Y | 171 // | ....... | 172 // | ret 0 | 173 // | arg X | 174 // | ....... | 175 // | arg 1 | 176 // | arg 0 | <-| 177 // | ReturnAddress | | 178 // +-----------------+ | 179 // | ........... | | 180 // | spill slot M | | argStackOffset: is unknown at this point of compilation. 181 // | ............ | | 182 // | spill slot 2 | | 183 // | spill slot 1 | | 184 // | clobbered 0 | | 185 // | clobbered 1 | | 186 // | ........... | | 187 // | clobbered N | | 188 // SP---> +-----------------+ <-+ 189 // (low address) 190 191 m := a.m 192 bits := arg.Type.Bits() 193 // At this point of compilation, we don't yet know how much space exist below the return address. 194 // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. 195 amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} 196 load := m.allocateInstr() 197 switch arg.Type { 198 case ssa.TypeI32, ssa.TypeI64: 199 load.asULoad(operandNR(reg), amode, bits) 200 case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: 201 load.asFpuLoad(operandNR(reg), amode, bits) 202 default: 203 panic("BUG") 204 } 205 m.insert(load) 206 a.m.unresolvedAddressModes = append(a.m.unresolvedAddressModes, load) 207 } 208 } 209 } 210 211 // CalleeGenVRegsToFunctionReturns implements backend.FunctionABI. 212 func (a *abiImpl) CalleeGenVRegsToFunctionReturns(rets []ssa.Value) { 213 l := len(rets) - 1 214 for i := range rets { 215 // Reverse order in order to avoid overwriting the stack returns existing in the return registers. 216 ret := rets[l-i] 217 r := &a.rets[l-i] 218 reg := a.m.compiler.VRegOf(ret) 219 if def := a.m.compiler.ValueDefinition(ret); def.IsFromInstr() { 220 // Constant instructions are inlined. 221 if inst := def.Instr; inst.Constant() { 222 a.m.InsertLoadConstant(inst, reg) 223 } 224 } 225 if r.Kind == backend.ABIArgKindReg { 226 a.m.InsertMove(r.Reg, reg, ret.Type()) 227 } else { 228 // TODO: we could use pair store if there's consecutive stores for the same type. 229 // 230 // (high address) 231 // +-----------------+ 232 // | ....... | 233 // | ret Y | 234 // | ....... | 235 // | ret 0 | <-+ 236 // | arg X | | 237 // | ....... | | 238 // | arg 1 | | 239 // | arg 0 | | 240 // | ReturnAddress | | 241 // +-----------------+ | 242 // | ........... | | 243 // | spill slot M | | retStackOffset: is unknown at this point of compilation. 244 // | ............ | | 245 // | spill slot 2 | | 246 // | spill slot 1 | | 247 // | clobbered 0 | | 248 // | clobbered 1 | | 249 // | ........... | | 250 // | clobbered N | | 251 // SP---> +-----------------+ <-+ 252 // (low address) 253 254 bits := r.Type.Bits() 255 256 // At this point of compilation, we don't yet know how much space exist below the return address. 257 // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. 258 amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} 259 store := a.m.allocateInstr() 260 store.asStore(operandNR(reg), amode, bits) 261 a.m.insert(store) 262 a.m.unresolvedAddressModes = append(a.m.unresolvedAddressModes, store) 263 } 264 } 265 } 266 267 // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the 268 // caller side of the function call. 269 func (a *abiImpl) callerGenVRegToFunctionArg(argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { 270 arg := &a.args[argIndex] 271 if def != nil && def.IsFromInstr() { 272 // Constant instructions are inlined. 273 if inst := def.Instr; inst.Constant() { 274 a.m.InsertLoadConstant(inst, reg) 275 } 276 } 277 if arg.Kind == backend.ABIArgKindReg { 278 a.m.InsertMove(arg.Reg, reg, arg.Type) 279 } else { 280 // TODO: we could use pair store if there's consecutive stores for the same type. 281 // 282 // Note that at this point, stack pointer is already adjusted. 283 bits := arg.Type.Bits() 284 amode := a.m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false) 285 store := a.m.allocateInstr() 286 store.asStore(operandNR(reg), amode, bits) 287 a.m.insert(store) 288 } 289 } 290 291 func (a *abiImpl) callerGenFunctionReturnVReg(retIndex int, reg regalloc.VReg, slotBegin int64) { 292 r := &a.rets[retIndex] 293 if r.Kind == backend.ABIArgKindReg { 294 a.m.InsertMove(reg, r.Reg, r.Type) 295 } else { 296 // TODO: we could use pair load if there's consecutive loads for the same type. 297 amode := a.m.resolveAddressModeForOffset(a.argStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false) 298 ldr := a.m.allocateInstr() 299 switch r.Type { 300 case ssa.TypeI32, ssa.TypeI64: 301 ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) 302 case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: 303 ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) 304 default: 305 panic("BUG") 306 } 307 a.m.insert(ldr) 308 } 309 } 310 311 func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { 312 exct := m.executableContext 313 exct.PendingInstructions = exct.PendingInstructions[:0] 314 mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) 315 for _, instr := range exct.PendingInstructions { 316 cur = linkInstr(cur, instr) 317 } 318 return cur, mode 319 } 320 321 func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { 322 if rn.RegType() != regalloc.RegTypeInt { 323 panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) 324 } 325 var amode addressMode 326 if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { 327 amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} 328 } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { 329 amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} 330 } else { 331 var indexReg regalloc.VReg 332 if allowTmpRegUse { 333 m.lowerConstantI64(tmpRegVReg, offset) 334 indexReg = tmpRegVReg 335 } else { 336 indexReg = m.compiler.AllocateVReg(ssa.TypeI64) 337 m.lowerConstantI64(indexReg, offset) 338 } 339 amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} 340 } 341 return amode 342 } 343 344 func (a *abiImpl) alignedArgResultStackSlotSize() int64 { 345 stackSlotSize := a.retStackSize + a.argStackSize 346 // Align stackSlotSize to 16 bytes. 347 stackSlotSize = (stackSlotSize + 15) &^ 15 348 return stackSlotSize 349 } 350 351 func (m *machine) lowerCall(si *ssa.Instruction) { 352 isDirectCall := si.Opcode() == ssa.OpcodeCall 353 var indirectCalleePtr ssa.Value 354 var directCallee ssa.FuncRef 355 var sigID ssa.SignatureID 356 var args []ssa.Value 357 if isDirectCall { 358 directCallee, sigID, args = si.CallData() 359 } else { 360 indirectCalleePtr, sigID, args = si.CallIndirectData() 361 } 362 calleeABI := m.getOrCreateABIImpl(m.compiler.SSABuilder().ResolveSignature(sigID)) 363 364 stackSlotSize := calleeABI.alignedArgResultStackSlotSize() 365 if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { 366 m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame. 367 } 368 369 for i, arg := range args { 370 reg := m.compiler.VRegOf(arg) 371 def := m.compiler.ValueDefinition(arg) 372 calleeABI.callerGenVRegToFunctionArg(i, reg, def, stackSlotSize) 373 } 374 375 if isDirectCall { 376 call := m.allocateInstr() 377 call.asCall(directCallee, calleeABI) 378 m.insert(call) 379 } else { 380 ptr := m.compiler.VRegOf(indirectCalleePtr) 381 callInd := m.allocateInstr() 382 callInd.asCallIndirect(ptr, calleeABI) 383 m.insert(callInd) 384 } 385 386 var index int 387 r1, rs := si.Returns() 388 if r1.Valid() { 389 calleeABI.callerGenFunctionReturnVReg(0, m.compiler.VRegOf(r1), stackSlotSize) 390 index++ 391 } 392 393 for _, r := range rs { 394 calleeABI.callerGenFunctionReturnVReg(index, m.compiler.VRegOf(r), stackSlotSize) 395 index++ 396 } 397 } 398 399 func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) { 400 if imm12Operand, ok := asImm12Operand(uint64(diff)); ok { 401 alu := m.allocateInstr() 402 var ao aluOp 403 if add { 404 ao = aluOpAdd 405 } else { 406 ao = aluOpSub 407 } 408 alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) 409 m.insert(alu) 410 } else { 411 m.lowerConstantI64(tmpRegVReg, diff) 412 alu := m.allocateInstr() 413 var ao aluOp 414 if add { 415 ao = aluOpAdd 416 } else { 417 ao = aluOpSub 418 } 419 alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) 420 m.insert(alu) 421 } 422 }