github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/arm64/abi.go (about) 1 package arm64 2 3 import ( 4 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" 5 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 6 "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" 7 ) 8 9 // References: 10 // * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture 11 // * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard 12 13 var ( 14 intParamResultRegs = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7} 15 floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7} 16 ) 17 18 var regInfo = ®alloc.RegisterInfo{ 19 AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{ 20 // We don't allocate: 21 // - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers 22 // - x28: Reserved by Go runtime. 23 // - x27(=tmpReg): because of the reason described on tmpReg. 24 regalloc.RegTypeInt: { 25 x8, x9, x10, x11, x12, x13, x14, x15, 26 x16, x17, x19, x20, x21, x22, x23, x24, x25, 27 x26, x29, x30, 28 // These are the argument/return registers. Less preferred in the allocation. 29 x7, x6, x5, x4, x3, x2, x1, x0, 30 }, 31 regalloc.RegTypeFloat: { 32 v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, 33 v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, 34 // These are the argument/return registers. Less preferred in the allocation. 35 v7, v6, v5, v4, v3, v2, v1, v0, 36 }, 37 }, 38 CalleeSavedRegisters: regalloc.NewRegSet( 39 x19, x20, x21, x22, x23, x24, x25, x26, x28, 40 v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, 41 ), 42 CallerSavedRegisters: regalloc.NewRegSet( 43 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30, 44 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, 45 ), 46 RealRegToVReg: []regalloc.VReg{ 47 x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg, 48 v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg, 49 }, 50 RealRegName: func(r regalloc.RealReg) string { return regNames[r] }, 51 RealRegType: func(r regalloc.RealReg) regalloc.RegType { 52 if r < v0 { 53 return regalloc.RegTypeInt 54 } 55 return regalloc.RegTypeFloat 56 }, 57 } 58 59 // ArgsResultsRegs implements backend.Machine. 60 func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) { 61 return intParamResultRegs, floatParamResultRegs 62 } 63 64 // LowerParams implements backend.FunctionABI. 65 func (m *machine) LowerParams(args []ssa.Value) { 66 a := m.currentABI 67 68 for i, ssaArg := range args { 69 if !ssaArg.Valid() { 70 continue 71 } 72 reg := m.compiler.VRegOf(ssaArg) 73 arg := &a.Args[i] 74 if arg.Kind == backend.ABIArgKindReg { 75 m.InsertMove(reg, arg.Reg, arg.Type) 76 } else { 77 // TODO: we could use pair load if there's consecutive loads for the same type. 78 // 79 // (high address) 80 // +-----------------+ 81 // | ....... | 82 // | ret Y | 83 // | ....... | 84 // | ret 0 | 85 // | arg X | 86 // | ....... | 87 // | arg 1 | 88 // | arg 0 | <-| 89 // | ReturnAddress | | 90 // +-----------------+ | 91 // | ........... | | 92 // | clobbered M | | argStackOffset: is unknown at this point of compilation. 93 // | ............ | | 94 // | clobbered 0 | | 95 // | spill slot N | | 96 // | ........... | | 97 // | spill slot 0 | | 98 // SP---> +-----------------+ <-+ 99 // (low address) 100 101 bits := arg.Type.Bits() 102 // At this point of compilation, we don't yet know how much space exist below the return address. 103 // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. 104 amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} 105 load := m.allocateInstr() 106 switch arg.Type { 107 case ssa.TypeI32, ssa.TypeI64: 108 load.asULoad(operandNR(reg), amode, bits) 109 case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: 110 load.asFpuLoad(operandNR(reg), amode, bits) 111 default: 112 panic("BUG") 113 } 114 m.insert(load) 115 m.unresolvedAddressModes = append(m.unresolvedAddressModes, load) 116 } 117 } 118 } 119 120 // LowerReturns lowers the given returns. 121 func (m *machine) LowerReturns(rets []ssa.Value) { 122 a := m.currentABI 123 124 l := len(rets) - 1 125 for i := range rets { 126 // Reverse order in order to avoid overwriting the stack returns existing in the return registers. 127 ret := rets[l-i] 128 r := &a.Rets[l-i] 129 reg := m.compiler.VRegOf(ret) 130 if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() { 131 // Constant instructions are inlined. 132 if inst := def.Instr; inst.Constant() { 133 val := inst.Return() 134 valType := val.Type() 135 v := inst.ConstantVal() 136 m.insertLoadConstant(v, valType, reg) 137 } 138 } 139 if r.Kind == backend.ABIArgKindReg { 140 m.InsertMove(r.Reg, reg, ret.Type()) 141 } else { 142 // TODO: we could use pair store if there's consecutive stores for the same type. 143 // 144 // (high address) 145 // +-----------------+ 146 // | ....... | 147 // | ret Y | 148 // | ....... | 149 // | ret 0 | <-+ 150 // | arg X | | 151 // | ....... | | 152 // | arg 1 | | 153 // | arg 0 | | 154 // | ReturnAddress | | 155 // +-----------------+ | 156 // | ........... | | 157 // | spill slot M | | retStackOffset: is unknown at this point of compilation. 158 // | ............ | | 159 // | spill slot 2 | | 160 // | spill slot 1 | | 161 // | clobbered 0 | | 162 // | clobbered 1 | | 163 // | ........... | | 164 // | clobbered N | | 165 // SP---> +-----------------+ <-+ 166 // (low address) 167 168 bits := r.Type.Bits() 169 170 // At this point of compilation, we don't yet know how much space exist below the return address. 171 // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. 172 amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} 173 store := m.allocateInstr() 174 store.asStore(operandNR(reg), amode, bits) 175 m.insert(store) 176 m.unresolvedAddressModes = append(m.unresolvedAddressModes, store) 177 } 178 } 179 } 180 181 // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the 182 // caller side of the function call. 183 func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { 184 arg := &a.Args[argIndex] 185 if def != nil && def.IsFromInstr() { 186 // Constant instructions are inlined. 187 if inst := def.Instr; inst.Constant() { 188 val := inst.Return() 189 valType := val.Type() 190 v := inst.ConstantVal() 191 m.insertLoadConstant(v, valType, reg) 192 } 193 } 194 if arg.Kind == backend.ABIArgKindReg { 195 m.InsertMove(arg.Reg, reg, arg.Type) 196 } else { 197 // TODO: we could use pair store if there's consecutive stores for the same type. 198 // 199 // Note that at this point, stack pointer is already adjusted. 200 bits := arg.Type.Bits() 201 amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false) 202 store := m.allocateInstr() 203 store.asStore(operandNR(reg), amode, bits) 204 m.insert(store) 205 } 206 } 207 208 func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) { 209 r := &a.Rets[retIndex] 210 if r.Kind == backend.ABIArgKindReg { 211 m.InsertMove(reg, r.Reg, r.Type) 212 } else { 213 // TODO: we could use pair load if there's consecutive loads for the same type. 214 amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false) 215 ldr := m.allocateInstr() 216 switch r.Type { 217 case ssa.TypeI32, ssa.TypeI64: 218 ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) 219 case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: 220 ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) 221 default: 222 panic("BUG") 223 } 224 m.insert(ldr) 225 } 226 } 227 228 func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { 229 exct := m.executableContext 230 exct.PendingInstructions = exct.PendingInstructions[:0] 231 mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) 232 for _, instr := range exct.PendingInstructions { 233 cur = linkInstr(cur, instr) 234 } 235 return cur, mode 236 } 237 238 func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { 239 if rn.RegType() != regalloc.RegTypeInt { 240 panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) 241 } 242 var amode addressMode 243 if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { 244 amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} 245 } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { 246 amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} 247 } else { 248 var indexReg regalloc.VReg 249 if allowTmpRegUse { 250 m.lowerConstantI64(tmpRegVReg, offset) 251 indexReg = tmpRegVReg 252 } else { 253 indexReg = m.compiler.AllocateVReg(ssa.TypeI64) 254 m.lowerConstantI64(indexReg, offset) 255 } 256 amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} 257 } 258 return amode 259 } 260 261 func (m *machine) lowerCall(si *ssa.Instruction) { 262 isDirectCall := si.Opcode() == ssa.OpcodeCall 263 var indirectCalleePtr ssa.Value 264 var directCallee ssa.FuncRef 265 var sigID ssa.SignatureID 266 var args []ssa.Value 267 if isDirectCall { 268 directCallee, sigID, args = si.CallData() 269 } else { 270 indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData() 271 } 272 calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID)) 273 274 stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) 275 if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { 276 m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame. 277 } 278 279 for i, arg := range args { 280 reg := m.compiler.VRegOf(arg) 281 def := m.compiler.ValueDefinition(arg) 282 m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) 283 } 284 285 if isDirectCall { 286 call := m.allocateInstr() 287 call.asCall(directCallee, calleeABI) 288 m.insert(call) 289 } else { 290 ptr := m.compiler.VRegOf(indirectCalleePtr) 291 callInd := m.allocateInstr() 292 callInd.asCallIndirect(ptr, calleeABI) 293 m.insert(callInd) 294 } 295 296 var index int 297 r1, rs := si.Returns() 298 if r1.Valid() { 299 m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize) 300 index++ 301 } 302 303 for _, r := range rs { 304 m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize) 305 index++ 306 } 307 } 308 309 func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) { 310 if imm12Operand, ok := asImm12Operand(uint64(diff)); ok { 311 alu := m.allocateInstr() 312 var ao aluOp 313 if add { 314 ao = aluOpAdd 315 } else { 316 ao = aluOpSub 317 } 318 alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) 319 m.insert(alu) 320 } else { 321 m.lowerConstantI64(tmpRegVReg, diff) 322 alu := m.allocateInstr() 323 var ao aluOp 324 if add { 325 ao = aluOpAdd 326 } else { 327 ao = aluOpSub 328 } 329 alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) 330 m.insert(alu) 331 } 332 }