github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/arm64/abi.go (about)

     1  package arm64
     2  
     3  import (
     4  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
     5  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
     6  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
     7  )
     8  
     9  // References:
    10  // * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
    11  // * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
    12  
    13  var (
    14  	intParamResultRegs   = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7}
    15  	floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7}
    16  )
    17  
    18  var regInfo = &regalloc.RegisterInfo{
    19  	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
    20  		// We don't allocate:
    21  		// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
    22  		// - x28: Reserved by Go runtime.
    23  		// - x27(=tmpReg): because of the reason described on tmpReg.
    24  		regalloc.RegTypeInt: {
    25  			x8, x9, x10, x11, x12, x13, x14, x15,
    26  			x16, x17, x19, x20, x21, x22, x23, x24, x25,
    27  			x26, x29, x30,
    28  			// These are the argument/return registers. Less preferred in the allocation.
    29  			x7, x6, x5, x4, x3, x2, x1, x0,
    30  		},
    31  		regalloc.RegTypeFloat: {
    32  			v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
    33  			v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
    34  			// These are the argument/return registers. Less preferred in the allocation.
    35  			v7, v6, v5, v4, v3, v2, v1, v0,
    36  		},
    37  	},
    38  	CalleeSavedRegisters: regalloc.NewRegSet(
    39  		x19, x20, x21, x22, x23, x24, x25, x26, x28,
    40  		v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
    41  	),
    42  	CallerSavedRegisters: regalloc.NewRegSet(
    43  		x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30,
    44  		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
    45  	),
    46  	RealRegToVReg: []regalloc.VReg{
    47  		x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
    48  		v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
    49  	},
    50  	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
    51  	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
    52  		if r < v0 {
    53  			return regalloc.RegTypeInt
    54  		}
    55  		return regalloc.RegTypeFloat
    56  	},
    57  }
    58  
    59  // ArgsResultsRegs implements backend.Machine.
    60  func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
    61  	return intParamResultRegs, floatParamResultRegs
    62  }
    63  
    64  // LowerParams implements backend.FunctionABI.
    65  func (m *machine) LowerParams(args []ssa.Value) {
    66  	a := m.currentABI
    67  
    68  	for i, ssaArg := range args {
    69  		if !ssaArg.Valid() {
    70  			continue
    71  		}
    72  		reg := m.compiler.VRegOf(ssaArg)
    73  		arg := &a.Args[i]
    74  		if arg.Kind == backend.ABIArgKindReg {
    75  			m.InsertMove(reg, arg.Reg, arg.Type)
    76  		} else {
    77  			// TODO: we could use pair load if there's consecutive loads for the same type.
    78  			//
    79  			//            (high address)
    80  			//          +-----------------+
    81  			//          |     .......     |
    82  			//          |      ret Y      |
    83  			//          |     .......     |
    84  			//          |      ret 0      |
    85  			//          |      arg X      |
    86  			//          |     .......     |
    87  			//          |      arg 1      |
    88  			//          |      arg 0      |    <-|
    89  			//          |   ReturnAddress |      |
    90  			//          +-----------------+      |
    91  			//          |   ...........   |      |
    92  			//          |   clobbered  M  |      |   argStackOffset: is unknown at this point of compilation.
    93  			//          |   ............  |      |
    94  			//          |   clobbered  0  |      |
    95  			//          |   spill slot N  |      |
    96  			//          |   ...........   |      |
    97  			//          |   spill slot 0  |      |
    98  			//   SP---> +-----------------+    <-+
    99  			//             (low address)
   100  
   101  			bits := arg.Type.Bits()
   102  			// At this point of compilation, we don't yet know how much space exist below the return address.
   103  			// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
   104  			amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
   105  			load := m.allocateInstr()
   106  			switch arg.Type {
   107  			case ssa.TypeI32, ssa.TypeI64:
   108  				load.asULoad(operandNR(reg), amode, bits)
   109  			case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
   110  				load.asFpuLoad(operandNR(reg), amode, bits)
   111  			default:
   112  				panic("BUG")
   113  			}
   114  			m.insert(load)
   115  			m.unresolvedAddressModes = append(m.unresolvedAddressModes, load)
   116  		}
   117  	}
   118  }
   119  
   120  // LowerReturns lowers the given returns.
   121  func (m *machine) LowerReturns(rets []ssa.Value) {
   122  	a := m.currentABI
   123  
   124  	l := len(rets) - 1
   125  	for i := range rets {
   126  		// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
   127  		ret := rets[l-i]
   128  		r := &a.Rets[l-i]
   129  		reg := m.compiler.VRegOf(ret)
   130  		if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() {
   131  			// Constant instructions are inlined.
   132  			if inst := def.Instr; inst.Constant() {
   133  				val := inst.Return()
   134  				valType := val.Type()
   135  				v := inst.ConstantVal()
   136  				m.insertLoadConstant(v, valType, reg)
   137  			}
   138  		}
   139  		if r.Kind == backend.ABIArgKindReg {
   140  			m.InsertMove(r.Reg, reg, ret.Type())
   141  		} else {
   142  			// TODO: we could use pair store if there's consecutive stores for the same type.
   143  			//
   144  			//            (high address)
   145  			//          +-----------------+
   146  			//          |     .......     |
   147  			//          |      ret Y      |
   148  			//          |     .......     |
   149  			//          |      ret 0      |    <-+
   150  			//          |      arg X      |      |
   151  			//          |     .......     |      |
   152  			//          |      arg 1      |      |
   153  			//          |      arg 0      |      |
   154  			//          |   ReturnAddress |      |
   155  			//          +-----------------+      |
   156  			//          |   ...........   |      |
   157  			//          |   spill slot M  |      |   retStackOffset: is unknown at this point of compilation.
   158  			//          |   ............  |      |
   159  			//          |   spill slot 2  |      |
   160  			//          |   spill slot 1  |      |
   161  			//          |   clobbered 0   |      |
   162  			//          |   clobbered 1   |      |
   163  			//          |   ...........   |      |
   164  			//          |   clobbered N   |      |
   165  			//   SP---> +-----------------+    <-+
   166  			//             (low address)
   167  
   168  			bits := r.Type.Bits()
   169  
   170  			// At this point of compilation, we don't yet know how much space exist below the return address.
   171  			// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
   172  			amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
   173  			store := m.allocateInstr()
   174  			store.asStore(operandNR(reg), amode, bits)
   175  			m.insert(store)
   176  			m.unresolvedAddressModes = append(m.unresolvedAddressModes, store)
   177  		}
   178  	}
   179  }
   180  
   181  // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
   182  // caller side of the function call.
   183  func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
   184  	arg := &a.Args[argIndex]
   185  	if def != nil && def.IsFromInstr() {
   186  		// Constant instructions are inlined.
   187  		if inst := def.Instr; inst.Constant() {
   188  			val := inst.Return()
   189  			valType := val.Type()
   190  			v := inst.ConstantVal()
   191  			m.insertLoadConstant(v, valType, reg)
   192  		}
   193  	}
   194  	if arg.Kind == backend.ABIArgKindReg {
   195  		m.InsertMove(arg.Reg, reg, arg.Type)
   196  	} else {
   197  		// TODO: we could use pair store if there's consecutive stores for the same type.
   198  		//
   199  		// Note that at this point, stack pointer is already adjusted.
   200  		bits := arg.Type.Bits()
   201  		amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
   202  		store := m.allocateInstr()
   203  		store.asStore(operandNR(reg), amode, bits)
   204  		m.insert(store)
   205  	}
   206  }
   207  
   208  func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) {
   209  	r := &a.Rets[retIndex]
   210  	if r.Kind == backend.ABIArgKindReg {
   211  		m.InsertMove(reg, r.Reg, r.Type)
   212  	} else {
   213  		// TODO: we could use pair load if there's consecutive loads for the same type.
   214  		amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
   215  		ldr := m.allocateInstr()
   216  		switch r.Type {
   217  		case ssa.TypeI32, ssa.TypeI64:
   218  			ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
   219  		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
   220  			ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
   221  		default:
   222  			panic("BUG")
   223  		}
   224  		m.insert(ldr)
   225  	}
   226  }
   227  
   228  func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
   229  	exct := m.executableContext
   230  	exct.PendingInstructions = exct.PendingInstructions[:0]
   231  	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
   232  	for _, instr := range exct.PendingInstructions {
   233  		cur = linkInstr(cur, instr)
   234  	}
   235  	return cur, mode
   236  }
   237  
   238  func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
   239  	if rn.RegType() != regalloc.RegTypeInt {
   240  		panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
   241  	}
   242  	var amode addressMode
   243  	if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
   244  		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
   245  	} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
   246  		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
   247  	} else {
   248  		var indexReg regalloc.VReg
   249  		if allowTmpRegUse {
   250  			m.lowerConstantI64(tmpRegVReg, offset)
   251  			indexReg = tmpRegVReg
   252  		} else {
   253  			indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
   254  			m.lowerConstantI64(indexReg, offset)
   255  		}
   256  		amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
   257  	}
   258  	return amode
   259  }
   260  
   261  func (m *machine) lowerCall(si *ssa.Instruction) {
   262  	isDirectCall := si.Opcode() == ssa.OpcodeCall
   263  	var indirectCalleePtr ssa.Value
   264  	var directCallee ssa.FuncRef
   265  	var sigID ssa.SignatureID
   266  	var args []ssa.Value
   267  	if isDirectCall {
   268  		directCallee, sigID, args = si.CallData()
   269  	} else {
   270  		indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData()
   271  	}
   272  	calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID))
   273  
   274  	stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
   275  	if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
   276  		m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
   277  	}
   278  
   279  	for i, arg := range args {
   280  		reg := m.compiler.VRegOf(arg)
   281  		def := m.compiler.ValueDefinition(arg)
   282  		m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
   283  	}
   284  
   285  	if isDirectCall {
   286  		call := m.allocateInstr()
   287  		call.asCall(directCallee, calleeABI)
   288  		m.insert(call)
   289  	} else {
   290  		ptr := m.compiler.VRegOf(indirectCalleePtr)
   291  		callInd := m.allocateInstr()
   292  		callInd.asCallIndirect(ptr, calleeABI)
   293  		m.insert(callInd)
   294  	}
   295  
   296  	var index int
   297  	r1, rs := si.Returns()
   298  	if r1.Valid() {
   299  		m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize)
   300  		index++
   301  	}
   302  
   303  	for _, r := range rs {
   304  		m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize)
   305  		index++
   306  	}
   307  }
   308  
   309  func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
   310  	if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
   311  		alu := m.allocateInstr()
   312  		var ao aluOp
   313  		if add {
   314  			ao = aluOpAdd
   315  		} else {
   316  			ao = aluOpSub
   317  		}
   318  		alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
   319  		m.insert(alu)
   320  	} else {
   321  		m.lowerConstantI64(tmpRegVReg, diff)
   322  		alu := m.allocateInstr()
   323  		var ao aluOp
   324  		if add {
   325  			ao = aluOpAdd
   326  		} else {
   327  			ao = aluOpSub
   328  		}
   329  		alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
   330  		m.insert(alu)
   331  	}
   332  }