github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/abi.go (about)

     1  package arm64
     2  
     3  import (
     4  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/backend"
     5  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc"
     6  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa"
     7  )
     8  
     9  // References:
    10  // * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
    11  // * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
    12  
    13  const xArgRetRegMax, vArgRetRegMax = x7, v7 // x0-x7 & v0-v7.
    14  
    15  var regInfo = &regalloc.RegisterInfo{
    16  	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
    17  		// We don't allocate:
    18  		// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
    19  		// - x28: Reserved by Go runtime.
    20  		// - x27(=tmpReg): because of the reason described on tmpReg.
    21  		regalloc.RegTypeInt: {
    22  			x8, x9, x10, x11, x12, x13, x14, x15,
    23  			x16, x17, x19, x20, x21, x22, x23, x24, x25,
    24  			x26, x29, x30,
    25  			// These are the argument/return registers. Less preferred in the allocation.
    26  			x7, x6, x5, x4, x3, x2, x1, x0,
    27  		},
    28  		regalloc.RegTypeFloat: {
    29  			v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
    30  			v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
    31  			// These are the argument/return registers. Less preferred in the allocation.
    32  			v7, v6, v5, v4, v3, v2, v1, v0,
    33  		},
    34  	},
    35  	CalleeSavedRegisters: [regalloc.RealRegsNumMax]bool{
    36  		x19: true, x20: true, x21: true, x22: true, x23: true, x24: true, x25: true, x26: true, x28: true,
    37  		v18: true, v19: true, v20: true, v21: true, v22: true, v23: true, v24: true, v25: true, v26: true,
    38  		v27: true, v28: true, v29: true, v30: true, v31: true,
    39  	},
    40  	CallerSavedRegisters: [regalloc.RealRegsNumMax]bool{
    41  		x0: true, x1: true, x2: true, x3: true, x4: true, x5: true, x6: true, x7: true, x8: true, x9: true, x10: true,
    42  		x11: true, x12: true, x13: true, x14: true, x15: true, x16: true, x17: true, x29: true, x30: true,
    43  		v0: true, v1: true, v2: true, v3: true, v4: true, v5: true, v6: true, v7: true, v8: true, v9: true, v10: true,
    44  		v11: true, v12: true, v13: true, v14: true, v15: true, v16: true, v17: true,
    45  	},
    46  	RealRegToVReg: []regalloc.VReg{
    47  		x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
    48  		v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
    49  	},
    50  	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
    51  	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
    52  		if r < v0 {
    53  			return regalloc.RegTypeInt
    54  		}
    55  		return regalloc.RegTypeFloat
    56  	},
    57  }
    58  
    59  // abiImpl implements backend.FunctionABI.
    60  type abiImpl struct {
    61  	m                          *machine
    62  	args, rets                 []backend.ABIArg
    63  	argStackSize, retStackSize int64
    64  
    65  	argRealRegs []regalloc.VReg
    66  	retRealRegs []regalloc.VReg
    67  }
    68  
    69  func (m *machine) getOrCreateABIImpl(sig *ssa.Signature) *abiImpl {
    70  	if int(sig.ID) >= len(m.abis) {
    71  		m.abis = append(m.abis, make([]abiImpl, int(sig.ID)+1)...)
    72  	}
    73  
    74  	abi := &m.abis[sig.ID]
    75  	if abi.m != nil {
    76  		return abi
    77  	}
    78  
    79  	abi.m = m
    80  	abi.init(sig)
    81  	return abi
    82  }
    83  
    84  // int initializes the abiImpl for the given signature.
    85  func (a *abiImpl) init(sig *ssa.Signature) {
    86  	if len(a.rets) < len(sig.Results) {
    87  		a.rets = make([]backend.ABIArg, len(sig.Results))
    88  	}
    89  	a.rets = a.rets[:len(sig.Results)]
    90  	a.retStackSize = a.setABIArgs(a.rets, sig.Results)
    91  	if argsNum := len(sig.Params); len(a.args) < argsNum {
    92  		a.args = make([]backend.ABIArg, argsNum)
    93  	}
    94  	a.args = a.args[:len(sig.Params)]
    95  	a.argStackSize = a.setABIArgs(a.args, sig.Params)
    96  
    97  	// Gather the real registers usages in arg/return.
    98  	a.retRealRegs = a.retRealRegs[:0]
    99  	for i := range a.rets {
   100  		r := &a.rets[i]
   101  		if r.Kind == backend.ABIArgKindReg {
   102  			a.retRealRegs = append(a.retRealRegs, r.Reg)
   103  		}
   104  	}
   105  	a.argRealRegs = a.argRealRegs[:0]
   106  	for i := range a.args {
   107  		arg := &a.args[i]
   108  		if arg.Kind == backend.ABIArgKindReg {
   109  			reg := arg.Reg
   110  			a.argRealRegs = append(a.argRealRegs, reg)
   111  		}
   112  	}
   113  }
   114  
   115  // setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
   116  // where if len(s) > len(types), the last elements of s is for the multi-return slot.
   117  func (a *abiImpl) setABIArgs(s []backend.ABIArg, types []ssa.Type) (stackSize int64) {
   118  	var stackOffset int64
   119  	nextX, nextV := x0, v0
   120  	for i, typ := range types {
   121  		arg := &s[i]
   122  		arg.Index = i
   123  		arg.Type = typ
   124  		if typ.IsInt() {
   125  			if nextX > xArgRetRegMax {
   126  				arg.Kind = backend.ABIArgKindStack
   127  				const slotSize = 8 // Align 8 bytes.
   128  				arg.Offset = stackOffset
   129  				stackOffset += slotSize
   130  			} else {
   131  				arg.Kind = backend.ABIArgKindReg
   132  				arg.Reg = regalloc.FromRealReg(nextX, regalloc.RegTypeInt)
   133  				nextX++
   134  			}
   135  		} else {
   136  			if nextV > vArgRetRegMax {
   137  				arg.Kind = backend.ABIArgKindStack
   138  				slotSize := int64(8)   // Align at least 8 bytes.
   139  				if typ.Bits() == 128 { // Vector.
   140  					slotSize = 16
   141  				}
   142  				arg.Offset = stackOffset
   143  				stackOffset += slotSize
   144  			} else {
   145  				arg.Kind = backend.ABIArgKindReg
   146  				arg.Reg = regalloc.FromRealReg(nextV, regalloc.RegTypeFloat)
   147  				nextV++
   148  			}
   149  		}
   150  	}
   151  	return stackOffset
   152  }
   153  
   154  // CalleeGenFunctionArgsToVRegs implements backend.FunctionABI.
   155  func (a *abiImpl) CalleeGenFunctionArgsToVRegs(args []ssa.Value) {
   156  	for i, ssaArg := range args {
   157  		if !ssaArg.Valid() {
   158  			continue
   159  		}
   160  		reg := a.m.compiler.VRegOf(ssaArg)
   161  		arg := &a.args[i]
   162  		if arg.Kind == backend.ABIArgKindReg {
   163  			a.m.InsertMove(reg, arg.Reg, arg.Type)
   164  		} else {
   165  			// TODO: we could use pair load if there's consecutive loads for the same type.
   166  			//
   167  			//            (high address)
   168  			//          +-----------------+
   169  			//          |     .......     |
   170  			//          |      ret Y      |
   171  			//          |     .......     |
   172  			//          |      ret 0      |
   173  			//          |      arg X      |
   174  			//          |     .......     |
   175  			//          |      arg 1      |
   176  			//          |      arg 0      |    <-|
   177  			//          |   ReturnAddress |      |
   178  			//          +-----------------+      |
   179  			//          |   ...........   |      |
   180  			//          |   spill slot M  |      |   argStackOffset: is unknown at this point of compilation.
   181  			//          |   ............  |      |
   182  			//          |   spill slot 2  |      |
   183  			//          |   spill slot 1  |      |
   184  			//          |   clobbered 0   |      |
   185  			//          |   clobbered 1   |      |
   186  			//          |   ...........   |      |
   187  			//          |   clobbered N   |      |
   188  			//   SP---> +-----------------+    <-+
   189  			//             (low address)
   190  
   191  			m := a.m
   192  			bits := arg.Type.Bits()
   193  			// At this point of compilation, we don't yet know how much space exist below the return address.
   194  			// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
   195  			amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
   196  			load := m.allocateInstr()
   197  			switch arg.Type {
   198  			case ssa.TypeI32, ssa.TypeI64:
   199  				load.asULoad(operandNR(reg), amode, bits)
   200  			case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
   201  				load.asFpuLoad(operandNR(reg), amode, bits)
   202  			default:
   203  				panic("BUG")
   204  			}
   205  			m.insert(load)
   206  			a.m.unresolvedAddressModes = append(a.m.unresolvedAddressModes, load)
   207  		}
   208  	}
   209  }
   210  
   211  // CalleeGenVRegsToFunctionReturns implements backend.FunctionABI.
   212  func (a *abiImpl) CalleeGenVRegsToFunctionReturns(rets []ssa.Value) {
   213  	l := len(rets) - 1
   214  	for i := range rets {
   215  		// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
   216  		ret := rets[l-i]
   217  		r := &a.rets[l-i]
   218  		reg := a.m.compiler.VRegOf(ret)
   219  		if def := a.m.compiler.ValueDefinition(ret); def.IsFromInstr() {
   220  			// Constant instructions are inlined.
   221  			if inst := def.Instr; inst.Constant() {
   222  				a.m.InsertLoadConstant(inst, reg)
   223  			}
   224  		}
   225  		if r.Kind == backend.ABIArgKindReg {
   226  			a.m.InsertMove(r.Reg, reg, ret.Type())
   227  		} else {
   228  			// TODO: we could use pair store if there's consecutive stores for the same type.
   229  			//
   230  			//            (high address)
   231  			//          +-----------------+
   232  			//          |     .......     |
   233  			//          |      ret Y      |
   234  			//          |     .......     |
   235  			//          |      ret 0      |    <-+
   236  			//          |      arg X      |      |
   237  			//          |     .......     |      |
   238  			//          |      arg 1      |      |
   239  			//          |      arg 0      |      |
   240  			//          |   ReturnAddress |      |
   241  			//          +-----------------+      |
   242  			//          |   ...........   |      |
   243  			//          |   spill slot M  |      |   retStackOffset: is unknown at this point of compilation.
   244  			//          |   ............  |      |
   245  			//          |   spill slot 2  |      |
   246  			//          |   spill slot 1  |      |
   247  			//          |   clobbered 0   |      |
   248  			//          |   clobbered 1   |      |
   249  			//          |   ...........   |      |
   250  			//          |   clobbered N   |      |
   251  			//   SP---> +-----------------+    <-+
   252  			//             (low address)
   253  
   254  			bits := r.Type.Bits()
   255  
   256  			// At this point of compilation, we don't yet know how much space exist below the return address.
   257  			// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
   258  			amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
   259  			store := a.m.allocateInstr()
   260  			store.asStore(operandNR(reg), amode, bits)
   261  			a.m.insert(store)
   262  			a.m.unresolvedAddressModes = append(a.m.unresolvedAddressModes, store)
   263  		}
   264  	}
   265  }
   266  
   267  // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
   268  // caller side of the function call.
   269  func (a *abiImpl) callerGenVRegToFunctionArg(argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
   270  	arg := &a.args[argIndex]
   271  	if def != nil && def.IsFromInstr() {
   272  		// Constant instructions are inlined.
   273  		if inst := def.Instr; inst.Constant() {
   274  			a.m.InsertLoadConstant(inst, reg)
   275  		}
   276  	}
   277  	if arg.Kind == backend.ABIArgKindReg {
   278  		a.m.InsertMove(arg.Reg, reg, arg.Type)
   279  	} else {
   280  		// TODO: we could use pair store if there's consecutive stores for the same type.
   281  		//
   282  		// Note that at this point, stack pointer is already adjusted.
   283  		bits := arg.Type.Bits()
   284  		amode := a.m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
   285  		store := a.m.allocateInstr()
   286  		store.asStore(operandNR(reg), amode, bits)
   287  		a.m.insert(store)
   288  	}
   289  }
   290  
   291  func (a *abiImpl) callerGenFunctionReturnVReg(retIndex int, reg regalloc.VReg, slotBegin int64) {
   292  	r := &a.rets[retIndex]
   293  	if r.Kind == backend.ABIArgKindReg {
   294  		a.m.InsertMove(reg, r.Reg, r.Type)
   295  	} else {
   296  		// TODO: we could use pair load if there's consecutive loads for the same type.
   297  		amode := a.m.resolveAddressModeForOffset(a.argStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
   298  		ldr := a.m.allocateInstr()
   299  		switch r.Type {
   300  		case ssa.TypeI32, ssa.TypeI64:
   301  			ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
   302  		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
   303  			ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
   304  		default:
   305  			panic("BUG")
   306  		}
   307  		a.m.insert(ldr)
   308  	}
   309  }
   310  
   311  func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
   312  	exct := m.executableContext
   313  	exct.PendingInstructions = exct.PendingInstructions[:0]
   314  	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
   315  	for _, instr := range exct.PendingInstructions {
   316  		cur = linkInstr(cur, instr)
   317  	}
   318  	return cur, mode
   319  }
   320  
   321  func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
   322  	if rn.RegType() != regalloc.RegTypeInt {
   323  		panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
   324  	}
   325  	var amode addressMode
   326  	if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
   327  		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
   328  	} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
   329  		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
   330  	} else {
   331  		var indexReg regalloc.VReg
   332  		if allowTmpRegUse {
   333  			m.lowerConstantI64(tmpRegVReg, offset)
   334  			indexReg = tmpRegVReg
   335  		} else {
   336  			indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
   337  			m.lowerConstantI64(indexReg, offset)
   338  		}
   339  		amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
   340  	}
   341  	return amode
   342  }
   343  
   344  func (a *abiImpl) alignedArgResultStackSlotSize() int64 {
   345  	stackSlotSize := a.retStackSize + a.argStackSize
   346  	// Align stackSlotSize to 16 bytes.
   347  	stackSlotSize = (stackSlotSize + 15) &^ 15
   348  	return stackSlotSize
   349  }
   350  
   351  func (m *machine) lowerCall(si *ssa.Instruction) {
   352  	isDirectCall := si.Opcode() == ssa.OpcodeCall
   353  	var indirectCalleePtr ssa.Value
   354  	var directCallee ssa.FuncRef
   355  	var sigID ssa.SignatureID
   356  	var args []ssa.Value
   357  	if isDirectCall {
   358  		directCallee, sigID, args = si.CallData()
   359  	} else {
   360  		indirectCalleePtr, sigID, args = si.CallIndirectData()
   361  	}
   362  	calleeABI := m.getOrCreateABIImpl(m.compiler.SSABuilder().ResolveSignature(sigID))
   363  
   364  	stackSlotSize := calleeABI.alignedArgResultStackSlotSize()
   365  	if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
   366  		m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
   367  	}
   368  
   369  	for i, arg := range args {
   370  		reg := m.compiler.VRegOf(arg)
   371  		def := m.compiler.ValueDefinition(arg)
   372  		calleeABI.callerGenVRegToFunctionArg(i, reg, def, stackSlotSize)
   373  	}
   374  
   375  	if isDirectCall {
   376  		call := m.allocateInstr()
   377  		call.asCall(directCallee, calleeABI)
   378  		m.insert(call)
   379  	} else {
   380  		ptr := m.compiler.VRegOf(indirectCalleePtr)
   381  		callInd := m.allocateInstr()
   382  		callInd.asCallIndirect(ptr, calleeABI)
   383  		m.insert(callInd)
   384  	}
   385  
   386  	var index int
   387  	r1, rs := si.Returns()
   388  	if r1.Valid() {
   389  		calleeABI.callerGenFunctionReturnVReg(0, m.compiler.VRegOf(r1), stackSlotSize)
   390  		index++
   391  	}
   392  
   393  	for _, r := range rs {
   394  		calleeABI.callerGenFunctionReturnVReg(index, m.compiler.VRegOf(r), stackSlotSize)
   395  		index++
   396  	}
   397  }
   398  
   399  func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
   400  	if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
   401  		alu := m.allocateInstr()
   402  		var ao aluOp
   403  		if add {
   404  			ao = aluOpAdd
   405  		} else {
   406  			ao = aluOpSub
   407  		}
   408  		alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
   409  		m.insert(alu)
   410  	} else {
   411  		m.lowerConstantI64(tmpRegVReg, diff)
   412  		alu := m.allocateInstr()
   413  		var ao aluOp
   414  		if add {
   415  			ao = aluOpAdd
   416  		} else {
   417  			ao = aluOpSub
   418  		}
   419  		alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
   420  		m.insert(alu)
   421  	}
   422  }