github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/machine.go (about) 1 package arm64 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 8 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend" 9 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc" 10 "github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa" 11 "github.com/bananabytelabs/wazero/internal/engine/wazevo/wazevoapi" 12 ) 13 14 type ( 15 // machine implements backend.Machine. 16 machine struct { 17 compiler backend.Compiler 18 executableContext *backend.ExecutableContextT[instruction] 19 currentABI *abiImpl 20 // abis maps ssa.SignatureID to the ABI implementation. 21 abis []abiImpl 22 23 regAllocFn regAllocFunctionImpl 24 25 // addendsWorkQueue is used during address lowering, defined here for reuse. 26 addendsWorkQueue queue[ssa.Value] 27 addends32 queue[addend32] 28 // addends64 is used during address lowering, defined here for reuse. 29 addends64 queue[regalloc.VReg] 30 unresolvedAddressModes []*instruction 31 32 // condBrRelocs holds the conditional branches which need offset relocation. 33 condBrRelocs []condBrReloc 34 35 // spillSlotSize is the size of the stack slot in bytes used for spilling registers. 36 // During the execution of the function, the stack looks like: 37 // 38 // 39 // (high address) 40 // +-----------------+ 41 // | ....... | 42 // | ret Y | 43 // | ....... | 44 // | ret 0 | 45 // | arg X | 46 // | ....... | 47 // | arg 1 | 48 // | arg 0 | 49 // | xxxxx | 50 // | ReturnAddress | 51 // +-----------------+ <<-| 52 // | ........... | | 53 // | spill slot M | | <--- spillSlotSize 54 // | ............ | | 55 // | spill slot 2 | | 56 // | spill slot 1 | <<-+ 57 // | clobbered N | 58 // | ........... | 59 // | clobbered 1 | 60 // | clobbered 0 | 61 // SP---> +-----------------+ 62 // (low address) 63 // 64 // and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16. 65 // Also note that this is only known after register allocation. 66 spillSlotSize int64 67 spillSlots map[regalloc.VRegID]int64 // regalloc.VRegID to offset. 68 // clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue. 69 clobberedRegs []regalloc.VReg 70 71 maxRequiredStackSizeForCalls int64 72 stackBoundsCheckDisabled bool 73 74 regAllocStarted bool 75 } 76 77 addend32 struct { 78 r regalloc.VReg 79 ext extendOp 80 } 81 82 condBrReloc struct { 83 cbr *instruction 84 // currentLabelPos is the labelPosition within which condBr is defined. 85 currentLabelPos *labelPosition 86 // Next block's labelPosition. 87 nextLabel label 88 offset int64 89 } 90 91 labelPosition = backend.LabelPosition[instruction] 92 label = backend.Label 93 ) 94 95 const ( 96 labelReturn = backend.LabelReturn 97 labelInvalid = backend.LabelInvalid 98 ) 99 100 // NewBackend returns a new backend for arm64. 101 func NewBackend() backend.Machine { 102 m := &machine{ 103 spillSlots: make(map[regalloc.VRegID]int64), 104 executableContext: newExecutableContext(), 105 } 106 m.regAllocFn.m = m 107 m.regAllocFn.labelToRegAllocBlockIndex = make(map[label]int) 108 return m 109 } 110 111 func newExecutableContext() *backend.ExecutableContextT[instruction] { 112 return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0) 113 } 114 115 // ExecutableContext implements backend.Machine. 116 func (m *machine) ExecutableContext() backend.ExecutableContext { 117 return m.executableContext 118 } 119 120 // Reset implements backend.Machine. 121 func (m *machine) Reset() { 122 m.regAllocStarted = false 123 m.clobberedRegs = m.clobberedRegs[:0] 124 for key := range m.spillSlots { 125 m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key)) 126 } 127 for _, key := range m.clobberedRegs { 128 delete(m.spillSlots, regalloc.VRegID(key)) 129 } 130 m.clobberedRegs = m.clobberedRegs[:0] 131 m.regAllocFn.reset() 132 m.spillSlotSize = 0 133 m.unresolvedAddressModes = m.unresolvedAddressModes[:0] 134 m.maxRequiredStackSizeForCalls = 0 135 m.executableContext.Reset() 136 } 137 138 // InitializeABI implements backend.Machine InitializeABI. 139 func (m *machine) InitializeABI(sig *ssa.Signature) { 140 m.currentABI = m.getOrCreateABIImpl(sig) 141 } 142 143 // DisableStackCheck implements backend.Machine DisableStackCheck. 144 func (m *machine) DisableStackCheck() { 145 m.stackBoundsCheckDisabled = true 146 } 147 148 // ABI implements backend.Machine. 149 func (m *machine) ABI() backend.FunctionABI { 150 return m.currentABI 151 } 152 153 // SetCompiler implements backend.Machine. 154 func (m *machine) SetCompiler(ctx backend.Compiler) { 155 m.compiler = ctx 156 } 157 158 // StartBlock implements backend.Machine. 159 func (m *machine) StartBlock(blk ssa.BasicBlock) { 160 l, pos := m.executableContext.StartBlock(blk) 161 m.regAllocFn.addBlock(blk, l, pos) 162 } 163 164 func (m *machine) insert(i *instruction) { 165 ectx := m.executableContext 166 ectx.PendingInstructions = append(ectx.PendingInstructions, i) 167 } 168 169 func (m *machine) insertBrTargetLabel() label { 170 nop, l := m.allocateBrTarget() 171 m.insert(nop) 172 return l 173 } 174 175 func (m *machine) allocateBrTarget() (nop *instruction, l label) { 176 ectx := m.executableContext 177 l = ectx.AllocateLabel() 178 nop = m.allocateInstr() 179 nop.asNop0WithLabel(l) 180 pos := ectx.AllocateLabelPosition(l) 181 pos.Begin, pos.End = nop, nop 182 ectx.LabelPositions[l] = pos 183 return 184 } 185 186 // allocateInstr allocates an instruction. 187 func (m *machine) allocateInstr() *instruction { 188 instr := m.executableContext.InstructionPool.Allocate() 189 if !m.regAllocStarted { 190 instr.addedBeforeRegAlloc = true 191 } 192 return instr 193 } 194 195 func resetInstruction(i *instruction) { 196 *i = instruction{} 197 } 198 199 func (m *machine) allocateNop() *instruction { 200 instr := m.allocateInstr() 201 instr.asNop0() 202 return instr 203 } 204 205 func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { 206 amode := &i.amode 207 switch amode.kind { 208 case addressModeKindResultStackSpace: 209 amode.imm += ret0offset 210 case addressModeKindArgStackSpace: 211 amode.imm += arg0offset 212 default: 213 panic("BUG") 214 } 215 216 var sizeInBits byte 217 switch i.kind { 218 case store8, uLoad8: 219 sizeInBits = 8 220 case store16, uLoad16: 221 sizeInBits = 16 222 case store32, fpuStore32, uLoad32, fpuLoad32: 223 sizeInBits = 32 224 case store64, fpuStore64, uLoad64, fpuLoad64: 225 sizeInBits = 64 226 case fpuStore128, fpuLoad128: 227 sizeInBits = 128 228 default: 229 panic("BUG") 230 } 231 232 if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) { 233 amode.kind = addressModeKindRegUnsignedImm12 234 } else { 235 // This case, we load the offset into the temporary register, 236 // and then use it as the index register. 237 newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm) 238 linkInstr(newPrev, i) 239 *amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */} 240 } 241 } 242 243 // ResolveRelativeAddresses implements backend.Machine. 244 func (m *machine) ResolveRelativeAddresses(ctx context.Context) { 245 if len(m.unresolvedAddressModes) > 0 { 246 arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP() 247 for _, i := range m.unresolvedAddressModes { 248 m.resolveAddressingMode(arg0offset, ret0offset, i) 249 } 250 } 251 252 // Reuse the slice to gather the unresolved conditional branches. 253 cbrs := m.condBrRelocs[:0] 254 ectx := m.executableContext 255 256 var fn string 257 var fnIndex int 258 var labelToSSABlockID map[label]ssa.BasicBlockID 259 if wazevoapi.PerfMapEnabled { 260 fn = wazevoapi.GetCurrentFunctionName(ctx) 261 labelToSSABlockID = make(map[label]ssa.BasicBlockID) 262 for i, l := range ectx.SsaBlockIDToLabels { 263 labelToSSABlockID[l] = ssa.BasicBlockID(i) 264 } 265 fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) 266 } 267 268 // Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label. 269 var offset int64 270 for i, pos := range ectx.OrderedBlockLabels { 271 pos.BinaryOffset = offset 272 var size int64 273 for cur := pos.Begin; ; cur = cur.next { 274 switch cur.kind { 275 case nop0: 276 l := cur.nop0Label() 277 if pos, ok := ectx.LabelPositions[l]; ok { 278 pos.BinaryOffset = offset + size 279 } 280 case condBr: 281 if !cur.condBrOffsetResolved() { 282 var nextLabel label 283 if i < len(ectx.OrderedBlockLabels)-1 { 284 // Note: this is only used when the block ends with fallthrough, 285 // therefore can be safely assumed that the next block exists when it's needed. 286 nextLabel = ectx.OrderedBlockLabels[i+1].L 287 } 288 cbrs = append(cbrs, condBrReloc{ 289 cbr: cur, currentLabelPos: pos, offset: offset + size, 290 nextLabel: nextLabel, 291 }) 292 } 293 } 294 size += cur.size() 295 if cur == pos.End { 296 break 297 } 298 } 299 300 if wazevoapi.PerfMapEnabled { 301 if size > 0 { 302 l := pos.L 303 var labelStr string 304 if blkID, ok := labelToSSABlockID[l]; ok { 305 labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) 306 } else { 307 labelStr = l.String() 308 } 309 wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) 310 } 311 } 312 313 pos.BinarySize = size 314 offset += size 315 } 316 317 // Before resolving any offsets, we need to check if all the conditional branches can be resolved. 318 var needRerun bool 319 for i := range cbrs { 320 reloc := &cbrs[i] 321 cbr := reloc.cbr 322 offset := reloc.offset 323 324 target := cbr.condBrLabel() 325 offsetOfTarget := ectx.LabelPositions[target].BinaryOffset 326 diff := offsetOfTarget - offset 327 if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { 328 // This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block, 329 // and jump to it. 330 m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel) 331 // Then, we need to recall this function to fix up the label offsets 332 // as they have changed after the trampoline is inserted. 333 needRerun = true 334 } 335 } 336 if needRerun { 337 m.ResolveRelativeAddresses(ctx) 338 if wazevoapi.PerfMapEnabled { 339 wazevoapi.PerfMap.Clear() 340 } 341 return 342 } 343 344 var currentOffset int64 345 for cur := ectx.RootInstr; cur != nil; cur = cur.next { 346 switch cur.kind { 347 case br: 348 target := cur.brLabel() 349 offsetOfTarget := ectx.LabelPositions[target].BinaryOffset 350 diff := offsetOfTarget - currentOffset 351 divided := diff >> 2 352 if divided < minSignedInt26 || divided > maxSignedInt26 { 353 // This means the currently compiled single function is extremely large. 354 panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range") 355 } 356 cur.brOffsetResolve(diff) 357 case condBr: 358 if !cur.condBrOffsetResolved() { 359 target := cur.condBrLabel() 360 offsetOfTarget := ectx.LabelPositions[target].BinaryOffset 361 diff := offsetOfTarget - currentOffset 362 if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { 363 panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly") 364 } 365 cur.condBrOffsetResolve(diff) 366 } 367 case brTableSequence: 368 for i := range cur.targets { 369 l := label(cur.targets[i]) 370 offsetOfTarget := ectx.LabelPositions[l].BinaryOffset 371 diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin) 372 cur.targets[i] = uint32(diff) 373 } 374 cur.brTableSequenceOffsetsResolved() 375 case emitSourceOffsetInfo: 376 m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo()) 377 } 378 currentOffset += cur.size() 379 } 380 } 381 382 const ( 383 maxSignedInt26 int64 = 1<<25 - 1 384 minSignedInt26 int64 = -(1 << 25) 385 386 maxSignedInt19 int64 = 1<<19 - 1 387 minSignedInt19 int64 = -(1 << 19) 388 ) 389 390 func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) { 391 cur := currentBlk.End 392 originalTarget := cbr.condBrLabel() 393 endNext := cur.next 394 395 if cur.kind != br { 396 // If the current block ends with a conditional branch, we can just insert the trampoline after it. 397 // Otherwise, we need to insert "skip" instruction to skip the trampoline instructions. 398 skip := m.allocateInstr() 399 skip.asBr(nextLabel) 400 cur = linkInstr(cur, skip) 401 } 402 403 cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget() 404 cbr.setCondBrTargets(cbrNewTargetLabel) 405 cur = linkInstr(cur, cbrNewTargetInstr) 406 407 // Then insert the unconditional branch to the original, which should be possible to get encoded 408 // as 26-bit offset should be enough for any practical application. 409 br := m.allocateInstr() 410 br.asBr(originalTarget) 411 cur = linkInstr(cur, br) 412 413 // Update the end of the current block. 414 currentBlk.End = cur 415 416 linkInstr(cur, endNext) 417 } 418 419 // Format implements backend.Machine. 420 func (m *machine) Format() string { 421 ectx := m.executableContext 422 begins := map[*instruction]label{} 423 for l, pos := range ectx.LabelPositions { 424 begins[pos.Begin] = l 425 } 426 427 irBlocks := map[label]ssa.BasicBlockID{} 428 for i, l := range ectx.SsaBlockIDToLabels { 429 irBlocks[l] = ssa.BasicBlockID(i) 430 } 431 432 var lines []string 433 for cur := ectx.RootInstr; cur != nil; cur = cur.next { 434 if l, ok := begins[cur]; ok { 435 var labelStr string 436 if blkID, ok := irBlocks[l]; ok { 437 labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) 438 } else { 439 labelStr = fmt.Sprintf("%s:", l) 440 } 441 lines = append(lines, labelStr) 442 } 443 if cur.kind == nop0 { 444 continue 445 } 446 lines = append(lines, "\t"+cur.String()) 447 } 448 return "\n" + strings.Join(lines, "\n") + "\n" 449 } 450 451 // InsertReturn implements backend.Machine. 452 func (m *machine) InsertReturn() { 453 i := m.allocateInstr() 454 i.asRet(m.currentABI) 455 m.insert(i) 456 } 457 458 func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { 459 offset, ok := m.spillSlots[id] 460 if !ok { 461 offset = m.spillSlotSize 462 // TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible. 463 m.spillSlots[id] = offset 464 m.spillSlotSize += int64(size) 465 } 466 return offset + 16 // spill slot starts above the clobbered registers and the frame size. 467 } 468 469 func (m *machine) clobberedRegSlotSize() int64 { 470 return int64(len(m.clobberedRegs) * 16) 471 } 472 473 func (m *machine) arg0OffsetFromSP() int64 { 474 return m.frameSize() + 475 16 + // 16-byte aligned return address 476 16 // frame size saved below the clobbered registers. 477 } 478 479 func (m *machine) ret0OffsetFromSP() int64 { 480 return m.arg0OffsetFromSP() + m.currentABI.argStackSize 481 } 482 483 func (m *machine) requiredStackSize() int64 { 484 return m.maxRequiredStackSizeForCalls + 485 m.frameSize() + 486 16 + // 16-byte aligned return address. 487 16 // frame size saved below the clobbered registers. 488 } 489 490 func (m *machine) frameSize() int64 { 491 s := m.clobberedRegSlotSize() + m.spillSlotSize 492 if s&0xf != 0 { 493 panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s)) 494 } 495 return s 496 }