github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/compiler.go (about) 1 package backend 2 3 import ( 4 "context" 5 "encoding/hex" 6 "fmt" 7 8 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc" 9 "github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa" 10 "github.com/bananabytelabs/wazero/internal/engine/wazevo/wazevoapi" 11 ) 12 13 // NewCompiler returns a new Compiler that can generate a machine code. 14 func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler { 15 return newCompiler(ctx, mach, builder) 16 } 17 18 func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler { 19 c := &compiler{ 20 mach: mach, ssaBuilder: builder, 21 nextVRegID: regalloc.VRegIDNonReservedBegin, 22 regAlloc: regalloc.NewAllocator(mach.RegisterInfo()), 23 } 24 mach.SetCompiler(c) 25 return c 26 } 27 28 // Compiler is the backend of wazevo which takes ssa.Builder and Machine, 29 // use the information there to emit the final machine code. 30 type Compiler interface { 31 // SSABuilder returns the ssa.Builder used by this compiler. 32 SSABuilder() ssa.Builder 33 34 // Compile executes the following steps: 35 // 1. Lower() 36 // 2. RegAlloc() 37 // 3. Finalize() 38 // 4. Encode() 39 // 40 // Each step can be called individually for testing purpose, therefore they are exposed in this interface too. 41 // 42 // The returned byte slices are the machine code and the relocation information for the machine code. 43 // The caller is responsible for copying them immediately since the compiler may reuse the buffer. 44 Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error) 45 46 // Lower lowers the given ssa.Instruction to the machine-specific instructions. 47 Lower() 48 49 // RegAlloc performs the register allocation after Lower is called. 50 RegAlloc() 51 52 // Finalize performs the finalization of the compilation. This must be called after RegAlloc. 53 Finalize(ctx context.Context) 54 55 // Encode encodes the machine code to the buffer. 56 Encode() 57 58 // Buf returns the buffer of the encoded machine code. This is only used for testing purpose. 59 Buf() []byte 60 61 // Format returns the debug string of the current state of the compiler. 62 Format() string 63 64 // Init initializes the internal state of the compiler for the next compilation. 65 Init() 66 67 // AllocateVReg allocates a new virtual register of the given type. 68 AllocateVReg(typ ssa.Type) regalloc.VReg 69 70 // ValueDefinition returns the definition of the given value. 71 ValueDefinition(ssa.Value) *SSAValueDefinition 72 73 // VRegOf returns the virtual register of the given ssa.Value. 74 VRegOf(value ssa.Value) regalloc.VReg 75 76 // TypeOf returns the ssa.Type of the given virtual register. 77 TypeOf(regalloc.VReg) ssa.Type 78 79 // MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID, 80 // and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group. 81 MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool 82 83 // MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode, 84 // this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid. 85 // 86 // Note: caller should be careful to avoid excessive allocation on opcodes slice. 87 MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode 88 89 // AddRelocationInfo appends the relocation information for the function reference at the current buffer offset. 90 AddRelocationInfo(funcRef ssa.FuncRef) 91 92 // AddSourceOffsetInfo appends the source offset information for the given offset. 93 AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) 94 95 // SourceOffsetInfo returns the source offset information for the current buffer offset. 96 SourceOffsetInfo() []SourceOffsetInfo 97 98 // Emit4Bytes appends 4 bytes to the buffer. Used during the code emission. 99 Emit4Bytes(b uint32) 100 } 101 102 // RelocationInfo represents the relocation information for a call instruction. 103 type RelocationInfo struct { 104 // Offset represents the offset from the beginning of the machine code of either a function or the entire module. 105 Offset int64 106 // Target is the target function of the call instruction. 107 FuncRef ssa.FuncRef 108 } 109 110 // compiler implements Compiler. 111 type compiler struct { 112 mach Machine 113 currentGID ssa.InstructionGroupID 114 ssaBuilder ssa.Builder 115 // nextVRegID is the next virtual register ID to be allocated. 116 nextVRegID regalloc.VRegID 117 // ssaValueToVRegs maps ssa.ValueID to regalloc.VReg. 118 ssaValueToVRegs [] /* VRegID to */ regalloc.VReg 119 // ssaValueDefinitions maps ssa.ValueID to its definition. 120 ssaValueDefinitions []SSAValueDefinition 121 // ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts(). 122 ssaValueRefCounts []int 123 // returnVRegs is the list of virtual registers that store the return values. 124 returnVRegs []regalloc.VReg 125 regAlloc regalloc.Allocator 126 varEdges [][2]regalloc.VReg 127 varEdgeTypes []ssa.Type 128 constEdges []struct { 129 cInst *ssa.Instruction 130 dst regalloc.VReg 131 } 132 vRegSet []bool 133 vRegIDs []regalloc.VRegID 134 tempRegs []regalloc.VReg 135 tmpVals []ssa.Value 136 ssaTypeOfVRegID [] /* VRegID to */ ssa.Type 137 buf []byte 138 relocations []RelocationInfo 139 sourceOffsets []SourceOffsetInfo 140 } 141 142 // SourceOffsetInfo is a data to associate the source offset with the executable offset. 143 type SourceOffsetInfo struct { 144 // SourceOffset is the source offset in the original source code. 145 SourceOffset ssa.SourceOffset 146 // ExecutableOffset is the offset in the compiled executable. 147 ExecutableOffset int64 148 } 149 150 // Compile implements Compiler.Compile. 151 func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) { 152 c.Lower() 153 if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) { 154 fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format()) 155 } 156 if wazevoapi.DeterministicCompilationVerifierEnabled { 157 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format()) 158 } 159 c.RegAlloc() 160 if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) { 161 fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format()) 162 } 163 if wazevoapi.DeterministicCompilationVerifierEnabled { 164 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format()) 165 } 166 c.Finalize(ctx) 167 if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) { 168 fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format()) 169 } 170 if wazevoapi.DeterministicCompilationVerifierEnabled { 171 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format()) 172 } 173 c.Encode() 174 if wazevoapi.DeterministicCompilationVerifierEnabled { 175 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Encoded Machine code", hex.EncodeToString(c.buf)) 176 } 177 return c.buf, c.relocations, nil 178 } 179 180 // RegAlloc implements Compiler.RegAlloc. 181 func (c *compiler) RegAlloc() { 182 regAllocFn := c.mach.Function() 183 c.regAlloc.DoAllocation(regAllocFn) 184 } 185 186 // Finalize implements Compiler.Finalize. 187 func (c *compiler) Finalize(ctx context.Context) { 188 c.mach.SetupPrologue() 189 c.mach.SetupEpilogue() 190 c.mach.ResolveRelativeAddresses(ctx) 191 } 192 193 // Encode implements Compiler.Encode. 194 func (c *compiler) Encode() { 195 c.mach.Encode() 196 } 197 198 // setCurrentGroupID sets the current instruction group ID. 199 func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) { 200 c.currentGID = gid 201 } 202 203 // assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder. 204 func (c *compiler) assignVirtualRegisters() { 205 builder := c.ssaBuilder 206 refCounts := builder.ValueRefCounts() 207 c.ssaValueRefCounts = refCounts 208 209 need := len(refCounts) 210 if need >= len(c.ssaValueToVRegs) { 211 c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...) 212 } 213 if need >= len(c.ssaValueDefinitions) { 214 c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...) 215 } 216 217 for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() { 218 // First we assign a virtual register to each parameter. 219 for i := 0; i < blk.Params(); i++ { 220 p := blk.Param(i) 221 pid := p.ID() 222 typ := p.Type() 223 vreg := c.AllocateVReg(typ) 224 c.ssaValueToVRegs[pid] = vreg 225 c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg} 226 c.ssaTypeOfVRegID[vreg.ID()] = p.Type() 227 } 228 229 // Assigns each value to a virtual register produced by instructions. 230 for cur := blk.Root(); cur != nil; cur = cur.Next() { 231 r, rs := cur.Returns() 232 var N int 233 if r.Valid() { 234 id := r.ID() 235 ssaTyp := r.Type() 236 typ := r.Type() 237 vReg := c.AllocateVReg(typ) 238 c.ssaValueToVRegs[id] = vReg 239 c.ssaValueDefinitions[id] = SSAValueDefinition{ 240 Instr: cur, 241 N: 0, 242 RefCount: refCounts[id], 243 } 244 c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp 245 N++ 246 } 247 for _, r := range rs { 248 id := r.ID() 249 ssaTyp := r.Type() 250 vReg := c.AllocateVReg(ssaTyp) 251 c.ssaValueToVRegs[id] = vReg 252 c.ssaValueDefinitions[id] = SSAValueDefinition{ 253 Instr: cur, 254 N: N, 255 RefCount: refCounts[id], 256 } 257 c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp 258 N++ 259 } 260 } 261 } 262 263 for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ { 264 typ := retBlk.Param(i).Type() 265 vReg := c.AllocateVReg(typ) 266 c.returnVRegs = append(c.returnVRegs, vReg) 267 c.ssaTypeOfVRegID[vReg.ID()] = typ 268 } 269 } 270 271 // AllocateVReg implements Compiler.AllocateVReg. 272 func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg { 273 regType := regalloc.RegTypeOf(typ) 274 r := regalloc.VReg(c.nextVRegID).SetRegType(regType) 275 276 id := r.ID() 277 if int(id) >= len(c.ssaTypeOfVRegID) { 278 c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...) 279 } 280 c.ssaTypeOfVRegID[id] = typ 281 c.nextVRegID++ 282 return r 283 } 284 285 // Init implements Compiler.Init. 286 func (c *compiler) Init() { 287 c.currentGID = 0 288 c.nextVRegID = regalloc.VRegIDNonReservedBegin 289 c.returnVRegs = c.returnVRegs[:0] 290 c.mach.Reset() 291 c.varEdges = c.varEdges[:0] 292 c.constEdges = c.constEdges[:0] 293 c.regAlloc.Reset() 294 c.buf = c.buf[:0] 295 c.sourceOffsets = c.sourceOffsets[:0] 296 c.relocations = c.relocations[:0] 297 } 298 299 // ValueDefinition implements Compiler.ValueDefinition. 300 func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition { 301 return &c.ssaValueDefinitions[value.ID()] 302 } 303 304 // VRegOf implements Compiler.VRegOf. 305 func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg { 306 return c.ssaValueToVRegs[value.ID()] 307 } 308 309 // Format implements Compiler.Format. 310 func (c *compiler) Format() string { 311 return c.mach.Format() 312 } 313 314 // TypeOf implements Compiler.Format. 315 func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type { 316 return c.ssaTypeOfVRegID[v.ID()] 317 } 318 319 // MatchInstr implements Compiler.MatchInstr. 320 func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool { 321 instr := def.Instr 322 return def.IsFromInstr() && 323 instr.Opcode() == opcode && 324 instr.GroupID() == c.currentGID && 325 def.RefCount < 2 326 } 327 328 // MatchInstrOneOf implements Compiler.MatchInstrOneOf. 329 func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode { 330 instr := def.Instr 331 if !def.IsFromInstr() { 332 return ssa.OpcodeInvalid 333 } 334 335 if instr.GroupID() != c.currentGID { 336 return ssa.OpcodeInvalid 337 } 338 339 if def.RefCount >= 2 { 340 return ssa.OpcodeInvalid 341 } 342 343 opcode := instr.Opcode() 344 for _, op := range opcodes { 345 if opcode == op { 346 return opcode 347 } 348 } 349 return ssa.OpcodeInvalid 350 } 351 352 // SSABuilder implements Compiler .SSABuilder. 353 func (c *compiler) SSABuilder() ssa.Builder { 354 return c.ssaBuilder 355 } 356 357 // AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo. 358 func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) { 359 c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{ 360 SourceOffset: sourceOffset, 361 ExecutableOffset: executableOffset, 362 }) 363 } 364 365 // SourceOffsetInfo implements Compiler.SourceOffsetInfo. 366 func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo { 367 return c.sourceOffsets 368 } 369 370 // AddRelocationInfo implements Compiler.AddRelocationInfo. 371 func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) { 372 c.relocations = append(c.relocations, RelocationInfo{ 373 Offset: int64(len(c.buf)), 374 FuncRef: funcRef, 375 }) 376 } 377 378 // Emit4Bytes implements Compiler.Add4Bytes. 379 func (c *compiler) Emit4Bytes(b uint32) { 380 c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24)) 381 } 382 383 // Buf implements Compiler.Buf. 384 func (c *compiler) Buf() []byte { 385 return c.buf 386 }