github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/compiler.go (about)

     1  package backend
     2  
     3  import (
     4  	"context"
     5  	"encoding/hex"
     6  	"fmt"
     7  
     8  	"github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc"
     9  	"github.com/wasilibs/wazerox/internal/engine/wazevo/ssa"
    10  	"github.com/wasilibs/wazerox/internal/engine/wazevo/wazevoapi"
    11  )
    12  
    13  // NewCompiler returns a new Compiler that can generate a machine code.
    14  func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
    15  	return newCompiler(ctx, mach, builder)
    16  }
    17  
    18  func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
    19  	c := &compiler{
    20  		mach: mach, ssaBuilder: builder,
    21  		nextVRegID: regalloc.VRegIDNonReservedBegin,
    22  		regAlloc:   regalloc.NewAllocator(mach.RegisterInfo()),
    23  	}
    24  	mach.SetCompiler(c)
    25  	return c
    26  }
    27  
    28  // Compiler is the backend of wazevo which takes ssa.Builder and Machine,
    29  // use the information there to emit the final machine code.
    30  type Compiler interface {
    31  	// SSABuilder returns the ssa.Builder used by this compiler.
    32  	SSABuilder() ssa.Builder
    33  
    34  	// Compile executes the following steps:
    35  	// 	1. Lower()
    36  	// 	2. RegAlloc()
    37  	// 	3. Finalize()
    38  	// 	4. Encode()
    39  	//
    40  	// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
    41  	//
    42  	// The returned byte slices are the machine code and the relocation information for the machine code.
    43  	// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
    44  	Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
    45  
    46  	// Lower lowers the given ssa.Instruction to the machine-specific instructions.
    47  	Lower()
    48  
    49  	// RegAlloc performs the register allocation after Lower is called.
    50  	RegAlloc()
    51  
    52  	// Finalize performs the finalization of the compilation. This must be called after RegAlloc.
    53  	Finalize()
    54  
    55  	// Encode encodes the machine code to the buffer.
    56  	Encode()
    57  
    58  	// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
    59  	Buf() []byte
    60  
    61  	// Format returns the debug string of the current state of the compiler.
    62  	Format() string
    63  
    64  	// Init initializes the internal state of the compiler for the next compilation.
    65  	Init()
    66  
    67  	// AllocateVReg allocates a new virtual register of the given type.
    68  	AllocateVReg(typ ssa.Type) regalloc.VReg
    69  
    70  	// ValueDefinition returns the definition of the given value.
    71  	ValueDefinition(ssa.Value) *SSAValueDefinition
    72  
    73  	// VRegOf returns the virtual register of the given ssa.Value.
    74  	VRegOf(value ssa.Value) regalloc.VReg
    75  
    76  	// TypeOf returns the ssa.Type of the given virtual register.
    77  	TypeOf(regalloc.VReg) ssa.Type
    78  
    79  	// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
    80  	// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
    81  	MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool
    82  
    83  	// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
    84  	// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
    85  	//
    86  	// Note: caller should be careful to avoid excessive allocation on opcodes slice.
    87  	MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
    88  
    89  	// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
    90  	AddRelocationInfo(funcRef ssa.FuncRef)
    91  
    92  	// AddSourceOffsetInfo appends the source offset information for the given offset.
    93  	AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
    94  
    95  	// SourceOffsetInfo returns the source offset information for the current buffer offset.
    96  	SourceOffsetInfo() []SourceOffsetInfo
    97  
    98  	// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
    99  	Emit4Bytes(b uint32)
   100  }
   101  
   102  // RelocationInfo represents the relocation information for a call instruction.
   103  type RelocationInfo struct {
   104  	// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
   105  	Offset int64
   106  	// Target is the target function of the call instruction.
   107  	FuncRef ssa.FuncRef
   108  }
   109  
   110  // compiler implements Compiler.
   111  type compiler struct {
   112  	mach       Machine
   113  	currentGID ssa.InstructionGroupID
   114  	ssaBuilder ssa.Builder
   115  	// nextVRegID is the next virtual register ID to be allocated.
   116  	nextVRegID regalloc.VRegID
   117  	// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
   118  	ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
   119  	// ssaValueDefinitions maps ssa.ValueID to its definition.
   120  	ssaValueDefinitions []SSAValueDefinition
   121  	// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts().
   122  	ssaValueRefCounts []int
   123  	// returnVRegs is the list of virtual registers that store the return values.
   124  	returnVRegs  []regalloc.VReg
   125  	regAlloc     regalloc.Allocator
   126  	varEdges     [][2]regalloc.VReg
   127  	varEdgeTypes []ssa.Type
   128  	constEdges   []struct {
   129  		cInst *ssa.Instruction
   130  		dst   regalloc.VReg
   131  	}
   132  	vRegSet         []bool
   133  	vRegIDs         []regalloc.VRegID
   134  	tempRegs        []regalloc.VReg
   135  	tmpVals         []ssa.Value
   136  	ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
   137  	buf             []byte
   138  	relocations     []RelocationInfo
   139  	sourceOffsets   []SourceOffsetInfo
   140  }
   141  
   142  // SourceOffsetInfo is a data to associate the source offset with the executable offset.
   143  type SourceOffsetInfo struct {
   144  	// SourceOffset is the source offset in the original source code.
   145  	SourceOffset ssa.SourceOffset
   146  	// ExecutableOffset is the offset in the compiled executable.
   147  	ExecutableOffset int64
   148  }
   149  
   150  // Compile implements Compiler.Compile.
   151  func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
   152  	c.Lower()
   153  	if wazevoapi.PrintSSAToBackendIRLowering {
   154  		fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
   155  	}
   156  	if wazevoapi.DeterministicCompilationVerifierEnabled {
   157  		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
   158  	}
   159  	c.RegAlloc()
   160  	if wazevoapi.PrintRegisterAllocated {
   161  		fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
   162  	}
   163  	if wazevoapi.DeterministicCompilationVerifierEnabled {
   164  		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
   165  	}
   166  	c.Finalize()
   167  	if wazevoapi.PrintFinalizedMachineCode {
   168  		fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
   169  	}
   170  	if wazevoapi.DeterministicCompilationVerifierEnabled {
   171  		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
   172  	}
   173  	c.Encode()
   174  	if wazevoapi.DeterministicCompilationVerifierEnabled {
   175  		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Encoded Machine code", hex.EncodeToString(c.buf))
   176  	}
   177  	return c.buf, c.relocations, nil
   178  }
   179  
   180  // RegAlloc implements Compiler.RegAlloc.
   181  func (c *compiler) RegAlloc() {
   182  	regAllocFn := c.mach.Function()
   183  	c.regAlloc.DoAllocation(regAllocFn)
   184  }
   185  
   186  // Finalize implements Compiler.Finalize.
   187  func (c *compiler) Finalize() {
   188  	c.mach.SetupPrologue()
   189  	c.mach.SetupEpilogue()
   190  	c.mach.ResolveRelativeAddresses()
   191  }
   192  
   193  // Encode implements Compiler.Encode.
   194  func (c *compiler) Encode() {
   195  	c.mach.Encode()
   196  }
   197  
   198  // setCurrentGroupID sets the current instruction group ID.
   199  func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
   200  	c.currentGID = gid
   201  }
   202  
   203  // assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
   204  func (c *compiler) assignVirtualRegisters() {
   205  	builder := c.ssaBuilder
   206  	refCounts := builder.ValueRefCounts()
   207  	c.ssaValueRefCounts = refCounts
   208  
   209  	need := len(refCounts)
   210  	if need >= len(c.ssaValueToVRegs) {
   211  		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...)
   212  	}
   213  	if need >= len(c.ssaValueDefinitions) {
   214  		c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...)
   215  	}
   216  
   217  	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
   218  		// First we assign a virtual register to each parameter.
   219  		for i := 0; i < blk.Params(); i++ {
   220  			p := blk.Param(i)
   221  			pid := p.ID()
   222  			typ := p.Type()
   223  			vreg := c.AllocateVReg(typ)
   224  			c.ssaValueToVRegs[pid] = vreg
   225  			c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}
   226  			c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
   227  		}
   228  
   229  		// Assigns each value to a virtual register produced by instructions.
   230  		for cur := blk.Root(); cur != nil; cur = cur.Next() {
   231  			r, rs := cur.Returns()
   232  			var N int
   233  			if r.Valid() {
   234  				id := r.ID()
   235  				ssaTyp := r.Type()
   236  				typ := r.Type()
   237  				vReg := c.AllocateVReg(typ)
   238  				c.ssaValueToVRegs[id] = vReg
   239  				c.ssaValueDefinitions[id] = SSAValueDefinition{
   240  					Instr:    cur,
   241  					N:        0,
   242  					RefCount: refCounts[id],
   243  				}
   244  				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
   245  				N++
   246  			}
   247  			for _, r := range rs {
   248  				id := r.ID()
   249  				ssaTyp := r.Type()
   250  				vReg := c.AllocateVReg(ssaTyp)
   251  				c.ssaValueToVRegs[id] = vReg
   252  				c.ssaValueDefinitions[id] = SSAValueDefinition{
   253  					Instr:    cur,
   254  					N:        N,
   255  					RefCount: refCounts[id],
   256  				}
   257  				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
   258  				N++
   259  			}
   260  		}
   261  	}
   262  
   263  	for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
   264  		typ := retBlk.Param(i).Type()
   265  		vReg := c.AllocateVReg(typ)
   266  		c.returnVRegs = append(c.returnVRegs, vReg)
   267  		c.ssaTypeOfVRegID[vReg.ID()] = typ
   268  	}
   269  }
   270  
   271  // AllocateVReg implements Compiler.AllocateVReg.
   272  func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
   273  	regType := regalloc.RegTypeOf(typ)
   274  	r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
   275  
   276  	id := r.ID()
   277  	if int(id) >= len(c.ssaTypeOfVRegID) {
   278  		c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
   279  	}
   280  	c.ssaTypeOfVRegID[id] = typ
   281  	c.nextVRegID++
   282  	return r
   283  }
   284  
   285  // Init implements Compiler.Init.
   286  func (c *compiler) Init() {
   287  	c.currentGID = 0
   288  	c.nextVRegID = regalloc.VRegIDNonReservedBegin
   289  	c.returnVRegs = c.returnVRegs[:0]
   290  	c.mach.Reset()
   291  	c.varEdges = c.varEdges[:0]
   292  	c.constEdges = c.constEdges[:0]
   293  	c.regAlloc.Reset()
   294  	c.buf = c.buf[:0]
   295  	c.sourceOffsets = c.sourceOffsets[:0]
   296  	c.relocations = c.relocations[:0]
   297  }
   298  
   299  // ValueDefinition implements Compiler.ValueDefinition.
   300  func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition {
   301  	return &c.ssaValueDefinitions[value.ID()]
   302  }
   303  
   304  // VRegOf implements Compiler.VRegOf.
   305  func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
   306  	return c.ssaValueToVRegs[value.ID()]
   307  }
   308  
   309  // Format implements Compiler.Format.
   310  func (c *compiler) Format() string {
   311  	return c.mach.Format()
   312  }
   313  
   314  // TypeOf implements Compiler.Format.
   315  func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
   316  	return c.ssaTypeOfVRegID[v.ID()]
   317  }
   318  
   319  // MatchInstr implements Compiler.MatchInstr.
   320  func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {
   321  	instr := def.Instr
   322  	return def.IsFromInstr() &&
   323  		instr.Opcode() == opcode &&
   324  		instr.GroupID() == c.currentGID &&
   325  		def.RefCount < 2
   326  }
   327  
   328  // MatchInstrOneOf implements Compiler.MatchInstrOneOf.
   329  func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
   330  	instr := def.Instr
   331  	if !def.IsFromInstr() {
   332  		return ssa.OpcodeInvalid
   333  	}
   334  
   335  	if instr.GroupID() != c.currentGID {
   336  		return ssa.OpcodeInvalid
   337  	}
   338  
   339  	if def.RefCount >= 2 {
   340  		return ssa.OpcodeInvalid
   341  	}
   342  
   343  	opcode := instr.Opcode()
   344  	for _, op := range opcodes {
   345  		if opcode == op {
   346  			return opcode
   347  		}
   348  	}
   349  	return ssa.OpcodeInvalid
   350  }
   351  
   352  // SSABuilder implements Compiler .SSABuilder.
   353  func (c *compiler) SSABuilder() ssa.Builder {
   354  	return c.ssaBuilder
   355  }
   356  
   357  // AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
   358  func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
   359  	c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
   360  		SourceOffset:     sourceOffset,
   361  		ExecutableOffset: executableOffset,
   362  	})
   363  }
   364  
   365  // SourceOffsetInfo implements Compiler.SourceOffsetInfo.
   366  func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
   367  	return c.sourceOffsets
   368  }
   369  
   370  // AddRelocationInfo implements Compiler.AddRelocationInfo.
   371  func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
   372  	c.relocations = append(c.relocations, RelocationInfo{
   373  		Offset:  int64(len(c.buf)),
   374  		FuncRef: funcRef,
   375  	})
   376  }
   377  
   378  // Emit4Bytes implements Compiler.Add4Bytes.
   379  func (c *compiler) Emit4Bytes(b uint32) {
   380  	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
   381  }
   382  
   383  // Buf implements Compiler.Buf.
   384  func (c *compiler) Buf() []byte {
   385  	return c.buf
   386  }