github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/asm/assembler.go (about)

     1  package asm
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  )
     7  
     8  // Register represents architecture-specific registers.
     9  type Register byte
    10  
    11  // NilRegister is the only architecture-independent register, and
    12  // can be used to indicate that no register is specified.
    13  const NilRegister Register = 0
    14  
    15  // Instruction represents architecture-specific instructions.
    16  type Instruction uint16 // to accommodate the high cardinality of vector ops
    17  
    18  // ConditionalRegisterState represents architecture-specific conditional
    19  // register's states.
    20  type ConditionalRegisterState byte
    21  
    22  // ConditionalRegisterStateUnset is the only architecture-independent conditional state, and
    23  // can be used to indicate that no conditional state is specified.
    24  const ConditionalRegisterStateUnset ConditionalRegisterState = 0
    25  
    26  // Node represents a node in the linked list of assembled operations.
    27  type Node interface {
    28  	fmt.Stringer
    29  
    30  	// AssignJumpTarget assigns the given target node as the destination of
    31  	// jump instruction for this Node.
    32  	AssignJumpTarget(target Node)
    33  
    34  	// AssignDestinationConstant assigns the given constant as the destination
    35  	// of the instruction for this node.
    36  	AssignDestinationConstant(value ConstantValue)
    37  
    38  	// AssignSourceConstant assigns the given constant as the source
    39  	// of the instruction for this node.
    40  	AssignSourceConstant(value ConstantValue)
    41  
    42  	// OffsetInBinary returns the offset of this node in the assembled binary.
    43  	OffsetInBinary() NodeOffsetInBinary
    44  }
    45  
    46  // NodeOffsetInBinary represents an offset of this node in the final binary.
    47  type NodeOffsetInBinary = uint64
    48  
    49  // ConstantValue represents a constant value used in an instruction.
    50  type ConstantValue = int64
    51  
    52  // StaticConst represents an arbitrary constant bytes which are pooled and emitted by assembler into the binary.
    53  // These constants can be referenced by instructions.
    54  type StaticConst struct {
    55  	// offsetFinalizedCallbacks holds callbacks which are called when .OffsetInBinary is finalized by assembler implementation.
    56  	offsetFinalizedCallbacks []func(offsetOfConstInBinary uint64)
    57  
    58  	Raw []byte
    59  	// OffsetInBinary is the offset of this static const in the result binary.
    60  	OffsetInBinary uint64
    61  }
    62  
    63  // NewStaticConst returns the pointer to the new NewStaticConst for given bytes.
    64  func NewStaticConst(raw []byte) *StaticConst {
    65  	return &StaticConst{Raw: raw}
    66  }
    67  
    68  // AddOffsetFinalizedCallback adds a callback into offsetFinalizedCallbacks.
    69  func (s *StaticConst) AddOffsetFinalizedCallback(cb func(offsetOfConstInBinary uint64)) {
    70  	s.offsetFinalizedCallbacks = append(s.offsetFinalizedCallbacks, cb)
    71  }
    72  
    73  // SetOffsetInBinary finalizes the offset of this StaticConst, and invokes callbacks.
    74  func (s *StaticConst) SetOffsetInBinary(offset uint64) {
    75  	s.OffsetInBinary = offset
    76  	for _, cb := range s.offsetFinalizedCallbacks {
    77  		cb(offset)
    78  	}
    79  }
    80  
    81  // StaticConstPool holds a bulk of StaticConst which are yet to be emitted into the binary.
    82  type StaticConstPool struct {
    83  	// addedConsts is used to deduplicate the consts to reduce the final size of binary.
    84  	// Note: we can use map on .consts field and remove this field,
    85  	// but we have the separate field for deduplication in order to have deterministic assembling behavior.
    86  	addedConsts map[*StaticConst]struct{}
    87  
    88  	Consts []*StaticConst
    89  	// FirstUseOffsetInBinary holds the offset of the first instruction which accesses this const pool .
    90  	FirstUseOffsetInBinary NodeOffsetInBinary
    91  	// PoolSizeInBytes is the current size of the pool in bytes.
    92  	PoolSizeInBytes int
    93  }
    94  
    95  func NewStaticConstPool() StaticConstPool {
    96  	return StaticConstPool{addedConsts: map[*StaticConst]struct{}{}, FirstUseOffsetInBinary: math.MaxUint64}
    97  }
    98  
    99  // Reset resets the *StaticConstPool for reuse.
   100  func (p *StaticConstPool) Reset() {
   101  	for _, c := range p.Consts {
   102  		delete(p.addedConsts, c)
   103  	}
   104  	// Reuse the slice to avoid re-allocations.
   105  	p.Consts = p.Consts[:0]
   106  	p.PoolSizeInBytes = 0
   107  	p.FirstUseOffsetInBinary = math.MaxUint64
   108  }
   109  
   110  // Empty returns true if StaticConstPool is empty.
   111  func (p *StaticConstPool) Empty() bool {
   112  	return p.FirstUseOffsetInBinary == math.MaxUint64
   113  }
   114  
   115  // AddConst adds a *StaticConst into the pool if it's not already added.
   116  func (p *StaticConstPool) AddConst(c *StaticConst, useOffset NodeOffsetInBinary) {
   117  	if _, ok := p.addedConsts[c]; ok {
   118  		return
   119  	}
   120  
   121  	if p.Empty() {
   122  		p.FirstUseOffsetInBinary = useOffset
   123  	}
   124  
   125  	c.offsetFinalizedCallbacks = c.offsetFinalizedCallbacks[:0]
   126  
   127  	p.Consts = append(p.Consts, c)
   128  	p.PoolSizeInBytes += len(c.Raw)
   129  	p.addedConsts[c] = struct{}{}
   130  }
   131  
   132  // AssemblerBase is the common interface for assemblers among multiple architectures.
   133  //
   134  // Note: some of them can be implemented in an arch-independent way, but not all can be
   135  // implemented as such. However, we intentionally put such arch-dependant methods here
   136  // in order to provide the common documentation interface.
   137  type AssemblerBase interface {
   138  	// Reset resets the state of Assembler implementation and mark it ready for
   139  	// the compilation of the new function compilation.
   140  	Reset()
   141  
   142  	// Assemble produces the final binary for the assembled operations.
   143  	Assemble(Buffer) error
   144  
   145  	// SetJumpTargetOnNext instructs the assembler that the next node must be
   146  	// assigned to the given node's jump destination.
   147  	SetJumpTargetOnNext(node Node)
   148  
   149  	// BuildJumpTable calculates the offsets between the first instruction `initialInstructions[0]`
   150  	// and others (e.g. initialInstructions[3]), and wrote the calculated offsets into pre-allocated
   151  	// `table` StaticConst in little endian.
   152  	BuildJumpTable(table *StaticConst, initialInstructions []Node)
   153  
   154  	// AllocateNOP allocates Node for NOP instruction.
   155  	AllocateNOP() Node
   156  
   157  	// Add appends the given `Node` in the assembled linked list.
   158  	Add(Node)
   159  
   160  	// CompileStandAlone adds an instruction to take no arguments.
   161  	CompileStandAlone(instruction Instruction) Node
   162  
   163  	// CompileConstToRegister adds an instruction where source operand is `value` as constant and destination is `destinationReg` register.
   164  	CompileConstToRegister(instruction Instruction, value ConstantValue, destinationReg Register) Node
   165  
   166  	// CompileRegisterToRegister adds an instruction where source and destination operands are registers.
   167  	CompileRegisterToRegister(instruction Instruction, from, to Register)
   168  
   169  	// CompileMemoryToRegister adds an instruction where source operands is the memory address specified by `sourceBaseReg+sourceOffsetConst`
   170  	// and the destination is `destinationReg` register.
   171  	CompileMemoryToRegister(
   172  		instruction Instruction,
   173  		sourceBaseReg Register,
   174  		sourceOffsetConst ConstantValue,
   175  		destinationReg Register,
   176  	)
   177  
   178  	// CompileRegisterToMemory adds an instruction where source operand is `sourceRegister` register and the destination is the
   179  	// memory address specified by `destinationBaseRegister+destinationOffsetConst`.
   180  	CompileRegisterToMemory(
   181  		instruction Instruction,
   182  		sourceRegister Register,
   183  		destinationBaseRegister Register,
   184  		destinationOffsetConst ConstantValue,
   185  	)
   186  
   187  	// CompileJump adds jump-type instruction and returns the corresponding Node in the assembled linked list.
   188  	CompileJump(jmpInstruction Instruction) Node
   189  
   190  	// CompileJumpToRegister adds jump-type instruction whose destination is the memory address specified by `reg` register.
   191  	CompileJumpToRegister(jmpInstruction Instruction, reg Register)
   192  
   193  	// CompileReadInstructionAddress adds an ADR instruction to set the absolute address of "target instruction"
   194  	// into destinationRegister. "target instruction" is specified by beforeTargetInst argument and
   195  	// the target is determined by "the instruction right after beforeTargetInst type".
   196  	//
   197  	// For example, if `beforeTargetInst == RET` and we have the instruction sequence like
   198  	// `ADR -> X -> Y -> ... -> RET -> MOV`, then the `ADR` instruction emitted by this function set the absolute
   199  	// address of `MOV` instruction into the destination register.
   200  	CompileReadInstructionAddress(destinationRegister Register, beforeAcquisitionTargetInstruction Instruction)
   201  }
   202  
   203  // JumpTableMaximumOffset represents the limit on the size of jump table in bytes.
   204  // When users try loading an extremely large WebAssembly binary which contains a br_table
   205  // statement with approximately 4294967296 (2^32) targets. Realistically speaking, that kind of binary
   206  // could result in more than ten gigabytes of native compiled code where we have to care about
   207  // huge stacks whose height might exceed 32-bit range, and such huge stack doesn't work with the
   208  // current implementation.
   209  const JumpTableMaximumOffset = math.MaxUint32