wa-lang.org/wazero@v1.0.2/internal/asm/assembler.go (about)

     1  package asm
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  )
     7  
     8  // Register represents architecture-specific registers.
     9  type Register byte
    10  
    11  // NilRegister is the only architecture-independent register, and
    12  // can be used to indicate that no register is specified.
    13  const NilRegister Register = 0
    14  
    15  // Instruction represents architecture-specific instructions.
    16  type Instruction uint16 // to accommodate the high cardinality of vector ops
    17  
    18  // ConditionalRegisterState represents architecture-specific conditional
    19  // register's states.
    20  type ConditionalRegisterState byte
    21  
    22  // ConditionalRegisterStateUnset is the only architecture-independent conditional state, and
    23  // can be used to indicate that no conditional state is specified.
    24  const ConditionalRegisterStateUnset ConditionalRegisterState = 0
    25  
    26  // Node represents a node in the linked list of assembled operations.
    27  type Node interface {
    28  	fmt.Stringer
    29  
    30  	// AssignJumpTarget assigns the given target node as the destination of
    31  	// jump instruction for this Node.
    32  	AssignJumpTarget(target Node)
    33  
    34  	// AssignDestinationConstant assigns the given constant as the destination
    35  	// of the instruction for this node.
    36  	AssignDestinationConstant(value ConstantValue)
    37  
    38  	// AssignSourceConstant assigns the given constant as the source
    39  	// of the instruction for this node.
    40  	AssignSourceConstant(value ConstantValue)
    41  
    42  	// OffsetInBinary returns the offset of this node in the assembled binary.
    43  	OffsetInBinary() NodeOffsetInBinary
    44  }
    45  
    46  // NodeOffsetInBinary represents an offset of this node in the final binary.
    47  type NodeOffsetInBinary = uint64
    48  
    49  // ConstantValue represents a constant value used in an instruction.
    50  type ConstantValue = int64
    51  
    52  // StaticConst represents an arbitrary constant bytes which are pooled and emitted by assembler into the binary.
    53  // These constants can be referenced by instructions.
    54  type StaticConst struct {
    55  	Raw []byte
    56  	// OffsetInBinary is the offset of this static const in the result binary.
    57  	offsetInBinary uint64
    58  	// offsetFinalizedCallbacks holds callbacks which are called when .OffsetInBinary is finalized by assembler implementation.
    59  	offsetFinalizedCallbacks []func(offsetOfConstInBinary uint64)
    60  }
    61  
    62  // NewStaticConst returns the pointer to the new NewStaticConst for given bytes.
    63  func NewStaticConst(raw []byte) *StaticConst {
    64  	return &StaticConst{Raw: raw}
    65  }
    66  
    67  // AddOffsetFinalizedCallback adds a callback into offsetFinalizedCallbacks.
    68  func (s *StaticConst) AddOffsetFinalizedCallback(cb func(offsetOfConstInBinary uint64)) {
    69  	s.offsetFinalizedCallbacks = append(s.offsetFinalizedCallbacks, cb)
    70  }
    71  
    72  // SetOffsetInBinary finalizes the offset of this StaticConst, and invokes callbacks.
    73  func (s *StaticConst) SetOffsetInBinary(offset uint64) {
    74  	s.offsetInBinary = offset
    75  	for _, cb := range s.offsetFinalizedCallbacks {
    76  		cb(offset)
    77  	}
    78  }
    79  
    80  // StaticConstPool holds a bulk of StaticConst which are yet to be emitted into the binary.
    81  type StaticConstPool struct {
    82  	// FirstUseOffsetInBinary holds the offset of the first instruction which accesses this const pool .
    83  	FirstUseOffsetInBinary *NodeOffsetInBinary
    84  	Consts                 []*StaticConst
    85  	// addedConsts is used to deduplicate the consts to reduce the final size of binary.
    86  	// Note: we can use map on .consts field and remove this field,
    87  	// but we have the separate field for deduplication in order to have deterministic assembling behavior.
    88  	addedConsts map[*StaticConst]struct{}
    89  	// PoolSizeInBytes is the current size of the pool in bytes.
    90  	PoolSizeInBytes int
    91  }
    92  
    93  // NewStaticConstPool returns the pointer to a new StaticConstPool.
    94  func NewStaticConstPool() *StaticConstPool {
    95  	return &StaticConstPool{addedConsts: map[*StaticConst]struct{}{}}
    96  }
    97  
    98  // AddConst adds a *StaticConst into the pool if it's not already added.
    99  func (p *StaticConstPool) AddConst(c *StaticConst, useOffset NodeOffsetInBinary) {
   100  	if _, ok := p.addedConsts[c]; ok {
   101  		return
   102  	}
   103  
   104  	if p.FirstUseOffsetInBinary == nil {
   105  		p.FirstUseOffsetInBinary = &useOffset
   106  	}
   107  
   108  	p.Consts = append(p.Consts, c)
   109  	p.PoolSizeInBytes += len(c.Raw)
   110  	p.addedConsts[c] = struct{}{}
   111  }
   112  
   113  // AssemblerBase is the common interface for assemblers among multiple architectures.
   114  //
   115  // Note: some of them can be implemented in an arch-independent way, but not all can be
   116  // implemented as such. However, we intentionally put such arch-dependant methods here
   117  // in order to provide the common documentation interface.
   118  type AssemblerBase interface {
   119  	// Assemble produces the final binary for the assembled operations.
   120  	Assemble() ([]byte, error)
   121  
   122  	// SetJumpTargetOnNext instructs the assembler that the next node must be
   123  	// assigned to the given node's jump destination.
   124  	SetJumpTargetOnNext(nodes ...Node)
   125  
   126  	// BuildJumpTable calculates the offsets between the first instruction `initialInstructions[0]`
   127  	// and others (e.g. initialInstructions[3]), and wrote the calculated offsets into pre-allocated
   128  	// `table` StaticConst in little endian.
   129  	BuildJumpTable(table *StaticConst, initialInstructions []Node)
   130  
   131  	// CompileStandAlone adds an instruction to take no arguments.
   132  	CompileStandAlone(instruction Instruction) Node
   133  
   134  	// CompileConstToRegister adds an instruction where source operand is `value` as constant and destination is `destinationReg` register.
   135  	CompileConstToRegister(instruction Instruction, value ConstantValue, destinationReg Register) Node
   136  
   137  	// CompileRegisterToRegister adds an instruction where source and destination operands are registers.
   138  	CompileRegisterToRegister(instruction Instruction, from, to Register)
   139  
   140  	// CompileMemoryToRegister adds an instruction where source operands is the memory address specified by `sourceBaseReg+sourceOffsetConst`
   141  	// and the destination is `destinationReg` register.
   142  	CompileMemoryToRegister(
   143  		instruction Instruction,
   144  		sourceBaseReg Register,
   145  		sourceOffsetConst ConstantValue,
   146  		destinationReg Register,
   147  	)
   148  
   149  	// CompileRegisterToMemory adds an instruction where source operand is `sourceRegister` register and the destination is the
   150  	// memory address specified by `destinationBaseRegister+destinationOffsetConst`.
   151  	CompileRegisterToMemory(
   152  		instruction Instruction,
   153  		sourceRegister Register,
   154  		destinationBaseRegister Register,
   155  		destinationOffsetConst ConstantValue,
   156  	)
   157  
   158  	// CompileJump adds jump-type instruction and returns the corresponding Node in the assembled linked list.
   159  	CompileJump(jmpInstruction Instruction) Node
   160  
   161  	// CompileJumpToRegister adds jump-type instruction whose destination is the memory address specified by `reg` register.
   162  	CompileJumpToRegister(jmpInstruction Instruction, reg Register)
   163  
   164  	// CompileReadInstructionAddress adds an ADR instruction to set the absolute address of "target instruction"
   165  	// into destinationRegister. "target instruction" is specified by beforeTargetInst argument and
   166  	// the target is determined by "the instruction right after beforeTargetInst type".
   167  	//
   168  	// For example, if `beforeTargetInst == RET` and we have the instruction sequence like
   169  	// `ADR -> X -> Y -> ... -> RET -> MOV`, then the `ADR` instruction emitted by this function set the absolute
   170  	// address of `MOV` instruction into the destination register.
   171  	CompileReadInstructionAddress(destinationRegister Register, beforeAcquisitionTargetInstruction Instruction)
   172  }
   173  
   174  // JumpTableMaximumOffset represents the limit on the size of jump table in bytes.
   175  // When users try loading an extremely large WebAssembly binary which contains a br_table
   176  // statement with approximately 4294967296 (2^32) targets. Realistically speaking, that kind of binary
   177  // could result in more than ten gigabytes of native compiled code where we have to care about
   178  // huge stacks whose height might exceed 32-bit range, and such huge stack doesn't work with the
   179  // current implementation.
   180  const JumpTableMaximumOffset = math.MaxUint32