github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/asm/assembler.go (about) 1 package asm 2 3 import ( 4 "fmt" 5 "math" 6 ) 7 8 // Register represents architecture-specific registers. 9 type Register byte 10 11 // NilRegister is the only architecture-independent register, and 12 // can be used to indicate that no register is specified. 13 const NilRegister Register = 0 14 15 // Instruction represents architecture-specific instructions. 16 type Instruction uint16 // to accommodate the high cardinality of vector ops 17 18 // ConditionalRegisterState represents architecture-specific conditional 19 // register's states. 20 type ConditionalRegisterState byte 21 22 // ConditionalRegisterStateUnset is the only architecture-independent conditional state, and 23 // can be used to indicate that no conditional state is specified. 24 const ConditionalRegisterStateUnset ConditionalRegisterState = 0 25 26 // Node represents a node in the linked list of assembled operations. 27 type Node interface { 28 fmt.Stringer 29 30 // AssignJumpTarget assigns the given target node as the destination of 31 // jump instruction for this Node. 32 AssignJumpTarget(target Node) 33 34 // AssignDestinationConstant assigns the given constant as the destination 35 // of the instruction for this node. 36 AssignDestinationConstant(value ConstantValue) 37 38 // AssignSourceConstant assigns the given constant as the source 39 // of the instruction for this node. 40 AssignSourceConstant(value ConstantValue) 41 42 // OffsetInBinary returns the offset of this node in the assembled binary. 43 OffsetInBinary() NodeOffsetInBinary 44 } 45 46 // NodeOffsetInBinary represents an offset of this node in the final binary. 47 type NodeOffsetInBinary = uint64 48 49 // ConstantValue represents a constant value used in an instruction. 50 type ConstantValue = int64 51 52 // StaticConst represents an arbitrary constant bytes which are pooled and emitted by assembler into the binary. 53 // These constants can be referenced by instructions. 54 type StaticConst struct { 55 // offsetFinalizedCallbacks holds callbacks which are called when .OffsetInBinary is finalized by assembler implementation. 56 offsetFinalizedCallbacks []func(offsetOfConstInBinary uint64) 57 58 Raw []byte 59 // OffsetInBinary is the offset of this static const in the result binary. 60 OffsetInBinary uint64 61 } 62 63 // NewStaticConst returns the pointer to the new NewStaticConst for given bytes. 64 func NewStaticConst(raw []byte) *StaticConst { 65 return &StaticConst{Raw: raw} 66 } 67 68 // AddOffsetFinalizedCallback adds a callback into offsetFinalizedCallbacks. 69 func (s *StaticConst) AddOffsetFinalizedCallback(cb func(offsetOfConstInBinary uint64)) { 70 s.offsetFinalizedCallbacks = append(s.offsetFinalizedCallbacks, cb) 71 } 72 73 // SetOffsetInBinary finalizes the offset of this StaticConst, and invokes callbacks. 74 func (s *StaticConst) SetOffsetInBinary(offset uint64) { 75 s.OffsetInBinary = offset 76 for _, cb := range s.offsetFinalizedCallbacks { 77 cb(offset) 78 } 79 } 80 81 // StaticConstPool holds a bulk of StaticConst which are yet to be emitted into the binary. 82 type StaticConstPool struct { 83 // addedConsts is used to deduplicate the consts to reduce the final size of binary. 84 // Note: we can use map on .consts field and remove this field, 85 // but we have the separate field for deduplication in order to have deterministic assembling behavior. 86 addedConsts map[*StaticConst]struct{} 87 88 Consts []*StaticConst 89 // FirstUseOffsetInBinary holds the offset of the first instruction which accesses this const pool . 90 FirstUseOffsetInBinary NodeOffsetInBinary 91 // PoolSizeInBytes is the current size of the pool in bytes. 92 PoolSizeInBytes int 93 } 94 95 func NewStaticConstPool() StaticConstPool { 96 return StaticConstPool{addedConsts: map[*StaticConst]struct{}{}, FirstUseOffsetInBinary: math.MaxUint64} 97 } 98 99 // Reset resets the *StaticConstPool for reuse. 100 func (p *StaticConstPool) Reset() { 101 for _, c := range p.Consts { 102 delete(p.addedConsts, c) 103 } 104 // Reuse the slice to avoid re-allocations. 105 p.Consts = p.Consts[:0] 106 p.PoolSizeInBytes = 0 107 p.FirstUseOffsetInBinary = math.MaxUint64 108 } 109 110 // Empty returns true if StaticConstPool is empty. 111 func (p *StaticConstPool) Empty() bool { 112 return p.FirstUseOffsetInBinary == math.MaxUint64 113 } 114 115 // AddConst adds a *StaticConst into the pool if it's not already added. 116 func (p *StaticConstPool) AddConst(c *StaticConst, useOffset NodeOffsetInBinary) { 117 if _, ok := p.addedConsts[c]; ok { 118 return 119 } 120 121 if p.Empty() { 122 p.FirstUseOffsetInBinary = useOffset 123 } 124 125 c.offsetFinalizedCallbacks = c.offsetFinalizedCallbacks[:0] 126 127 p.Consts = append(p.Consts, c) 128 p.PoolSizeInBytes += len(c.Raw) 129 p.addedConsts[c] = struct{}{} 130 } 131 132 // AssemblerBase is the common interface for assemblers among multiple architectures. 133 // 134 // Note: some of them can be implemented in an arch-independent way, but not all can be 135 // implemented as such. However, we intentionally put such arch-dependant methods here 136 // in order to provide the common documentation interface. 137 type AssemblerBase interface { 138 // Reset resets the state of Assembler implementation and mark it ready for 139 // the compilation of the new function compilation. 140 Reset() 141 142 // Assemble produces the final binary for the assembled operations. 143 Assemble(Buffer) error 144 145 // SetJumpTargetOnNext instructs the assembler that the next node must be 146 // assigned to the given node's jump destination. 147 SetJumpTargetOnNext(node Node) 148 149 // BuildJumpTable calculates the offsets between the first instruction `initialInstructions[0]` 150 // and others (e.g. initialInstructions[3]), and wrote the calculated offsets into pre-allocated 151 // `table` StaticConst in little endian. 152 BuildJumpTable(table *StaticConst, initialInstructions []Node) 153 154 // AllocateNOP allocates Node for NOP instruction. 155 AllocateNOP() Node 156 157 // Add appends the given `Node` in the assembled linked list. 158 Add(Node) 159 160 // CompileStandAlone adds an instruction to take no arguments. 161 CompileStandAlone(instruction Instruction) Node 162 163 // CompileConstToRegister adds an instruction where source operand is `value` as constant and destination is `destinationReg` register. 164 CompileConstToRegister(instruction Instruction, value ConstantValue, destinationReg Register) Node 165 166 // CompileRegisterToRegister adds an instruction where source and destination operands are registers. 167 CompileRegisterToRegister(instruction Instruction, from, to Register) 168 169 // CompileMemoryToRegister adds an instruction where source operands is the memory address specified by `sourceBaseReg+sourceOffsetConst` 170 // and the destination is `destinationReg` register. 171 CompileMemoryToRegister( 172 instruction Instruction, 173 sourceBaseReg Register, 174 sourceOffsetConst ConstantValue, 175 destinationReg Register, 176 ) 177 178 // CompileRegisterToMemory adds an instruction where source operand is `sourceRegister` register and the destination is the 179 // memory address specified by `destinationBaseRegister+destinationOffsetConst`. 180 CompileRegisterToMemory( 181 instruction Instruction, 182 sourceRegister Register, 183 destinationBaseRegister Register, 184 destinationOffsetConst ConstantValue, 185 ) 186 187 // CompileJump adds jump-type instruction and returns the corresponding Node in the assembled linked list. 188 CompileJump(jmpInstruction Instruction) Node 189 190 // CompileJumpToRegister adds jump-type instruction whose destination is the memory address specified by `reg` register. 191 CompileJumpToRegister(jmpInstruction Instruction, reg Register) 192 193 // CompileReadInstructionAddress adds an ADR instruction to set the absolute address of "target instruction" 194 // into destinationRegister. "target instruction" is specified by beforeTargetInst argument and 195 // the target is determined by "the instruction right after beforeTargetInst type". 196 // 197 // For example, if `beforeTargetInst == RET` and we have the instruction sequence like 198 // `ADR -> X -> Y -> ... -> RET -> MOV`, then the `ADR` instruction emitted by this function set the absolute 199 // address of `MOV` instruction into the destination register. 200 CompileReadInstructionAddress(destinationRegister Register, beforeAcquisitionTargetInstruction Instruction) 201 } 202 203 // JumpTableMaximumOffset represents the limit on the size of jump table in bytes. 204 // When users try loading an extremely large WebAssembly binary which contains a br_table 205 // statement with approximately 4294967296 (2^32) targets. Realistically speaking, that kind of binary 206 // could result in more than ten gigabytes of native compiled code where we have to care about 207 // huge stacks whose height might exceed 32-bit range, and such huge stack doesn't work with the 208 // current implementation. 209 const JumpTableMaximumOffset = math.MaxUint32