github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/interpreter/interpreter.go (about)

     1  package interpreter
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"math/bits"
    10  	"sync"
    11  	"unsafe"
    12  
    13  	"github.com/bananabytelabs/wazero/api"
    14  	"github.com/bananabytelabs/wazero/experimental"
    15  	"github.com/bananabytelabs/wazero/internal/filecache"
    16  	"github.com/bananabytelabs/wazero/internal/internalapi"
    17  	"github.com/bananabytelabs/wazero/internal/moremath"
    18  	"github.com/bananabytelabs/wazero/internal/wasm"
    19  	"github.com/bananabytelabs/wazero/internal/wasmdebug"
    20  	"github.com/bananabytelabs/wazero/internal/wasmruntime"
    21  	"github.com/bananabytelabs/wazero/internal/wazeroir"
    22  )
    23  
    24  // callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
    25  // wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
    26  //
    27  // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
    28  var callStackCeiling = 2000
    29  
    30  // engine is an interpreter implementation of wasm.Engine
    31  type engine struct {
    32  	enabledFeatures   api.CoreFeatures
    33  	compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
    34  	mux               sync.RWMutex
    35  	// labelAddressResolutionCache is the temporary cache used to map LabelKind -> FrameID -> the index to the body.
    36  	labelAddressResolutionCache [wazeroir.LabelKindNum][]uint64
    37  }
    38  
    39  func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
    40  	return &engine{
    41  		enabledFeatures:   enabledFeatures,
    42  		compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
    43  	}
    44  }
    45  
    46  // Close implements the same method as documented on wasm.Engine.
    47  func (e *engine) Close() (err error) {
    48  	return
    49  }
    50  
    51  // CompiledModuleCount implements the same method as documented on wasm.Engine.
    52  func (e *engine) CompiledModuleCount() uint32 {
    53  	return uint32(len(e.compiledFunctions))
    54  }
    55  
    56  // DeleteCompiledModule implements the same method as documented on wasm.Engine.
    57  func (e *engine) DeleteCompiledModule(m *wasm.Module) {
    58  	e.deleteCompiledFunctions(m)
    59  }
    60  
    61  func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
    62  	e.mux.Lock()
    63  	defer e.mux.Unlock()
    64  	delete(e.compiledFunctions, module.ID)
    65  }
    66  
    67  func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
    68  	e.mux.Lock()
    69  	defer e.mux.Unlock()
    70  	e.compiledFunctions[module.ID] = fs
    71  }
    72  
    73  func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
    74  	e.mux.RLock()
    75  	defer e.mux.RUnlock()
    76  	fs, ok = e.compiledFunctions[module.ID]
    77  	return
    78  }
    79  
    80  // moduleEngine implements wasm.ModuleEngine
    81  type moduleEngine struct {
    82  	// codes are the compiled functions in a module instances.
    83  	// The index is module instance-scoped.
    84  	functions []function
    85  
    86  	// parentEngine holds *engine from which this module engine is created from.
    87  	parentEngine *engine
    88  }
    89  
    90  // GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
    91  func (e *moduleEngine) GetGlobalValue(wasm.Index) (lo, hi uint64) {
    92  	panic("BUG: GetGlobalValue should never be called on interpreter mode")
    93  }
    94  
    95  // OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
    96  func (e *moduleEngine) OwnsGlobals() bool { return false }
    97  
    98  // callEngine holds context per moduleEngine.Call, and shared across all the
    99  // function calls originating from the same moduleEngine.Call execution.
   100  //
   101  // This implements api.Function.
   102  type callEngine struct {
   103  	internalapi.WazeroOnlyType
   104  
   105  	// stack contains the operands.
   106  	// Note that all the values are represented as uint64.
   107  	stack []uint64
   108  
   109  	// frames are the function call stack.
   110  	frames []*callFrame
   111  
   112  	// f is the initial function for this call engine.
   113  	f *function
   114  
   115  	// stackiterator for Listeners to walk frames and stack.
   116  	stackIterator stackIterator
   117  }
   118  
   119  func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
   120  	return &callEngine{f: compiled}
   121  }
   122  
   123  func (ce *callEngine) pushValue(v uint64) {
   124  	ce.stack = append(ce.stack, v)
   125  }
   126  
   127  func (ce *callEngine) pushValues(v []uint64) {
   128  	ce.stack = append(ce.stack, v...)
   129  }
   130  
   131  func (ce *callEngine) popValue() (v uint64) {
   132  	// No need to check stack bound
   133  	// as we can assume that all the operations
   134  	// are valid thanks to validateFunction
   135  	// at module validation phase
   136  	// and wazeroir translation
   137  	// before compilation.
   138  	stackTopIndex := len(ce.stack) - 1
   139  	v = ce.stack[stackTopIndex]
   140  	ce.stack = ce.stack[:stackTopIndex]
   141  	return
   142  }
   143  
   144  func (ce *callEngine) popValues(v []uint64) {
   145  	stackTopIndex := len(ce.stack) - len(v)
   146  	copy(v, ce.stack[stackTopIndex:])
   147  	ce.stack = ce.stack[:stackTopIndex]
   148  }
   149  
   150  // peekValues peeks api.ValueType values from the stack and returns them.
   151  func (ce *callEngine) peekValues(count int) []uint64 {
   152  	if count == 0 {
   153  		return nil
   154  	}
   155  	stackLen := len(ce.stack)
   156  	return ce.stack[stackLen-count : stackLen]
   157  }
   158  
   159  func (ce *callEngine) drop(raw uint64) {
   160  	r := wazeroir.InclusiveRangeFromU64(raw)
   161  	if r.Start == -1 {
   162  		return
   163  	} else if r.Start == 0 {
   164  		ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
   165  	} else {
   166  		newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
   167  		newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
   168  		ce.stack = newStack
   169  	}
   170  }
   171  
   172  func (ce *callEngine) pushFrame(frame *callFrame) {
   173  	if callStackCeiling <= len(ce.frames) {
   174  		panic(wasmruntime.ErrRuntimeStackOverflow)
   175  	}
   176  	ce.frames = append(ce.frames, frame)
   177  }
   178  
   179  func (ce *callEngine) popFrame() (frame *callFrame) {
   180  	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
   181  	// module validation phase and wazeroir translation before compilation.
   182  	oneLess := len(ce.frames) - 1
   183  	frame = ce.frames[oneLess]
   184  	ce.frames = ce.frames[:oneLess]
   185  	return
   186  }
   187  
   188  type callFrame struct {
   189  	// pc is the program counter representing the current position in code.body.
   190  	pc uint64
   191  	// f is the compiled function used in this function frame.
   192  	f *function
   193  	// base index in the frame of this function, used to detect the count of
   194  	// values on the stack.
   195  	base int
   196  }
   197  
   198  type compiledFunction struct {
   199  	source              *wasm.Module
   200  	body                []wazeroir.UnionOperation
   201  	listener            experimental.FunctionListener
   202  	offsetsInWasmBinary []uint64
   203  	hostFn              interface{}
   204  	ensureTermination   bool
   205  	index               wasm.Index
   206  }
   207  
   208  type function struct {
   209  	funcType       *wasm.FunctionType
   210  	moduleInstance *wasm.ModuleInstance
   211  	typeID         wasm.FunctionTypeID
   212  	parent         *compiledFunction
   213  }
   214  
   215  // functionFromUintptr resurrects the original *function from the given uintptr
   216  // which comes from either funcref table or OpcodeRefFunc instruction.
   217  func functionFromUintptr(ptr uintptr) *function {
   218  	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
   219  	//
   220  	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
   221  	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
   222  	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
   223  	var wrapped *uintptr = &ptr
   224  	return *(**function)(unsafe.Pointer(wrapped))
   225  }
   226  
   227  // stackIterator implements experimental.StackIterator.
   228  type stackIterator struct {
   229  	stack   []uint64
   230  	frames  []*callFrame
   231  	started bool
   232  	fn      *function
   233  	pc      uint64
   234  }
   235  
   236  func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
   237  	si.fn = f
   238  	si.pc = 0
   239  	si.stack = stack
   240  	si.frames = frames
   241  	si.started = false
   242  }
   243  
   244  func (si *stackIterator) clear() {
   245  	si.stack = nil
   246  	si.frames = nil
   247  	si.started = false
   248  	si.fn = nil
   249  }
   250  
   251  // Next implements the same method as documented on experimental.StackIterator.
   252  func (si *stackIterator) Next() bool {
   253  	if !si.started {
   254  		si.started = true
   255  		return true
   256  	}
   257  
   258  	if len(si.frames) == 0 {
   259  		return false
   260  	}
   261  
   262  	frame := si.frames[len(si.frames)-1]
   263  	si.stack = si.stack[:frame.base]
   264  	si.fn = frame.f
   265  	si.pc = frame.pc
   266  	si.frames = si.frames[:len(si.frames)-1]
   267  	return true
   268  }
   269  
   270  // Function implements the same method as documented on
   271  // experimental.StackIterator.
   272  func (si *stackIterator) Function() experimental.InternalFunction {
   273  	return internalFunction{si.fn}
   274  }
   275  
   276  // ProgramCounter implements the same method as documented on
   277  // experimental.StackIterator.
   278  func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
   279  	return experimental.ProgramCounter(si.pc)
   280  }
   281  
   282  // internalFunction implements experimental.InternalFunction.
   283  type internalFunction struct{ *function }
   284  
   285  // Definition implements the same method as documented on
   286  // experimental.InternalFunction.
   287  func (f internalFunction) Definition() api.FunctionDefinition {
   288  	return f.definition()
   289  }
   290  
   291  // SourceOffsetForPC implements the same method as documented on
   292  // experimental.InternalFunction.
   293  func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
   294  	offsetsMap := f.parent.offsetsInWasmBinary
   295  	if uint64(pc) < uint64(len(offsetsMap)) {
   296  		return offsetsMap[pc]
   297  	}
   298  	return 0
   299  }
   300  
   301  // interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
   302  const callFrameStackSize = 0
   303  
   304  // CompileModule implements the same method as documented on wasm.Engine.
   305  func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
   306  	if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
   307  		return nil
   308  	}
   309  
   310  	funcs := make([]compiledFunction, len(module.FunctionSection))
   311  	irCompiler, err := wazeroir.NewCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
   312  	if err != nil {
   313  		return err
   314  	}
   315  	imported := module.ImportFunctionCount
   316  	for i := range module.CodeSection {
   317  		var lsn experimental.FunctionListener
   318  		if i < len(listeners) {
   319  			lsn = listeners[i]
   320  		}
   321  
   322  		compiled := &funcs[i]
   323  		// If this is the host function, there's nothing to do as the runtime representation of
   324  		// host function in interpreter is its Go function itself as opposed to Wasm functions,
   325  		// which need to be compiled down to wazeroir.
   326  		if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
   327  			compiled.hostFn = codeSeg.GoFunc
   328  		} else {
   329  			ir, err := irCompiler.Next()
   330  			if err != nil {
   331  				return err
   332  			}
   333  			err = e.lowerIR(ir, compiled)
   334  			if err != nil {
   335  				def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
   336  				return fmt.Errorf("failed to lower func[%s] to wazeroir: %w", def.DebugName(), err)
   337  			}
   338  		}
   339  		compiled.source = module
   340  		compiled.ensureTermination = ensureTermination
   341  		compiled.listener = lsn
   342  		compiled.index = imported + uint32(i)
   343  	}
   344  	e.addCompiledFunctions(module, funcs)
   345  	return nil
   346  }
   347  
   348  // NewModuleEngine implements the same method as documented on wasm.Engine.
   349  func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
   350  	me := &moduleEngine{
   351  		parentEngine: e,
   352  		functions:    make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
   353  	}
   354  
   355  	codes, ok := e.getCompiledFunctions(module)
   356  	if !ok {
   357  		return nil, errors.New("source module must be compiled before instantiation")
   358  	}
   359  
   360  	for i := range codes {
   361  		c := &codes[i]
   362  		offset := i + int(module.ImportFunctionCount)
   363  		typeIndex := module.FunctionSection[i]
   364  		me.functions[offset] = function{
   365  			moduleInstance: instance,
   366  			typeID:         instance.TypeIDs[typeIndex],
   367  			funcType:       &module.TypeSection[typeIndex],
   368  			parent:         c,
   369  		}
   370  	}
   371  	return me, nil
   372  }
   373  
   374  // lowerIR lowers the wazeroir operations to engine friendly struct.
   375  func (e *engine) lowerIR(ir *wazeroir.CompilationResult, ret *compiledFunction) error {
   376  	// Copy the body from the result.
   377  	ret.body = make([]wazeroir.UnionOperation, len(ir.Operations))
   378  	copy(ret.body, ir.Operations)
   379  	// Also copy the offsets if necessary.
   380  	if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
   381  		ret.offsetsInWasmBinary = make([]uint64, len(offsets))
   382  		copy(ret.offsetsInWasmBinary, offsets)
   383  	}
   384  
   385  	// First, we iterate all labels, and resolve the address.
   386  	for i := range ret.body {
   387  		op := &ret.body[i]
   388  		switch op.Kind {
   389  		case wazeroir.OperationKindLabel:
   390  			label := wazeroir.Label(op.U1)
   391  			address := uint64(i)
   392  
   393  			kind, fid := label.Kind(), label.FrameID()
   394  			frameToAddresses := e.labelAddressResolutionCache[label.Kind()]
   395  			// Expand the slice if necessary.
   396  			if diff := fid - len(frameToAddresses) + 1; diff > 0 {
   397  				for j := 0; j < diff; j++ {
   398  					frameToAddresses = append(frameToAddresses, 0)
   399  				}
   400  			}
   401  			frameToAddresses[fid] = address
   402  			e.labelAddressResolutionCache[kind] = frameToAddresses
   403  		}
   404  	}
   405  
   406  	// Then resolve the label as the index to the body.
   407  	for i := range ret.body {
   408  		op := &ret.body[i]
   409  		switch op.Kind {
   410  		case wazeroir.OperationKindBr:
   411  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   412  		case wazeroir.OperationKindBrIf:
   413  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   414  			e.setLabelAddress(&op.U2, wazeroir.Label(op.U2))
   415  		case wazeroir.OperationKindBrTable:
   416  			for j := 0; j < len(op.Us); j += 2 {
   417  				target := op.Us[j]
   418  				e.setLabelAddress(&op.Us[j], wazeroir.Label(target))
   419  			}
   420  		}
   421  	}
   422  
   423  	// Reuses the slices for the subsequent compilation, so clear the content here.
   424  	for i := range e.labelAddressResolutionCache {
   425  		e.labelAddressResolutionCache[i] = e.labelAddressResolutionCache[i][:0]
   426  	}
   427  	return nil
   428  }
   429  
   430  func (e *engine) setLabelAddress(op *uint64, label wazeroir.Label) {
   431  	if label.IsReturnTarget() {
   432  		// Jmp to the end of the possible binary.
   433  		*op = math.MaxUint64
   434  	} else {
   435  		*op = e.labelAddressResolutionCache[label.Kind()][label.FrameID()]
   436  	}
   437  }
   438  
   439  // ResolveImportedFunction implements wasm.ModuleEngine.
   440  func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
   441  	imported := importedModuleEngine.(*moduleEngine)
   442  	e.functions[index] = imported.functions[indexInImportedModule]
   443  }
   444  
   445  // ResolveImportedMemory implements wasm.ModuleEngine.
   446  func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {}
   447  
   448  // DoneInstantiation implements wasm.ModuleEngine.
   449  func (e *moduleEngine) DoneInstantiation() {}
   450  
   451  // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
   452  func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
   453  	return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
   454  }
   455  
   456  // NewFunction implements the same method as documented on wasm.ModuleEngine.
   457  func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
   458  	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
   459  	// code on close aren't locked, neither is this read.
   460  	compiled := &e.functions[index]
   461  	return e.newCallEngine(compiled)
   462  }
   463  
   464  // LookupFunction implements the same method as documented on wasm.ModuleEngine.
   465  func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
   466  	if tableOffset >= uint32(len(t.References)) {
   467  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   468  	}
   469  	rawPtr := t.References[tableOffset]
   470  	if rawPtr == 0 {
   471  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   472  	}
   473  
   474  	tf := functionFromUintptr(rawPtr)
   475  	if tf.typeID != typeId {
   476  		panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   477  	}
   478  	return tf.moduleInstance, tf.parent.index
   479  }
   480  
   481  // Definition implements the same method as documented on api.Function.
   482  func (ce *callEngine) Definition() api.FunctionDefinition {
   483  	return ce.f.definition()
   484  }
   485  
   486  func (f *function) definition() api.FunctionDefinition {
   487  	compiled := f.parent
   488  	return compiled.source.FunctionDefinition(compiled.index)
   489  }
   490  
   491  // Call implements the same method as documented on api.Function.
   492  func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
   493  	ft := ce.f.funcType
   494  	if n := ft.ParamNumInUint64; n != len(params) {
   495  		return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
   496  	}
   497  	return ce.call(ctx, params, nil)
   498  }
   499  
   500  // CallWithStack implements the same method as documented on api.Function.
   501  func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
   502  	params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
   503  	if err != nil {
   504  		return err
   505  	}
   506  	_, err = ce.call(ctx, params, results)
   507  	return err
   508  }
   509  
   510  func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
   511  	m := ce.f.moduleInstance
   512  	if ce.f.parent.ensureTermination {
   513  		select {
   514  		case <-ctx.Done():
   515  			// If the provided context is already done, close the call context
   516  			// and return the error.
   517  			m.CloseWithCtxErr(ctx)
   518  			return nil, m.FailIfClosed()
   519  		default:
   520  		}
   521  	}
   522  
   523  	defer func() {
   524  		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
   525  		if err == nil {
   526  			err = m.FailIfClosed()
   527  		}
   528  		// TODO: ^^ Will not fail if the function was imported from a closed module.
   529  
   530  		if v := recover(); v != nil {
   531  			err = ce.recoverOnCall(ctx, m, v)
   532  		}
   533  	}()
   534  
   535  	ce.pushValues(params)
   536  
   537  	if ce.f.parent.ensureTermination {
   538  		done := m.CloseModuleOnCanceledOrTimeout(ctx)
   539  		defer done()
   540  	}
   541  
   542  	ce.callFunction(ctx, m, ce.f)
   543  
   544  	// This returns a safe copy of the results, instead of a slice view. If we
   545  	// returned a re-slice, the caller could accidentally or purposefully
   546  	// corrupt the stack of subsequent calls.
   547  	ft := ce.f.funcType
   548  	if results == nil && ft.ResultNumInUint64 > 0 {
   549  		results = make([]uint64, ft.ResultNumInUint64)
   550  	}
   551  	ce.popValues(results)
   552  	return results, nil
   553  }
   554  
   555  // functionListenerInvocation captures arguments needed to perform function
   556  // listener invocations when unwinding the call stack.
   557  type functionListenerInvocation struct {
   558  	experimental.FunctionListener
   559  	def api.FunctionDefinition
   560  }
   561  
   562  // recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
   563  // with the call frame stack traces. Also, reset the state of callEngine
   564  // so that it can be used for the subsequent calls.
   565  func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
   566  	builder := wasmdebug.NewErrorBuilder()
   567  	frameCount := len(ce.frames)
   568  	functionListeners := make([]functionListenerInvocation, 0, 16)
   569  
   570  	for i := 0; i < frameCount; i++ {
   571  		frame := ce.popFrame()
   572  		f := frame.f
   573  		def := f.definition()
   574  		var sources []string
   575  		if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
   576  			sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
   577  		}
   578  		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
   579  		if f.parent.listener != nil {
   580  			functionListeners = append(functionListeners, functionListenerInvocation{
   581  				FunctionListener: f.parent.listener,
   582  				def:              f.definition(),
   583  			})
   584  		}
   585  	}
   586  
   587  	err = builder.FromRecovered(v)
   588  	for i := range functionListeners {
   589  		functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
   590  	}
   591  
   592  	// Allows the reuse of CallEngine.
   593  	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
   594  	return
   595  }
   596  
   597  func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   598  	if f.parent.hostFn != nil {
   599  		ce.callGoFuncWithStack(ctx, m, f)
   600  	} else if lsn := f.parent.listener; lsn != nil {
   601  		ce.callNativeFuncWithListener(ctx, m, f, lsn)
   602  	} else {
   603  		ce.callNativeFunc(ctx, m, f)
   604  	}
   605  }
   606  
   607  func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
   608  	typ := f.funcType
   609  	lsn := f.parent.listener
   610  	if lsn != nil {
   611  		params := stack[:typ.ParamNumInUint64]
   612  		ce.stackIterator.reset(ce.stack, ce.frames, f)
   613  		lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
   614  		ce.stackIterator.clear()
   615  	}
   616  	frame := &callFrame{f: f, base: len(ce.stack)}
   617  	ce.pushFrame(frame)
   618  
   619  	fn := f.parent.hostFn
   620  	switch fn := fn.(type) {
   621  	case api.GoModuleFunction:
   622  		fn.Call(ctx, m, stack)
   623  	case api.GoFunction:
   624  		fn.Call(ctx, stack)
   625  	}
   626  
   627  	ce.popFrame()
   628  	if lsn != nil {
   629  		// TODO: This doesn't get the error due to use of panic to propagate them.
   630  		results := stack[:typ.ResultNumInUint64]
   631  		lsn.After(ctx, m, f.definition(), results)
   632  	}
   633  }
   634  
   635  func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   636  	frame := &callFrame{f: f, base: len(ce.stack)}
   637  	moduleInst := f.moduleInstance
   638  	functions := moduleInst.Engine.(*moduleEngine).functions
   639  	memoryInst := moduleInst.MemoryInstance
   640  	globals := moduleInst.Globals
   641  	tables := moduleInst.Tables
   642  	typeIDs := moduleInst.TypeIDs
   643  	dataInstances := moduleInst.DataInstances
   644  	elementInstances := moduleInst.ElementInstances
   645  	ce.pushFrame(frame)
   646  	body := frame.f.parent.body
   647  	bodyLen := uint64(len(body))
   648  	for frame.pc < bodyLen {
   649  		op := &body[frame.pc]
   650  		// TODO: add description of each operation/case
   651  		// on, for example, how many args are used,
   652  		// how the stack is modified, etc.
   653  		switch op.Kind {
   654  		case wazeroir.OperationKindBuiltinFunctionCheckExitCode:
   655  			if err := m.FailIfClosed(); err != nil {
   656  				panic(err)
   657  			}
   658  			frame.pc++
   659  		case wazeroir.OperationKindUnreachable:
   660  			panic(wasmruntime.ErrRuntimeUnreachable)
   661  		case wazeroir.OperationKindBr:
   662  			frame.pc = op.U1
   663  		case wazeroir.OperationKindBrIf:
   664  			if ce.popValue() > 0 {
   665  				ce.drop(op.U3)
   666  				frame.pc = op.U1
   667  			} else {
   668  				frame.pc = op.U2
   669  			}
   670  		case wazeroir.OperationKindBrTable:
   671  			v := ce.popValue()
   672  			defaultAt := uint64(len(op.Us))/2 - 1
   673  			if v > defaultAt {
   674  				v = defaultAt
   675  			}
   676  			v *= 2
   677  			ce.drop(op.Us[v+1])
   678  			frame.pc = op.Us[v]
   679  		case wazeroir.OperationKindCall:
   680  			ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
   681  			frame.pc++
   682  		case wazeroir.OperationKindCallIndirect:
   683  			offset := ce.popValue()
   684  			table := tables[op.U2]
   685  			if offset >= uint64(len(table.References)) {
   686  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   687  			}
   688  			rawPtr := table.References[offset]
   689  			if rawPtr == 0 {
   690  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   691  			}
   692  
   693  			tf := functionFromUintptr(rawPtr)
   694  			if tf.typeID != typeIDs[op.U1] {
   695  				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   696  			}
   697  
   698  			ce.callFunction(ctx, f.moduleInstance, tf)
   699  			frame.pc++
   700  		case wazeroir.OperationKindDrop:
   701  			ce.drop(op.U1)
   702  			frame.pc++
   703  		case wazeroir.OperationKindSelect:
   704  			c := ce.popValue()
   705  			if op.B3 { // Target is vector.
   706  				x2Hi, x2Lo := ce.popValue(), ce.popValue()
   707  				if c == 0 {
   708  					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
   709  					ce.pushValue(x2Lo)
   710  					ce.pushValue(x2Hi)
   711  				}
   712  			} else {
   713  				v2 := ce.popValue()
   714  				if c == 0 {
   715  					_ = ce.popValue()
   716  					ce.pushValue(v2)
   717  				}
   718  			}
   719  			frame.pc++
   720  		case wazeroir.OperationKindPick:
   721  			index := len(ce.stack) - 1 - int(op.U1)
   722  			ce.pushValue(ce.stack[index])
   723  			if op.B3 { // V128 value target.
   724  				ce.pushValue(ce.stack[index+1])
   725  			}
   726  			frame.pc++
   727  		case wazeroir.OperationKindSet:
   728  			if op.B3 { // V128 value target.
   729  				lowIndex := len(ce.stack) - 1 - int(op.U1)
   730  				highIndex := lowIndex + 1
   731  				hi, lo := ce.popValue(), ce.popValue()
   732  				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
   733  			} else {
   734  				index := len(ce.stack) - 1 - int(op.U1)
   735  				ce.stack[index] = ce.popValue()
   736  			}
   737  			frame.pc++
   738  		case wazeroir.OperationKindGlobalGet:
   739  			g := globals[op.U1]
   740  			ce.pushValue(g.Val)
   741  			if g.Type.ValType == wasm.ValueTypeV128 {
   742  				ce.pushValue(g.ValHi)
   743  			}
   744  			frame.pc++
   745  		case wazeroir.OperationKindGlobalSet:
   746  			g := globals[op.U1]
   747  			if g.Type.ValType == wasm.ValueTypeV128 {
   748  				g.ValHi = ce.popValue()
   749  			}
   750  			g.Val = ce.popValue()
   751  			frame.pc++
   752  		case wazeroir.OperationKindLoad:
   753  			offset := ce.popMemoryOffset(op)
   754  			switch wazeroir.UnsignedType(op.B1) {
   755  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   756  				if val, ok := memoryInst.ReadUint32Le(offset); !ok {
   757  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   758  				} else {
   759  					ce.pushValue(uint64(val))
   760  				}
   761  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   762  				if val, ok := memoryInst.ReadUint64Le(offset); !ok {
   763  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   764  				} else {
   765  					ce.pushValue(val)
   766  				}
   767  			}
   768  			frame.pc++
   769  		case wazeroir.OperationKindLoad8:
   770  			val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
   771  			if !ok {
   772  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   773  			}
   774  
   775  			switch wazeroir.SignedInt(op.B1) {
   776  			case wazeroir.SignedInt32:
   777  				ce.pushValue(uint64(uint32(int8(val))))
   778  			case wazeroir.SignedInt64:
   779  				ce.pushValue(uint64(int8(val)))
   780  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   781  				ce.pushValue(uint64(val))
   782  			}
   783  			frame.pc++
   784  		case wazeroir.OperationKindLoad16:
   785  
   786  			val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
   787  			if !ok {
   788  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   789  			}
   790  
   791  			switch wazeroir.SignedInt(op.B1) {
   792  			case wazeroir.SignedInt32:
   793  				ce.pushValue(uint64(uint32(int16(val))))
   794  			case wazeroir.SignedInt64:
   795  				ce.pushValue(uint64(int16(val)))
   796  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   797  				ce.pushValue(uint64(val))
   798  			}
   799  			frame.pc++
   800  		case wazeroir.OperationKindLoad32:
   801  			val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
   802  			if !ok {
   803  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   804  			}
   805  
   806  			if op.B1 == 1 { // Signed
   807  				ce.pushValue(uint64(int32(val)))
   808  			} else {
   809  				ce.pushValue(uint64(val))
   810  			}
   811  			frame.pc++
   812  		case wazeroir.OperationKindStore:
   813  			val := ce.popValue()
   814  			offset := ce.popMemoryOffset(op)
   815  			switch wazeroir.UnsignedType(op.B1) {
   816  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   817  				if !memoryInst.WriteUint32Le(offset, uint32(val)) {
   818  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   819  				}
   820  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   821  				if !memoryInst.WriteUint64Le(offset, val) {
   822  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   823  				}
   824  			}
   825  			frame.pc++
   826  		case wazeroir.OperationKindStore8:
   827  			val := byte(ce.popValue())
   828  			offset := ce.popMemoryOffset(op)
   829  			if !memoryInst.WriteByte(offset, val) {
   830  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   831  			}
   832  			frame.pc++
   833  		case wazeroir.OperationKindStore16:
   834  			val := uint16(ce.popValue())
   835  			offset := ce.popMemoryOffset(op)
   836  			if !memoryInst.WriteUint16Le(offset, val) {
   837  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   838  			}
   839  			frame.pc++
   840  		case wazeroir.OperationKindStore32:
   841  			val := uint32(ce.popValue())
   842  			offset := ce.popMemoryOffset(op)
   843  			if !memoryInst.WriteUint32Le(offset, val) {
   844  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   845  			}
   846  			frame.pc++
   847  		case wazeroir.OperationKindMemorySize:
   848  			ce.pushValue(uint64(memoryInst.PageSize()))
   849  			frame.pc++
   850  		case wazeroir.OperationKindMemoryGrow:
   851  			n := ce.popValue()
   852  			if res, ok := memoryInst.Grow(uint32(n)); !ok {
   853  				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
   854  			} else {
   855  				ce.pushValue(uint64(res))
   856  			}
   857  			frame.pc++
   858  		case wazeroir.OperationKindConstI32, wazeroir.OperationKindConstI64,
   859  			wazeroir.OperationKindConstF32, wazeroir.OperationKindConstF64:
   860  			ce.pushValue(op.U1)
   861  			frame.pc++
   862  		case wazeroir.OperationKindEq:
   863  			var b bool
   864  			switch wazeroir.UnsignedType(op.B1) {
   865  			case wazeroir.UnsignedTypeI32:
   866  				v2, v1 := ce.popValue(), ce.popValue()
   867  				b = uint32(v1) == uint32(v2)
   868  			case wazeroir.UnsignedTypeI64:
   869  				v2, v1 := ce.popValue(), ce.popValue()
   870  				b = v1 == v2
   871  			case wazeroir.UnsignedTypeF32:
   872  				v2, v1 := ce.popValue(), ce.popValue()
   873  				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
   874  			case wazeroir.UnsignedTypeF64:
   875  				v2, v1 := ce.popValue(), ce.popValue()
   876  				b = math.Float64frombits(v2) == math.Float64frombits(v1)
   877  			}
   878  			if b {
   879  				ce.pushValue(1)
   880  			} else {
   881  				ce.pushValue(0)
   882  			}
   883  			frame.pc++
   884  		case wazeroir.OperationKindNe:
   885  			var b bool
   886  			switch wazeroir.UnsignedType(op.B1) {
   887  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeI64:
   888  				v2, v1 := ce.popValue(), ce.popValue()
   889  				b = v1 != v2
   890  			case wazeroir.UnsignedTypeF32:
   891  				v2, v1 := ce.popValue(), ce.popValue()
   892  				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
   893  			case wazeroir.UnsignedTypeF64:
   894  				v2, v1 := ce.popValue(), ce.popValue()
   895  				b = math.Float64frombits(v2) != math.Float64frombits(v1)
   896  			}
   897  			if b {
   898  				ce.pushValue(1)
   899  			} else {
   900  				ce.pushValue(0)
   901  			}
   902  			frame.pc++
   903  		case wazeroir.OperationKindEqz:
   904  			if ce.popValue() == 0 {
   905  				ce.pushValue(1)
   906  			} else {
   907  				ce.pushValue(0)
   908  			}
   909  			frame.pc++
   910  		case wazeroir.OperationKindLt:
   911  			v2 := ce.popValue()
   912  			v1 := ce.popValue()
   913  			var b bool
   914  			switch wazeroir.SignedType(op.B1) {
   915  			case wazeroir.SignedTypeInt32:
   916  				b = int32(v1) < int32(v2)
   917  			case wazeroir.SignedTypeInt64:
   918  				b = int64(v1) < int64(v2)
   919  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   920  				b = v1 < v2
   921  			case wazeroir.SignedTypeFloat32:
   922  				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
   923  			case wazeroir.SignedTypeFloat64:
   924  				b = math.Float64frombits(v1) < math.Float64frombits(v2)
   925  			}
   926  			if b {
   927  				ce.pushValue(1)
   928  			} else {
   929  				ce.pushValue(0)
   930  			}
   931  			frame.pc++
   932  		case wazeroir.OperationKindGt:
   933  			v2 := ce.popValue()
   934  			v1 := ce.popValue()
   935  			var b bool
   936  			switch wazeroir.SignedType(op.B1) {
   937  			case wazeroir.SignedTypeInt32:
   938  				b = int32(v1) > int32(v2)
   939  			case wazeroir.SignedTypeInt64:
   940  				b = int64(v1) > int64(v2)
   941  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   942  				b = v1 > v2
   943  			case wazeroir.SignedTypeFloat32:
   944  				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
   945  			case wazeroir.SignedTypeFloat64:
   946  				b = math.Float64frombits(v1) > math.Float64frombits(v2)
   947  			}
   948  			if b {
   949  				ce.pushValue(1)
   950  			} else {
   951  				ce.pushValue(0)
   952  			}
   953  			frame.pc++
   954  		case wazeroir.OperationKindLe:
   955  			v2 := ce.popValue()
   956  			v1 := ce.popValue()
   957  			var b bool
   958  			switch wazeroir.SignedType(op.B1) {
   959  			case wazeroir.SignedTypeInt32:
   960  				b = int32(v1) <= int32(v2)
   961  			case wazeroir.SignedTypeInt64:
   962  				b = int64(v1) <= int64(v2)
   963  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   964  				b = v1 <= v2
   965  			case wazeroir.SignedTypeFloat32:
   966  				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
   967  			case wazeroir.SignedTypeFloat64:
   968  				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
   969  			}
   970  			if b {
   971  				ce.pushValue(1)
   972  			} else {
   973  				ce.pushValue(0)
   974  			}
   975  			frame.pc++
   976  		case wazeroir.OperationKindGe:
   977  			v2 := ce.popValue()
   978  			v1 := ce.popValue()
   979  			var b bool
   980  			switch wazeroir.SignedType(op.B1) {
   981  			case wazeroir.SignedTypeInt32:
   982  				b = int32(v1) >= int32(v2)
   983  			case wazeroir.SignedTypeInt64:
   984  				b = int64(v1) >= int64(v2)
   985  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   986  				b = v1 >= v2
   987  			case wazeroir.SignedTypeFloat32:
   988  				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
   989  			case wazeroir.SignedTypeFloat64:
   990  				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
   991  			}
   992  			if b {
   993  				ce.pushValue(1)
   994  			} else {
   995  				ce.pushValue(0)
   996  			}
   997  			frame.pc++
   998  		case wazeroir.OperationKindAdd:
   999  			v2 := ce.popValue()
  1000  			v1 := ce.popValue()
  1001  			switch wazeroir.UnsignedType(op.B1) {
  1002  			case wazeroir.UnsignedTypeI32:
  1003  				v := uint32(v1) + uint32(v2)
  1004  				ce.pushValue(uint64(v))
  1005  			case wazeroir.UnsignedTypeI64:
  1006  				ce.pushValue(v1 + v2)
  1007  			case wazeroir.UnsignedTypeF32:
  1008  				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
  1009  			case wazeroir.UnsignedTypeF64:
  1010  				v := math.Float64frombits(v1) + math.Float64frombits(v2)
  1011  				ce.pushValue(math.Float64bits(v))
  1012  			}
  1013  			frame.pc++
  1014  		case wazeroir.OperationKindSub:
  1015  			v2 := ce.popValue()
  1016  			v1 := ce.popValue()
  1017  			switch wazeroir.UnsignedType(op.B1) {
  1018  			case wazeroir.UnsignedTypeI32:
  1019  				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
  1020  			case wazeroir.UnsignedTypeI64:
  1021  				ce.pushValue(v1 - v2)
  1022  			case wazeroir.UnsignedTypeF32:
  1023  				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
  1024  			case wazeroir.UnsignedTypeF64:
  1025  				v := math.Float64frombits(v1) - math.Float64frombits(v2)
  1026  				ce.pushValue(math.Float64bits(v))
  1027  			}
  1028  			frame.pc++
  1029  		case wazeroir.OperationKindMul:
  1030  			v2 := ce.popValue()
  1031  			v1 := ce.popValue()
  1032  			switch wazeroir.UnsignedType(op.B1) {
  1033  			case wazeroir.UnsignedTypeI32:
  1034  				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
  1035  			case wazeroir.UnsignedTypeI64:
  1036  				ce.pushValue(v1 * v2)
  1037  			case wazeroir.UnsignedTypeF32:
  1038  				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
  1039  			case wazeroir.UnsignedTypeF64:
  1040  				v := math.Float64frombits(v2) * math.Float64frombits(v1)
  1041  				ce.pushValue(math.Float64bits(v))
  1042  			}
  1043  			frame.pc++
  1044  		case wazeroir.OperationKindClz:
  1045  			v := ce.popValue()
  1046  			if op.B1 == 0 {
  1047  				// UnsignedInt32
  1048  				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
  1049  			} else {
  1050  				// UnsignedInt64
  1051  				ce.pushValue(uint64(bits.LeadingZeros64(v)))
  1052  			}
  1053  			frame.pc++
  1054  		case wazeroir.OperationKindCtz:
  1055  			v := ce.popValue()
  1056  			if op.B1 == 0 {
  1057  				// UnsignedInt32
  1058  				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
  1059  			} else {
  1060  				// UnsignedInt64
  1061  				ce.pushValue(uint64(bits.TrailingZeros64(v)))
  1062  			}
  1063  			frame.pc++
  1064  		case wazeroir.OperationKindPopcnt:
  1065  			v := ce.popValue()
  1066  			if op.B1 == 0 {
  1067  				// UnsignedInt32
  1068  				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
  1069  			} else {
  1070  				// UnsignedInt64
  1071  				ce.pushValue(uint64(bits.OnesCount64(v)))
  1072  			}
  1073  			frame.pc++
  1074  		case wazeroir.OperationKindDiv:
  1075  			// If an integer, check we won't divide by zero.
  1076  			t := wazeroir.SignedType(op.B1)
  1077  			v2, v1 := ce.popValue(), ce.popValue()
  1078  			switch t {
  1079  			case wazeroir.SignedTypeFloat32, wazeroir.SignedTypeFloat64: // not integers
  1080  			default:
  1081  				if v2 == 0 {
  1082  					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1083  				}
  1084  			}
  1085  
  1086  			switch t {
  1087  			case wazeroir.SignedTypeInt32:
  1088  				d := int32(v2)
  1089  				n := int32(v1)
  1090  				if n == math.MinInt32 && d == -1 {
  1091  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1092  				}
  1093  				ce.pushValue(uint64(uint32(n / d)))
  1094  			case wazeroir.SignedTypeInt64:
  1095  				d := int64(v2)
  1096  				n := int64(v1)
  1097  				if n == math.MinInt64 && d == -1 {
  1098  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1099  				}
  1100  				ce.pushValue(uint64(n / d))
  1101  			case wazeroir.SignedTypeUint32:
  1102  				d := uint32(v2)
  1103  				n := uint32(v1)
  1104  				ce.pushValue(uint64(n / d))
  1105  			case wazeroir.SignedTypeUint64:
  1106  				d := v2
  1107  				n := v1
  1108  				ce.pushValue(n / d)
  1109  			case wazeroir.SignedTypeFloat32:
  1110  				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
  1111  			case wazeroir.SignedTypeFloat64:
  1112  				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
  1113  			}
  1114  			frame.pc++
  1115  		case wazeroir.OperationKindRem:
  1116  			v2, v1 := ce.popValue(), ce.popValue()
  1117  			if v2 == 0 {
  1118  				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1119  			}
  1120  			switch wazeroir.SignedInt(op.B1) {
  1121  			case wazeroir.SignedInt32:
  1122  				d := int32(v2)
  1123  				n := int32(v1)
  1124  				ce.pushValue(uint64(uint32(n % d)))
  1125  			case wazeroir.SignedInt64:
  1126  				d := int64(v2)
  1127  				n := int64(v1)
  1128  				ce.pushValue(uint64(n % d))
  1129  			case wazeroir.SignedUint32:
  1130  				d := uint32(v2)
  1131  				n := uint32(v1)
  1132  				ce.pushValue(uint64(n % d))
  1133  			case wazeroir.SignedUint64:
  1134  				d := v2
  1135  				n := v1
  1136  				ce.pushValue(n % d)
  1137  			}
  1138  			frame.pc++
  1139  		case wazeroir.OperationKindAnd:
  1140  			v2 := ce.popValue()
  1141  			v1 := ce.popValue()
  1142  			if op.B1 == 0 {
  1143  				// UnsignedInt32
  1144  				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
  1145  			} else {
  1146  				// UnsignedInt64
  1147  				ce.pushValue(uint64(v2 & v1))
  1148  			}
  1149  			frame.pc++
  1150  		case wazeroir.OperationKindOr:
  1151  			v2 := ce.popValue()
  1152  			v1 := ce.popValue()
  1153  			if op.B1 == 0 {
  1154  				// UnsignedInt32
  1155  				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
  1156  			} else {
  1157  				// UnsignedInt64
  1158  				ce.pushValue(uint64(v2 | v1))
  1159  			}
  1160  			frame.pc++
  1161  		case wazeroir.OperationKindXor:
  1162  			v2 := ce.popValue()
  1163  			v1 := ce.popValue()
  1164  			if op.B1 == 0 {
  1165  				// UnsignedInt32
  1166  				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
  1167  			} else {
  1168  				// UnsignedInt64
  1169  				ce.pushValue(uint64(v2 ^ v1))
  1170  			}
  1171  			frame.pc++
  1172  		case wazeroir.OperationKindShl:
  1173  			v2 := ce.popValue()
  1174  			v1 := ce.popValue()
  1175  			if op.B1 == 0 {
  1176  				// UnsignedInt32
  1177  				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
  1178  			} else {
  1179  				// UnsignedInt64
  1180  				ce.pushValue(v1 << (v2 % 64))
  1181  			}
  1182  			frame.pc++
  1183  		case wazeroir.OperationKindShr:
  1184  			v2 := ce.popValue()
  1185  			v1 := ce.popValue()
  1186  			switch wazeroir.SignedInt(op.B1) {
  1187  			case wazeroir.SignedInt32:
  1188  				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
  1189  			case wazeroir.SignedInt64:
  1190  				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
  1191  			case wazeroir.SignedUint32:
  1192  				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
  1193  			case wazeroir.SignedUint64:
  1194  				ce.pushValue(v1 >> (v2 % 64))
  1195  			}
  1196  			frame.pc++
  1197  		case wazeroir.OperationKindRotl:
  1198  			v2 := ce.popValue()
  1199  			v1 := ce.popValue()
  1200  			if op.B1 == 0 {
  1201  				// UnsignedInt32
  1202  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
  1203  			} else {
  1204  				// UnsignedInt64
  1205  				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
  1206  			}
  1207  			frame.pc++
  1208  		case wazeroir.OperationKindRotr:
  1209  			v2 := ce.popValue()
  1210  			v1 := ce.popValue()
  1211  			if op.B1 == 0 {
  1212  				// UnsignedInt32
  1213  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
  1214  			} else {
  1215  				// UnsignedInt64
  1216  				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
  1217  			}
  1218  			frame.pc++
  1219  		case wazeroir.OperationKindAbs:
  1220  			if op.B1 == 0 {
  1221  				// Float32
  1222  				const mask uint32 = 1 << 31
  1223  				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
  1224  			} else {
  1225  				// Float64
  1226  				const mask uint64 = 1 << 63
  1227  				ce.pushValue(ce.popValue() &^ mask)
  1228  			}
  1229  			frame.pc++
  1230  		case wazeroir.OperationKindNeg:
  1231  			if op.B1 == 0 {
  1232  				// Float32
  1233  				v := -math.Float32frombits(uint32(ce.popValue()))
  1234  				ce.pushValue(uint64(math.Float32bits(v)))
  1235  			} else {
  1236  				// Float64
  1237  				v := -math.Float64frombits(ce.popValue())
  1238  				ce.pushValue(math.Float64bits(v))
  1239  			}
  1240  			frame.pc++
  1241  		case wazeroir.OperationKindCeil:
  1242  			if op.B1 == 0 {
  1243  				// Float32
  1244  				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
  1245  				ce.pushValue(uint64(math.Float32bits(v)))
  1246  			} else {
  1247  				// Float64
  1248  				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
  1249  				ce.pushValue(math.Float64bits(v))
  1250  			}
  1251  			frame.pc++
  1252  		case wazeroir.OperationKindFloor:
  1253  			if op.B1 == 0 {
  1254  				// Float32
  1255  				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
  1256  				ce.pushValue(uint64(math.Float32bits(v)))
  1257  			} else {
  1258  				// Float64
  1259  				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
  1260  				ce.pushValue(math.Float64bits(v))
  1261  			}
  1262  			frame.pc++
  1263  		case wazeroir.OperationKindTrunc:
  1264  			if op.B1 == 0 {
  1265  				// Float32
  1266  				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
  1267  				ce.pushValue(uint64(math.Float32bits(v)))
  1268  			} else {
  1269  				// Float64
  1270  				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
  1271  				ce.pushValue(math.Float64bits(v))
  1272  			}
  1273  			frame.pc++
  1274  		case wazeroir.OperationKindNearest:
  1275  			if op.B1 == 0 {
  1276  				// Float32
  1277  				f := math.Float32frombits(uint32(ce.popValue()))
  1278  				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
  1279  			} else {
  1280  				// Float64
  1281  				f := math.Float64frombits(ce.popValue())
  1282  				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
  1283  			}
  1284  			frame.pc++
  1285  		case wazeroir.OperationKindSqrt:
  1286  			if op.B1 == 0 {
  1287  				// Float32
  1288  				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
  1289  				ce.pushValue(uint64(math.Float32bits(float32(v))))
  1290  			} else {
  1291  				// Float64
  1292  				v := math.Sqrt(math.Float64frombits(ce.popValue()))
  1293  				ce.pushValue(math.Float64bits(v))
  1294  			}
  1295  			frame.pc++
  1296  		case wazeroir.OperationKindMin:
  1297  			if op.B1 == 0 {
  1298  				// Float32
  1299  				ce.pushValue(WasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1300  			} else {
  1301  				v2 := math.Float64frombits(ce.popValue())
  1302  				v1 := math.Float64frombits(ce.popValue())
  1303  				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
  1304  			}
  1305  			frame.pc++
  1306  		case wazeroir.OperationKindMax:
  1307  			if op.B1 == 0 {
  1308  				ce.pushValue(WasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1309  			} else {
  1310  				// Float64
  1311  				v2 := math.Float64frombits(ce.popValue())
  1312  				v1 := math.Float64frombits(ce.popValue())
  1313  				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
  1314  			}
  1315  			frame.pc++
  1316  		case wazeroir.OperationKindCopysign:
  1317  			if op.B1 == 0 {
  1318  				// Float32
  1319  				v2 := uint32(ce.popValue())
  1320  				v1 := uint32(ce.popValue())
  1321  				const signbit = 1 << 31
  1322  				ce.pushValue(uint64(v1&^signbit | v2&signbit))
  1323  			} else {
  1324  				// Float64
  1325  				v2 := ce.popValue()
  1326  				v1 := ce.popValue()
  1327  				const signbit = 1 << 63
  1328  				ce.pushValue(v1&^signbit | v2&signbit)
  1329  			}
  1330  			frame.pc++
  1331  		case wazeroir.OperationKindI32WrapFromI64:
  1332  			ce.pushValue(uint64(uint32(ce.popValue())))
  1333  			frame.pc++
  1334  		case wazeroir.OperationKindITruncFromF:
  1335  			if op.B1 == 0 {
  1336  				// Float32
  1337  				switch wazeroir.SignedInt(op.B2) {
  1338  				case wazeroir.SignedInt32:
  1339  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1340  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1341  						if op.B3 {
  1342  							// non-trapping conversion must cast nan to zero.
  1343  							v = 0
  1344  						} else {
  1345  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1346  						}
  1347  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1348  						if op.B3 {
  1349  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1350  							if v < 0 {
  1351  								v = math.MinInt32
  1352  							} else {
  1353  								v = math.MaxInt32
  1354  							}
  1355  						} else {
  1356  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1357  						}
  1358  					}
  1359  					ce.pushValue(uint64(uint32(int32(v))))
  1360  				case wazeroir.SignedInt64:
  1361  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1362  					res := int64(v)
  1363  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1364  						if op.B3 {
  1365  							// non-trapping conversion must cast nan to zero.
  1366  							res = 0
  1367  						} else {
  1368  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1369  						}
  1370  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1371  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1372  						// and that's why we use '>=' not '>' to check overflow.
  1373  						if op.B3 {
  1374  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1375  							if v < 0 {
  1376  								res = math.MinInt64
  1377  							} else {
  1378  								res = math.MaxInt64
  1379  							}
  1380  						} else {
  1381  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1382  						}
  1383  					}
  1384  					ce.pushValue(uint64(res))
  1385  				case wazeroir.SignedUint32:
  1386  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1387  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1388  						if op.B3 {
  1389  							// non-trapping conversion must cast nan to zero.
  1390  							v = 0
  1391  						} else {
  1392  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1393  						}
  1394  					} else if v < 0 || v > math.MaxUint32 {
  1395  						if op.B3 {
  1396  							// non-trapping conversion must "saturate" the value for overflowing source.
  1397  							if v < 0 {
  1398  								v = 0
  1399  							} else {
  1400  								v = math.MaxUint32
  1401  							}
  1402  						} else {
  1403  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1404  						}
  1405  					}
  1406  					ce.pushValue(uint64(uint32(v)))
  1407  				case wazeroir.SignedUint64:
  1408  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1409  					res := uint64(v)
  1410  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1411  						if op.B3 {
  1412  							// non-trapping conversion must cast nan to zero.
  1413  							res = 0
  1414  						} else {
  1415  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1416  						}
  1417  					} else if v < 0 || v >= math.MaxUint64 {
  1418  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1419  						// and that's why we use '>=' not '>' to check overflow.
  1420  						if op.B3 {
  1421  							// non-trapping conversion must "saturate" the value for overflowing source.
  1422  							if v < 0 {
  1423  								res = 0
  1424  							} else {
  1425  								res = math.MaxUint64
  1426  							}
  1427  						} else {
  1428  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1429  						}
  1430  					}
  1431  					ce.pushValue(res)
  1432  				}
  1433  			} else {
  1434  				// Float64
  1435  				switch wazeroir.SignedInt(op.B2) {
  1436  				case wazeroir.SignedInt32:
  1437  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1438  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1439  						if op.B3 {
  1440  							// non-trapping conversion must cast nan to zero.
  1441  							v = 0
  1442  						} else {
  1443  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1444  						}
  1445  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1446  						if op.B3 {
  1447  							// non-trapping conversion must "saturate" the value for overflowing source.
  1448  							if v < 0 {
  1449  								v = math.MinInt32
  1450  							} else {
  1451  								v = math.MaxInt32
  1452  							}
  1453  						} else {
  1454  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1455  						}
  1456  					}
  1457  					ce.pushValue(uint64(uint32(int32(v))))
  1458  				case wazeroir.SignedInt64:
  1459  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1460  					res := int64(v)
  1461  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1462  						if op.B3 {
  1463  							// non-trapping conversion must cast nan to zero.
  1464  							res = 0
  1465  						} else {
  1466  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1467  						}
  1468  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1469  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1470  						// and that's why we use '>=' not '>' to check overflow.
  1471  						if op.B3 {
  1472  							// non-trapping conversion must "saturate" the value for overflowing source.
  1473  							if v < 0 {
  1474  								res = math.MinInt64
  1475  							} else {
  1476  								res = math.MaxInt64
  1477  							}
  1478  						} else {
  1479  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1480  						}
  1481  					}
  1482  					ce.pushValue(uint64(res))
  1483  				case wazeroir.SignedUint32:
  1484  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1485  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1486  						if op.B3 {
  1487  							// non-trapping conversion must cast nan to zero.
  1488  							v = 0
  1489  						} else {
  1490  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1491  						}
  1492  					} else if v < 0 || v > math.MaxUint32 {
  1493  						if op.B3 {
  1494  							// non-trapping conversion must "saturate" the value for overflowing source.
  1495  							if v < 0 {
  1496  								v = 0
  1497  							} else {
  1498  								v = math.MaxUint32
  1499  							}
  1500  						} else {
  1501  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1502  						}
  1503  					}
  1504  					ce.pushValue(uint64(uint32(v)))
  1505  				case wazeroir.SignedUint64:
  1506  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1507  					res := uint64(v)
  1508  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1509  						if op.B3 {
  1510  							// non-trapping conversion must cast nan to zero.
  1511  							res = 0
  1512  						} else {
  1513  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1514  						}
  1515  					} else if v < 0 || v >= math.MaxUint64 {
  1516  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1517  						// and that's why we use '>=' not '>' to check overflow.
  1518  						if op.B3 {
  1519  							// non-trapping conversion must "saturate" the value for overflowing source.
  1520  							if v < 0 {
  1521  								res = 0
  1522  							} else {
  1523  								res = math.MaxUint64
  1524  							}
  1525  						} else {
  1526  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1527  						}
  1528  					}
  1529  					ce.pushValue(res)
  1530  				}
  1531  			}
  1532  			frame.pc++
  1533  		case wazeroir.OperationKindFConvertFromI:
  1534  			switch wazeroir.SignedInt(op.B1) {
  1535  			case wazeroir.SignedInt32:
  1536  				if op.B2 == 0 {
  1537  					// Float32
  1538  					v := float32(int32(ce.popValue()))
  1539  					ce.pushValue(uint64(math.Float32bits(v)))
  1540  				} else {
  1541  					// Float64
  1542  					v := float64(int32(ce.popValue()))
  1543  					ce.pushValue(math.Float64bits(v))
  1544  				}
  1545  			case wazeroir.SignedInt64:
  1546  				if op.B2 == 0 {
  1547  					// Float32
  1548  					v := float32(int64(ce.popValue()))
  1549  					ce.pushValue(uint64(math.Float32bits(v)))
  1550  				} else {
  1551  					// Float64
  1552  					v := float64(int64(ce.popValue()))
  1553  					ce.pushValue(math.Float64bits(v))
  1554  				}
  1555  			case wazeroir.SignedUint32:
  1556  				if op.B2 == 0 {
  1557  					// Float32
  1558  					v := float32(uint32(ce.popValue()))
  1559  					ce.pushValue(uint64(math.Float32bits(v)))
  1560  				} else {
  1561  					// Float64
  1562  					v := float64(uint32(ce.popValue()))
  1563  					ce.pushValue(math.Float64bits(v))
  1564  				}
  1565  			case wazeroir.SignedUint64:
  1566  				if op.B2 == 0 {
  1567  					// Float32
  1568  					v := float32(ce.popValue())
  1569  					ce.pushValue(uint64(math.Float32bits(v)))
  1570  				} else {
  1571  					// Float64
  1572  					v := float64(ce.popValue())
  1573  					ce.pushValue(math.Float64bits(v))
  1574  				}
  1575  			}
  1576  			frame.pc++
  1577  		case wazeroir.OperationKindF32DemoteFromF64:
  1578  			v := float32(math.Float64frombits(ce.popValue()))
  1579  			ce.pushValue(uint64(math.Float32bits(v)))
  1580  			frame.pc++
  1581  		case wazeroir.OperationKindF64PromoteFromF32:
  1582  			v := float64(math.Float32frombits(uint32(ce.popValue())))
  1583  			ce.pushValue(math.Float64bits(v))
  1584  			frame.pc++
  1585  		case wazeroir.OperationKindExtend:
  1586  			if op.B1 == 1 {
  1587  				// Signed.
  1588  				v := int64(int32(ce.popValue()))
  1589  				ce.pushValue(uint64(v))
  1590  			} else {
  1591  				v := uint64(uint32(ce.popValue()))
  1592  				ce.pushValue(v)
  1593  			}
  1594  			frame.pc++
  1595  		case wazeroir.OperationKindSignExtend32From8:
  1596  			v := uint32(int8(ce.popValue()))
  1597  			ce.pushValue(uint64(v))
  1598  			frame.pc++
  1599  		case wazeroir.OperationKindSignExtend32From16:
  1600  			v := uint32(int16(ce.popValue()))
  1601  			ce.pushValue(uint64(v))
  1602  			frame.pc++
  1603  		case wazeroir.OperationKindSignExtend64From8:
  1604  			v := int64(int8(ce.popValue()))
  1605  			ce.pushValue(uint64(v))
  1606  			frame.pc++
  1607  		case wazeroir.OperationKindSignExtend64From16:
  1608  			v := int64(int16(ce.popValue()))
  1609  			ce.pushValue(uint64(v))
  1610  			frame.pc++
  1611  		case wazeroir.OperationKindSignExtend64From32:
  1612  			v := int64(int32(ce.popValue()))
  1613  			ce.pushValue(uint64(v))
  1614  			frame.pc++
  1615  		case wazeroir.OperationKindMemoryInit:
  1616  			dataInstance := dataInstances[op.U1]
  1617  			copySize := ce.popValue()
  1618  			inDataOffset := ce.popValue()
  1619  			inMemoryOffset := ce.popValue()
  1620  			if inDataOffset+copySize > uint64(len(dataInstance)) ||
  1621  				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
  1622  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1623  			} else if copySize != 0 {
  1624  				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
  1625  			}
  1626  			frame.pc++
  1627  		case wazeroir.OperationKindDataDrop:
  1628  			dataInstances[op.U1] = nil
  1629  			frame.pc++
  1630  		case wazeroir.OperationKindMemoryCopy:
  1631  			memLen := uint64(len(memoryInst.Buffer))
  1632  			copySize := ce.popValue()
  1633  			sourceOffset := ce.popValue()
  1634  			destinationOffset := ce.popValue()
  1635  			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
  1636  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1637  			} else if copySize != 0 {
  1638  				copy(memoryInst.Buffer[destinationOffset:],
  1639  					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
  1640  			}
  1641  			frame.pc++
  1642  		case wazeroir.OperationKindMemoryFill:
  1643  			fillSize := ce.popValue()
  1644  			value := byte(ce.popValue())
  1645  			offset := ce.popValue()
  1646  			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
  1647  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1648  			} else if fillSize != 0 {
  1649  				// Uses the copy trick for faster filling buffer.
  1650  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1651  				buf := memoryInst.Buffer[offset : offset+fillSize]
  1652  				buf[0] = value
  1653  				for i := 1; i < len(buf); i *= 2 {
  1654  					copy(buf[i:], buf[:i])
  1655  				}
  1656  			}
  1657  			frame.pc++
  1658  		case wazeroir.OperationKindTableInit:
  1659  			elementInstance := elementInstances[op.U1]
  1660  			copySize := ce.popValue()
  1661  			inElementOffset := ce.popValue()
  1662  			inTableOffset := ce.popValue()
  1663  			table := tables[op.U2]
  1664  			if inElementOffset+copySize > uint64(len(elementInstance)) ||
  1665  				inTableOffset+copySize > uint64(len(table.References)) {
  1666  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1667  			} else if copySize != 0 {
  1668  				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:])
  1669  			}
  1670  			frame.pc++
  1671  		case wazeroir.OperationKindElemDrop:
  1672  			elementInstances[op.U1] = nil
  1673  			frame.pc++
  1674  		case wazeroir.OperationKindTableCopy:
  1675  			srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
  1676  			copySize := ce.popValue()
  1677  			sourceOffset := ce.popValue()
  1678  			destinationOffset := ce.popValue()
  1679  			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
  1680  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1681  			} else if copySize != 0 {
  1682  				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
  1683  			}
  1684  			frame.pc++
  1685  		case wazeroir.OperationKindRefFunc:
  1686  			ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
  1687  			frame.pc++
  1688  		case wazeroir.OperationKindTableGet:
  1689  			table := tables[op.U1]
  1690  
  1691  			offset := ce.popValue()
  1692  			if offset >= uint64(len(table.References)) {
  1693  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1694  			}
  1695  
  1696  			ce.pushValue(uint64(table.References[offset]))
  1697  			frame.pc++
  1698  		case wazeroir.OperationKindTableSet:
  1699  			table := tables[op.U1]
  1700  			ref := ce.popValue()
  1701  
  1702  			offset := ce.popValue()
  1703  			if offset >= uint64(len(table.References)) {
  1704  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1705  			}
  1706  
  1707  			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
  1708  			frame.pc++
  1709  		case wazeroir.OperationKindTableSize:
  1710  			table := tables[op.U1]
  1711  			ce.pushValue(uint64(len(table.References)))
  1712  			frame.pc++
  1713  		case wazeroir.OperationKindTableGrow:
  1714  			table := tables[op.U1]
  1715  			num, ref := ce.popValue(), ce.popValue()
  1716  			ret := table.Grow(uint32(num), uintptr(ref))
  1717  			ce.pushValue(uint64(ret))
  1718  			frame.pc++
  1719  		case wazeroir.OperationKindTableFill:
  1720  			table := tables[op.U1]
  1721  			num := ce.popValue()
  1722  			ref := uintptr(ce.popValue())
  1723  			offset := ce.popValue()
  1724  			if num+offset > uint64(len(table.References)) {
  1725  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1726  			} else if num > 0 {
  1727  				// Uses the copy trick for faster filling the region with the value.
  1728  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1729  				targetRegion := table.References[offset : offset+num]
  1730  				targetRegion[0] = ref
  1731  				for i := 1; i < len(targetRegion); i *= 2 {
  1732  					copy(targetRegion[i:], targetRegion[:i])
  1733  				}
  1734  			}
  1735  			frame.pc++
  1736  		case wazeroir.OperationKindV128Const:
  1737  			lo, hi := op.U1, op.U2
  1738  			ce.pushValue(lo)
  1739  			ce.pushValue(hi)
  1740  			frame.pc++
  1741  		case wazeroir.OperationKindV128Add:
  1742  			yHigh, yLow := ce.popValue(), ce.popValue()
  1743  			xHigh, xLow := ce.popValue(), ce.popValue()
  1744  			switch op.B1 {
  1745  			case wazeroir.ShapeI8x16:
  1746  				ce.pushValue(
  1747  					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
  1748  						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
  1749  						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
  1750  						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
  1751  				)
  1752  				ce.pushValue(
  1753  					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
  1754  						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
  1755  						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
  1756  						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
  1757  				)
  1758  			case wazeroir.ShapeI16x8:
  1759  				ce.pushValue(
  1760  					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
  1761  						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
  1762  				)
  1763  				ce.pushValue(
  1764  					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
  1765  						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
  1766  				)
  1767  			case wazeroir.ShapeI32x4:
  1768  				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
  1769  				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
  1770  			case wazeroir.ShapeI64x2:
  1771  				ce.pushValue(xLow + yLow)
  1772  				ce.pushValue(xHigh + yHigh)
  1773  			case wazeroir.ShapeF32x4:
  1774  				ce.pushValue(
  1775  					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1776  				)
  1777  				ce.pushValue(
  1778  					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1779  				)
  1780  			case wazeroir.ShapeF64x2:
  1781  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
  1782  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
  1783  			}
  1784  			frame.pc++
  1785  		case wazeroir.OperationKindV128Sub:
  1786  			yHigh, yLow := ce.popValue(), ce.popValue()
  1787  			xHigh, xLow := ce.popValue(), ce.popValue()
  1788  			switch op.B1 {
  1789  			case wazeroir.ShapeI8x16:
  1790  				ce.pushValue(
  1791  					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
  1792  						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
  1793  						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
  1794  						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
  1795  				)
  1796  				ce.pushValue(
  1797  					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
  1798  						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
  1799  						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
  1800  						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
  1801  				)
  1802  			case wazeroir.ShapeI16x8:
  1803  				ce.pushValue(
  1804  					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
  1805  						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
  1806  				)
  1807  				ce.pushValue(
  1808  					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
  1809  						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
  1810  				)
  1811  			case wazeroir.ShapeI32x4:
  1812  				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
  1813  				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
  1814  			case wazeroir.ShapeI64x2:
  1815  				ce.pushValue(xLow - yLow)
  1816  				ce.pushValue(xHigh - yHigh)
  1817  			case wazeroir.ShapeF32x4:
  1818  				ce.pushValue(
  1819  					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1820  				)
  1821  				ce.pushValue(
  1822  					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1823  				)
  1824  			case wazeroir.ShapeF64x2:
  1825  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
  1826  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
  1827  			}
  1828  			frame.pc++
  1829  		case wazeroir.OperationKindV128Load:
  1830  			offset := ce.popMemoryOffset(op)
  1831  			switch op.B1 {
  1832  			case wazeroir.V128LoadType128:
  1833  				lo, ok := memoryInst.ReadUint64Le(offset)
  1834  				if !ok {
  1835  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1836  				}
  1837  				ce.pushValue(lo)
  1838  				hi, ok := memoryInst.ReadUint64Le(offset + 8)
  1839  				if !ok {
  1840  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1841  				}
  1842  				ce.pushValue(hi)
  1843  			case wazeroir.V128LoadType8x8s:
  1844  				data, ok := memoryInst.Read(offset, 8)
  1845  				if !ok {
  1846  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1847  				}
  1848  				ce.pushValue(
  1849  					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
  1850  				)
  1851  				ce.pushValue(
  1852  					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
  1853  				)
  1854  			case wazeroir.V128LoadType8x8u:
  1855  				data, ok := memoryInst.Read(offset, 8)
  1856  				if !ok {
  1857  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1858  				}
  1859  				ce.pushValue(
  1860  					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
  1861  				)
  1862  				ce.pushValue(
  1863  					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
  1864  				)
  1865  			case wazeroir.V128LoadType16x4s:
  1866  				data, ok := memoryInst.Read(offset, 8)
  1867  				if !ok {
  1868  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1869  				}
  1870  				ce.pushValue(
  1871  					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
  1872  						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
  1873  				)
  1874  				ce.pushValue(
  1875  					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
  1876  						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
  1877  				)
  1878  			case wazeroir.V128LoadType16x4u:
  1879  				data, ok := memoryInst.Read(offset, 8)
  1880  				if !ok {
  1881  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1882  				}
  1883  				ce.pushValue(
  1884  					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
  1885  				)
  1886  				ce.pushValue(
  1887  					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
  1888  				)
  1889  			case wazeroir.V128LoadType32x2s:
  1890  				data, ok := memoryInst.Read(offset, 8)
  1891  				if !ok {
  1892  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1893  				}
  1894  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
  1895  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
  1896  			case wazeroir.V128LoadType32x2u:
  1897  				data, ok := memoryInst.Read(offset, 8)
  1898  				if !ok {
  1899  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1900  				}
  1901  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
  1902  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
  1903  			case wazeroir.V128LoadType8Splat:
  1904  				v, ok := memoryInst.ReadByte(offset)
  1905  				if !ok {
  1906  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1907  				}
  1908  				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
  1909  					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
  1910  				ce.pushValue(v8)
  1911  				ce.pushValue(v8)
  1912  			case wazeroir.V128LoadType16Splat:
  1913  				v, ok := memoryInst.ReadUint16Le(offset)
  1914  				if !ok {
  1915  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1916  				}
  1917  				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
  1918  				ce.pushValue(v4)
  1919  				ce.pushValue(v4)
  1920  			case wazeroir.V128LoadType32Splat:
  1921  				v, ok := memoryInst.ReadUint32Le(offset)
  1922  				if !ok {
  1923  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1924  				}
  1925  				vv := uint64(v)<<32 | uint64(v)
  1926  				ce.pushValue(vv)
  1927  				ce.pushValue(vv)
  1928  			case wazeroir.V128LoadType64Splat:
  1929  				lo, ok := memoryInst.ReadUint64Le(offset)
  1930  				if !ok {
  1931  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1932  				}
  1933  				ce.pushValue(lo)
  1934  				ce.pushValue(lo)
  1935  			case wazeroir.V128LoadType32zero:
  1936  				lo, ok := memoryInst.ReadUint32Le(offset)
  1937  				if !ok {
  1938  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1939  				}
  1940  				ce.pushValue(uint64(lo))
  1941  				ce.pushValue(0)
  1942  			case wazeroir.V128LoadType64zero:
  1943  				lo, ok := memoryInst.ReadUint64Le(offset)
  1944  				if !ok {
  1945  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1946  				}
  1947  				ce.pushValue(lo)
  1948  				ce.pushValue(0)
  1949  			}
  1950  			frame.pc++
  1951  		case wazeroir.OperationKindV128LoadLane:
  1952  			hi, lo := ce.popValue(), ce.popValue()
  1953  			offset := ce.popMemoryOffset(op)
  1954  			switch op.B1 {
  1955  			case 8:
  1956  				b, ok := memoryInst.ReadByte(offset)
  1957  				if !ok {
  1958  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1959  				}
  1960  				if op.B2 < 8 {
  1961  					s := op.B2 << 3
  1962  					lo = (lo & ^(0xff << s)) | uint64(b)<<s
  1963  				} else {
  1964  					s := (op.B2 - 8) << 3
  1965  					hi = (hi & ^(0xff << s)) | uint64(b)<<s
  1966  				}
  1967  			case 16:
  1968  				b, ok := memoryInst.ReadUint16Le(offset)
  1969  				if !ok {
  1970  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1971  				}
  1972  				if op.B2 < 4 {
  1973  					s := op.B2 << 4
  1974  					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
  1975  				} else {
  1976  					s := (op.B2 - 4) << 4
  1977  					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
  1978  				}
  1979  			case 32:
  1980  				b, ok := memoryInst.ReadUint32Le(offset)
  1981  				if !ok {
  1982  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1983  				}
  1984  				if op.B2 < 2 {
  1985  					s := op.B2 << 5
  1986  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  1987  				} else {
  1988  					s := (op.B2 - 2) << 5
  1989  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  1990  				}
  1991  			case 64:
  1992  				b, ok := memoryInst.ReadUint64Le(offset)
  1993  				if !ok {
  1994  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1995  				}
  1996  				if op.B2 == 0 {
  1997  					lo = b
  1998  				} else {
  1999  					hi = b
  2000  				}
  2001  			}
  2002  			ce.pushValue(lo)
  2003  			ce.pushValue(hi)
  2004  			frame.pc++
  2005  		case wazeroir.OperationKindV128Store:
  2006  			hi, lo := ce.popValue(), ce.popValue()
  2007  			offset := ce.popMemoryOffset(op)
  2008  			if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
  2009  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2010  			}
  2011  			if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
  2012  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2013  			}
  2014  			frame.pc++
  2015  		case wazeroir.OperationKindV128StoreLane:
  2016  			hi, lo := ce.popValue(), ce.popValue()
  2017  			offset := ce.popMemoryOffset(op)
  2018  			var ok bool
  2019  			switch op.B1 {
  2020  			case 8:
  2021  				if op.B2 < 8 {
  2022  					ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
  2023  				} else {
  2024  					ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
  2025  				}
  2026  			case 16:
  2027  				if op.B2 < 4 {
  2028  					ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
  2029  				} else {
  2030  					ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
  2031  				}
  2032  			case 32:
  2033  				if op.B2 < 2 {
  2034  					ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
  2035  				} else {
  2036  					ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
  2037  				}
  2038  			case 64:
  2039  				if op.B2 == 0 {
  2040  					ok = memoryInst.WriteUint64Le(offset, lo)
  2041  				} else {
  2042  					ok = memoryInst.WriteUint64Le(offset, hi)
  2043  				}
  2044  			}
  2045  			if !ok {
  2046  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2047  			}
  2048  			frame.pc++
  2049  		case wazeroir.OperationKindV128ReplaceLane:
  2050  			v := ce.popValue()
  2051  			hi, lo := ce.popValue(), ce.popValue()
  2052  			switch op.B1 {
  2053  			case wazeroir.ShapeI8x16:
  2054  				if op.B2 < 8 {
  2055  					s := op.B2 << 3
  2056  					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
  2057  				} else {
  2058  					s := (op.B2 - 8) << 3
  2059  					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
  2060  				}
  2061  			case wazeroir.ShapeI16x8:
  2062  				if op.B2 < 4 {
  2063  					s := op.B2 << 4
  2064  					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2065  				} else {
  2066  					s := (op.B2 - 4) << 4
  2067  					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2068  				}
  2069  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2070  				if op.B2 < 2 {
  2071  					s := op.B2 << 5
  2072  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2073  				} else {
  2074  					s := (op.B2 - 2) << 5
  2075  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2076  				}
  2077  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2078  				if op.B2 == 0 {
  2079  					lo = v
  2080  				} else {
  2081  					hi = v
  2082  				}
  2083  			}
  2084  			ce.pushValue(lo)
  2085  			ce.pushValue(hi)
  2086  			frame.pc++
  2087  		case wazeroir.OperationKindV128ExtractLane:
  2088  			hi, lo := ce.popValue(), ce.popValue()
  2089  			var v uint64
  2090  			switch op.B1 {
  2091  			case wazeroir.ShapeI8x16:
  2092  				var u8 byte
  2093  				if op.B2 < 8 {
  2094  					u8 = byte(lo >> (op.B2 * 8))
  2095  				} else {
  2096  					u8 = byte(hi >> ((op.B2 - 8) * 8))
  2097  				}
  2098  				if op.B3 {
  2099  					// sign-extend.
  2100  					v = uint64(uint32(int8(u8)))
  2101  				} else {
  2102  					v = uint64(u8)
  2103  				}
  2104  			case wazeroir.ShapeI16x8:
  2105  				var u16 uint16
  2106  				if op.B2 < 4 {
  2107  					u16 = uint16(lo >> (op.B2 * 16))
  2108  				} else {
  2109  					u16 = uint16(hi >> ((op.B2 - 4) * 16))
  2110  				}
  2111  				if op.B3 {
  2112  					// sign-extend.
  2113  					v = uint64(uint32(int16(u16)))
  2114  				} else {
  2115  					v = uint64(u16)
  2116  				}
  2117  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2118  				if op.B2 < 2 {
  2119  					v = uint64(uint32(lo >> (op.B2 * 32)))
  2120  				} else {
  2121  					v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
  2122  				}
  2123  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2124  				if op.B2 == 0 {
  2125  					v = lo
  2126  				} else {
  2127  					v = hi
  2128  				}
  2129  			}
  2130  			ce.pushValue(v)
  2131  			frame.pc++
  2132  		case wazeroir.OperationKindV128Splat:
  2133  			v := ce.popValue()
  2134  			var hi, lo uint64
  2135  			switch op.B1 {
  2136  			case wazeroir.ShapeI8x16:
  2137  				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
  2138  					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
  2139  				hi, lo = v8, v8
  2140  			case wazeroir.ShapeI16x8:
  2141  				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
  2142  				hi, lo = v4, v4
  2143  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2144  				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
  2145  				lo, hi = v2, v2
  2146  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2147  				lo, hi = v, v
  2148  			}
  2149  			ce.pushValue(lo)
  2150  			ce.pushValue(hi)
  2151  			frame.pc++
  2152  		case wazeroir.OperationKindV128Swizzle:
  2153  			idxHi, idxLo := ce.popValue(), ce.popValue()
  2154  			baseHi, baseLo := ce.popValue(), ce.popValue()
  2155  			var newVal [16]byte
  2156  			for i := 0; i < 16; i++ {
  2157  				var id byte
  2158  				if i < 8 {
  2159  					id = byte(idxLo >> (i * 8))
  2160  				} else {
  2161  					id = byte(idxHi >> ((i - 8) * 8))
  2162  				}
  2163  				if id < 8 {
  2164  					newVal[i] = byte(baseLo >> (id * 8))
  2165  				} else if id < 16 {
  2166  					newVal[i] = byte(baseHi >> ((id - 8) * 8))
  2167  				}
  2168  			}
  2169  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2170  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2171  			frame.pc++
  2172  		case wazeroir.OperationKindV128Shuffle:
  2173  			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
  2174  			var newVal [16]byte
  2175  			for i, l := range op.Us {
  2176  				if l < 8 {
  2177  					newVal[i] = byte(yLo >> (l * 8))
  2178  				} else if l < 16 {
  2179  					newVal[i] = byte(yHi >> ((l - 8) * 8))
  2180  				} else if l < 24 {
  2181  					newVal[i] = byte(xLo >> ((l - 16) * 8))
  2182  				} else if l < 32 {
  2183  					newVal[i] = byte(xHi >> ((l - 24) * 8))
  2184  				}
  2185  			}
  2186  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2187  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2188  			frame.pc++
  2189  		case wazeroir.OperationKindV128AnyTrue:
  2190  			hi, lo := ce.popValue(), ce.popValue()
  2191  			if hi != 0 || lo != 0 {
  2192  				ce.pushValue(1)
  2193  			} else {
  2194  				ce.pushValue(0)
  2195  			}
  2196  			frame.pc++
  2197  		case wazeroir.OperationKindV128AllTrue:
  2198  			hi, lo := ce.popValue(), ce.popValue()
  2199  			var ret bool
  2200  			switch op.B1 {
  2201  			case wazeroir.ShapeI8x16:
  2202  				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
  2203  					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
  2204  					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
  2205  					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
  2206  			case wazeroir.ShapeI16x8:
  2207  				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
  2208  					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
  2209  			case wazeroir.ShapeI32x4:
  2210  				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
  2211  					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
  2212  			case wazeroir.ShapeI64x2:
  2213  				ret = (lo != 0) &&
  2214  					(hi != 0)
  2215  			}
  2216  			if ret {
  2217  				ce.pushValue(1)
  2218  			} else {
  2219  				ce.pushValue(0)
  2220  			}
  2221  			frame.pc++
  2222  		case wazeroir.OperationKindV128BitMask:
  2223  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
  2224  			hi, lo := ce.popValue(), ce.popValue()
  2225  			var res uint64
  2226  			switch op.B1 {
  2227  			case wazeroir.ShapeI8x16:
  2228  				for i := 0; i < 8; i++ {
  2229  					if int8(lo>>(i*8)) < 0 {
  2230  						res |= 1 << i
  2231  					}
  2232  				}
  2233  				for i := 0; i < 8; i++ {
  2234  					if int8(hi>>(i*8)) < 0 {
  2235  						res |= 1 << (i + 8)
  2236  					}
  2237  				}
  2238  			case wazeroir.ShapeI16x8:
  2239  				for i := 0; i < 4; i++ {
  2240  					if int16(lo>>(i*16)) < 0 {
  2241  						res |= 1 << i
  2242  					}
  2243  				}
  2244  				for i := 0; i < 4; i++ {
  2245  					if int16(hi>>(i*16)) < 0 {
  2246  						res |= 1 << (i + 4)
  2247  					}
  2248  				}
  2249  			case wazeroir.ShapeI32x4:
  2250  				for i := 0; i < 2; i++ {
  2251  					if int32(lo>>(i*32)) < 0 {
  2252  						res |= 1 << i
  2253  					}
  2254  				}
  2255  				for i := 0; i < 2; i++ {
  2256  					if int32(hi>>(i*32)) < 0 {
  2257  						res |= 1 << (i + 2)
  2258  					}
  2259  				}
  2260  			case wazeroir.ShapeI64x2:
  2261  				if int64(lo) < 0 {
  2262  					res |= 0b01
  2263  				}
  2264  				if int(hi) < 0 {
  2265  					res |= 0b10
  2266  				}
  2267  			}
  2268  			ce.pushValue(res)
  2269  			frame.pc++
  2270  		case wazeroir.OperationKindV128And:
  2271  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2272  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2273  			ce.pushValue(x1Lo & x2Lo)
  2274  			ce.pushValue(x1Hi & x2Hi)
  2275  			frame.pc++
  2276  		case wazeroir.OperationKindV128Not:
  2277  			hi, lo := ce.popValue(), ce.popValue()
  2278  			ce.pushValue(^lo)
  2279  			ce.pushValue(^hi)
  2280  			frame.pc++
  2281  		case wazeroir.OperationKindV128Or:
  2282  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2283  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2284  			ce.pushValue(x1Lo | x2Lo)
  2285  			ce.pushValue(x1Hi | x2Hi)
  2286  			frame.pc++
  2287  		case wazeroir.OperationKindV128Xor:
  2288  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2289  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2290  			ce.pushValue(x1Lo ^ x2Lo)
  2291  			ce.pushValue(x1Hi ^ x2Hi)
  2292  			frame.pc++
  2293  		case wazeroir.OperationKindV128Bitselect:
  2294  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
  2295  			cHi, cLo := ce.popValue(), ce.popValue()
  2296  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2297  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2298  			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
  2299  			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
  2300  			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
  2301  			frame.pc++
  2302  		case wazeroir.OperationKindV128AndNot:
  2303  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2304  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2305  			ce.pushValue(x1Lo & (^x2Lo))
  2306  			ce.pushValue(x1Hi & (^x2Hi))
  2307  			frame.pc++
  2308  		case wazeroir.OperationKindV128Shl:
  2309  			s := ce.popValue()
  2310  			hi, lo := ce.popValue(), ce.popValue()
  2311  			switch op.B1 {
  2312  			case wazeroir.ShapeI8x16:
  2313  				s = s % 8
  2314  				lo = uint64(uint8(lo<<s)) |
  2315  					uint64(uint8((lo>>8)<<s))<<8 |
  2316  					uint64(uint8((lo>>16)<<s))<<16 |
  2317  					uint64(uint8((lo>>24)<<s))<<24 |
  2318  					uint64(uint8((lo>>32)<<s))<<32 |
  2319  					uint64(uint8((lo>>40)<<s))<<40 |
  2320  					uint64(uint8((lo>>48)<<s))<<48 |
  2321  					uint64(uint8((lo>>56)<<s))<<56
  2322  				hi = uint64(uint8(hi<<s)) |
  2323  					uint64(uint8((hi>>8)<<s))<<8 |
  2324  					uint64(uint8((hi>>16)<<s))<<16 |
  2325  					uint64(uint8((hi>>24)<<s))<<24 |
  2326  					uint64(uint8((hi>>32)<<s))<<32 |
  2327  					uint64(uint8((hi>>40)<<s))<<40 |
  2328  					uint64(uint8((hi>>48)<<s))<<48 |
  2329  					uint64(uint8((hi>>56)<<s))<<56
  2330  			case wazeroir.ShapeI16x8:
  2331  				s = s % 16
  2332  				lo = uint64(uint16(lo<<s)) |
  2333  					uint64(uint16((lo>>16)<<s))<<16 |
  2334  					uint64(uint16((lo>>32)<<s))<<32 |
  2335  					uint64(uint16((lo>>48)<<s))<<48
  2336  				hi = uint64(uint16(hi<<s)) |
  2337  					uint64(uint16((hi>>16)<<s))<<16 |
  2338  					uint64(uint16((hi>>32)<<s))<<32 |
  2339  					uint64(uint16((hi>>48)<<s))<<48
  2340  			case wazeroir.ShapeI32x4:
  2341  				s = s % 32
  2342  				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
  2343  				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
  2344  			case wazeroir.ShapeI64x2:
  2345  				s = s % 64
  2346  				lo = lo << s
  2347  				hi = hi << s
  2348  			}
  2349  			ce.pushValue(lo)
  2350  			ce.pushValue(hi)
  2351  			frame.pc++
  2352  		case wazeroir.OperationKindV128Shr:
  2353  			s := ce.popValue()
  2354  			hi, lo := ce.popValue(), ce.popValue()
  2355  			switch op.B1 {
  2356  			case wazeroir.ShapeI8x16:
  2357  				s = s % 8
  2358  				if op.B3 { // signed
  2359  					lo = uint64(uint8(int8(lo)>>s)) |
  2360  						uint64(uint8(int8(lo>>8)>>s))<<8 |
  2361  						uint64(uint8(int8(lo>>16)>>s))<<16 |
  2362  						uint64(uint8(int8(lo>>24)>>s))<<24 |
  2363  						uint64(uint8(int8(lo>>32)>>s))<<32 |
  2364  						uint64(uint8(int8(lo>>40)>>s))<<40 |
  2365  						uint64(uint8(int8(lo>>48)>>s))<<48 |
  2366  						uint64(uint8(int8(lo>>56)>>s))<<56
  2367  					hi = uint64(uint8(int8(hi)>>s)) |
  2368  						uint64(uint8(int8(hi>>8)>>s))<<8 |
  2369  						uint64(uint8(int8(hi>>16)>>s))<<16 |
  2370  						uint64(uint8(int8(hi>>24)>>s))<<24 |
  2371  						uint64(uint8(int8(hi>>32)>>s))<<32 |
  2372  						uint64(uint8(int8(hi>>40)>>s))<<40 |
  2373  						uint64(uint8(int8(hi>>48)>>s))<<48 |
  2374  						uint64(uint8(int8(hi>>56)>>s))<<56
  2375  				} else {
  2376  					lo = uint64(uint8(lo)>>s) |
  2377  						uint64(uint8(lo>>8)>>s)<<8 |
  2378  						uint64(uint8(lo>>16)>>s)<<16 |
  2379  						uint64(uint8(lo>>24)>>s)<<24 |
  2380  						uint64(uint8(lo>>32)>>s)<<32 |
  2381  						uint64(uint8(lo>>40)>>s)<<40 |
  2382  						uint64(uint8(lo>>48)>>s)<<48 |
  2383  						uint64(uint8(lo>>56)>>s)<<56
  2384  					hi = uint64(uint8(hi)>>s) |
  2385  						uint64(uint8(hi>>8)>>s)<<8 |
  2386  						uint64(uint8(hi>>16)>>s)<<16 |
  2387  						uint64(uint8(hi>>24)>>s)<<24 |
  2388  						uint64(uint8(hi>>32)>>s)<<32 |
  2389  						uint64(uint8(hi>>40)>>s)<<40 |
  2390  						uint64(uint8(hi>>48)>>s)<<48 |
  2391  						uint64(uint8(hi>>56)>>s)<<56
  2392  				}
  2393  			case wazeroir.ShapeI16x8:
  2394  				s = s % 16
  2395  				if op.B3 { // signed
  2396  					lo = uint64(uint16(int16(lo)>>s)) |
  2397  						uint64(uint16(int16(lo>>16)>>s))<<16 |
  2398  						uint64(uint16(int16(lo>>32)>>s))<<32 |
  2399  						uint64(uint16(int16(lo>>48)>>s))<<48
  2400  					hi = uint64(uint16(int16(hi)>>s)) |
  2401  						uint64(uint16(int16(hi>>16)>>s))<<16 |
  2402  						uint64(uint16(int16(hi>>32)>>s))<<32 |
  2403  						uint64(uint16(int16(hi>>48)>>s))<<48
  2404  				} else {
  2405  					lo = uint64(uint16(lo)>>s) |
  2406  						uint64(uint16(lo>>16)>>s)<<16 |
  2407  						uint64(uint16(lo>>32)>>s)<<32 |
  2408  						uint64(uint16(lo>>48)>>s)<<48
  2409  					hi = uint64(uint16(hi)>>s) |
  2410  						uint64(uint16(hi>>16)>>s)<<16 |
  2411  						uint64(uint16(hi>>32)>>s)<<32 |
  2412  						uint64(uint16(hi>>48)>>s)<<48
  2413  				}
  2414  			case wazeroir.ShapeI32x4:
  2415  				s = s % 32
  2416  				if op.B3 {
  2417  					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
  2418  					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
  2419  				} else {
  2420  					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
  2421  					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
  2422  				}
  2423  			case wazeroir.ShapeI64x2:
  2424  				s = s % 64
  2425  				if op.B3 { // signed
  2426  					lo = uint64(int64(lo) >> s)
  2427  					hi = uint64(int64(hi) >> s)
  2428  				} else {
  2429  					lo = lo >> s
  2430  					hi = hi >> s
  2431  				}
  2432  
  2433  			}
  2434  			ce.pushValue(lo)
  2435  			ce.pushValue(hi)
  2436  			frame.pc++
  2437  		case wazeroir.OperationKindV128Cmp:
  2438  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2439  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2440  			var result []bool
  2441  			switch op.B1 {
  2442  			case wazeroir.V128CmpTypeI8x16Eq:
  2443  				result = []bool{
  2444  					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
  2445  					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
  2446  					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
  2447  					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
  2448  					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
  2449  					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
  2450  					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
  2451  					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
  2452  				}
  2453  			case wazeroir.V128CmpTypeI8x16Ne:
  2454  				result = []bool{
  2455  					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
  2456  					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
  2457  					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
  2458  					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
  2459  					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
  2460  					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
  2461  					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
  2462  					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
  2463  				}
  2464  			case wazeroir.V128CmpTypeI8x16LtS:
  2465  				result = []bool{
  2466  					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
  2467  					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
  2468  					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
  2469  					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
  2470  					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
  2471  					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
  2472  					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
  2473  					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
  2474  				}
  2475  			case wazeroir.V128CmpTypeI8x16LtU:
  2476  				result = []bool{
  2477  					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
  2478  					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
  2479  					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
  2480  					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
  2481  					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
  2482  					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
  2483  					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
  2484  					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
  2485  				}
  2486  			case wazeroir.V128CmpTypeI8x16GtS:
  2487  				result = []bool{
  2488  					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
  2489  					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
  2490  					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
  2491  					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
  2492  					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
  2493  					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
  2494  					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
  2495  					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
  2496  				}
  2497  			case wazeroir.V128CmpTypeI8x16GtU:
  2498  				result = []bool{
  2499  					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
  2500  					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
  2501  					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
  2502  					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
  2503  					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
  2504  					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
  2505  					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
  2506  					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
  2507  				}
  2508  			case wazeroir.V128CmpTypeI8x16LeS:
  2509  				result = []bool{
  2510  					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
  2511  					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
  2512  					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
  2513  					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
  2514  					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
  2515  					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
  2516  					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
  2517  					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
  2518  				}
  2519  			case wazeroir.V128CmpTypeI8x16LeU:
  2520  				result = []bool{
  2521  					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
  2522  					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
  2523  					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
  2524  					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
  2525  					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
  2526  					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
  2527  					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
  2528  					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
  2529  				}
  2530  			case wazeroir.V128CmpTypeI8x16GeS:
  2531  				result = []bool{
  2532  					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
  2533  					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
  2534  					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
  2535  					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
  2536  					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
  2537  					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
  2538  					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
  2539  					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
  2540  				}
  2541  			case wazeroir.V128CmpTypeI8x16GeU:
  2542  				result = []bool{
  2543  					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
  2544  					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
  2545  					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
  2546  					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
  2547  					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
  2548  					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
  2549  					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
  2550  					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
  2551  				}
  2552  			case wazeroir.V128CmpTypeI16x8Eq:
  2553  				result = []bool{
  2554  					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
  2555  					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
  2556  					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
  2557  					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
  2558  				}
  2559  			case wazeroir.V128CmpTypeI16x8Ne:
  2560  				result = []bool{
  2561  					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
  2562  					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
  2563  					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
  2564  					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
  2565  				}
  2566  			case wazeroir.V128CmpTypeI16x8LtS:
  2567  				result = []bool{
  2568  					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
  2569  					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
  2570  					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
  2571  					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
  2572  				}
  2573  			case wazeroir.V128CmpTypeI16x8LtU:
  2574  				result = []bool{
  2575  					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
  2576  					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
  2577  					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
  2578  					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
  2579  				}
  2580  			case wazeroir.V128CmpTypeI16x8GtS:
  2581  				result = []bool{
  2582  					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
  2583  					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
  2584  					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
  2585  					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
  2586  				}
  2587  			case wazeroir.V128CmpTypeI16x8GtU:
  2588  				result = []bool{
  2589  					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
  2590  					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
  2591  					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
  2592  					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
  2593  				}
  2594  			case wazeroir.V128CmpTypeI16x8LeS:
  2595  				result = []bool{
  2596  					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
  2597  					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
  2598  					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
  2599  					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
  2600  				}
  2601  			case wazeroir.V128CmpTypeI16x8LeU:
  2602  				result = []bool{
  2603  					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
  2604  					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
  2605  					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
  2606  					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
  2607  				}
  2608  			case wazeroir.V128CmpTypeI16x8GeS:
  2609  				result = []bool{
  2610  					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
  2611  					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
  2612  					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
  2613  					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
  2614  				}
  2615  			case wazeroir.V128CmpTypeI16x8GeU:
  2616  				result = []bool{
  2617  					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
  2618  					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
  2619  					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
  2620  					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
  2621  				}
  2622  			case wazeroir.V128CmpTypeI32x4Eq:
  2623  				result = []bool{
  2624  					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
  2625  					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
  2626  				}
  2627  			case wazeroir.V128CmpTypeI32x4Ne:
  2628  				result = []bool{
  2629  					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
  2630  					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
  2631  				}
  2632  			case wazeroir.V128CmpTypeI32x4LtS:
  2633  				result = []bool{
  2634  					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
  2635  					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
  2636  				}
  2637  			case wazeroir.V128CmpTypeI32x4LtU:
  2638  				result = []bool{
  2639  					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
  2640  					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
  2641  				}
  2642  			case wazeroir.V128CmpTypeI32x4GtS:
  2643  				result = []bool{
  2644  					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
  2645  					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
  2646  				}
  2647  			case wazeroir.V128CmpTypeI32x4GtU:
  2648  				result = []bool{
  2649  					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
  2650  					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
  2651  				}
  2652  			case wazeroir.V128CmpTypeI32x4LeS:
  2653  				result = []bool{
  2654  					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
  2655  					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
  2656  				}
  2657  			case wazeroir.V128CmpTypeI32x4LeU:
  2658  				result = []bool{
  2659  					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
  2660  					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
  2661  				}
  2662  			case wazeroir.V128CmpTypeI32x4GeS:
  2663  				result = []bool{
  2664  					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
  2665  					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
  2666  				}
  2667  			case wazeroir.V128CmpTypeI32x4GeU:
  2668  				result = []bool{
  2669  					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
  2670  					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
  2671  				}
  2672  			case wazeroir.V128CmpTypeI64x2Eq:
  2673  				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
  2674  			case wazeroir.V128CmpTypeI64x2Ne:
  2675  				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
  2676  			case wazeroir.V128CmpTypeI64x2LtS:
  2677  				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
  2678  			case wazeroir.V128CmpTypeI64x2GtS:
  2679  				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
  2680  			case wazeroir.V128CmpTypeI64x2LeS:
  2681  				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
  2682  			case wazeroir.V128CmpTypeI64x2GeS:
  2683  				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
  2684  			case wazeroir.V128CmpTypeF32x4Eq:
  2685  				result = []bool{
  2686  					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
  2687  					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
  2688  					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
  2689  					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
  2690  				}
  2691  			case wazeroir.V128CmpTypeF32x4Ne:
  2692  				result = []bool{
  2693  					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
  2694  					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
  2695  					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
  2696  					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
  2697  				}
  2698  			case wazeroir.V128CmpTypeF32x4Lt:
  2699  				result = []bool{
  2700  					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
  2701  					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
  2702  					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
  2703  					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
  2704  				}
  2705  			case wazeroir.V128CmpTypeF32x4Gt:
  2706  				result = []bool{
  2707  					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
  2708  					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
  2709  					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
  2710  					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
  2711  				}
  2712  			case wazeroir.V128CmpTypeF32x4Le:
  2713  				result = []bool{
  2714  					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
  2715  					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
  2716  					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
  2717  					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
  2718  				}
  2719  			case wazeroir.V128CmpTypeF32x4Ge:
  2720  				result = []bool{
  2721  					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
  2722  					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
  2723  					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
  2724  					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
  2725  				}
  2726  			case wazeroir.V128CmpTypeF64x2Eq:
  2727  				result = []bool{
  2728  					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
  2729  					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
  2730  				}
  2731  			case wazeroir.V128CmpTypeF64x2Ne:
  2732  				result = []bool{
  2733  					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
  2734  					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
  2735  				}
  2736  			case wazeroir.V128CmpTypeF64x2Lt:
  2737  				result = []bool{
  2738  					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
  2739  					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
  2740  				}
  2741  			case wazeroir.V128CmpTypeF64x2Gt:
  2742  				result = []bool{
  2743  					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
  2744  					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
  2745  				}
  2746  			case wazeroir.V128CmpTypeF64x2Le:
  2747  				result = []bool{
  2748  					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
  2749  					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
  2750  				}
  2751  			case wazeroir.V128CmpTypeF64x2Ge:
  2752  				result = []bool{
  2753  					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
  2754  					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
  2755  				}
  2756  			}
  2757  
  2758  			var retLo, retHi uint64
  2759  			laneNum := len(result)
  2760  			switch laneNum {
  2761  			case 16:
  2762  				for i, b := range result {
  2763  					if b {
  2764  						if i < 8 {
  2765  							retLo |= 0xff << (i * 8)
  2766  						} else {
  2767  							retHi |= 0xff << ((i - 8) * 8)
  2768  						}
  2769  					}
  2770  				}
  2771  			case 8:
  2772  				for i, b := range result {
  2773  					if b {
  2774  						if i < 4 {
  2775  							retLo |= 0xffff << (i * 16)
  2776  						} else {
  2777  							retHi |= 0xffff << ((i - 4) * 16)
  2778  						}
  2779  					}
  2780  				}
  2781  			case 4:
  2782  				for i, b := range result {
  2783  					if b {
  2784  						if i < 2 {
  2785  							retLo |= 0xffff_ffff << (i * 32)
  2786  						} else {
  2787  							retHi |= 0xffff_ffff << ((i - 2) * 32)
  2788  						}
  2789  					}
  2790  				}
  2791  			case 2:
  2792  				if result[0] {
  2793  					retLo = ^uint64(0)
  2794  				}
  2795  				if result[1] {
  2796  					retHi = ^uint64(0)
  2797  				}
  2798  			}
  2799  
  2800  			ce.pushValue(retLo)
  2801  			ce.pushValue(retHi)
  2802  			frame.pc++
  2803  		case wazeroir.OperationKindV128AddSat:
  2804  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2805  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2806  
  2807  			var retLo, retHi uint64
  2808  
  2809  			// Lane-wise addition while saturating the overflowing values.
  2810  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
  2811  			switch op.B1 {
  2812  			case wazeroir.ShapeI8x16:
  2813  				for i := 0; i < 16; i++ {
  2814  					var v, w byte
  2815  					if i < 8 {
  2816  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2817  					} else {
  2818  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2819  					}
  2820  
  2821  					var uv uint64
  2822  					if op.B3 { // signed
  2823  						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
  2824  							uv = uint64(byte(0x80))
  2825  						} else if subbed > math.MaxInt8 {
  2826  							uv = uint64(byte(0x7f))
  2827  						} else {
  2828  							uv = uint64(byte(int8(subbed)))
  2829  						}
  2830  					} else {
  2831  						if subbed := int64(v) + int64(w); subbed < 0 {
  2832  							uv = uint64(byte(0))
  2833  						} else if subbed > math.MaxUint8 {
  2834  							uv = uint64(byte(0xff))
  2835  						} else {
  2836  							uv = uint64(byte(subbed))
  2837  						}
  2838  					}
  2839  
  2840  					if i < 8 { // first 8 lanes are on lower 64bits.
  2841  						retLo |= uv << (i * 8)
  2842  					} else {
  2843  						retHi |= uv << ((i - 8) * 8)
  2844  					}
  2845  				}
  2846  			case wazeroir.ShapeI16x8:
  2847  				for i := 0; i < 8; i++ {
  2848  					var v, w uint16
  2849  					if i < 4 {
  2850  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2851  					} else {
  2852  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2853  					}
  2854  
  2855  					var uv uint64
  2856  					if op.B3 { // signed
  2857  						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
  2858  							uv = uint64(uint16(0x8000))
  2859  						} else if added > math.MaxInt16 {
  2860  							uv = uint64(uint16(0x7fff))
  2861  						} else {
  2862  							uv = uint64(uint16(int16(added)))
  2863  						}
  2864  					} else {
  2865  						if added := int64(v) + int64(w); added < 0 {
  2866  							uv = uint64(uint16(0))
  2867  						} else if added > math.MaxUint16 {
  2868  							uv = uint64(uint16(0xffff))
  2869  						} else {
  2870  							uv = uint64(uint16(added))
  2871  						}
  2872  					}
  2873  
  2874  					if i < 4 { // first 4 lanes are on lower 64bits.
  2875  						retLo |= uv << (i * 16)
  2876  					} else {
  2877  						retHi |= uv << ((i - 4) * 16)
  2878  					}
  2879  				}
  2880  			}
  2881  
  2882  			ce.pushValue(retLo)
  2883  			ce.pushValue(retHi)
  2884  			frame.pc++
  2885  		case wazeroir.OperationKindV128SubSat:
  2886  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2887  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2888  
  2889  			var retLo, retHi uint64
  2890  
  2891  			// Lane-wise subtraction while saturating the overflowing values.
  2892  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
  2893  			switch op.B1 {
  2894  			case wazeroir.ShapeI8x16:
  2895  				for i := 0; i < 16; i++ {
  2896  					var v, w byte
  2897  					if i < 8 {
  2898  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2899  					} else {
  2900  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2901  					}
  2902  
  2903  					var uv uint64
  2904  					if op.B3 { // signed
  2905  						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
  2906  							uv = uint64(byte(0x80))
  2907  						} else if subbed > math.MaxInt8 {
  2908  							uv = uint64(byte(0x7f))
  2909  						} else {
  2910  							uv = uint64(byte(int8(subbed)))
  2911  						}
  2912  					} else {
  2913  						if subbed := int64(v) - int64(w); subbed < 0 {
  2914  							uv = uint64(byte(0))
  2915  						} else if subbed > math.MaxUint8 {
  2916  							uv = uint64(byte(0xff))
  2917  						} else {
  2918  							uv = uint64(byte(subbed))
  2919  						}
  2920  					}
  2921  
  2922  					if i < 8 {
  2923  						retLo |= uv << (i * 8)
  2924  					} else {
  2925  						retHi |= uv << ((i - 8) * 8)
  2926  					}
  2927  				}
  2928  			case wazeroir.ShapeI16x8:
  2929  				for i := 0; i < 8; i++ {
  2930  					var v, w uint16
  2931  					if i < 4 {
  2932  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2933  					} else {
  2934  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2935  					}
  2936  
  2937  					var uv uint64
  2938  					if op.B3 { // signed
  2939  						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
  2940  							uv = uint64(uint16(0x8000))
  2941  						} else if subbed > math.MaxInt16 {
  2942  							uv = uint64(uint16(0x7fff))
  2943  						} else {
  2944  							uv = uint64(uint16(int16(subbed)))
  2945  						}
  2946  					} else {
  2947  						if subbed := int64(v) - int64(w); subbed < 0 {
  2948  							uv = uint64(uint16(0))
  2949  						} else if subbed > math.MaxUint16 {
  2950  							uv = uint64(uint16(0xffff))
  2951  						} else {
  2952  							uv = uint64(uint16(subbed))
  2953  						}
  2954  					}
  2955  
  2956  					if i < 4 {
  2957  						retLo |= uv << (i * 16)
  2958  					} else {
  2959  						retHi |= uv << ((i - 4) * 16)
  2960  					}
  2961  				}
  2962  			}
  2963  
  2964  			ce.pushValue(retLo)
  2965  			ce.pushValue(retHi)
  2966  			frame.pc++
  2967  		case wazeroir.OperationKindV128Mul:
  2968  			x2hi, x2lo := ce.popValue(), ce.popValue()
  2969  			x1hi, x1lo := ce.popValue(), ce.popValue()
  2970  			var retLo, retHi uint64
  2971  			switch op.B1 {
  2972  			case wazeroir.ShapeI16x8:
  2973  				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
  2974  					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
  2975  				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
  2976  					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
  2977  			case wazeroir.ShapeI32x4:
  2978  				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
  2979  				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
  2980  			case wazeroir.ShapeI64x2:
  2981  				retHi = x1hi * x2hi
  2982  				retLo = x1lo * x2lo
  2983  			case wazeroir.ShapeF32x4:
  2984  				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  2985  				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  2986  			case wazeroir.ShapeF64x2:
  2987  				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
  2988  				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
  2989  			}
  2990  			ce.pushValue(retLo)
  2991  			ce.pushValue(retHi)
  2992  			frame.pc++
  2993  		case wazeroir.OperationKindV128Div:
  2994  			x2hi, x2lo := ce.popValue(), ce.popValue()
  2995  			x1hi, x1lo := ce.popValue(), ce.popValue()
  2996  			var retLo, retHi uint64
  2997  			if op.B1 == wazeroir.ShapeF64x2 {
  2998  				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
  2999  				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
  3000  			} else {
  3001  				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3002  				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3003  			}
  3004  			ce.pushValue(retLo)
  3005  			ce.pushValue(retHi)
  3006  			frame.pc++
  3007  		case wazeroir.OperationKindV128Neg:
  3008  			hi, lo := ce.popValue(), ce.popValue()
  3009  			switch op.B1 {
  3010  			case wazeroir.ShapeI8x16:
  3011  				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
  3012  					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
  3013  					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
  3014  					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
  3015  				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
  3016  					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
  3017  					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
  3018  					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
  3019  			case wazeroir.ShapeI16x8:
  3020  				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
  3021  					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
  3022  				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
  3023  					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
  3024  			case wazeroir.ShapeI32x4:
  3025  				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
  3026  				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
  3027  			case wazeroir.ShapeI64x2:
  3028  				hi = -hi
  3029  				lo = -lo
  3030  			case wazeroir.ShapeF32x4:
  3031  				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
  3032  					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
  3033  				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
  3034  					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
  3035  			case wazeroir.ShapeF64x2:
  3036  				hi = math.Float64bits(-math.Float64frombits(hi))
  3037  				lo = math.Float64bits(-math.Float64frombits(lo))
  3038  			}
  3039  			ce.pushValue(lo)
  3040  			ce.pushValue(hi)
  3041  			frame.pc++
  3042  		case wazeroir.OperationKindV128Sqrt:
  3043  			hi, lo := ce.popValue(), ce.popValue()
  3044  			if op.B1 == wazeroir.ShapeF64x2 {
  3045  				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
  3046  				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
  3047  			} else {
  3048  				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
  3049  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
  3050  				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
  3051  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
  3052  			}
  3053  			ce.pushValue(lo)
  3054  			ce.pushValue(hi)
  3055  			frame.pc++
  3056  		case wazeroir.OperationKindV128Abs:
  3057  			hi, lo := ce.popValue(), ce.popValue()
  3058  			switch op.B1 {
  3059  			case wazeroir.ShapeI8x16:
  3060  				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
  3061  					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
  3062  					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
  3063  					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
  3064  				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
  3065  					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
  3066  					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
  3067  					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
  3068  			case wazeroir.ShapeI16x8:
  3069  				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
  3070  					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
  3071  				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
  3072  					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
  3073  			case wazeroir.ShapeI32x4:
  3074  				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
  3075  				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
  3076  			case wazeroir.ShapeI64x2:
  3077  				if int64(hi) < 0 {
  3078  					hi = -hi
  3079  				}
  3080  				if int64(lo) < 0 {
  3081  					lo = -lo
  3082  				}
  3083  			case wazeroir.ShapeF32x4:
  3084  				hi = hi &^ (1<<31 | 1<<63)
  3085  				lo = lo &^ (1<<31 | 1<<63)
  3086  			case wazeroir.ShapeF64x2:
  3087  				hi = hi &^ (1 << 63)
  3088  				lo = lo &^ (1 << 63)
  3089  			}
  3090  			ce.pushValue(lo)
  3091  			ce.pushValue(hi)
  3092  			frame.pc++
  3093  		case wazeroir.OperationKindV128Popcnt:
  3094  			hi, lo := ce.popValue(), ce.popValue()
  3095  			var retLo, retHi uint64
  3096  			for i := 0; i < 16; i++ {
  3097  				var v byte
  3098  				if i < 8 {
  3099  					v = byte(lo >> (i * 8))
  3100  				} else {
  3101  					v = byte(hi >> ((i - 8) * 8))
  3102  				}
  3103  
  3104  				var cnt uint64
  3105  				for i := 0; i < 8; i++ {
  3106  					if (v>>i)&0b1 != 0 {
  3107  						cnt++
  3108  					}
  3109  				}
  3110  
  3111  				if i < 8 {
  3112  					retLo |= cnt << (i * 8)
  3113  				} else {
  3114  					retHi |= cnt << ((i - 8) * 8)
  3115  				}
  3116  			}
  3117  			ce.pushValue(retLo)
  3118  			ce.pushValue(retHi)
  3119  			frame.pc++
  3120  		case wazeroir.OperationKindV128Min:
  3121  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3122  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3123  			var retLo, retHi uint64
  3124  			switch op.B1 {
  3125  			case wazeroir.ShapeI8x16:
  3126  				if op.B3 { // signed
  3127  					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
  3128  						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3129  						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3130  						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3131  					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
  3132  						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3133  						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3134  						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3135  				} else {
  3136  					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
  3137  						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3138  						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3139  						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3140  					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
  3141  						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3142  						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3143  						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3144  				}
  3145  			case wazeroir.ShapeI16x8:
  3146  				if op.B3 { // signed
  3147  					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
  3148  						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3149  						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3150  						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3151  					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
  3152  						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3153  						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3154  						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3155  				} else {
  3156  					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
  3157  						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3158  						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3159  						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3160  					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
  3161  						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3162  						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3163  						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3164  				}
  3165  			case wazeroir.ShapeI32x4:
  3166  				if op.B3 { // signed
  3167  					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
  3168  						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3169  					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
  3170  						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3171  				} else {
  3172  					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
  3173  						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3174  					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
  3175  						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3176  				}
  3177  			case wazeroir.ShapeF32x4:
  3178  				retHi = WasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
  3179  					WasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3180  				retLo = WasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
  3181  					WasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3182  			case wazeroir.ShapeF64x2:
  3183  				retHi = math.Float64bits(moremath.WasmCompatMin64(
  3184  					math.Float64frombits(x1hi),
  3185  					math.Float64frombits(x2hi),
  3186  				))
  3187  				retLo = math.Float64bits(moremath.WasmCompatMin64(
  3188  					math.Float64frombits(x1lo),
  3189  					math.Float64frombits(x2lo),
  3190  				))
  3191  			}
  3192  			ce.pushValue(retLo)
  3193  			ce.pushValue(retHi)
  3194  			frame.pc++
  3195  		case wazeroir.OperationKindV128Max:
  3196  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3197  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3198  			var retLo, retHi uint64
  3199  			switch op.B1 {
  3200  			case wazeroir.ShapeI8x16:
  3201  				if op.B3 { // signed
  3202  					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
  3203  						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3204  						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3205  						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3206  					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
  3207  						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3208  						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3209  						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3210  				} else {
  3211  					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
  3212  						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3213  						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3214  						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3215  					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
  3216  						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3217  						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3218  						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3219  				}
  3220  			case wazeroir.ShapeI16x8:
  3221  				if op.B3 { // signed
  3222  					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
  3223  						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3224  						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3225  						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3226  					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
  3227  						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3228  						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3229  						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3230  				} else {
  3231  					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
  3232  						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3233  						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3234  						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3235  					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
  3236  						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3237  						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3238  						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3239  				}
  3240  			case wazeroir.ShapeI32x4:
  3241  				if op.B3 { // signed
  3242  					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
  3243  						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3244  					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
  3245  						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3246  				} else {
  3247  					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
  3248  						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3249  					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
  3250  						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3251  				}
  3252  			case wazeroir.ShapeF32x4:
  3253  				retHi = WasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
  3254  					WasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3255  				retLo = WasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
  3256  					WasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3257  			case wazeroir.ShapeF64x2:
  3258  				retHi = math.Float64bits(moremath.WasmCompatMax64(
  3259  					math.Float64frombits(x1hi),
  3260  					math.Float64frombits(x2hi),
  3261  				))
  3262  				retLo = math.Float64bits(moremath.WasmCompatMax64(
  3263  					math.Float64frombits(x1lo),
  3264  					math.Float64frombits(x2lo),
  3265  				))
  3266  			}
  3267  			ce.pushValue(retLo)
  3268  			ce.pushValue(retHi)
  3269  			frame.pc++
  3270  		case wazeroir.OperationKindV128AvgrU:
  3271  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3272  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3273  			var retLo, retHi uint64
  3274  			switch op.B1 {
  3275  			case wazeroir.ShapeI8x16:
  3276  				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
  3277  					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3278  					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3279  					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3280  				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
  3281  					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3282  					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3283  					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3284  			case wazeroir.ShapeI16x8:
  3285  				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
  3286  					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3287  					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3288  					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3289  				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
  3290  					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3291  					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3292  					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3293  			}
  3294  			ce.pushValue(retLo)
  3295  			ce.pushValue(retHi)
  3296  			frame.pc++
  3297  		case wazeroir.OperationKindV128Pmin:
  3298  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3299  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3300  			var retLo, retHi uint64
  3301  			if op.B1 == wazeroir.ShapeF32x4 {
  3302  				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
  3303  					retLo = x2lo & 0x00000000_ffffffff
  3304  				} else {
  3305  					retLo = x1lo & 0x00000000_ffffffff
  3306  				}
  3307  				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
  3308  					retLo |= x2lo & 0xffffffff_00000000
  3309  				} else {
  3310  					retLo |= x1lo & 0xffffffff_00000000
  3311  				}
  3312  				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
  3313  					retHi = x2hi & 0x00000000_ffffffff
  3314  				} else {
  3315  					retHi = x1hi & 0x00000000_ffffffff
  3316  				}
  3317  				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
  3318  					retHi |= x2hi & 0xffffffff_00000000
  3319  				} else {
  3320  					retHi |= x1hi & 0xffffffff_00000000
  3321  				}
  3322  			} else {
  3323  				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
  3324  					retLo = x2lo
  3325  				} else {
  3326  					retLo = x1lo
  3327  				}
  3328  				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
  3329  					retHi = x2hi
  3330  				} else {
  3331  					retHi = x1hi
  3332  				}
  3333  			}
  3334  			ce.pushValue(retLo)
  3335  			ce.pushValue(retHi)
  3336  			frame.pc++
  3337  		case wazeroir.OperationKindV128Pmax:
  3338  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3339  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3340  			var retLo, retHi uint64
  3341  			if op.B1 == wazeroir.ShapeF32x4 {
  3342  				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
  3343  					retLo = x2lo & 0x00000000_ffffffff
  3344  				} else {
  3345  					retLo = x1lo & 0x00000000_ffffffff
  3346  				}
  3347  				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
  3348  					retLo |= x2lo & 0xffffffff_00000000
  3349  				} else {
  3350  					retLo |= x1lo & 0xffffffff_00000000
  3351  				}
  3352  				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
  3353  					retHi = x2hi & 0x00000000_ffffffff
  3354  				} else {
  3355  					retHi = x1hi & 0x00000000_ffffffff
  3356  				}
  3357  				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
  3358  					retHi |= x2hi & 0xffffffff_00000000
  3359  				} else {
  3360  					retHi |= x1hi & 0xffffffff_00000000
  3361  				}
  3362  			} else {
  3363  				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
  3364  					retLo = x2lo
  3365  				} else {
  3366  					retLo = x1lo
  3367  				}
  3368  				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
  3369  					retHi = x2hi
  3370  				} else {
  3371  					retHi = x1hi
  3372  				}
  3373  			}
  3374  			ce.pushValue(retLo)
  3375  			ce.pushValue(retHi)
  3376  			frame.pc++
  3377  		case wazeroir.OperationKindV128Ceil:
  3378  			hi, lo := ce.popValue(), ce.popValue()
  3379  			if op.B1 == wazeroir.ShapeF32x4 {
  3380  				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
  3381  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3382  				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
  3383  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3384  			} else {
  3385  				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
  3386  				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
  3387  			}
  3388  			ce.pushValue(lo)
  3389  			ce.pushValue(hi)
  3390  			frame.pc++
  3391  		case wazeroir.OperationKindV128Floor:
  3392  			hi, lo := ce.popValue(), ce.popValue()
  3393  			if op.B1 == wazeroir.ShapeF32x4 {
  3394  				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
  3395  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3396  				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
  3397  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3398  			} else {
  3399  				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
  3400  				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
  3401  			}
  3402  			ce.pushValue(lo)
  3403  			ce.pushValue(hi)
  3404  			frame.pc++
  3405  		case wazeroir.OperationKindV128Trunc:
  3406  			hi, lo := ce.popValue(), ce.popValue()
  3407  			if op.B1 == wazeroir.ShapeF32x4 {
  3408  				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
  3409  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3410  				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
  3411  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3412  			} else {
  3413  				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
  3414  				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
  3415  			}
  3416  			ce.pushValue(lo)
  3417  			ce.pushValue(hi)
  3418  			frame.pc++
  3419  		case wazeroir.OperationKindV128Nearest:
  3420  			hi, lo := ce.popValue(), ce.popValue()
  3421  			if op.B1 == wazeroir.ShapeF32x4 {
  3422  				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
  3423  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3424  				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
  3425  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3426  			} else {
  3427  				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
  3428  				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
  3429  			}
  3430  			ce.pushValue(lo)
  3431  			ce.pushValue(hi)
  3432  			frame.pc++
  3433  		case wazeroir.OperationKindV128Extend:
  3434  			hi, lo := ce.popValue(), ce.popValue()
  3435  			var origin uint64
  3436  			if op.B3 { // use lower 64 bits
  3437  				origin = lo
  3438  			} else {
  3439  				origin = hi
  3440  			}
  3441  
  3442  			signed := op.B2 == 1
  3443  
  3444  			var retHi, retLo uint64
  3445  			switch op.B1 {
  3446  			case wazeroir.ShapeI8x16:
  3447  				for i := 0; i < 8; i++ {
  3448  					v8 := byte(origin >> (i * 8))
  3449  
  3450  					var v16 uint16
  3451  					if signed {
  3452  						v16 = uint16(int8(v8))
  3453  					} else {
  3454  						v16 = uint16(v8)
  3455  					}
  3456  
  3457  					if i < 4 {
  3458  						retLo |= uint64(v16) << (i * 16)
  3459  					} else {
  3460  						retHi |= uint64(v16) << ((i - 4) * 16)
  3461  					}
  3462  				}
  3463  			case wazeroir.ShapeI16x8:
  3464  				for i := 0; i < 4; i++ {
  3465  					v16 := uint16(origin >> (i * 16))
  3466  
  3467  					var v32 uint32
  3468  					if signed {
  3469  						v32 = uint32(int16(v16))
  3470  					} else {
  3471  						v32 = uint32(v16)
  3472  					}
  3473  
  3474  					if i < 2 {
  3475  						retLo |= uint64(v32) << (i * 32)
  3476  					} else {
  3477  						retHi |= uint64(v32) << ((i - 2) * 32)
  3478  					}
  3479  				}
  3480  			case wazeroir.ShapeI32x4:
  3481  				v32Lo := uint32(origin)
  3482  				v32Hi := uint32(origin >> 32)
  3483  				if signed {
  3484  					retLo = uint64(int32(v32Lo))
  3485  					retHi = uint64(int32(v32Hi))
  3486  				} else {
  3487  					retLo = uint64(v32Lo)
  3488  					retHi = uint64(v32Hi)
  3489  				}
  3490  			}
  3491  			ce.pushValue(retLo)
  3492  			ce.pushValue(retHi)
  3493  			frame.pc++
  3494  		case wazeroir.OperationKindV128ExtMul:
  3495  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3496  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3497  			var x1, x2 uint64
  3498  			if op.B3 { // use lower 64 bits
  3499  				x1, x2 = x1Lo, x2Lo
  3500  			} else {
  3501  				x1, x2 = x1Hi, x2Hi
  3502  			}
  3503  
  3504  			signed := op.B2 == 1
  3505  
  3506  			var retLo, retHi uint64
  3507  			switch op.B1 {
  3508  			case wazeroir.ShapeI8x16:
  3509  				for i := 0; i < 8; i++ {
  3510  					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
  3511  
  3512  					var v16 uint16
  3513  					if signed {
  3514  						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
  3515  					} else {
  3516  						v16 = uint16(v1) * uint16(v2)
  3517  					}
  3518  
  3519  					if i < 4 {
  3520  						retLo |= uint64(v16) << (i * 16)
  3521  					} else {
  3522  						retHi |= uint64(v16) << ((i - 4) * 16)
  3523  					}
  3524  				}
  3525  			case wazeroir.ShapeI16x8:
  3526  				for i := 0; i < 4; i++ {
  3527  					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
  3528  
  3529  					var v32 uint32
  3530  					if signed {
  3531  						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
  3532  					} else {
  3533  						v32 = uint32(v1) * uint32(v2)
  3534  					}
  3535  
  3536  					if i < 2 {
  3537  						retLo |= uint64(v32) << (i * 32)
  3538  					} else {
  3539  						retHi |= uint64(v32) << ((i - 2) * 32)
  3540  					}
  3541  				}
  3542  			case wazeroir.ShapeI32x4:
  3543  				v1Lo, v2Lo := uint32(x1), uint32(x2)
  3544  				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
  3545  				if signed {
  3546  					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
  3547  					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
  3548  				} else {
  3549  					retLo = uint64(v1Lo) * uint64(v2Lo)
  3550  					retHi = uint64(v1Hi) * uint64(v2Hi)
  3551  				}
  3552  			}
  3553  
  3554  			ce.pushValue(retLo)
  3555  			ce.pushValue(retHi)
  3556  			frame.pc++
  3557  		case wazeroir.OperationKindV128Q15mulrSatS:
  3558  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3559  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3560  			var retLo, retHi uint64
  3561  			for i := 0; i < 8; i++ {
  3562  				var v, w int16
  3563  				if i < 4 {
  3564  					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
  3565  				} else {
  3566  					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
  3567  				}
  3568  
  3569  				var uv uint64
  3570  				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
  3571  				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
  3572  					uv = uint64(uint16(0x8000))
  3573  				} else if calc > math.MaxInt16 {
  3574  					uv = uint64(uint16(0x7fff))
  3575  				} else {
  3576  					uv = uint64(uint16(int16(calc)))
  3577  				}
  3578  
  3579  				if i < 4 {
  3580  					retLo |= uv << (i * 16)
  3581  				} else {
  3582  					retHi |= uv << ((i - 4) * 16)
  3583  				}
  3584  			}
  3585  
  3586  			ce.pushValue(retLo)
  3587  			ce.pushValue(retHi)
  3588  			frame.pc++
  3589  		case wazeroir.OperationKindV128ExtAddPairwise:
  3590  			hi, lo := ce.popValue(), ce.popValue()
  3591  
  3592  			signed := op.B3
  3593  
  3594  			var retLo, retHi uint64
  3595  			switch op.B1 {
  3596  			case wazeroir.ShapeI8x16:
  3597  				for i := 0; i < 8; i++ {
  3598  					var v1, v2 byte
  3599  					if i < 4 {
  3600  						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
  3601  					} else {
  3602  						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
  3603  					}
  3604  
  3605  					var v16 uint16
  3606  					if signed {
  3607  						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
  3608  					} else {
  3609  						v16 = uint16(v1) + uint16(v2)
  3610  					}
  3611  
  3612  					if i < 4 {
  3613  						retLo |= uint64(v16) << (i * 16)
  3614  					} else {
  3615  						retHi |= uint64(v16) << ((i - 4) * 16)
  3616  					}
  3617  				}
  3618  			case wazeroir.ShapeI16x8:
  3619  				for i := 0; i < 4; i++ {
  3620  					var v1, v2 uint16
  3621  					if i < 2 {
  3622  						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
  3623  					} else {
  3624  						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
  3625  					}
  3626  
  3627  					var v32 uint32
  3628  					if signed {
  3629  						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
  3630  					} else {
  3631  						v32 = uint32(v1) + uint32(v2)
  3632  					}
  3633  
  3634  					if i < 2 {
  3635  						retLo |= uint64(v32) << (i * 32)
  3636  					} else {
  3637  						retHi |= uint64(v32) << ((i - 2) * 32)
  3638  					}
  3639  				}
  3640  			}
  3641  			ce.pushValue(retLo)
  3642  			ce.pushValue(retHi)
  3643  			frame.pc++
  3644  		case wazeroir.OperationKindV128FloatPromote:
  3645  			_, toPromote := ce.popValue(), ce.popValue()
  3646  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
  3647  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
  3648  			frame.pc++
  3649  		case wazeroir.OperationKindV128FloatDemote:
  3650  			hi, lo := ce.popValue(), ce.popValue()
  3651  			ce.pushValue(
  3652  				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
  3653  					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
  3654  			)
  3655  			ce.pushValue(0)
  3656  			frame.pc++
  3657  		case wazeroir.OperationKindV128FConvertFromI:
  3658  			hi, lo := ce.popValue(), ce.popValue()
  3659  			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
  3660  			signed := op.B3
  3661  
  3662  			var retLo, retHi uint64
  3663  			switch op.B1 { // Destination shape.
  3664  			case wazeroir.ShapeF32x4: // f32x4 from signed/unsigned i32x4
  3665  				if signed {
  3666  					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
  3667  						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
  3668  					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
  3669  						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
  3670  				} else {
  3671  					retLo = uint64(math.Float32bits(float32(v1))) |
  3672  						(uint64(math.Float32bits(float32(v2))) << 32)
  3673  					retHi = uint64(math.Float32bits(float32(v3))) |
  3674  						(uint64(math.Float32bits(float32(v4))) << 32)
  3675  				}
  3676  			case wazeroir.ShapeF64x2: // f64x2 from signed/unsigned i32x4
  3677  				if signed {
  3678  					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
  3679  				} else {
  3680  					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
  3681  				}
  3682  			}
  3683  
  3684  			ce.pushValue(retLo)
  3685  			ce.pushValue(retHi)
  3686  			frame.pc++
  3687  		case wazeroir.OperationKindV128Narrow:
  3688  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3689  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3690  			signed := op.B3
  3691  
  3692  			var retLo, retHi uint64
  3693  			switch op.B1 {
  3694  			case wazeroir.ShapeI16x8: // signed/unsigned i16x8 to i8x16
  3695  				for i := 0; i < 8; i++ {
  3696  					var v16 uint16
  3697  					if i < 4 {
  3698  						v16 = uint16(x1Lo >> (i * 16))
  3699  					} else {
  3700  						v16 = uint16(x1Hi >> ((i - 4) * 16))
  3701  					}
  3702  
  3703  					var v byte
  3704  					if signed {
  3705  						if s := int16(v16); s > math.MaxInt8 {
  3706  							v = math.MaxInt8
  3707  						} else if s < math.MinInt8 {
  3708  							s = math.MinInt8
  3709  							v = byte(s)
  3710  						} else {
  3711  							v = byte(v16)
  3712  						}
  3713  					} else {
  3714  						if s := int16(v16); s > math.MaxUint8 {
  3715  							v = math.MaxUint8
  3716  						} else if s < 0 {
  3717  							v = 0
  3718  						} else {
  3719  							v = byte(v16)
  3720  						}
  3721  					}
  3722  					retLo |= uint64(v) << (i * 8)
  3723  				}
  3724  				for i := 0; i < 8; i++ {
  3725  					var v16 uint16
  3726  					if i < 4 {
  3727  						v16 = uint16(x2Lo >> (i * 16))
  3728  					} else {
  3729  						v16 = uint16(x2Hi >> ((i - 4) * 16))
  3730  					}
  3731  
  3732  					var v byte
  3733  					if signed {
  3734  						if s := int16(v16); s > math.MaxInt8 {
  3735  							v = math.MaxInt8
  3736  						} else if s < math.MinInt8 {
  3737  							s = math.MinInt8
  3738  							v = byte(s)
  3739  						} else {
  3740  							v = byte(v16)
  3741  						}
  3742  					} else {
  3743  						if s := int16(v16); s > math.MaxUint8 {
  3744  							v = math.MaxUint8
  3745  						} else if s < 0 {
  3746  							v = 0
  3747  						} else {
  3748  							v = byte(v16)
  3749  						}
  3750  					}
  3751  					retHi |= uint64(v) << (i * 8)
  3752  				}
  3753  			case wazeroir.ShapeI32x4: // signed/unsigned i32x4 to i16x8
  3754  				for i := 0; i < 4; i++ {
  3755  					var v32 uint32
  3756  					if i < 2 {
  3757  						v32 = uint32(x1Lo >> (i * 32))
  3758  					} else {
  3759  						v32 = uint32(x1Hi >> ((i - 2) * 32))
  3760  					}
  3761  
  3762  					var v uint16
  3763  					if signed {
  3764  						if s := int32(v32); s > math.MaxInt16 {
  3765  							v = math.MaxInt16
  3766  						} else if s < math.MinInt16 {
  3767  							s = math.MinInt16
  3768  							v = uint16(s)
  3769  						} else {
  3770  							v = uint16(v32)
  3771  						}
  3772  					} else {
  3773  						if s := int32(v32); s > math.MaxUint16 {
  3774  							v = math.MaxUint16
  3775  						} else if s < 0 {
  3776  							v = 0
  3777  						} else {
  3778  							v = uint16(v32)
  3779  						}
  3780  					}
  3781  					retLo |= uint64(v) << (i * 16)
  3782  				}
  3783  
  3784  				for i := 0; i < 4; i++ {
  3785  					var v32 uint32
  3786  					if i < 2 {
  3787  						v32 = uint32(x2Lo >> (i * 32))
  3788  					} else {
  3789  						v32 = uint32(x2Hi >> ((i - 2) * 32))
  3790  					}
  3791  
  3792  					var v uint16
  3793  					if signed {
  3794  						if s := int32(v32); s > math.MaxInt16 {
  3795  							v = math.MaxInt16
  3796  						} else if s < math.MinInt16 {
  3797  							s = math.MinInt16
  3798  							v = uint16(s)
  3799  						} else {
  3800  							v = uint16(v32)
  3801  						}
  3802  					} else {
  3803  						if s := int32(v32); s > math.MaxUint16 {
  3804  							v = math.MaxUint16
  3805  						} else if s < 0 {
  3806  							v = 0
  3807  						} else {
  3808  							v = uint16(v32)
  3809  						}
  3810  					}
  3811  					retHi |= uint64(v) << (i * 16)
  3812  				}
  3813  			}
  3814  			ce.pushValue(retLo)
  3815  			ce.pushValue(retHi)
  3816  			frame.pc++
  3817  		case wazeroir.OperationKindV128Dot:
  3818  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3819  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3820  			ce.pushValue(
  3821  				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
  3822  					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
  3823  			)
  3824  			ce.pushValue(
  3825  				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
  3826  					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
  3827  			)
  3828  			frame.pc++
  3829  		case wazeroir.OperationKindV128ITruncSatFromF:
  3830  			hi, lo := ce.popValue(), ce.popValue()
  3831  			signed := op.B3
  3832  			var retLo, retHi uint64
  3833  
  3834  			switch op.B1 {
  3835  			case wazeroir.ShapeF32x4: // f32x4 to i32x4
  3836  				for i, f64 := range [4]float64{
  3837  					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
  3838  					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
  3839  					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
  3840  					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
  3841  				} {
  3842  
  3843  					var v uint32
  3844  					if math.IsNaN(f64) {
  3845  						v = 0
  3846  					} else if signed {
  3847  						if f64 < math.MinInt32 {
  3848  							f64 = math.MinInt32
  3849  						} else if f64 > math.MaxInt32 {
  3850  							f64 = math.MaxInt32
  3851  						}
  3852  						v = uint32(int32(f64))
  3853  					} else {
  3854  						if f64 < 0 {
  3855  							f64 = 0
  3856  						} else if f64 > math.MaxUint32 {
  3857  							f64 = math.MaxUint32
  3858  						}
  3859  						v = uint32(f64)
  3860  					}
  3861  
  3862  					if i < 2 {
  3863  						retLo |= uint64(v) << (i * 32)
  3864  					} else {
  3865  						retHi |= uint64(v) << ((i - 2) * 32)
  3866  					}
  3867  				}
  3868  
  3869  			case wazeroir.ShapeF64x2: // f64x2 to i32x4
  3870  				for i, f := range [2]float64{
  3871  					math.Trunc(math.Float64frombits(lo)),
  3872  					math.Trunc(math.Float64frombits(hi)),
  3873  				} {
  3874  					var v uint32
  3875  					if math.IsNaN(f) {
  3876  						v = 0
  3877  					} else if signed {
  3878  						if f < math.MinInt32 {
  3879  							f = math.MinInt32
  3880  						} else if f > math.MaxInt32 {
  3881  							f = math.MaxInt32
  3882  						}
  3883  						v = uint32(int32(f))
  3884  					} else {
  3885  						if f < 0 {
  3886  							f = 0
  3887  						} else if f > math.MaxUint32 {
  3888  							f = math.MaxUint32
  3889  						}
  3890  						v = uint32(f)
  3891  					}
  3892  
  3893  					retLo |= uint64(v) << (i * 32)
  3894  				}
  3895  			}
  3896  
  3897  			ce.pushValue(retLo)
  3898  			ce.pushValue(retHi)
  3899  			frame.pc++
  3900  		default:
  3901  			frame.pc++
  3902  		}
  3903  	}
  3904  	ce.popFrame()
  3905  }
  3906  
  3907  func WasmCompatMax32bits(v1, v2 uint32) uint64 {
  3908  	return uint64(math.Float32bits(moremath.WasmCompatMax32(
  3909  		math.Float32frombits(v1),
  3910  		math.Float32frombits(v2),
  3911  	)))
  3912  }
  3913  
  3914  func WasmCompatMin32bits(v1, v2 uint32) uint64 {
  3915  	return uint64(math.Float32bits(moremath.WasmCompatMin32(
  3916  		math.Float32frombits(v1),
  3917  		math.Float32frombits(v2),
  3918  	)))
  3919  }
  3920  
  3921  func addFloat32bits(v1, v2 uint32) uint64 {
  3922  	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
  3923  }
  3924  
  3925  func subFloat32bits(v1, v2 uint32) uint64 {
  3926  	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
  3927  }
  3928  
  3929  func mulFloat32bits(v1, v2 uint32) uint64 {
  3930  	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
  3931  }
  3932  
  3933  func divFloat32bits(v1, v2 uint32) uint64 {
  3934  	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
  3935  }
  3936  
  3937  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  3938  func flt32(z1, z2 float32) bool {
  3939  	if z1 != z1 || z2 != z2 {
  3940  		return false
  3941  	} else if z1 == z2 {
  3942  		return false
  3943  	} else if math.IsInf(float64(z1), 1) {
  3944  		return false
  3945  	} else if math.IsInf(float64(z1), -1) {
  3946  		return true
  3947  	} else if math.IsInf(float64(z2), 1) {
  3948  		return true
  3949  	} else if math.IsInf(float64(z2), -1) {
  3950  		return false
  3951  	}
  3952  	return z1 < z2
  3953  }
  3954  
  3955  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  3956  func flt64(z1, z2 float64) bool {
  3957  	if z1 != z1 || z2 != z2 {
  3958  		return false
  3959  	} else if z1 == z2 {
  3960  		return false
  3961  	} else if math.IsInf(z1, 1) {
  3962  		return false
  3963  	} else if math.IsInf(z1, -1) {
  3964  		return true
  3965  	} else if math.IsInf(z2, 1) {
  3966  		return true
  3967  	} else if math.IsInf(z2, -1) {
  3968  		return false
  3969  	}
  3970  	return z1 < z2
  3971  }
  3972  
  3973  func i8RoundingAverage(v1, v2 byte) byte {
  3974  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  3975  	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
  3976  }
  3977  
  3978  func i16RoundingAverage(v1, v2 uint16) uint16 {
  3979  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  3980  	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
  3981  }
  3982  
  3983  func i8Abs(v byte) byte {
  3984  	if i := int8(v); i < 0 {
  3985  		return byte(-i)
  3986  	} else {
  3987  		return byte(i)
  3988  	}
  3989  }
  3990  
  3991  func i8MaxU(v1, v2 byte) byte {
  3992  	if v1 < v2 {
  3993  		return v2
  3994  	} else {
  3995  		return v1
  3996  	}
  3997  }
  3998  
  3999  func i8MinU(v1, v2 byte) byte {
  4000  	if v1 > v2 {
  4001  		return v2
  4002  	} else {
  4003  		return v1
  4004  	}
  4005  }
  4006  
  4007  func i8MaxS(v1, v2 byte) byte {
  4008  	if int8(v1) < int8(v2) {
  4009  		return v2
  4010  	} else {
  4011  		return v1
  4012  	}
  4013  }
  4014  
  4015  func i8MinS(v1, v2 byte) byte {
  4016  	if int8(v1) > int8(v2) {
  4017  		return v2
  4018  	} else {
  4019  		return v1
  4020  	}
  4021  }
  4022  
  4023  func i16MaxU(v1, v2 uint16) uint16 {
  4024  	if v1 < v2 {
  4025  		return v2
  4026  	} else {
  4027  		return v1
  4028  	}
  4029  }
  4030  
  4031  func i16MinU(v1, v2 uint16) uint16 {
  4032  	if v1 > v2 {
  4033  		return v2
  4034  	} else {
  4035  		return v1
  4036  	}
  4037  }
  4038  
  4039  func i16MaxS(v1, v2 uint16) uint16 {
  4040  	if int16(v1) < int16(v2) {
  4041  		return v2
  4042  	} else {
  4043  		return v1
  4044  	}
  4045  }
  4046  
  4047  func i16MinS(v1, v2 uint16) uint16 {
  4048  	if int16(v1) > int16(v2) {
  4049  		return v2
  4050  	} else {
  4051  		return v1
  4052  	}
  4053  }
  4054  
  4055  func i32MaxU(v1, v2 uint32) uint32 {
  4056  	if v1 < v2 {
  4057  		return v2
  4058  	} else {
  4059  		return v1
  4060  	}
  4061  }
  4062  
  4063  func i32MinU(v1, v2 uint32) uint32 {
  4064  	if v1 > v2 {
  4065  		return v2
  4066  	} else {
  4067  		return v1
  4068  	}
  4069  }
  4070  
  4071  func i32MaxS(v1, v2 uint32) uint32 {
  4072  	if int32(v1) < int32(v2) {
  4073  		return v2
  4074  	} else {
  4075  		return v1
  4076  	}
  4077  }
  4078  
  4079  func i32MinS(v1, v2 uint32) uint32 {
  4080  	if int32(v1) > int32(v2) {
  4081  		return v2
  4082  	} else {
  4083  		return v1
  4084  	}
  4085  }
  4086  
  4087  func i16Abs(v uint16) uint16 {
  4088  	if i := int16(v); i < 0 {
  4089  		return uint16(-i)
  4090  	} else {
  4091  		return uint16(i)
  4092  	}
  4093  }
  4094  
  4095  func i32Abs(v uint32) uint32 {
  4096  	if i := int32(v); i < 0 {
  4097  		return uint32(-i)
  4098  	} else {
  4099  		return uint32(i)
  4100  	}
  4101  }
  4102  
  4103  func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
  4104  	def, typ := f.definition(), f.funcType
  4105  
  4106  	ce.stackIterator.reset(ce.stack, ce.frames, f)
  4107  	fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator)
  4108  	ce.stackIterator.clear()
  4109  	ce.callNativeFunc(ctx, m, f)
  4110  	fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64))
  4111  	return ctx
  4112  }
  4113  
  4114  // popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
  4115  // As the top of stack value is 64-bit, this ensures it is in range before returning it.
  4116  func (ce *callEngine) popMemoryOffset(op *wazeroir.UnionOperation) uint32 {
  4117  	offset := op.U2 + ce.popValue()
  4118  	if offset > math.MaxUint32 {
  4119  		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4120  	}
  4121  	return uint32(offset)
  4122  }
  4123  
  4124  func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
  4125  	typ := f.funcType
  4126  	paramLen := typ.ParamNumInUint64
  4127  	resultLen := typ.ResultNumInUint64
  4128  	stackLen := paramLen
  4129  
  4130  	// In the interpreter engine, ce.stack may only have capacity to store
  4131  	// parameters. Grow when there are more results than parameters.
  4132  	if growLen := resultLen - paramLen; growLen > 0 {
  4133  		for i := 0; i < growLen; i++ {
  4134  			ce.stack = append(ce.stack, 0)
  4135  		}
  4136  		stackLen += growLen
  4137  	}
  4138  
  4139  	// Pass the stack elements to the go function.
  4140  	stack := ce.stack[len(ce.stack)-stackLen:]
  4141  	ce.callGoFunc(ctx, m, f, stack)
  4142  
  4143  	// Shrink the stack when there were more parameters than results.
  4144  	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
  4145  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
  4146  	}
  4147  }