github.com/tetratelabs/wazero@v1.2.1/internal/engine/interpreter/interpreter.go (about)

     1  package interpreter
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"math/bits"
    10  	"sync"
    11  	"unsafe"
    12  
    13  	"github.com/tetratelabs/wazero/api"
    14  	"github.com/tetratelabs/wazero/experimental"
    15  	"github.com/tetratelabs/wazero/internal/filecache"
    16  	"github.com/tetratelabs/wazero/internal/internalapi"
    17  	"github.com/tetratelabs/wazero/internal/moremath"
    18  	"github.com/tetratelabs/wazero/internal/wasm"
    19  	"github.com/tetratelabs/wazero/internal/wasmdebug"
    20  	"github.com/tetratelabs/wazero/internal/wasmruntime"
    21  	"github.com/tetratelabs/wazero/internal/wazeroir"
    22  )
    23  
    24  // callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
    25  // wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
    26  //
    27  // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
    28  var callStackCeiling = 2000
    29  
    30  // engine is an interpreter implementation of wasm.Engine
    31  type engine struct {
    32  	enabledFeatures   api.CoreFeatures
    33  	compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
    34  	mux               sync.RWMutex
    35  	// labelAddressResolutionCache is the temporary cache used to map LabelKind -> FrameID -> the index to the body.
    36  	labelAddressResolutionCache [wazeroir.LabelKindNum][]uint64
    37  }
    38  
    39  func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
    40  	return &engine{
    41  		enabledFeatures:   enabledFeatures,
    42  		compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
    43  	}
    44  }
    45  
    46  // Close implements the same method as documented on wasm.Engine.
    47  func (e *engine) Close() (err error) {
    48  	return
    49  }
    50  
    51  // CompiledModuleCount implements the same method as documented on wasm.Engine.
    52  func (e *engine) CompiledModuleCount() uint32 {
    53  	return uint32(len(e.compiledFunctions))
    54  }
    55  
    56  // DeleteCompiledModule implements the same method as documented on wasm.Engine.
    57  func (e *engine) DeleteCompiledModule(m *wasm.Module) {
    58  	e.deleteCompiledFunctions(m)
    59  }
    60  
    61  func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
    62  	e.mux.Lock()
    63  	defer e.mux.Unlock()
    64  	delete(e.compiledFunctions, module.ID)
    65  }
    66  
    67  func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
    68  	e.mux.Lock()
    69  	defer e.mux.Unlock()
    70  	e.compiledFunctions[module.ID] = fs
    71  }
    72  
    73  func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
    74  	e.mux.RLock()
    75  	defer e.mux.RUnlock()
    76  	fs, ok = e.compiledFunctions[module.ID]
    77  	return
    78  }
    79  
    80  // moduleEngine implements wasm.ModuleEngine
    81  type moduleEngine struct {
    82  	// codes are the compiled functions in a module instances.
    83  	// The index is module instance-scoped.
    84  	functions []function
    85  
    86  	// parentEngine holds *engine from which this module engine is created from.
    87  	parentEngine *engine
    88  }
    89  
    90  // callEngine holds context per moduleEngine.Call, and shared across all the
    91  // function calls originating from the same moduleEngine.Call execution.
    92  //
    93  // This implements api.Function.
    94  type callEngine struct {
    95  	internalapi.WazeroOnlyType
    96  
    97  	// stack contains the operands.
    98  	// Note that all the values are represented as uint64.
    99  	stack []uint64
   100  
   101  	// frames are the function call stack.
   102  	frames []*callFrame
   103  
   104  	// f is the initial function for this call engine.
   105  	f *function
   106  
   107  	// stackiterator for Listeners to walk frames and stack.
   108  	stackIterator stackIterator
   109  }
   110  
   111  func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
   112  	return &callEngine{f: compiled}
   113  }
   114  
   115  func (ce *callEngine) pushValue(v uint64) {
   116  	ce.stack = append(ce.stack, v)
   117  }
   118  
   119  func (ce *callEngine) pushValues(v []uint64) {
   120  	ce.stack = append(ce.stack, v...)
   121  }
   122  
   123  func (ce *callEngine) popValue() (v uint64) {
   124  	// No need to check stack bound
   125  	// as we can assume that all the operations
   126  	// are valid thanks to validateFunction
   127  	// at module validation phase
   128  	// and wazeroir translation
   129  	// before compilation.
   130  	stackTopIndex := len(ce.stack) - 1
   131  	v = ce.stack[stackTopIndex]
   132  	ce.stack = ce.stack[:stackTopIndex]
   133  	return
   134  }
   135  
   136  func (ce *callEngine) popValues(v []uint64) {
   137  	stackTopIndex := len(ce.stack) - len(v)
   138  	copy(v, ce.stack[stackTopIndex:])
   139  	ce.stack = ce.stack[:stackTopIndex]
   140  }
   141  
   142  // peekValues peeks api.ValueType values from the stack and returns them.
   143  func (ce *callEngine) peekValues(count int) []uint64 {
   144  	if count == 0 {
   145  		return nil
   146  	}
   147  	stackLen := len(ce.stack)
   148  	return ce.stack[stackLen-count : stackLen]
   149  }
   150  
   151  func (ce *callEngine) drop(raw uint64) {
   152  	r := wazeroir.InclusiveRangeFromU64(raw)
   153  	if r.Start == -1 {
   154  		return
   155  	} else if r.Start == 0 {
   156  		ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
   157  	} else {
   158  		newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
   159  		newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
   160  		ce.stack = newStack
   161  	}
   162  }
   163  
   164  func (ce *callEngine) pushFrame(frame *callFrame) {
   165  	if callStackCeiling <= len(ce.frames) {
   166  		panic(wasmruntime.ErrRuntimeStackOverflow)
   167  	}
   168  	ce.frames = append(ce.frames, frame)
   169  }
   170  
   171  func (ce *callEngine) popFrame() (frame *callFrame) {
   172  	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
   173  	// module validation phase and wazeroir translation before compilation.
   174  	oneLess := len(ce.frames) - 1
   175  	frame = ce.frames[oneLess]
   176  	ce.frames = ce.frames[:oneLess]
   177  	return
   178  }
   179  
   180  type callFrame struct {
   181  	// pc is the program counter representing the current position in code.body.
   182  	pc uint64
   183  	// f is the compiled function used in this function frame.
   184  	f *function
   185  	// base index in the frame of this function, used to detect the count of
   186  	// values on the stack.
   187  	base int
   188  }
   189  
   190  type compiledFunction struct {
   191  	source              *wasm.Module
   192  	body                []wazeroir.UnionOperation
   193  	listener            experimental.FunctionListener
   194  	offsetsInWasmBinary []uint64
   195  	hostFn              interface{}
   196  	ensureTermination   bool
   197  	index               wasm.Index
   198  }
   199  
   200  type function struct {
   201  	funcType       *wasm.FunctionType
   202  	moduleInstance *wasm.ModuleInstance
   203  	typeID         wasm.FunctionTypeID
   204  	parent         *compiledFunction
   205  }
   206  
   207  // functionFromUintptr resurrects the original *function from the given uintptr
   208  // which comes from either funcref table or OpcodeRefFunc instruction.
   209  func functionFromUintptr(ptr uintptr) *function {
   210  	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
   211  	//
   212  	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
   213  	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
   214  	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
   215  	var wrapped *uintptr = &ptr
   216  	return *(**function)(unsafe.Pointer(wrapped))
   217  }
   218  
   219  // stackIterator implements experimental.StackIterator.
   220  type stackIterator struct {
   221  	stack   []uint64
   222  	frames  []*callFrame
   223  	started bool
   224  	fn      *function
   225  	pc      uint64
   226  }
   227  
   228  func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
   229  	si.fn = f
   230  	si.pc = 0
   231  	si.stack = stack
   232  	si.frames = frames
   233  	si.started = false
   234  }
   235  
   236  func (si *stackIterator) clear() {
   237  	si.stack = nil
   238  	si.frames = nil
   239  	si.started = false
   240  	si.fn = nil
   241  }
   242  
   243  // Next implements the same method as documented on experimental.StackIterator.
   244  func (si *stackIterator) Next() bool {
   245  	if !si.started {
   246  		si.started = true
   247  		return true
   248  	}
   249  
   250  	if len(si.frames) == 0 {
   251  		return false
   252  	}
   253  
   254  	frame := si.frames[len(si.frames)-1]
   255  	si.stack = si.stack[:frame.base]
   256  	si.fn = frame.f
   257  	si.pc = frame.pc
   258  	si.frames = si.frames[:len(si.frames)-1]
   259  	return true
   260  }
   261  
   262  // Function implements the same method as documented on
   263  // experimental.StackIterator.
   264  func (si *stackIterator) Function() experimental.InternalFunction {
   265  	return internalFunction{si.fn}
   266  }
   267  
   268  // ProgramCounter implements the same method as documented on
   269  // experimental.StackIterator.
   270  func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
   271  	return experimental.ProgramCounter(si.pc)
   272  }
   273  
   274  // Parameters implements the same method as documented on
   275  // experimental.StackIterator.
   276  func (si *stackIterator) Parameters() []uint64 {
   277  	paramsCount := si.fn.funcType.ParamNumInUint64
   278  	top := len(si.stack)
   279  	return si.stack[top-paramsCount:]
   280  }
   281  
   282  // internalFunction implements experimental.InternalFunction.
   283  type internalFunction struct{ *function }
   284  
   285  // Definition implements the same method as documented on
   286  // experimental.InternalFunction.
   287  func (f internalFunction) Definition() api.FunctionDefinition {
   288  	return f.definition()
   289  }
   290  
   291  // SourceOffsetForPC implements the same method as documented on
   292  // experimental.InternalFunction.
   293  func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
   294  	offsetsMap := f.parent.offsetsInWasmBinary
   295  	if uint64(pc) < uint64(len(offsetsMap)) {
   296  		return offsetsMap[pc]
   297  	}
   298  	return 0
   299  }
   300  
   301  // interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
   302  const callFrameStackSize = 0
   303  
   304  // CompileModule implements the same method as documented on wasm.Engine.
   305  func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
   306  	if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
   307  		return nil
   308  	}
   309  
   310  	funcs := make([]compiledFunction, len(module.FunctionSection))
   311  	irCompiler, err := wazeroir.NewCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
   312  	if err != nil {
   313  		return err
   314  	}
   315  	imported := module.ImportFunctionCount
   316  	for i := range module.CodeSection {
   317  		var lsn experimental.FunctionListener
   318  		if i < len(listeners) {
   319  			lsn = listeners[i]
   320  		}
   321  
   322  		compiled := &funcs[i]
   323  		// If this is the host function, there's nothing to do as the runtime representation of
   324  		// host function in interpreter is its Go function itself as opposed to Wasm functions,
   325  		// which need to be compiled down to wazeroir.
   326  		if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
   327  			compiled.hostFn = codeSeg.GoFunc
   328  		} else {
   329  			ir, err := irCompiler.Next()
   330  			if err != nil {
   331  				return err
   332  			}
   333  			err = e.lowerIR(ir, compiled)
   334  			if err != nil {
   335  				def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
   336  				return fmt.Errorf("failed to lower func[%s] to wazeroir: %w", def.DebugName(), err)
   337  			}
   338  		}
   339  		compiled.source = module
   340  		compiled.ensureTermination = ensureTermination
   341  		compiled.listener = lsn
   342  		compiled.index = imported + uint32(i)
   343  	}
   344  	e.addCompiledFunctions(module, funcs)
   345  	return nil
   346  }
   347  
   348  // NewModuleEngine implements the same method as documented on wasm.Engine.
   349  func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
   350  	me := &moduleEngine{
   351  		parentEngine: e,
   352  		functions:    make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
   353  	}
   354  
   355  	codes, ok := e.getCompiledFunctions(module)
   356  	if !ok {
   357  		return nil, errors.New("source module must be compiled before instantiation")
   358  	}
   359  
   360  	for i := range codes {
   361  		c := &codes[i]
   362  		offset := i + int(module.ImportFunctionCount)
   363  		typeIndex := module.FunctionSection[i]
   364  		me.functions[offset] = function{
   365  			moduleInstance: instance,
   366  			typeID:         instance.TypeIDs[typeIndex],
   367  			funcType:       &module.TypeSection[typeIndex],
   368  			parent:         c,
   369  		}
   370  	}
   371  	return me, nil
   372  }
   373  
   374  // lowerIR lowers the wazeroir operations to engine friendly struct.
   375  func (e *engine) lowerIR(ir *wazeroir.CompilationResult, ret *compiledFunction) error {
   376  	// Copy the body from the result.
   377  	ret.body = make([]wazeroir.UnionOperation, len(ir.Operations))
   378  	copy(ret.body, ir.Operations)
   379  	// Also copy the offsets if necessary.
   380  	if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
   381  		ret.offsetsInWasmBinary = make([]uint64, len(offsets))
   382  		copy(ret.offsetsInWasmBinary, offsets)
   383  	}
   384  
   385  	// First, we iterate all labels, and resolve the address.
   386  	for i := range ret.body {
   387  		op := &ret.body[i]
   388  		switch op.Kind {
   389  		case wazeroir.OperationKindLabel:
   390  			label := wazeroir.Label(op.U1)
   391  			address := uint64(i)
   392  
   393  			kind, fid := label.Kind(), label.FrameID()
   394  			frameToAddresses := e.labelAddressResolutionCache[label.Kind()]
   395  			// Expand the slice if necessary.
   396  			if diff := fid - len(frameToAddresses) + 1; diff > 0 {
   397  				for j := 0; j < diff; j++ {
   398  					frameToAddresses = append(frameToAddresses, 0)
   399  				}
   400  			}
   401  			frameToAddresses[fid] = address
   402  			e.labelAddressResolutionCache[kind] = frameToAddresses
   403  		}
   404  	}
   405  
   406  	// Then resolve the label as the index to the body.
   407  	for i := range ret.body {
   408  		op := &ret.body[i]
   409  		switch op.Kind {
   410  		case wazeroir.OperationKindBr:
   411  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   412  		case wazeroir.OperationKindBrIf:
   413  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   414  			e.setLabelAddress(&op.U2, wazeroir.Label(op.U2))
   415  		case wazeroir.OperationKindBrTable:
   416  			for j := 0; j < len(op.Us); j += 2 {
   417  				target := op.Us[j]
   418  				e.setLabelAddress(&op.Us[j], wazeroir.Label(target))
   419  			}
   420  		}
   421  	}
   422  
   423  	// Reuses the slices for the subsequent compilation, so clear the content here.
   424  	for i := range e.labelAddressResolutionCache {
   425  		e.labelAddressResolutionCache[i] = e.labelAddressResolutionCache[i][:0]
   426  	}
   427  	return nil
   428  }
   429  
   430  func (e *engine) setLabelAddress(op *uint64, label wazeroir.Label) {
   431  	if label.IsReturnTarget() {
   432  		// Jmp to the end of the possible binary.
   433  		*op = math.MaxUint64
   434  	} else {
   435  		*op = e.labelAddressResolutionCache[label.Kind()][label.FrameID()]
   436  	}
   437  }
   438  
   439  // ResolveImportedFunction implements wasm.ModuleEngine.
   440  func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
   441  	imported := importedModuleEngine.(*moduleEngine)
   442  	e.functions[index] = imported.functions[indexInImportedModule]
   443  }
   444  
   445  // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
   446  func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
   447  	return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
   448  }
   449  
   450  // NewFunction implements the same method as documented on wasm.ModuleEngine.
   451  func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
   452  	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
   453  	// code on close aren't locked, neither is this read.
   454  	compiled := &e.functions[index]
   455  	return e.newCallEngine(compiled)
   456  }
   457  
   458  // LookupFunction implements the same method as documented on wasm.ModuleEngine.
   459  func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (f api.Function, err error) {
   460  	if tableOffset >= uint32(len(t.References)) {
   461  		err = wasmruntime.ErrRuntimeInvalidTableAccess
   462  		return
   463  	}
   464  	rawPtr := t.References[tableOffset]
   465  	if rawPtr == 0 {
   466  		err = wasmruntime.ErrRuntimeInvalidTableAccess
   467  		return
   468  	}
   469  
   470  	tf := functionFromUintptr(rawPtr)
   471  	if tf.typeID != typeId {
   472  		err = wasmruntime.ErrRuntimeIndirectCallTypeMismatch
   473  		return
   474  	}
   475  
   476  	f = e.newCallEngine(tf)
   477  	return
   478  }
   479  
   480  // Definition implements the same method as documented on api.Function.
   481  func (ce *callEngine) Definition() api.FunctionDefinition {
   482  	return ce.f.definition()
   483  }
   484  
   485  func (f *function) definition() api.FunctionDefinition {
   486  	compiled := f.parent
   487  	return compiled.source.FunctionDefinition(compiled.index)
   488  }
   489  
   490  // Call implements the same method as documented on api.Function.
   491  func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
   492  	ft := ce.f.funcType
   493  	if n := ft.ParamNumInUint64; n != len(params) {
   494  		return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
   495  	}
   496  	return ce.call(ctx, params, nil)
   497  }
   498  
   499  // CallWithStack implements the same method as documented on api.Function.
   500  func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
   501  	params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
   502  	if err != nil {
   503  		return err
   504  	}
   505  	_, err = ce.call(ctx, params, results)
   506  	return err
   507  }
   508  
   509  func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
   510  	m := ce.f.moduleInstance
   511  	if ce.f.parent.ensureTermination {
   512  		select {
   513  		case <-ctx.Done():
   514  			// If the provided context is already done, close the call context
   515  			// and return the error.
   516  			m.CloseWithCtxErr(ctx)
   517  			return nil, m.FailIfClosed()
   518  		default:
   519  		}
   520  	}
   521  
   522  	defer func() {
   523  		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
   524  		if err == nil {
   525  			err = m.FailIfClosed()
   526  		}
   527  		// TODO: ^^ Will not fail if the function was imported from a closed module.
   528  
   529  		if v := recover(); v != nil {
   530  			err = ce.recoverOnCall(ctx, m, v)
   531  		}
   532  	}()
   533  
   534  	ce.pushValues(params)
   535  
   536  	if ce.f.parent.ensureTermination {
   537  		done := m.CloseModuleOnCanceledOrTimeout(ctx)
   538  		defer done()
   539  	}
   540  
   541  	ce.callFunction(ctx, m, ce.f)
   542  
   543  	// This returns a safe copy of the results, instead of a slice view. If we
   544  	// returned a re-slice, the caller could accidentally or purposefully
   545  	// corrupt the stack of subsequent calls.
   546  	ft := ce.f.funcType
   547  	if results == nil && ft.ResultNumInUint64 > 0 {
   548  		results = make([]uint64, ft.ResultNumInUint64)
   549  	}
   550  	ce.popValues(results)
   551  	return results, nil
   552  }
   553  
   554  // functionListenerInvocation captures arguments needed to perform function
   555  // listener invocations when unwinding the call stack.
   556  type functionListenerInvocation struct {
   557  	experimental.FunctionListener
   558  	def api.FunctionDefinition
   559  }
   560  
   561  // recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
   562  // with the call frame stack traces. Also, reset the state of callEngine
   563  // so that it can be used for the subsequent calls.
   564  func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
   565  	builder := wasmdebug.NewErrorBuilder()
   566  	frameCount := len(ce.frames)
   567  	functionListeners := make([]functionListenerInvocation, 0, 16)
   568  
   569  	for i := 0; i < frameCount; i++ {
   570  		frame := ce.popFrame()
   571  		f := frame.f
   572  		def := f.definition()
   573  		var sources []string
   574  		if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
   575  			sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
   576  		}
   577  		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
   578  		if f.parent.listener != nil {
   579  			functionListeners = append(functionListeners, functionListenerInvocation{
   580  				FunctionListener: f.parent.listener,
   581  				def:              f.definition(),
   582  			})
   583  		}
   584  	}
   585  
   586  	err = builder.FromRecovered(v)
   587  	for i := range functionListeners {
   588  		functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
   589  	}
   590  
   591  	// Allows the reuse of CallEngine.
   592  	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
   593  	return
   594  }
   595  
   596  func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   597  	if f.parent.hostFn != nil {
   598  		ce.callGoFuncWithStack(ctx, m, f)
   599  	} else if lsn := f.parent.listener; lsn != nil {
   600  		ce.callNativeFuncWithListener(ctx, m, f, lsn)
   601  	} else {
   602  		ce.callNativeFunc(ctx, m, f)
   603  	}
   604  }
   605  
   606  func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
   607  	typ := f.funcType
   608  	lsn := f.parent.listener
   609  	if lsn != nil {
   610  		params := stack[:typ.ParamNumInUint64]
   611  		ce.stackIterator.reset(ce.stack, ce.frames, f)
   612  		lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
   613  		ce.stackIterator.clear()
   614  	}
   615  	frame := &callFrame{f: f, base: len(ce.stack)}
   616  	ce.pushFrame(frame)
   617  
   618  	fn := f.parent.hostFn
   619  	switch fn := fn.(type) {
   620  	case api.GoModuleFunction:
   621  		fn.Call(ctx, m, stack)
   622  	case api.GoFunction:
   623  		fn.Call(ctx, stack)
   624  	}
   625  
   626  	ce.popFrame()
   627  	if lsn != nil {
   628  		// TODO: This doesn't get the error due to use of panic to propagate them.
   629  		results := stack[:typ.ResultNumInUint64]
   630  		lsn.After(ctx, m, f.definition(), results)
   631  	}
   632  }
   633  
   634  func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   635  	frame := &callFrame{f: f, base: len(ce.stack)}
   636  	moduleInst := f.moduleInstance
   637  	functions := moduleInst.Engine.(*moduleEngine).functions
   638  	memoryInst := moduleInst.MemoryInstance
   639  	globals := moduleInst.Globals
   640  	tables := moduleInst.Tables
   641  	typeIDs := moduleInst.TypeIDs
   642  	dataInstances := moduleInst.DataInstances
   643  	elementInstances := moduleInst.ElementInstances
   644  	ce.pushFrame(frame)
   645  	body := frame.f.parent.body
   646  	bodyLen := uint64(len(body))
   647  	for frame.pc < bodyLen {
   648  		op := &body[frame.pc]
   649  		// TODO: add description of each operation/case
   650  		// on, for example, how many args are used,
   651  		// how the stack is modified, etc.
   652  		switch op.Kind {
   653  		case wazeroir.OperationKindBuiltinFunctionCheckExitCode:
   654  			if err := m.FailIfClosed(); err != nil {
   655  				panic(err)
   656  			}
   657  			frame.pc++
   658  		case wazeroir.OperationKindUnreachable:
   659  			panic(wasmruntime.ErrRuntimeUnreachable)
   660  		case wazeroir.OperationKindBr:
   661  			frame.pc = op.U1
   662  		case wazeroir.OperationKindBrIf:
   663  			if ce.popValue() > 0 {
   664  				ce.drop(op.U3)
   665  				frame.pc = op.U1
   666  			} else {
   667  				frame.pc = op.U2
   668  			}
   669  		case wazeroir.OperationKindBrTable:
   670  			v := ce.popValue()
   671  			defaultAt := uint64(len(op.Us))/2 - 1
   672  			if v > defaultAt {
   673  				v = defaultAt
   674  			}
   675  			v *= 2
   676  			ce.drop(op.Us[v+1])
   677  			frame.pc = op.Us[v]
   678  		case wazeroir.OperationKindCall:
   679  			ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
   680  			frame.pc++
   681  		case wazeroir.OperationKindCallIndirect:
   682  			offset := ce.popValue()
   683  			table := tables[op.U2]
   684  			if offset >= uint64(len(table.References)) {
   685  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   686  			}
   687  			rawPtr := table.References[offset]
   688  			if rawPtr == 0 {
   689  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   690  			}
   691  
   692  			tf := functionFromUintptr(rawPtr)
   693  			if tf.typeID != typeIDs[op.U1] {
   694  				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   695  			}
   696  
   697  			ce.callFunction(ctx, f.moduleInstance, tf)
   698  			frame.pc++
   699  		case wazeroir.OperationKindDrop:
   700  			ce.drop(op.U1)
   701  			frame.pc++
   702  		case wazeroir.OperationKindSelect:
   703  			c := ce.popValue()
   704  			if op.B3 { // Target is vector.
   705  				x2Hi, x2Lo := ce.popValue(), ce.popValue()
   706  				if c == 0 {
   707  					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
   708  					ce.pushValue(x2Lo)
   709  					ce.pushValue(x2Hi)
   710  				}
   711  			} else {
   712  				v2 := ce.popValue()
   713  				if c == 0 {
   714  					_ = ce.popValue()
   715  					ce.pushValue(v2)
   716  				}
   717  			}
   718  			frame.pc++
   719  		case wazeroir.OperationKindPick:
   720  			index := len(ce.stack) - 1 - int(op.U1)
   721  			ce.pushValue(ce.stack[index])
   722  			if op.B3 { // V128 value target.
   723  				ce.pushValue(ce.stack[index+1])
   724  			}
   725  			frame.pc++
   726  		case wazeroir.OperationKindSet:
   727  			if op.B3 { // V128 value target.
   728  				lowIndex := len(ce.stack) - 1 - int(op.U1)
   729  				highIndex := lowIndex + 1
   730  				hi, lo := ce.popValue(), ce.popValue()
   731  				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
   732  			} else {
   733  				index := len(ce.stack) - 1 - int(op.U1)
   734  				ce.stack[index] = ce.popValue()
   735  			}
   736  			frame.pc++
   737  		case wazeroir.OperationKindGlobalGet:
   738  			g := globals[op.U1]
   739  			ce.pushValue(g.Val)
   740  			if g.Type.ValType == wasm.ValueTypeV128 {
   741  				ce.pushValue(g.ValHi)
   742  			}
   743  			frame.pc++
   744  		case wazeroir.OperationKindGlobalSet:
   745  			g := globals[op.U1]
   746  			if g.Type.ValType == wasm.ValueTypeV128 {
   747  				g.ValHi = ce.popValue()
   748  			}
   749  			g.Val = ce.popValue()
   750  			frame.pc++
   751  		case wazeroir.OperationKindLoad:
   752  			offset := ce.popMemoryOffset(op)
   753  			switch wazeroir.UnsignedType(op.B1) {
   754  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   755  				if val, ok := memoryInst.ReadUint32Le(offset); !ok {
   756  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   757  				} else {
   758  					ce.pushValue(uint64(val))
   759  				}
   760  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   761  				if val, ok := memoryInst.ReadUint64Le(offset); !ok {
   762  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   763  				} else {
   764  					ce.pushValue(val)
   765  				}
   766  			}
   767  			frame.pc++
   768  		case wazeroir.OperationKindLoad8:
   769  			val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
   770  			if !ok {
   771  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   772  			}
   773  
   774  			switch wazeroir.SignedInt(op.B1) {
   775  			case wazeroir.SignedInt32:
   776  				ce.pushValue(uint64(uint32(int8(val))))
   777  			case wazeroir.SignedInt64:
   778  				ce.pushValue(uint64(int8(val)))
   779  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   780  				ce.pushValue(uint64(val))
   781  			}
   782  			frame.pc++
   783  		case wazeroir.OperationKindLoad16:
   784  
   785  			val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
   786  			if !ok {
   787  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   788  			}
   789  
   790  			switch wazeroir.SignedInt(op.B1) {
   791  			case wazeroir.SignedInt32:
   792  				ce.pushValue(uint64(uint32(int16(val))))
   793  			case wazeroir.SignedInt64:
   794  				ce.pushValue(uint64(int16(val)))
   795  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   796  				ce.pushValue(uint64(val))
   797  			}
   798  			frame.pc++
   799  		case wazeroir.OperationKindLoad32:
   800  			val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
   801  			if !ok {
   802  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   803  			}
   804  
   805  			if op.B1 == 1 { // Signed
   806  				ce.pushValue(uint64(int32(val)))
   807  			} else {
   808  				ce.pushValue(uint64(val))
   809  			}
   810  			frame.pc++
   811  		case wazeroir.OperationKindStore:
   812  			val := ce.popValue()
   813  			offset := ce.popMemoryOffset(op)
   814  			switch wazeroir.UnsignedType(op.B1) {
   815  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   816  				if !memoryInst.WriteUint32Le(offset, uint32(val)) {
   817  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   818  				}
   819  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   820  				if !memoryInst.WriteUint64Le(offset, val) {
   821  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   822  				}
   823  			}
   824  			frame.pc++
   825  		case wazeroir.OperationKindStore8:
   826  			val := byte(ce.popValue())
   827  			offset := ce.popMemoryOffset(op)
   828  			if !memoryInst.WriteByte(offset, val) {
   829  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   830  			}
   831  			frame.pc++
   832  		case wazeroir.OperationKindStore16:
   833  			val := uint16(ce.popValue())
   834  			offset := ce.popMemoryOffset(op)
   835  			if !memoryInst.WriteUint16Le(offset, val) {
   836  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   837  			}
   838  			frame.pc++
   839  		case wazeroir.OperationKindStore32:
   840  			val := uint32(ce.popValue())
   841  			offset := ce.popMemoryOffset(op)
   842  			if !memoryInst.WriteUint32Le(offset, val) {
   843  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   844  			}
   845  			frame.pc++
   846  		case wazeroir.OperationKindMemorySize:
   847  			ce.pushValue(uint64(memoryInst.PageSize()))
   848  			frame.pc++
   849  		case wazeroir.OperationKindMemoryGrow:
   850  			n := ce.popValue()
   851  			if res, ok := memoryInst.Grow(uint32(n)); !ok {
   852  				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
   853  			} else {
   854  				ce.pushValue(uint64(res))
   855  			}
   856  			frame.pc++
   857  		case wazeroir.OperationKindConstI32, wazeroir.OperationKindConstI64,
   858  			wazeroir.OperationKindConstF32, wazeroir.OperationKindConstF64:
   859  			ce.pushValue(op.U1)
   860  			frame.pc++
   861  		case wazeroir.OperationKindEq:
   862  			var b bool
   863  			switch wazeroir.UnsignedType(op.B1) {
   864  			case wazeroir.UnsignedTypeI32:
   865  				v2, v1 := ce.popValue(), ce.popValue()
   866  				b = uint32(v1) == uint32(v2)
   867  			case wazeroir.UnsignedTypeI64:
   868  				v2, v1 := ce.popValue(), ce.popValue()
   869  				b = v1 == v2
   870  			case wazeroir.UnsignedTypeF32:
   871  				v2, v1 := ce.popValue(), ce.popValue()
   872  				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
   873  			case wazeroir.UnsignedTypeF64:
   874  				v2, v1 := ce.popValue(), ce.popValue()
   875  				b = math.Float64frombits(v2) == math.Float64frombits(v1)
   876  			}
   877  			if b {
   878  				ce.pushValue(1)
   879  			} else {
   880  				ce.pushValue(0)
   881  			}
   882  			frame.pc++
   883  		case wazeroir.OperationKindNe:
   884  			var b bool
   885  			switch wazeroir.UnsignedType(op.B1) {
   886  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeI64:
   887  				v2, v1 := ce.popValue(), ce.popValue()
   888  				b = v1 != v2
   889  			case wazeroir.UnsignedTypeF32:
   890  				v2, v1 := ce.popValue(), ce.popValue()
   891  				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
   892  			case wazeroir.UnsignedTypeF64:
   893  				v2, v1 := ce.popValue(), ce.popValue()
   894  				b = math.Float64frombits(v2) != math.Float64frombits(v1)
   895  			}
   896  			if b {
   897  				ce.pushValue(1)
   898  			} else {
   899  				ce.pushValue(0)
   900  			}
   901  			frame.pc++
   902  		case wazeroir.OperationKindEqz:
   903  			if ce.popValue() == 0 {
   904  				ce.pushValue(1)
   905  			} else {
   906  				ce.pushValue(0)
   907  			}
   908  			frame.pc++
   909  		case wazeroir.OperationKindLt:
   910  			v2 := ce.popValue()
   911  			v1 := ce.popValue()
   912  			var b bool
   913  			switch wazeroir.SignedType(op.B1) {
   914  			case wazeroir.SignedTypeInt32:
   915  				b = int32(v1) < int32(v2)
   916  			case wazeroir.SignedTypeInt64:
   917  				b = int64(v1) < int64(v2)
   918  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   919  				b = v1 < v2
   920  			case wazeroir.SignedTypeFloat32:
   921  				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
   922  			case wazeroir.SignedTypeFloat64:
   923  				b = math.Float64frombits(v1) < math.Float64frombits(v2)
   924  			}
   925  			if b {
   926  				ce.pushValue(1)
   927  			} else {
   928  				ce.pushValue(0)
   929  			}
   930  			frame.pc++
   931  		case wazeroir.OperationKindGt:
   932  			v2 := ce.popValue()
   933  			v1 := ce.popValue()
   934  			var b bool
   935  			switch wazeroir.SignedType(op.B1) {
   936  			case wazeroir.SignedTypeInt32:
   937  				b = int32(v1) > int32(v2)
   938  			case wazeroir.SignedTypeInt64:
   939  				b = int64(v1) > int64(v2)
   940  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   941  				b = v1 > v2
   942  			case wazeroir.SignedTypeFloat32:
   943  				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
   944  			case wazeroir.SignedTypeFloat64:
   945  				b = math.Float64frombits(v1) > math.Float64frombits(v2)
   946  			}
   947  			if b {
   948  				ce.pushValue(1)
   949  			} else {
   950  				ce.pushValue(0)
   951  			}
   952  			frame.pc++
   953  		case wazeroir.OperationKindLe:
   954  			v2 := ce.popValue()
   955  			v1 := ce.popValue()
   956  			var b bool
   957  			switch wazeroir.SignedType(op.B1) {
   958  			case wazeroir.SignedTypeInt32:
   959  				b = int32(v1) <= int32(v2)
   960  			case wazeroir.SignedTypeInt64:
   961  				b = int64(v1) <= int64(v2)
   962  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   963  				b = v1 <= v2
   964  			case wazeroir.SignedTypeFloat32:
   965  				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
   966  			case wazeroir.SignedTypeFloat64:
   967  				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
   968  			}
   969  			if b {
   970  				ce.pushValue(1)
   971  			} else {
   972  				ce.pushValue(0)
   973  			}
   974  			frame.pc++
   975  		case wazeroir.OperationKindGe:
   976  			v2 := ce.popValue()
   977  			v1 := ce.popValue()
   978  			var b bool
   979  			switch wazeroir.SignedType(op.B1) {
   980  			case wazeroir.SignedTypeInt32:
   981  				b = int32(v1) >= int32(v2)
   982  			case wazeroir.SignedTypeInt64:
   983  				b = int64(v1) >= int64(v2)
   984  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   985  				b = v1 >= v2
   986  			case wazeroir.SignedTypeFloat32:
   987  				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
   988  			case wazeroir.SignedTypeFloat64:
   989  				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
   990  			}
   991  			if b {
   992  				ce.pushValue(1)
   993  			} else {
   994  				ce.pushValue(0)
   995  			}
   996  			frame.pc++
   997  		case wazeroir.OperationKindAdd:
   998  			v2 := ce.popValue()
   999  			v1 := ce.popValue()
  1000  			switch wazeroir.UnsignedType(op.B1) {
  1001  			case wazeroir.UnsignedTypeI32:
  1002  				v := uint32(v1) + uint32(v2)
  1003  				ce.pushValue(uint64(v))
  1004  			case wazeroir.UnsignedTypeI64:
  1005  				ce.pushValue(v1 + v2)
  1006  			case wazeroir.UnsignedTypeF32:
  1007  				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
  1008  			case wazeroir.UnsignedTypeF64:
  1009  				v := math.Float64frombits(v1) + math.Float64frombits(v2)
  1010  				ce.pushValue(math.Float64bits(v))
  1011  			}
  1012  			frame.pc++
  1013  		case wazeroir.OperationKindSub:
  1014  			v2 := ce.popValue()
  1015  			v1 := ce.popValue()
  1016  			switch wazeroir.UnsignedType(op.B1) {
  1017  			case wazeroir.UnsignedTypeI32:
  1018  				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
  1019  			case wazeroir.UnsignedTypeI64:
  1020  				ce.pushValue(v1 - v2)
  1021  			case wazeroir.UnsignedTypeF32:
  1022  				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
  1023  			case wazeroir.UnsignedTypeF64:
  1024  				v := math.Float64frombits(v1) - math.Float64frombits(v2)
  1025  				ce.pushValue(math.Float64bits(v))
  1026  			}
  1027  			frame.pc++
  1028  		case wazeroir.OperationKindMul:
  1029  			v2 := ce.popValue()
  1030  			v1 := ce.popValue()
  1031  			switch wazeroir.UnsignedType(op.B1) {
  1032  			case wazeroir.UnsignedTypeI32:
  1033  				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
  1034  			case wazeroir.UnsignedTypeI64:
  1035  				ce.pushValue(v1 * v2)
  1036  			case wazeroir.UnsignedTypeF32:
  1037  				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
  1038  			case wazeroir.UnsignedTypeF64:
  1039  				v := math.Float64frombits(v2) * math.Float64frombits(v1)
  1040  				ce.pushValue(math.Float64bits(v))
  1041  			}
  1042  			frame.pc++
  1043  		case wazeroir.OperationKindClz:
  1044  			v := ce.popValue()
  1045  			if op.B1 == 0 {
  1046  				// UnsignedInt32
  1047  				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
  1048  			} else {
  1049  				// UnsignedInt64
  1050  				ce.pushValue(uint64(bits.LeadingZeros64(v)))
  1051  			}
  1052  			frame.pc++
  1053  		case wazeroir.OperationKindCtz:
  1054  			v := ce.popValue()
  1055  			if op.B1 == 0 {
  1056  				// UnsignedInt32
  1057  				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
  1058  			} else {
  1059  				// UnsignedInt64
  1060  				ce.pushValue(uint64(bits.TrailingZeros64(v)))
  1061  			}
  1062  			frame.pc++
  1063  		case wazeroir.OperationKindPopcnt:
  1064  			v := ce.popValue()
  1065  			if op.B1 == 0 {
  1066  				// UnsignedInt32
  1067  				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
  1068  			} else {
  1069  				// UnsignedInt64
  1070  				ce.pushValue(uint64(bits.OnesCount64(v)))
  1071  			}
  1072  			frame.pc++
  1073  		case wazeroir.OperationKindDiv:
  1074  			// If an integer, check we won't divide by zero.
  1075  			t := wazeroir.SignedType(op.B1)
  1076  			v2, v1 := ce.popValue(), ce.popValue()
  1077  			switch t {
  1078  			case wazeroir.SignedTypeFloat32, wazeroir.SignedTypeFloat64: // not integers
  1079  			default:
  1080  				if v2 == 0 {
  1081  					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1082  				}
  1083  			}
  1084  
  1085  			switch t {
  1086  			case wazeroir.SignedTypeInt32:
  1087  				d := int32(v2)
  1088  				n := int32(v1)
  1089  				if n == math.MinInt32 && d == -1 {
  1090  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1091  				}
  1092  				ce.pushValue(uint64(uint32(n / d)))
  1093  			case wazeroir.SignedTypeInt64:
  1094  				d := int64(v2)
  1095  				n := int64(v1)
  1096  				if n == math.MinInt64 && d == -1 {
  1097  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1098  				}
  1099  				ce.pushValue(uint64(n / d))
  1100  			case wazeroir.SignedTypeUint32:
  1101  				d := uint32(v2)
  1102  				n := uint32(v1)
  1103  				ce.pushValue(uint64(n / d))
  1104  			case wazeroir.SignedTypeUint64:
  1105  				d := v2
  1106  				n := v1
  1107  				ce.pushValue(n / d)
  1108  			case wazeroir.SignedTypeFloat32:
  1109  				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
  1110  			case wazeroir.SignedTypeFloat64:
  1111  				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
  1112  			}
  1113  			frame.pc++
  1114  		case wazeroir.OperationKindRem:
  1115  			v2, v1 := ce.popValue(), ce.popValue()
  1116  			if v2 == 0 {
  1117  				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1118  			}
  1119  			switch wazeroir.SignedInt(op.B1) {
  1120  			case wazeroir.SignedInt32:
  1121  				d := int32(v2)
  1122  				n := int32(v1)
  1123  				ce.pushValue(uint64(uint32(n % d)))
  1124  			case wazeroir.SignedInt64:
  1125  				d := int64(v2)
  1126  				n := int64(v1)
  1127  				ce.pushValue(uint64(n % d))
  1128  			case wazeroir.SignedUint32:
  1129  				d := uint32(v2)
  1130  				n := uint32(v1)
  1131  				ce.pushValue(uint64(n % d))
  1132  			case wazeroir.SignedUint64:
  1133  				d := v2
  1134  				n := v1
  1135  				ce.pushValue(n % d)
  1136  			}
  1137  			frame.pc++
  1138  		case wazeroir.OperationKindAnd:
  1139  			v2 := ce.popValue()
  1140  			v1 := ce.popValue()
  1141  			if op.B1 == 0 {
  1142  				// UnsignedInt32
  1143  				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
  1144  			} else {
  1145  				// UnsignedInt64
  1146  				ce.pushValue(uint64(v2 & v1))
  1147  			}
  1148  			frame.pc++
  1149  		case wazeroir.OperationKindOr:
  1150  			v2 := ce.popValue()
  1151  			v1 := ce.popValue()
  1152  			if op.B1 == 0 {
  1153  				// UnsignedInt32
  1154  				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
  1155  			} else {
  1156  				// UnsignedInt64
  1157  				ce.pushValue(uint64(v2 | v1))
  1158  			}
  1159  			frame.pc++
  1160  		case wazeroir.OperationKindXor:
  1161  			v2 := ce.popValue()
  1162  			v1 := ce.popValue()
  1163  			if op.B1 == 0 {
  1164  				// UnsignedInt32
  1165  				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
  1166  			} else {
  1167  				// UnsignedInt64
  1168  				ce.pushValue(uint64(v2 ^ v1))
  1169  			}
  1170  			frame.pc++
  1171  		case wazeroir.OperationKindShl:
  1172  			v2 := ce.popValue()
  1173  			v1 := ce.popValue()
  1174  			if op.B1 == 0 {
  1175  				// UnsignedInt32
  1176  				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
  1177  			} else {
  1178  				// UnsignedInt64
  1179  				ce.pushValue(v1 << (v2 % 64))
  1180  			}
  1181  			frame.pc++
  1182  		case wazeroir.OperationKindShr:
  1183  			v2 := ce.popValue()
  1184  			v1 := ce.popValue()
  1185  			switch wazeroir.SignedInt(op.B1) {
  1186  			case wazeroir.SignedInt32:
  1187  				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
  1188  			case wazeroir.SignedInt64:
  1189  				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
  1190  			case wazeroir.SignedUint32:
  1191  				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
  1192  			case wazeroir.SignedUint64:
  1193  				ce.pushValue(v1 >> (v2 % 64))
  1194  			}
  1195  			frame.pc++
  1196  		case wazeroir.OperationKindRotl:
  1197  			v2 := ce.popValue()
  1198  			v1 := ce.popValue()
  1199  			if op.B1 == 0 {
  1200  				// UnsignedInt32
  1201  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
  1202  			} else {
  1203  				// UnsignedInt64
  1204  				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
  1205  			}
  1206  			frame.pc++
  1207  		case wazeroir.OperationKindRotr:
  1208  			v2 := ce.popValue()
  1209  			v1 := ce.popValue()
  1210  			if op.B1 == 0 {
  1211  				// UnsignedInt32
  1212  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
  1213  			} else {
  1214  				// UnsignedInt64
  1215  				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
  1216  			}
  1217  			frame.pc++
  1218  		case wazeroir.OperationKindAbs:
  1219  			if op.B1 == 0 {
  1220  				// Float32
  1221  				const mask uint32 = 1 << 31
  1222  				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
  1223  			} else {
  1224  				// Float64
  1225  				const mask uint64 = 1 << 63
  1226  				ce.pushValue(ce.popValue() &^ mask)
  1227  			}
  1228  			frame.pc++
  1229  		case wazeroir.OperationKindNeg:
  1230  			if op.B1 == 0 {
  1231  				// Float32
  1232  				v := -math.Float32frombits(uint32(ce.popValue()))
  1233  				ce.pushValue(uint64(math.Float32bits(v)))
  1234  			} else {
  1235  				// Float64
  1236  				v := -math.Float64frombits(ce.popValue())
  1237  				ce.pushValue(math.Float64bits(v))
  1238  			}
  1239  			frame.pc++
  1240  		case wazeroir.OperationKindCeil:
  1241  			if op.B1 == 0 {
  1242  				// Float32
  1243  				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
  1244  				ce.pushValue(uint64(math.Float32bits(v)))
  1245  			} else {
  1246  				// Float64
  1247  				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
  1248  				ce.pushValue(math.Float64bits(v))
  1249  			}
  1250  			frame.pc++
  1251  		case wazeroir.OperationKindFloor:
  1252  			if op.B1 == 0 {
  1253  				// Float32
  1254  				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
  1255  				ce.pushValue(uint64(math.Float32bits(v)))
  1256  			} else {
  1257  				// Float64
  1258  				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
  1259  				ce.pushValue(math.Float64bits(v))
  1260  			}
  1261  			frame.pc++
  1262  		case wazeroir.OperationKindTrunc:
  1263  			if op.B1 == 0 {
  1264  				// Float32
  1265  				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
  1266  				ce.pushValue(uint64(math.Float32bits(v)))
  1267  			} else {
  1268  				// Float64
  1269  				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
  1270  				ce.pushValue(math.Float64bits(v))
  1271  			}
  1272  			frame.pc++
  1273  		case wazeroir.OperationKindNearest:
  1274  			if op.B1 == 0 {
  1275  				// Float32
  1276  				f := math.Float32frombits(uint32(ce.popValue()))
  1277  				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
  1278  			} else {
  1279  				// Float64
  1280  				f := math.Float64frombits(ce.popValue())
  1281  				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
  1282  			}
  1283  			frame.pc++
  1284  		case wazeroir.OperationKindSqrt:
  1285  			if op.B1 == 0 {
  1286  				// Float32
  1287  				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
  1288  				ce.pushValue(uint64(math.Float32bits(float32(v))))
  1289  			} else {
  1290  				// Float64
  1291  				v := math.Sqrt(math.Float64frombits(ce.popValue()))
  1292  				ce.pushValue(math.Float64bits(v))
  1293  			}
  1294  			frame.pc++
  1295  		case wazeroir.OperationKindMin:
  1296  			if op.B1 == 0 {
  1297  				// Float32
  1298  				ce.pushValue(WasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1299  			} else {
  1300  				v2 := math.Float64frombits(ce.popValue())
  1301  				v1 := math.Float64frombits(ce.popValue())
  1302  				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
  1303  			}
  1304  			frame.pc++
  1305  		case wazeroir.OperationKindMax:
  1306  			if op.B1 == 0 {
  1307  				ce.pushValue(WasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1308  			} else {
  1309  				// Float64
  1310  				v2 := math.Float64frombits(ce.popValue())
  1311  				v1 := math.Float64frombits(ce.popValue())
  1312  				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
  1313  			}
  1314  			frame.pc++
  1315  		case wazeroir.OperationKindCopysign:
  1316  			if op.B1 == 0 {
  1317  				// Float32
  1318  				v2 := uint32(ce.popValue())
  1319  				v1 := uint32(ce.popValue())
  1320  				const signbit = 1 << 31
  1321  				ce.pushValue(uint64(v1&^signbit | v2&signbit))
  1322  			} else {
  1323  				// Float64
  1324  				v2 := ce.popValue()
  1325  				v1 := ce.popValue()
  1326  				const signbit = 1 << 63
  1327  				ce.pushValue(v1&^signbit | v2&signbit)
  1328  			}
  1329  			frame.pc++
  1330  		case wazeroir.OperationKindI32WrapFromI64:
  1331  			ce.pushValue(uint64(uint32(ce.popValue())))
  1332  			frame.pc++
  1333  		case wazeroir.OperationKindITruncFromF:
  1334  			if op.B1 == 0 {
  1335  				// Float32
  1336  				switch wazeroir.SignedInt(op.B2) {
  1337  				case wazeroir.SignedInt32:
  1338  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1339  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1340  						if op.B3 {
  1341  							// non-trapping conversion must cast nan to zero.
  1342  							v = 0
  1343  						} else {
  1344  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1345  						}
  1346  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1347  						if op.B3 {
  1348  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1349  							if v < 0 {
  1350  								v = math.MinInt32
  1351  							} else {
  1352  								v = math.MaxInt32
  1353  							}
  1354  						} else {
  1355  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1356  						}
  1357  					}
  1358  					ce.pushValue(uint64(uint32(int32(v))))
  1359  				case wazeroir.SignedInt64:
  1360  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1361  					res := int64(v)
  1362  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1363  						if op.B3 {
  1364  							// non-trapping conversion must cast nan to zero.
  1365  							res = 0
  1366  						} else {
  1367  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1368  						}
  1369  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1370  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1371  						// and that's why we use '>=' not '>' to check overflow.
  1372  						if op.B3 {
  1373  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1374  							if v < 0 {
  1375  								res = math.MinInt64
  1376  							} else {
  1377  								res = math.MaxInt64
  1378  							}
  1379  						} else {
  1380  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1381  						}
  1382  					}
  1383  					ce.pushValue(uint64(res))
  1384  				case wazeroir.SignedUint32:
  1385  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1386  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1387  						if op.B3 {
  1388  							// non-trapping conversion must cast nan to zero.
  1389  							v = 0
  1390  						} else {
  1391  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1392  						}
  1393  					} else if v < 0 || v > math.MaxUint32 {
  1394  						if op.B3 {
  1395  							// non-trapping conversion must "saturate" the value for overflowing source.
  1396  							if v < 0 {
  1397  								v = 0
  1398  							} else {
  1399  								v = math.MaxUint32
  1400  							}
  1401  						} else {
  1402  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1403  						}
  1404  					}
  1405  					ce.pushValue(uint64(uint32(v)))
  1406  				case wazeroir.SignedUint64:
  1407  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1408  					res := uint64(v)
  1409  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1410  						if op.B3 {
  1411  							// non-trapping conversion must cast nan to zero.
  1412  							res = 0
  1413  						} else {
  1414  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1415  						}
  1416  					} else if v < 0 || v >= math.MaxUint64 {
  1417  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1418  						// and that's why we use '>=' not '>' to check overflow.
  1419  						if op.B3 {
  1420  							// non-trapping conversion must "saturate" the value for overflowing source.
  1421  							if v < 0 {
  1422  								res = 0
  1423  							} else {
  1424  								res = math.MaxUint64
  1425  							}
  1426  						} else {
  1427  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1428  						}
  1429  					}
  1430  					ce.pushValue(res)
  1431  				}
  1432  			} else {
  1433  				// Float64
  1434  				switch wazeroir.SignedInt(op.B2) {
  1435  				case wazeroir.SignedInt32:
  1436  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1437  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1438  						if op.B3 {
  1439  							// non-trapping conversion must cast nan to zero.
  1440  							v = 0
  1441  						} else {
  1442  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1443  						}
  1444  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1445  						if op.B3 {
  1446  							// non-trapping conversion must "saturate" the value for overflowing source.
  1447  							if v < 0 {
  1448  								v = math.MinInt32
  1449  							} else {
  1450  								v = math.MaxInt32
  1451  							}
  1452  						} else {
  1453  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1454  						}
  1455  					}
  1456  					ce.pushValue(uint64(uint32(int32(v))))
  1457  				case wazeroir.SignedInt64:
  1458  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1459  					res := int64(v)
  1460  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1461  						if op.B3 {
  1462  							// non-trapping conversion must cast nan to zero.
  1463  							res = 0
  1464  						} else {
  1465  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1466  						}
  1467  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1468  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1469  						// and that's why we use '>=' not '>' to check overflow.
  1470  						if op.B3 {
  1471  							// non-trapping conversion must "saturate" the value for overflowing source.
  1472  							if v < 0 {
  1473  								res = math.MinInt64
  1474  							} else {
  1475  								res = math.MaxInt64
  1476  							}
  1477  						} else {
  1478  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1479  						}
  1480  					}
  1481  					ce.pushValue(uint64(res))
  1482  				case wazeroir.SignedUint32:
  1483  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1484  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1485  						if op.B3 {
  1486  							// non-trapping conversion must cast nan to zero.
  1487  							v = 0
  1488  						} else {
  1489  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1490  						}
  1491  					} else if v < 0 || v > math.MaxUint32 {
  1492  						if op.B3 {
  1493  							// non-trapping conversion must "saturate" the value for overflowing source.
  1494  							if v < 0 {
  1495  								v = 0
  1496  							} else {
  1497  								v = math.MaxUint32
  1498  							}
  1499  						} else {
  1500  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1501  						}
  1502  					}
  1503  					ce.pushValue(uint64(uint32(v)))
  1504  				case wazeroir.SignedUint64:
  1505  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1506  					res := uint64(v)
  1507  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1508  						if op.B3 {
  1509  							// non-trapping conversion must cast nan to zero.
  1510  							res = 0
  1511  						} else {
  1512  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1513  						}
  1514  					} else if v < 0 || v >= math.MaxUint64 {
  1515  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1516  						// and that's why we use '>=' not '>' to check overflow.
  1517  						if op.B3 {
  1518  							// non-trapping conversion must "saturate" the value for overflowing source.
  1519  							if v < 0 {
  1520  								res = 0
  1521  							} else {
  1522  								res = math.MaxUint64
  1523  							}
  1524  						} else {
  1525  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1526  						}
  1527  					}
  1528  					ce.pushValue(res)
  1529  				}
  1530  			}
  1531  			frame.pc++
  1532  		case wazeroir.OperationKindFConvertFromI:
  1533  			switch wazeroir.SignedInt(op.B1) {
  1534  			case wazeroir.SignedInt32:
  1535  				if op.B2 == 0 {
  1536  					// Float32
  1537  					v := float32(int32(ce.popValue()))
  1538  					ce.pushValue(uint64(math.Float32bits(v)))
  1539  				} else {
  1540  					// Float64
  1541  					v := float64(int32(ce.popValue()))
  1542  					ce.pushValue(math.Float64bits(v))
  1543  				}
  1544  			case wazeroir.SignedInt64:
  1545  				if op.B2 == 0 {
  1546  					// Float32
  1547  					v := float32(int64(ce.popValue()))
  1548  					ce.pushValue(uint64(math.Float32bits(v)))
  1549  				} else {
  1550  					// Float64
  1551  					v := float64(int64(ce.popValue()))
  1552  					ce.pushValue(math.Float64bits(v))
  1553  				}
  1554  			case wazeroir.SignedUint32:
  1555  				if op.B2 == 0 {
  1556  					// Float32
  1557  					v := float32(uint32(ce.popValue()))
  1558  					ce.pushValue(uint64(math.Float32bits(v)))
  1559  				} else {
  1560  					// Float64
  1561  					v := float64(uint32(ce.popValue()))
  1562  					ce.pushValue(math.Float64bits(v))
  1563  				}
  1564  			case wazeroir.SignedUint64:
  1565  				if op.B2 == 0 {
  1566  					// Float32
  1567  					v := float32(ce.popValue())
  1568  					ce.pushValue(uint64(math.Float32bits(v)))
  1569  				} else {
  1570  					// Float64
  1571  					v := float64(ce.popValue())
  1572  					ce.pushValue(math.Float64bits(v))
  1573  				}
  1574  			}
  1575  			frame.pc++
  1576  		case wazeroir.OperationKindF32DemoteFromF64:
  1577  			v := float32(math.Float64frombits(ce.popValue()))
  1578  			ce.pushValue(uint64(math.Float32bits(v)))
  1579  			frame.pc++
  1580  		case wazeroir.OperationKindF64PromoteFromF32:
  1581  			v := float64(math.Float32frombits(uint32(ce.popValue())))
  1582  			ce.pushValue(math.Float64bits(v))
  1583  			frame.pc++
  1584  		case wazeroir.OperationKindExtend:
  1585  			if op.B1 == 1 {
  1586  				// Signed.
  1587  				v := int64(int32(ce.popValue()))
  1588  				ce.pushValue(uint64(v))
  1589  			} else {
  1590  				v := uint64(uint32(ce.popValue()))
  1591  				ce.pushValue(v)
  1592  			}
  1593  			frame.pc++
  1594  		case wazeroir.OperationKindSignExtend32From8:
  1595  			v := uint32(int8(ce.popValue()))
  1596  			ce.pushValue(uint64(v))
  1597  			frame.pc++
  1598  		case wazeroir.OperationKindSignExtend32From16:
  1599  			v := uint32(int16(ce.popValue()))
  1600  			ce.pushValue(uint64(v))
  1601  			frame.pc++
  1602  		case wazeroir.OperationKindSignExtend64From8:
  1603  			v := int64(int8(ce.popValue()))
  1604  			ce.pushValue(uint64(v))
  1605  			frame.pc++
  1606  		case wazeroir.OperationKindSignExtend64From16:
  1607  			v := int64(int16(ce.popValue()))
  1608  			ce.pushValue(uint64(v))
  1609  			frame.pc++
  1610  		case wazeroir.OperationKindSignExtend64From32:
  1611  			v := int64(int32(ce.popValue()))
  1612  			ce.pushValue(uint64(v))
  1613  			frame.pc++
  1614  		case wazeroir.OperationKindMemoryInit:
  1615  			dataInstance := dataInstances[op.U1]
  1616  			copySize := ce.popValue()
  1617  			inDataOffset := ce.popValue()
  1618  			inMemoryOffset := ce.popValue()
  1619  			if inDataOffset+copySize > uint64(len(dataInstance)) ||
  1620  				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
  1621  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1622  			} else if copySize != 0 {
  1623  				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
  1624  			}
  1625  			frame.pc++
  1626  		case wazeroir.OperationKindDataDrop:
  1627  			dataInstances[op.U1] = nil
  1628  			frame.pc++
  1629  		case wazeroir.OperationKindMemoryCopy:
  1630  			memLen := uint64(len(memoryInst.Buffer))
  1631  			copySize := ce.popValue()
  1632  			sourceOffset := ce.popValue()
  1633  			destinationOffset := ce.popValue()
  1634  			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
  1635  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1636  			} else if copySize != 0 {
  1637  				copy(memoryInst.Buffer[destinationOffset:],
  1638  					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
  1639  			}
  1640  			frame.pc++
  1641  		case wazeroir.OperationKindMemoryFill:
  1642  			fillSize := ce.popValue()
  1643  			value := byte(ce.popValue())
  1644  			offset := ce.popValue()
  1645  			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
  1646  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1647  			} else if fillSize != 0 {
  1648  				// Uses the copy trick for faster filling buffer.
  1649  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1650  				buf := memoryInst.Buffer[offset : offset+fillSize]
  1651  				buf[0] = value
  1652  				for i := 1; i < len(buf); i *= 2 {
  1653  					copy(buf[i:], buf[:i])
  1654  				}
  1655  			}
  1656  			frame.pc++
  1657  		case wazeroir.OperationKindTableInit:
  1658  			elementInstance := elementInstances[op.U1]
  1659  			copySize := ce.popValue()
  1660  			inElementOffset := ce.popValue()
  1661  			inTableOffset := ce.popValue()
  1662  			table := tables[op.U2]
  1663  			if inElementOffset+copySize > uint64(len(elementInstance.References)) ||
  1664  				inTableOffset+copySize > uint64(len(table.References)) {
  1665  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1666  			} else if copySize != 0 {
  1667  				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance.References[inElementOffset:])
  1668  			}
  1669  			frame.pc++
  1670  		case wazeroir.OperationKindElemDrop:
  1671  			elementInstances[op.U1].References = nil
  1672  			frame.pc++
  1673  		case wazeroir.OperationKindTableCopy:
  1674  			srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
  1675  			copySize := ce.popValue()
  1676  			sourceOffset := ce.popValue()
  1677  			destinationOffset := ce.popValue()
  1678  			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
  1679  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1680  			} else if copySize != 0 {
  1681  				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
  1682  			}
  1683  			frame.pc++
  1684  		case wazeroir.OperationKindRefFunc:
  1685  			ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
  1686  			frame.pc++
  1687  		case wazeroir.OperationKindTableGet:
  1688  			table := tables[op.U1]
  1689  
  1690  			offset := ce.popValue()
  1691  			if offset >= uint64(len(table.References)) {
  1692  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1693  			}
  1694  
  1695  			ce.pushValue(uint64(table.References[offset]))
  1696  			frame.pc++
  1697  		case wazeroir.OperationKindTableSet:
  1698  			table := tables[op.U1]
  1699  			ref := ce.popValue()
  1700  
  1701  			offset := ce.popValue()
  1702  			if offset >= uint64(len(table.References)) {
  1703  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1704  			}
  1705  
  1706  			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
  1707  			frame.pc++
  1708  		case wazeroir.OperationKindTableSize:
  1709  			table := tables[op.U1]
  1710  			ce.pushValue(uint64(len(table.References)))
  1711  			frame.pc++
  1712  		case wazeroir.OperationKindTableGrow:
  1713  			table := tables[op.U1]
  1714  			num, ref := ce.popValue(), ce.popValue()
  1715  			ret := table.Grow(uint32(num), uintptr(ref))
  1716  			ce.pushValue(uint64(ret))
  1717  			frame.pc++
  1718  		case wazeroir.OperationKindTableFill:
  1719  			table := tables[op.U1]
  1720  			num := ce.popValue()
  1721  			ref := uintptr(ce.popValue())
  1722  			offset := ce.popValue()
  1723  			if num+offset > uint64(len(table.References)) {
  1724  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1725  			} else if num > 0 {
  1726  				// Uses the copy trick for faster filling the region with the value.
  1727  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1728  				targetRegion := table.References[offset : offset+num]
  1729  				targetRegion[0] = ref
  1730  				for i := 1; i < len(targetRegion); i *= 2 {
  1731  					copy(targetRegion[i:], targetRegion[:i])
  1732  				}
  1733  			}
  1734  			frame.pc++
  1735  		case wazeroir.OperationKindV128Const:
  1736  			lo, hi := op.U1, op.U2
  1737  			ce.pushValue(lo)
  1738  			ce.pushValue(hi)
  1739  			frame.pc++
  1740  		case wazeroir.OperationKindV128Add:
  1741  			yHigh, yLow := ce.popValue(), ce.popValue()
  1742  			xHigh, xLow := ce.popValue(), ce.popValue()
  1743  			switch op.B1 {
  1744  			case wazeroir.ShapeI8x16:
  1745  				ce.pushValue(
  1746  					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
  1747  						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
  1748  						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
  1749  						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
  1750  				)
  1751  				ce.pushValue(
  1752  					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
  1753  						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
  1754  						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
  1755  						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
  1756  				)
  1757  			case wazeroir.ShapeI16x8:
  1758  				ce.pushValue(
  1759  					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
  1760  						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
  1761  				)
  1762  				ce.pushValue(
  1763  					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
  1764  						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
  1765  				)
  1766  			case wazeroir.ShapeI32x4:
  1767  				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
  1768  				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
  1769  			case wazeroir.ShapeI64x2:
  1770  				ce.pushValue(xLow + yLow)
  1771  				ce.pushValue(xHigh + yHigh)
  1772  			case wazeroir.ShapeF32x4:
  1773  				ce.pushValue(
  1774  					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1775  				)
  1776  				ce.pushValue(
  1777  					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1778  				)
  1779  			case wazeroir.ShapeF64x2:
  1780  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
  1781  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
  1782  			}
  1783  			frame.pc++
  1784  		case wazeroir.OperationKindV128Sub:
  1785  			yHigh, yLow := ce.popValue(), ce.popValue()
  1786  			xHigh, xLow := ce.popValue(), ce.popValue()
  1787  			switch op.B1 {
  1788  			case wazeroir.ShapeI8x16:
  1789  				ce.pushValue(
  1790  					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
  1791  						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
  1792  						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
  1793  						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
  1794  				)
  1795  				ce.pushValue(
  1796  					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
  1797  						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
  1798  						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
  1799  						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
  1800  				)
  1801  			case wazeroir.ShapeI16x8:
  1802  				ce.pushValue(
  1803  					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
  1804  						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
  1805  				)
  1806  				ce.pushValue(
  1807  					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
  1808  						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
  1809  				)
  1810  			case wazeroir.ShapeI32x4:
  1811  				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
  1812  				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
  1813  			case wazeroir.ShapeI64x2:
  1814  				ce.pushValue(xLow - yLow)
  1815  				ce.pushValue(xHigh - yHigh)
  1816  			case wazeroir.ShapeF32x4:
  1817  				ce.pushValue(
  1818  					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1819  				)
  1820  				ce.pushValue(
  1821  					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1822  				)
  1823  			case wazeroir.ShapeF64x2:
  1824  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
  1825  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
  1826  			}
  1827  			frame.pc++
  1828  		case wazeroir.OperationKindV128Load:
  1829  			offset := ce.popMemoryOffset(op)
  1830  			switch op.B1 {
  1831  			case wazeroir.V128LoadType128:
  1832  				lo, ok := memoryInst.ReadUint64Le(offset)
  1833  				if !ok {
  1834  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1835  				}
  1836  				ce.pushValue(lo)
  1837  				hi, ok := memoryInst.ReadUint64Le(offset + 8)
  1838  				if !ok {
  1839  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1840  				}
  1841  				ce.pushValue(hi)
  1842  			case wazeroir.V128LoadType8x8s:
  1843  				data, ok := memoryInst.Read(offset, 8)
  1844  				if !ok {
  1845  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1846  				}
  1847  				ce.pushValue(
  1848  					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
  1849  				)
  1850  				ce.pushValue(
  1851  					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
  1852  				)
  1853  			case wazeroir.V128LoadType8x8u:
  1854  				data, ok := memoryInst.Read(offset, 8)
  1855  				if !ok {
  1856  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1857  				}
  1858  				ce.pushValue(
  1859  					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
  1860  				)
  1861  				ce.pushValue(
  1862  					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
  1863  				)
  1864  			case wazeroir.V128LoadType16x4s:
  1865  				data, ok := memoryInst.Read(offset, 8)
  1866  				if !ok {
  1867  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1868  				}
  1869  				ce.pushValue(
  1870  					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
  1871  						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
  1872  				)
  1873  				ce.pushValue(
  1874  					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
  1875  						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
  1876  				)
  1877  			case wazeroir.V128LoadType16x4u:
  1878  				data, ok := memoryInst.Read(offset, 8)
  1879  				if !ok {
  1880  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1881  				}
  1882  				ce.pushValue(
  1883  					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
  1884  				)
  1885  				ce.pushValue(
  1886  					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
  1887  				)
  1888  			case wazeroir.V128LoadType32x2s:
  1889  				data, ok := memoryInst.Read(offset, 8)
  1890  				if !ok {
  1891  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1892  				}
  1893  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
  1894  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
  1895  			case wazeroir.V128LoadType32x2u:
  1896  				data, ok := memoryInst.Read(offset, 8)
  1897  				if !ok {
  1898  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1899  				}
  1900  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
  1901  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
  1902  			case wazeroir.V128LoadType8Splat:
  1903  				v, ok := memoryInst.ReadByte(offset)
  1904  				if !ok {
  1905  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1906  				}
  1907  				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
  1908  					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
  1909  				ce.pushValue(v8)
  1910  				ce.pushValue(v8)
  1911  			case wazeroir.V128LoadType16Splat:
  1912  				v, ok := memoryInst.ReadUint16Le(offset)
  1913  				if !ok {
  1914  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1915  				}
  1916  				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
  1917  				ce.pushValue(v4)
  1918  				ce.pushValue(v4)
  1919  			case wazeroir.V128LoadType32Splat:
  1920  				v, ok := memoryInst.ReadUint32Le(offset)
  1921  				if !ok {
  1922  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1923  				}
  1924  				vv := uint64(v)<<32 | uint64(v)
  1925  				ce.pushValue(vv)
  1926  				ce.pushValue(vv)
  1927  			case wazeroir.V128LoadType64Splat:
  1928  				lo, ok := memoryInst.ReadUint64Le(offset)
  1929  				if !ok {
  1930  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1931  				}
  1932  				ce.pushValue(lo)
  1933  				ce.pushValue(lo)
  1934  			case wazeroir.V128LoadType32zero:
  1935  				lo, ok := memoryInst.ReadUint32Le(offset)
  1936  				if !ok {
  1937  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1938  				}
  1939  				ce.pushValue(uint64(lo))
  1940  				ce.pushValue(0)
  1941  			case wazeroir.V128LoadType64zero:
  1942  				lo, ok := memoryInst.ReadUint64Le(offset)
  1943  				if !ok {
  1944  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1945  				}
  1946  				ce.pushValue(lo)
  1947  				ce.pushValue(0)
  1948  			}
  1949  			frame.pc++
  1950  		case wazeroir.OperationKindV128LoadLane:
  1951  			hi, lo := ce.popValue(), ce.popValue()
  1952  			offset := ce.popMemoryOffset(op)
  1953  			switch op.B1 {
  1954  			case 8:
  1955  				b, ok := memoryInst.ReadByte(offset)
  1956  				if !ok {
  1957  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1958  				}
  1959  				if op.B2 < 8 {
  1960  					s := op.B2 << 3
  1961  					lo = (lo & ^(0xff << s)) | uint64(b)<<s
  1962  				} else {
  1963  					s := (op.B2 - 8) << 3
  1964  					hi = (hi & ^(0xff << s)) | uint64(b)<<s
  1965  				}
  1966  			case 16:
  1967  				b, ok := memoryInst.ReadUint16Le(offset)
  1968  				if !ok {
  1969  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1970  				}
  1971  				if op.B2 < 4 {
  1972  					s := op.B2 << 4
  1973  					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
  1974  				} else {
  1975  					s := (op.B2 - 4) << 4
  1976  					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
  1977  				}
  1978  			case 32:
  1979  				b, ok := memoryInst.ReadUint32Le(offset)
  1980  				if !ok {
  1981  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1982  				}
  1983  				if op.B2 < 2 {
  1984  					s := op.B2 << 5
  1985  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  1986  				} else {
  1987  					s := (op.B2 - 2) << 5
  1988  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  1989  				}
  1990  			case 64:
  1991  				b, ok := memoryInst.ReadUint64Le(offset)
  1992  				if !ok {
  1993  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1994  				}
  1995  				if op.B2 == 0 {
  1996  					lo = b
  1997  				} else {
  1998  					hi = b
  1999  				}
  2000  			}
  2001  			ce.pushValue(lo)
  2002  			ce.pushValue(hi)
  2003  			frame.pc++
  2004  		case wazeroir.OperationKindV128Store:
  2005  			hi, lo := ce.popValue(), ce.popValue()
  2006  			offset := ce.popMemoryOffset(op)
  2007  			if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
  2008  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2009  			}
  2010  			if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
  2011  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2012  			}
  2013  			frame.pc++
  2014  		case wazeroir.OperationKindV128StoreLane:
  2015  			hi, lo := ce.popValue(), ce.popValue()
  2016  			offset := ce.popMemoryOffset(op)
  2017  			var ok bool
  2018  			switch op.B1 {
  2019  			case 8:
  2020  				if op.B2 < 8 {
  2021  					ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
  2022  				} else {
  2023  					ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
  2024  				}
  2025  			case 16:
  2026  				if op.B2 < 4 {
  2027  					ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
  2028  				} else {
  2029  					ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
  2030  				}
  2031  			case 32:
  2032  				if op.B2 < 2 {
  2033  					ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
  2034  				} else {
  2035  					ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
  2036  				}
  2037  			case 64:
  2038  				if op.B2 == 0 {
  2039  					ok = memoryInst.WriteUint64Le(offset, lo)
  2040  				} else {
  2041  					ok = memoryInst.WriteUint64Le(offset, hi)
  2042  				}
  2043  			}
  2044  			if !ok {
  2045  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2046  			}
  2047  			frame.pc++
  2048  		case wazeroir.OperationKindV128ReplaceLane:
  2049  			v := ce.popValue()
  2050  			hi, lo := ce.popValue(), ce.popValue()
  2051  			switch op.B1 {
  2052  			case wazeroir.ShapeI8x16:
  2053  				if op.B2 < 8 {
  2054  					s := op.B2 << 3
  2055  					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
  2056  				} else {
  2057  					s := (op.B2 - 8) << 3
  2058  					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
  2059  				}
  2060  			case wazeroir.ShapeI16x8:
  2061  				if op.B2 < 4 {
  2062  					s := op.B2 << 4
  2063  					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2064  				} else {
  2065  					s := (op.B2 - 4) << 4
  2066  					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2067  				}
  2068  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2069  				if op.B2 < 2 {
  2070  					s := op.B2 << 5
  2071  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2072  				} else {
  2073  					s := (op.B2 - 2) << 5
  2074  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2075  				}
  2076  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2077  				if op.B2 == 0 {
  2078  					lo = v
  2079  				} else {
  2080  					hi = v
  2081  				}
  2082  			}
  2083  			ce.pushValue(lo)
  2084  			ce.pushValue(hi)
  2085  			frame.pc++
  2086  		case wazeroir.OperationKindV128ExtractLane:
  2087  			hi, lo := ce.popValue(), ce.popValue()
  2088  			var v uint64
  2089  			switch op.B1 {
  2090  			case wazeroir.ShapeI8x16:
  2091  				var u8 byte
  2092  				if op.B2 < 8 {
  2093  					u8 = byte(lo >> (op.B2 * 8))
  2094  				} else {
  2095  					u8 = byte(hi >> ((op.B2 - 8) * 8))
  2096  				}
  2097  				if op.B3 {
  2098  					// sign-extend.
  2099  					v = uint64(uint32(int8(u8)))
  2100  				} else {
  2101  					v = uint64(u8)
  2102  				}
  2103  			case wazeroir.ShapeI16x8:
  2104  				var u16 uint16
  2105  				if op.B2 < 4 {
  2106  					u16 = uint16(lo >> (op.B2 * 16))
  2107  				} else {
  2108  					u16 = uint16(hi >> ((op.B2 - 4) * 16))
  2109  				}
  2110  				if op.B3 {
  2111  					// sign-extend.
  2112  					v = uint64(uint32(int16(u16)))
  2113  				} else {
  2114  					v = uint64(u16)
  2115  				}
  2116  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2117  				if op.B2 < 2 {
  2118  					v = uint64(uint32(lo >> (op.B2 * 32)))
  2119  				} else {
  2120  					v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
  2121  				}
  2122  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2123  				if op.B2 == 0 {
  2124  					v = lo
  2125  				} else {
  2126  					v = hi
  2127  				}
  2128  			}
  2129  			ce.pushValue(v)
  2130  			frame.pc++
  2131  		case wazeroir.OperationKindV128Splat:
  2132  			v := ce.popValue()
  2133  			var hi, lo uint64
  2134  			switch op.B1 {
  2135  			case wazeroir.ShapeI8x16:
  2136  				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
  2137  					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
  2138  				hi, lo = v8, v8
  2139  			case wazeroir.ShapeI16x8:
  2140  				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
  2141  				hi, lo = v4, v4
  2142  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2143  				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
  2144  				lo, hi = v2, v2
  2145  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2146  				lo, hi = v, v
  2147  			}
  2148  			ce.pushValue(lo)
  2149  			ce.pushValue(hi)
  2150  			frame.pc++
  2151  		case wazeroir.OperationKindV128Swizzle:
  2152  			idxHi, idxLo := ce.popValue(), ce.popValue()
  2153  			baseHi, baseLo := ce.popValue(), ce.popValue()
  2154  			var newVal [16]byte
  2155  			for i := 0; i < 16; i++ {
  2156  				var id byte
  2157  				if i < 8 {
  2158  					id = byte(idxLo >> (i * 8))
  2159  				} else {
  2160  					id = byte(idxHi >> ((i - 8) * 8))
  2161  				}
  2162  				if id < 8 {
  2163  					newVal[i] = byte(baseLo >> (id * 8))
  2164  				} else if id < 16 {
  2165  					newVal[i] = byte(baseHi >> ((id - 8) * 8))
  2166  				}
  2167  			}
  2168  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2169  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2170  			frame.pc++
  2171  		case wazeroir.OperationKindV128Shuffle:
  2172  			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
  2173  			var newVal [16]byte
  2174  			for i, l := range op.Us {
  2175  				if l < 8 {
  2176  					newVal[i] = byte(yLo >> (l * 8))
  2177  				} else if l < 16 {
  2178  					newVal[i] = byte(yHi >> ((l - 8) * 8))
  2179  				} else if l < 24 {
  2180  					newVal[i] = byte(xLo >> ((l - 16) * 8))
  2181  				} else if l < 32 {
  2182  					newVal[i] = byte(xHi >> ((l - 24) * 8))
  2183  				}
  2184  			}
  2185  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2186  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2187  			frame.pc++
  2188  		case wazeroir.OperationKindV128AnyTrue:
  2189  			hi, lo := ce.popValue(), ce.popValue()
  2190  			if hi != 0 || lo != 0 {
  2191  				ce.pushValue(1)
  2192  			} else {
  2193  				ce.pushValue(0)
  2194  			}
  2195  			frame.pc++
  2196  		case wazeroir.OperationKindV128AllTrue:
  2197  			hi, lo := ce.popValue(), ce.popValue()
  2198  			var ret bool
  2199  			switch op.B1 {
  2200  			case wazeroir.ShapeI8x16:
  2201  				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
  2202  					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
  2203  					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
  2204  					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
  2205  			case wazeroir.ShapeI16x8:
  2206  				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
  2207  					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
  2208  			case wazeroir.ShapeI32x4:
  2209  				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
  2210  					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
  2211  			case wazeroir.ShapeI64x2:
  2212  				ret = (lo != 0) &&
  2213  					(hi != 0)
  2214  			}
  2215  			if ret {
  2216  				ce.pushValue(1)
  2217  			} else {
  2218  				ce.pushValue(0)
  2219  			}
  2220  			frame.pc++
  2221  		case wazeroir.OperationKindV128BitMask:
  2222  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
  2223  			hi, lo := ce.popValue(), ce.popValue()
  2224  			var res uint64
  2225  			switch op.B1 {
  2226  			case wazeroir.ShapeI8x16:
  2227  				for i := 0; i < 8; i++ {
  2228  					if int8(lo>>(i*8)) < 0 {
  2229  						res |= 1 << i
  2230  					}
  2231  				}
  2232  				for i := 0; i < 8; i++ {
  2233  					if int8(hi>>(i*8)) < 0 {
  2234  						res |= 1 << (i + 8)
  2235  					}
  2236  				}
  2237  			case wazeroir.ShapeI16x8:
  2238  				for i := 0; i < 4; i++ {
  2239  					if int16(lo>>(i*16)) < 0 {
  2240  						res |= 1 << i
  2241  					}
  2242  				}
  2243  				for i := 0; i < 4; i++ {
  2244  					if int16(hi>>(i*16)) < 0 {
  2245  						res |= 1 << (i + 4)
  2246  					}
  2247  				}
  2248  			case wazeroir.ShapeI32x4:
  2249  				for i := 0; i < 2; i++ {
  2250  					if int32(lo>>(i*32)) < 0 {
  2251  						res |= 1 << i
  2252  					}
  2253  				}
  2254  				for i := 0; i < 2; i++ {
  2255  					if int32(hi>>(i*32)) < 0 {
  2256  						res |= 1 << (i + 2)
  2257  					}
  2258  				}
  2259  			case wazeroir.ShapeI64x2:
  2260  				if int64(lo) < 0 {
  2261  					res |= 0b01
  2262  				}
  2263  				if int(hi) < 0 {
  2264  					res |= 0b10
  2265  				}
  2266  			}
  2267  			ce.pushValue(res)
  2268  			frame.pc++
  2269  		case wazeroir.OperationKindV128And:
  2270  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2271  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2272  			ce.pushValue(x1Lo & x2Lo)
  2273  			ce.pushValue(x1Hi & x2Hi)
  2274  			frame.pc++
  2275  		case wazeroir.OperationKindV128Not:
  2276  			hi, lo := ce.popValue(), ce.popValue()
  2277  			ce.pushValue(^lo)
  2278  			ce.pushValue(^hi)
  2279  			frame.pc++
  2280  		case wazeroir.OperationKindV128Or:
  2281  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2282  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2283  			ce.pushValue(x1Lo | x2Lo)
  2284  			ce.pushValue(x1Hi | x2Hi)
  2285  			frame.pc++
  2286  		case wazeroir.OperationKindV128Xor:
  2287  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2288  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2289  			ce.pushValue(x1Lo ^ x2Lo)
  2290  			ce.pushValue(x1Hi ^ x2Hi)
  2291  			frame.pc++
  2292  		case wazeroir.OperationKindV128Bitselect:
  2293  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
  2294  			cHi, cLo := ce.popValue(), ce.popValue()
  2295  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2296  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2297  			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
  2298  			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
  2299  			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
  2300  			frame.pc++
  2301  		case wazeroir.OperationKindV128AndNot:
  2302  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2303  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2304  			ce.pushValue(x1Lo & (^x2Lo))
  2305  			ce.pushValue(x1Hi & (^x2Hi))
  2306  			frame.pc++
  2307  		case wazeroir.OperationKindV128Shl:
  2308  			s := ce.popValue()
  2309  			hi, lo := ce.popValue(), ce.popValue()
  2310  			switch op.B1 {
  2311  			case wazeroir.ShapeI8x16:
  2312  				s = s % 8
  2313  				lo = uint64(uint8(lo<<s)) |
  2314  					uint64(uint8((lo>>8)<<s))<<8 |
  2315  					uint64(uint8((lo>>16)<<s))<<16 |
  2316  					uint64(uint8((lo>>24)<<s))<<24 |
  2317  					uint64(uint8((lo>>32)<<s))<<32 |
  2318  					uint64(uint8((lo>>40)<<s))<<40 |
  2319  					uint64(uint8((lo>>48)<<s))<<48 |
  2320  					uint64(uint8((lo>>56)<<s))<<56
  2321  				hi = uint64(uint8(hi<<s)) |
  2322  					uint64(uint8((hi>>8)<<s))<<8 |
  2323  					uint64(uint8((hi>>16)<<s))<<16 |
  2324  					uint64(uint8((hi>>24)<<s))<<24 |
  2325  					uint64(uint8((hi>>32)<<s))<<32 |
  2326  					uint64(uint8((hi>>40)<<s))<<40 |
  2327  					uint64(uint8((hi>>48)<<s))<<48 |
  2328  					uint64(uint8((hi>>56)<<s))<<56
  2329  			case wazeroir.ShapeI16x8:
  2330  				s = s % 16
  2331  				lo = uint64(uint16(lo<<s)) |
  2332  					uint64(uint16((lo>>16)<<s))<<16 |
  2333  					uint64(uint16((lo>>32)<<s))<<32 |
  2334  					uint64(uint16((lo>>48)<<s))<<48
  2335  				hi = uint64(uint16(hi<<s)) |
  2336  					uint64(uint16((hi>>16)<<s))<<16 |
  2337  					uint64(uint16((hi>>32)<<s))<<32 |
  2338  					uint64(uint16((hi>>48)<<s))<<48
  2339  			case wazeroir.ShapeI32x4:
  2340  				s = s % 32
  2341  				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
  2342  				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
  2343  			case wazeroir.ShapeI64x2:
  2344  				s = s % 64
  2345  				lo = lo << s
  2346  				hi = hi << s
  2347  			}
  2348  			ce.pushValue(lo)
  2349  			ce.pushValue(hi)
  2350  			frame.pc++
  2351  		case wazeroir.OperationKindV128Shr:
  2352  			s := ce.popValue()
  2353  			hi, lo := ce.popValue(), ce.popValue()
  2354  			switch op.B1 {
  2355  			case wazeroir.ShapeI8x16:
  2356  				s = s % 8
  2357  				if op.B3 { // signed
  2358  					lo = uint64(uint8(int8(lo)>>s)) |
  2359  						uint64(uint8(int8(lo>>8)>>s))<<8 |
  2360  						uint64(uint8(int8(lo>>16)>>s))<<16 |
  2361  						uint64(uint8(int8(lo>>24)>>s))<<24 |
  2362  						uint64(uint8(int8(lo>>32)>>s))<<32 |
  2363  						uint64(uint8(int8(lo>>40)>>s))<<40 |
  2364  						uint64(uint8(int8(lo>>48)>>s))<<48 |
  2365  						uint64(uint8(int8(lo>>56)>>s))<<56
  2366  					hi = uint64(uint8(int8(hi)>>s)) |
  2367  						uint64(uint8(int8(hi>>8)>>s))<<8 |
  2368  						uint64(uint8(int8(hi>>16)>>s))<<16 |
  2369  						uint64(uint8(int8(hi>>24)>>s))<<24 |
  2370  						uint64(uint8(int8(hi>>32)>>s))<<32 |
  2371  						uint64(uint8(int8(hi>>40)>>s))<<40 |
  2372  						uint64(uint8(int8(hi>>48)>>s))<<48 |
  2373  						uint64(uint8(int8(hi>>56)>>s))<<56
  2374  				} else {
  2375  					lo = uint64(uint8(lo)>>s) |
  2376  						uint64(uint8(lo>>8)>>s)<<8 |
  2377  						uint64(uint8(lo>>16)>>s)<<16 |
  2378  						uint64(uint8(lo>>24)>>s)<<24 |
  2379  						uint64(uint8(lo>>32)>>s)<<32 |
  2380  						uint64(uint8(lo>>40)>>s)<<40 |
  2381  						uint64(uint8(lo>>48)>>s)<<48 |
  2382  						uint64(uint8(lo>>56)>>s)<<56
  2383  					hi = uint64(uint8(hi)>>s) |
  2384  						uint64(uint8(hi>>8)>>s)<<8 |
  2385  						uint64(uint8(hi>>16)>>s)<<16 |
  2386  						uint64(uint8(hi>>24)>>s)<<24 |
  2387  						uint64(uint8(hi>>32)>>s)<<32 |
  2388  						uint64(uint8(hi>>40)>>s)<<40 |
  2389  						uint64(uint8(hi>>48)>>s)<<48 |
  2390  						uint64(uint8(hi>>56)>>s)<<56
  2391  				}
  2392  			case wazeroir.ShapeI16x8:
  2393  				s = s % 16
  2394  				if op.B3 { // signed
  2395  					lo = uint64(uint16(int16(lo)>>s)) |
  2396  						uint64(uint16(int16(lo>>16)>>s))<<16 |
  2397  						uint64(uint16(int16(lo>>32)>>s))<<32 |
  2398  						uint64(uint16(int16(lo>>48)>>s))<<48
  2399  					hi = uint64(uint16(int16(hi)>>s)) |
  2400  						uint64(uint16(int16(hi>>16)>>s))<<16 |
  2401  						uint64(uint16(int16(hi>>32)>>s))<<32 |
  2402  						uint64(uint16(int16(hi>>48)>>s))<<48
  2403  				} else {
  2404  					lo = uint64(uint16(lo)>>s) |
  2405  						uint64(uint16(lo>>16)>>s)<<16 |
  2406  						uint64(uint16(lo>>32)>>s)<<32 |
  2407  						uint64(uint16(lo>>48)>>s)<<48
  2408  					hi = uint64(uint16(hi)>>s) |
  2409  						uint64(uint16(hi>>16)>>s)<<16 |
  2410  						uint64(uint16(hi>>32)>>s)<<32 |
  2411  						uint64(uint16(hi>>48)>>s)<<48
  2412  				}
  2413  			case wazeroir.ShapeI32x4:
  2414  				s = s % 32
  2415  				if op.B3 {
  2416  					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
  2417  					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
  2418  				} else {
  2419  					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
  2420  					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
  2421  				}
  2422  			case wazeroir.ShapeI64x2:
  2423  				s = s % 64
  2424  				if op.B3 { // signed
  2425  					lo = uint64(int64(lo) >> s)
  2426  					hi = uint64(int64(hi) >> s)
  2427  				} else {
  2428  					lo = lo >> s
  2429  					hi = hi >> s
  2430  				}
  2431  
  2432  			}
  2433  			ce.pushValue(lo)
  2434  			ce.pushValue(hi)
  2435  			frame.pc++
  2436  		case wazeroir.OperationKindV128Cmp:
  2437  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2438  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2439  			var result []bool
  2440  			switch op.B1 {
  2441  			case wazeroir.V128CmpTypeI8x16Eq:
  2442  				result = []bool{
  2443  					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
  2444  					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
  2445  					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
  2446  					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
  2447  					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
  2448  					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
  2449  					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
  2450  					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
  2451  				}
  2452  			case wazeroir.V128CmpTypeI8x16Ne:
  2453  				result = []bool{
  2454  					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
  2455  					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
  2456  					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
  2457  					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
  2458  					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
  2459  					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
  2460  					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
  2461  					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
  2462  				}
  2463  			case wazeroir.V128CmpTypeI8x16LtS:
  2464  				result = []bool{
  2465  					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
  2466  					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
  2467  					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
  2468  					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
  2469  					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
  2470  					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
  2471  					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
  2472  					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
  2473  				}
  2474  			case wazeroir.V128CmpTypeI8x16LtU:
  2475  				result = []bool{
  2476  					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
  2477  					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
  2478  					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
  2479  					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
  2480  					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
  2481  					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
  2482  					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
  2483  					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
  2484  				}
  2485  			case wazeroir.V128CmpTypeI8x16GtS:
  2486  				result = []bool{
  2487  					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
  2488  					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
  2489  					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
  2490  					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
  2491  					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
  2492  					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
  2493  					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
  2494  					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
  2495  				}
  2496  			case wazeroir.V128CmpTypeI8x16GtU:
  2497  				result = []bool{
  2498  					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
  2499  					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
  2500  					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
  2501  					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
  2502  					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
  2503  					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
  2504  					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
  2505  					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
  2506  				}
  2507  			case wazeroir.V128CmpTypeI8x16LeS:
  2508  				result = []bool{
  2509  					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
  2510  					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
  2511  					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
  2512  					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
  2513  					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
  2514  					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
  2515  					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
  2516  					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
  2517  				}
  2518  			case wazeroir.V128CmpTypeI8x16LeU:
  2519  				result = []bool{
  2520  					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
  2521  					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
  2522  					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
  2523  					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
  2524  					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
  2525  					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
  2526  					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
  2527  					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
  2528  				}
  2529  			case wazeroir.V128CmpTypeI8x16GeS:
  2530  				result = []bool{
  2531  					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
  2532  					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
  2533  					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
  2534  					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
  2535  					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
  2536  					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
  2537  					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
  2538  					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
  2539  				}
  2540  			case wazeroir.V128CmpTypeI8x16GeU:
  2541  				result = []bool{
  2542  					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
  2543  					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
  2544  					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
  2545  					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
  2546  					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
  2547  					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
  2548  					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
  2549  					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
  2550  				}
  2551  			case wazeroir.V128CmpTypeI16x8Eq:
  2552  				result = []bool{
  2553  					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
  2554  					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
  2555  					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
  2556  					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
  2557  				}
  2558  			case wazeroir.V128CmpTypeI16x8Ne:
  2559  				result = []bool{
  2560  					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
  2561  					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
  2562  					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
  2563  					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
  2564  				}
  2565  			case wazeroir.V128CmpTypeI16x8LtS:
  2566  				result = []bool{
  2567  					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
  2568  					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
  2569  					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
  2570  					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
  2571  				}
  2572  			case wazeroir.V128CmpTypeI16x8LtU:
  2573  				result = []bool{
  2574  					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
  2575  					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
  2576  					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
  2577  					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
  2578  				}
  2579  			case wazeroir.V128CmpTypeI16x8GtS:
  2580  				result = []bool{
  2581  					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
  2582  					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
  2583  					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
  2584  					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
  2585  				}
  2586  			case wazeroir.V128CmpTypeI16x8GtU:
  2587  				result = []bool{
  2588  					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
  2589  					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
  2590  					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
  2591  					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
  2592  				}
  2593  			case wazeroir.V128CmpTypeI16x8LeS:
  2594  				result = []bool{
  2595  					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
  2596  					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
  2597  					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
  2598  					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
  2599  				}
  2600  			case wazeroir.V128CmpTypeI16x8LeU:
  2601  				result = []bool{
  2602  					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
  2603  					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
  2604  					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
  2605  					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
  2606  				}
  2607  			case wazeroir.V128CmpTypeI16x8GeS:
  2608  				result = []bool{
  2609  					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
  2610  					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
  2611  					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
  2612  					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
  2613  				}
  2614  			case wazeroir.V128CmpTypeI16x8GeU:
  2615  				result = []bool{
  2616  					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
  2617  					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
  2618  					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
  2619  					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
  2620  				}
  2621  			case wazeroir.V128CmpTypeI32x4Eq:
  2622  				result = []bool{
  2623  					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
  2624  					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
  2625  				}
  2626  			case wazeroir.V128CmpTypeI32x4Ne:
  2627  				result = []bool{
  2628  					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
  2629  					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
  2630  				}
  2631  			case wazeroir.V128CmpTypeI32x4LtS:
  2632  				result = []bool{
  2633  					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
  2634  					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
  2635  				}
  2636  			case wazeroir.V128CmpTypeI32x4LtU:
  2637  				result = []bool{
  2638  					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
  2639  					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
  2640  				}
  2641  			case wazeroir.V128CmpTypeI32x4GtS:
  2642  				result = []bool{
  2643  					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
  2644  					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
  2645  				}
  2646  			case wazeroir.V128CmpTypeI32x4GtU:
  2647  				result = []bool{
  2648  					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
  2649  					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
  2650  				}
  2651  			case wazeroir.V128CmpTypeI32x4LeS:
  2652  				result = []bool{
  2653  					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
  2654  					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
  2655  				}
  2656  			case wazeroir.V128CmpTypeI32x4LeU:
  2657  				result = []bool{
  2658  					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
  2659  					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
  2660  				}
  2661  			case wazeroir.V128CmpTypeI32x4GeS:
  2662  				result = []bool{
  2663  					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
  2664  					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
  2665  				}
  2666  			case wazeroir.V128CmpTypeI32x4GeU:
  2667  				result = []bool{
  2668  					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
  2669  					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
  2670  				}
  2671  			case wazeroir.V128CmpTypeI64x2Eq:
  2672  				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
  2673  			case wazeroir.V128CmpTypeI64x2Ne:
  2674  				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
  2675  			case wazeroir.V128CmpTypeI64x2LtS:
  2676  				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
  2677  			case wazeroir.V128CmpTypeI64x2GtS:
  2678  				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
  2679  			case wazeroir.V128CmpTypeI64x2LeS:
  2680  				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
  2681  			case wazeroir.V128CmpTypeI64x2GeS:
  2682  				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
  2683  			case wazeroir.V128CmpTypeF32x4Eq:
  2684  				result = []bool{
  2685  					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
  2686  					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
  2687  					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
  2688  					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
  2689  				}
  2690  			case wazeroir.V128CmpTypeF32x4Ne:
  2691  				result = []bool{
  2692  					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
  2693  					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
  2694  					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
  2695  					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
  2696  				}
  2697  			case wazeroir.V128CmpTypeF32x4Lt:
  2698  				result = []bool{
  2699  					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
  2700  					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
  2701  					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
  2702  					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
  2703  				}
  2704  			case wazeroir.V128CmpTypeF32x4Gt:
  2705  				result = []bool{
  2706  					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
  2707  					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
  2708  					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
  2709  					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
  2710  				}
  2711  			case wazeroir.V128CmpTypeF32x4Le:
  2712  				result = []bool{
  2713  					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
  2714  					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
  2715  					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
  2716  					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
  2717  				}
  2718  			case wazeroir.V128CmpTypeF32x4Ge:
  2719  				result = []bool{
  2720  					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
  2721  					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
  2722  					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
  2723  					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
  2724  				}
  2725  			case wazeroir.V128CmpTypeF64x2Eq:
  2726  				result = []bool{
  2727  					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
  2728  					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
  2729  				}
  2730  			case wazeroir.V128CmpTypeF64x2Ne:
  2731  				result = []bool{
  2732  					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
  2733  					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
  2734  				}
  2735  			case wazeroir.V128CmpTypeF64x2Lt:
  2736  				result = []bool{
  2737  					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
  2738  					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
  2739  				}
  2740  			case wazeroir.V128CmpTypeF64x2Gt:
  2741  				result = []bool{
  2742  					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
  2743  					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
  2744  				}
  2745  			case wazeroir.V128CmpTypeF64x2Le:
  2746  				result = []bool{
  2747  					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
  2748  					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
  2749  				}
  2750  			case wazeroir.V128CmpTypeF64x2Ge:
  2751  				result = []bool{
  2752  					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
  2753  					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
  2754  				}
  2755  			}
  2756  
  2757  			var retLo, retHi uint64
  2758  			laneNum := len(result)
  2759  			switch laneNum {
  2760  			case 16:
  2761  				for i, b := range result {
  2762  					if b {
  2763  						if i < 8 {
  2764  							retLo |= 0xff << (i * 8)
  2765  						} else {
  2766  							retHi |= 0xff << ((i - 8) * 8)
  2767  						}
  2768  					}
  2769  				}
  2770  			case 8:
  2771  				for i, b := range result {
  2772  					if b {
  2773  						if i < 4 {
  2774  							retLo |= 0xffff << (i * 16)
  2775  						} else {
  2776  							retHi |= 0xffff << ((i - 4) * 16)
  2777  						}
  2778  					}
  2779  				}
  2780  			case 4:
  2781  				for i, b := range result {
  2782  					if b {
  2783  						if i < 2 {
  2784  							retLo |= 0xffff_ffff << (i * 32)
  2785  						} else {
  2786  							retHi |= 0xffff_ffff << ((i - 2) * 32)
  2787  						}
  2788  					}
  2789  				}
  2790  			case 2:
  2791  				if result[0] {
  2792  					retLo = ^uint64(0)
  2793  				}
  2794  				if result[1] {
  2795  					retHi = ^uint64(0)
  2796  				}
  2797  			}
  2798  
  2799  			ce.pushValue(retLo)
  2800  			ce.pushValue(retHi)
  2801  			frame.pc++
  2802  		case wazeroir.OperationKindV128AddSat:
  2803  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2804  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2805  
  2806  			var retLo, retHi uint64
  2807  
  2808  			// Lane-wise addition while saturating the overflowing values.
  2809  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
  2810  			switch op.B1 {
  2811  			case wazeroir.ShapeI8x16:
  2812  				for i := 0; i < 16; i++ {
  2813  					var v, w byte
  2814  					if i < 8 {
  2815  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2816  					} else {
  2817  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2818  					}
  2819  
  2820  					var uv uint64
  2821  					if op.B3 { // signed
  2822  						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
  2823  							uv = uint64(byte(0x80))
  2824  						} else if subbed > math.MaxInt8 {
  2825  							uv = uint64(byte(0x7f))
  2826  						} else {
  2827  							uv = uint64(byte(int8(subbed)))
  2828  						}
  2829  					} else {
  2830  						if subbed := int64(v) + int64(w); subbed < 0 {
  2831  							uv = uint64(byte(0))
  2832  						} else if subbed > math.MaxUint8 {
  2833  							uv = uint64(byte(0xff))
  2834  						} else {
  2835  							uv = uint64(byte(subbed))
  2836  						}
  2837  					}
  2838  
  2839  					if i < 8 { // first 8 lanes are on lower 64bits.
  2840  						retLo |= uv << (i * 8)
  2841  					} else {
  2842  						retHi |= uv << ((i - 8) * 8)
  2843  					}
  2844  				}
  2845  			case wazeroir.ShapeI16x8:
  2846  				for i := 0; i < 8; i++ {
  2847  					var v, w uint16
  2848  					if i < 4 {
  2849  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2850  					} else {
  2851  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2852  					}
  2853  
  2854  					var uv uint64
  2855  					if op.B3 { // signed
  2856  						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
  2857  							uv = uint64(uint16(0x8000))
  2858  						} else if added > math.MaxInt16 {
  2859  							uv = uint64(uint16(0x7fff))
  2860  						} else {
  2861  							uv = uint64(uint16(int16(added)))
  2862  						}
  2863  					} else {
  2864  						if added := int64(v) + int64(w); added < 0 {
  2865  							uv = uint64(uint16(0))
  2866  						} else if added > math.MaxUint16 {
  2867  							uv = uint64(uint16(0xffff))
  2868  						} else {
  2869  							uv = uint64(uint16(added))
  2870  						}
  2871  					}
  2872  
  2873  					if i < 4 { // first 4 lanes are on lower 64bits.
  2874  						retLo |= uv << (i * 16)
  2875  					} else {
  2876  						retHi |= uv << ((i - 4) * 16)
  2877  					}
  2878  				}
  2879  			}
  2880  
  2881  			ce.pushValue(retLo)
  2882  			ce.pushValue(retHi)
  2883  			frame.pc++
  2884  		case wazeroir.OperationKindV128SubSat:
  2885  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2886  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2887  
  2888  			var retLo, retHi uint64
  2889  
  2890  			// Lane-wise subtraction while saturating the overflowing values.
  2891  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
  2892  			switch op.B1 {
  2893  			case wazeroir.ShapeI8x16:
  2894  				for i := 0; i < 16; i++ {
  2895  					var v, w byte
  2896  					if i < 8 {
  2897  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2898  					} else {
  2899  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2900  					}
  2901  
  2902  					var uv uint64
  2903  					if op.B3 { // signed
  2904  						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
  2905  							uv = uint64(byte(0x80))
  2906  						} else if subbed > math.MaxInt8 {
  2907  							uv = uint64(byte(0x7f))
  2908  						} else {
  2909  							uv = uint64(byte(int8(subbed)))
  2910  						}
  2911  					} else {
  2912  						if subbed := int64(v) - int64(w); subbed < 0 {
  2913  							uv = uint64(byte(0))
  2914  						} else if subbed > math.MaxUint8 {
  2915  							uv = uint64(byte(0xff))
  2916  						} else {
  2917  							uv = uint64(byte(subbed))
  2918  						}
  2919  					}
  2920  
  2921  					if i < 8 {
  2922  						retLo |= uv << (i * 8)
  2923  					} else {
  2924  						retHi |= uv << ((i - 8) * 8)
  2925  					}
  2926  				}
  2927  			case wazeroir.ShapeI16x8:
  2928  				for i := 0; i < 8; i++ {
  2929  					var v, w uint16
  2930  					if i < 4 {
  2931  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2932  					} else {
  2933  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2934  					}
  2935  
  2936  					var uv uint64
  2937  					if op.B3 { // signed
  2938  						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
  2939  							uv = uint64(uint16(0x8000))
  2940  						} else if subbed > math.MaxInt16 {
  2941  							uv = uint64(uint16(0x7fff))
  2942  						} else {
  2943  							uv = uint64(uint16(int16(subbed)))
  2944  						}
  2945  					} else {
  2946  						if subbed := int64(v) - int64(w); subbed < 0 {
  2947  							uv = uint64(uint16(0))
  2948  						} else if subbed > math.MaxUint16 {
  2949  							uv = uint64(uint16(0xffff))
  2950  						} else {
  2951  							uv = uint64(uint16(subbed))
  2952  						}
  2953  					}
  2954  
  2955  					if i < 4 {
  2956  						retLo |= uv << (i * 16)
  2957  					} else {
  2958  						retHi |= uv << ((i - 4) * 16)
  2959  					}
  2960  				}
  2961  			}
  2962  
  2963  			ce.pushValue(retLo)
  2964  			ce.pushValue(retHi)
  2965  			frame.pc++
  2966  		case wazeroir.OperationKindV128Mul:
  2967  			x2hi, x2lo := ce.popValue(), ce.popValue()
  2968  			x1hi, x1lo := ce.popValue(), ce.popValue()
  2969  			var retLo, retHi uint64
  2970  			switch op.B1 {
  2971  			case wazeroir.ShapeI16x8:
  2972  				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
  2973  					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
  2974  				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
  2975  					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
  2976  			case wazeroir.ShapeI32x4:
  2977  				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
  2978  				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
  2979  			case wazeroir.ShapeI64x2:
  2980  				retHi = x1hi * x2hi
  2981  				retLo = x1lo * x2lo
  2982  			case wazeroir.ShapeF32x4:
  2983  				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  2984  				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  2985  			case wazeroir.ShapeF64x2:
  2986  				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
  2987  				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
  2988  			}
  2989  			ce.pushValue(retLo)
  2990  			ce.pushValue(retHi)
  2991  			frame.pc++
  2992  		case wazeroir.OperationKindV128Div:
  2993  			x2hi, x2lo := ce.popValue(), ce.popValue()
  2994  			x1hi, x1lo := ce.popValue(), ce.popValue()
  2995  			var retLo, retHi uint64
  2996  			if op.B1 == wazeroir.ShapeF64x2 {
  2997  				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
  2998  				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
  2999  			} else {
  3000  				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3001  				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3002  			}
  3003  			ce.pushValue(retLo)
  3004  			ce.pushValue(retHi)
  3005  			frame.pc++
  3006  		case wazeroir.OperationKindV128Neg:
  3007  			hi, lo := ce.popValue(), ce.popValue()
  3008  			switch op.B1 {
  3009  			case wazeroir.ShapeI8x16:
  3010  				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
  3011  					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
  3012  					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
  3013  					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
  3014  				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
  3015  					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
  3016  					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
  3017  					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
  3018  			case wazeroir.ShapeI16x8:
  3019  				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
  3020  					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
  3021  				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
  3022  					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
  3023  			case wazeroir.ShapeI32x4:
  3024  				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
  3025  				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
  3026  			case wazeroir.ShapeI64x2:
  3027  				hi = -hi
  3028  				lo = -lo
  3029  			case wazeroir.ShapeF32x4:
  3030  				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
  3031  					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
  3032  				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
  3033  					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
  3034  			case wazeroir.ShapeF64x2:
  3035  				hi = math.Float64bits(-math.Float64frombits(hi))
  3036  				lo = math.Float64bits(-math.Float64frombits(lo))
  3037  			}
  3038  			ce.pushValue(lo)
  3039  			ce.pushValue(hi)
  3040  			frame.pc++
  3041  		case wazeroir.OperationKindV128Sqrt:
  3042  			hi, lo := ce.popValue(), ce.popValue()
  3043  			if op.B1 == wazeroir.ShapeF64x2 {
  3044  				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
  3045  				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
  3046  			} else {
  3047  				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
  3048  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
  3049  				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
  3050  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
  3051  			}
  3052  			ce.pushValue(lo)
  3053  			ce.pushValue(hi)
  3054  			frame.pc++
  3055  		case wazeroir.OperationKindV128Abs:
  3056  			hi, lo := ce.popValue(), ce.popValue()
  3057  			switch op.B1 {
  3058  			case wazeroir.ShapeI8x16:
  3059  				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
  3060  					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
  3061  					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
  3062  					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
  3063  				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
  3064  					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
  3065  					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
  3066  					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
  3067  			case wazeroir.ShapeI16x8:
  3068  				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
  3069  					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
  3070  				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
  3071  					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
  3072  			case wazeroir.ShapeI32x4:
  3073  				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
  3074  				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
  3075  			case wazeroir.ShapeI64x2:
  3076  				if int64(hi) < 0 {
  3077  					hi = -hi
  3078  				}
  3079  				if int64(lo) < 0 {
  3080  					lo = -lo
  3081  				}
  3082  			case wazeroir.ShapeF32x4:
  3083  				hi = hi &^ (1<<31 | 1<<63)
  3084  				lo = lo &^ (1<<31 | 1<<63)
  3085  			case wazeroir.ShapeF64x2:
  3086  				hi = hi &^ (1 << 63)
  3087  				lo = lo &^ (1 << 63)
  3088  			}
  3089  			ce.pushValue(lo)
  3090  			ce.pushValue(hi)
  3091  			frame.pc++
  3092  		case wazeroir.OperationKindV128Popcnt:
  3093  			hi, lo := ce.popValue(), ce.popValue()
  3094  			var retLo, retHi uint64
  3095  			for i := 0; i < 16; i++ {
  3096  				var v byte
  3097  				if i < 8 {
  3098  					v = byte(lo >> (i * 8))
  3099  				} else {
  3100  					v = byte(hi >> ((i - 8) * 8))
  3101  				}
  3102  
  3103  				var cnt uint64
  3104  				for i := 0; i < 8; i++ {
  3105  					if (v>>i)&0b1 != 0 {
  3106  						cnt++
  3107  					}
  3108  				}
  3109  
  3110  				if i < 8 {
  3111  					retLo |= cnt << (i * 8)
  3112  				} else {
  3113  					retHi |= cnt << ((i - 8) * 8)
  3114  				}
  3115  			}
  3116  			ce.pushValue(retLo)
  3117  			ce.pushValue(retHi)
  3118  			frame.pc++
  3119  		case wazeroir.OperationKindV128Min:
  3120  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3121  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3122  			var retLo, retHi uint64
  3123  			switch op.B1 {
  3124  			case wazeroir.ShapeI8x16:
  3125  				if op.B3 { // signed
  3126  					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
  3127  						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3128  						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3129  						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3130  					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
  3131  						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3132  						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3133  						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3134  				} else {
  3135  					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
  3136  						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3137  						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3138  						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3139  					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
  3140  						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3141  						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3142  						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3143  				}
  3144  			case wazeroir.ShapeI16x8:
  3145  				if op.B3 { // signed
  3146  					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
  3147  						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3148  						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3149  						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3150  					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
  3151  						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3152  						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3153  						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3154  				} else {
  3155  					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
  3156  						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3157  						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3158  						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3159  					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
  3160  						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3161  						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3162  						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3163  				}
  3164  			case wazeroir.ShapeI32x4:
  3165  				if op.B3 { // signed
  3166  					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
  3167  						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3168  					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
  3169  						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3170  				} else {
  3171  					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
  3172  						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3173  					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
  3174  						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3175  				}
  3176  			case wazeroir.ShapeF32x4:
  3177  				retHi = WasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
  3178  					WasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3179  				retLo = WasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
  3180  					WasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3181  			case wazeroir.ShapeF64x2:
  3182  				retHi = math.Float64bits(moremath.WasmCompatMin64(
  3183  					math.Float64frombits(x1hi),
  3184  					math.Float64frombits(x2hi),
  3185  				))
  3186  				retLo = math.Float64bits(moremath.WasmCompatMin64(
  3187  					math.Float64frombits(x1lo),
  3188  					math.Float64frombits(x2lo),
  3189  				))
  3190  			}
  3191  			ce.pushValue(retLo)
  3192  			ce.pushValue(retHi)
  3193  			frame.pc++
  3194  		case wazeroir.OperationKindV128Max:
  3195  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3196  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3197  			var retLo, retHi uint64
  3198  			switch op.B1 {
  3199  			case wazeroir.ShapeI8x16:
  3200  				if op.B3 { // signed
  3201  					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
  3202  						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3203  						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3204  						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3205  					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
  3206  						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3207  						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3208  						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3209  				} else {
  3210  					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
  3211  						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3212  						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3213  						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3214  					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
  3215  						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3216  						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3217  						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3218  				}
  3219  			case wazeroir.ShapeI16x8:
  3220  				if op.B3 { // signed
  3221  					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
  3222  						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3223  						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3224  						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3225  					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
  3226  						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3227  						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3228  						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3229  				} else {
  3230  					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
  3231  						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3232  						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3233  						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3234  					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
  3235  						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3236  						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3237  						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3238  				}
  3239  			case wazeroir.ShapeI32x4:
  3240  				if op.B3 { // signed
  3241  					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
  3242  						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3243  					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
  3244  						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3245  				} else {
  3246  					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
  3247  						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3248  					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
  3249  						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3250  				}
  3251  			case wazeroir.ShapeF32x4:
  3252  				retHi = WasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
  3253  					WasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3254  				retLo = WasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
  3255  					WasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3256  			case wazeroir.ShapeF64x2:
  3257  				retHi = math.Float64bits(moremath.WasmCompatMax64(
  3258  					math.Float64frombits(x1hi),
  3259  					math.Float64frombits(x2hi),
  3260  				))
  3261  				retLo = math.Float64bits(moremath.WasmCompatMax64(
  3262  					math.Float64frombits(x1lo),
  3263  					math.Float64frombits(x2lo),
  3264  				))
  3265  			}
  3266  			ce.pushValue(retLo)
  3267  			ce.pushValue(retHi)
  3268  			frame.pc++
  3269  		case wazeroir.OperationKindV128AvgrU:
  3270  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3271  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3272  			var retLo, retHi uint64
  3273  			switch op.B1 {
  3274  			case wazeroir.ShapeI8x16:
  3275  				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
  3276  					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3277  					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3278  					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3279  				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
  3280  					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3281  					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3282  					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3283  			case wazeroir.ShapeI16x8:
  3284  				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
  3285  					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3286  					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3287  					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3288  				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
  3289  					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3290  					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3291  					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3292  			}
  3293  			ce.pushValue(retLo)
  3294  			ce.pushValue(retHi)
  3295  			frame.pc++
  3296  		case wazeroir.OperationKindV128Pmin:
  3297  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3298  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3299  			var retLo, retHi uint64
  3300  			if op.B1 == wazeroir.ShapeF32x4 {
  3301  				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
  3302  					retLo = x2lo & 0x00000000_ffffffff
  3303  				} else {
  3304  					retLo = x1lo & 0x00000000_ffffffff
  3305  				}
  3306  				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
  3307  					retLo |= x2lo & 0xffffffff_00000000
  3308  				} else {
  3309  					retLo |= x1lo & 0xffffffff_00000000
  3310  				}
  3311  				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
  3312  					retHi = x2hi & 0x00000000_ffffffff
  3313  				} else {
  3314  					retHi = x1hi & 0x00000000_ffffffff
  3315  				}
  3316  				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
  3317  					retHi |= x2hi & 0xffffffff_00000000
  3318  				} else {
  3319  					retHi |= x1hi & 0xffffffff_00000000
  3320  				}
  3321  			} else {
  3322  				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
  3323  					retLo = x2lo
  3324  				} else {
  3325  					retLo = x1lo
  3326  				}
  3327  				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
  3328  					retHi = x2hi
  3329  				} else {
  3330  					retHi = x1hi
  3331  				}
  3332  			}
  3333  			ce.pushValue(retLo)
  3334  			ce.pushValue(retHi)
  3335  			frame.pc++
  3336  		case wazeroir.OperationKindV128Pmax:
  3337  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3338  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3339  			var retLo, retHi uint64
  3340  			if op.B1 == wazeroir.ShapeF32x4 {
  3341  				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
  3342  					retLo = x2lo & 0x00000000_ffffffff
  3343  				} else {
  3344  					retLo = x1lo & 0x00000000_ffffffff
  3345  				}
  3346  				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
  3347  					retLo |= x2lo & 0xffffffff_00000000
  3348  				} else {
  3349  					retLo |= x1lo & 0xffffffff_00000000
  3350  				}
  3351  				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
  3352  					retHi = x2hi & 0x00000000_ffffffff
  3353  				} else {
  3354  					retHi = x1hi & 0x00000000_ffffffff
  3355  				}
  3356  				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
  3357  					retHi |= x2hi & 0xffffffff_00000000
  3358  				} else {
  3359  					retHi |= x1hi & 0xffffffff_00000000
  3360  				}
  3361  			} else {
  3362  				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
  3363  					retLo = x2lo
  3364  				} else {
  3365  					retLo = x1lo
  3366  				}
  3367  				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
  3368  					retHi = x2hi
  3369  				} else {
  3370  					retHi = x1hi
  3371  				}
  3372  			}
  3373  			ce.pushValue(retLo)
  3374  			ce.pushValue(retHi)
  3375  			frame.pc++
  3376  		case wazeroir.OperationKindV128Ceil:
  3377  			hi, lo := ce.popValue(), ce.popValue()
  3378  			if op.B1 == wazeroir.ShapeF32x4 {
  3379  				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
  3380  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3381  				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
  3382  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3383  			} else {
  3384  				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
  3385  				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
  3386  			}
  3387  			ce.pushValue(lo)
  3388  			ce.pushValue(hi)
  3389  			frame.pc++
  3390  		case wazeroir.OperationKindV128Floor:
  3391  			hi, lo := ce.popValue(), ce.popValue()
  3392  			if op.B1 == wazeroir.ShapeF32x4 {
  3393  				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
  3394  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3395  				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
  3396  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3397  			} else {
  3398  				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
  3399  				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
  3400  			}
  3401  			ce.pushValue(lo)
  3402  			ce.pushValue(hi)
  3403  			frame.pc++
  3404  		case wazeroir.OperationKindV128Trunc:
  3405  			hi, lo := ce.popValue(), ce.popValue()
  3406  			if op.B1 == wazeroir.ShapeF32x4 {
  3407  				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
  3408  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3409  				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
  3410  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3411  			} else {
  3412  				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
  3413  				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
  3414  			}
  3415  			ce.pushValue(lo)
  3416  			ce.pushValue(hi)
  3417  			frame.pc++
  3418  		case wazeroir.OperationKindV128Nearest:
  3419  			hi, lo := ce.popValue(), ce.popValue()
  3420  			if op.B1 == wazeroir.ShapeF32x4 {
  3421  				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
  3422  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3423  				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
  3424  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3425  			} else {
  3426  				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
  3427  				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
  3428  			}
  3429  			ce.pushValue(lo)
  3430  			ce.pushValue(hi)
  3431  			frame.pc++
  3432  		case wazeroir.OperationKindV128Extend:
  3433  			hi, lo := ce.popValue(), ce.popValue()
  3434  			var origin uint64
  3435  			if op.B3 { // use lower 64 bits
  3436  				origin = lo
  3437  			} else {
  3438  				origin = hi
  3439  			}
  3440  
  3441  			signed := op.B2 == 1
  3442  
  3443  			var retHi, retLo uint64
  3444  			switch op.B1 {
  3445  			case wazeroir.ShapeI8x16:
  3446  				for i := 0; i < 8; i++ {
  3447  					v8 := byte(origin >> (i * 8))
  3448  
  3449  					var v16 uint16
  3450  					if signed {
  3451  						v16 = uint16(int8(v8))
  3452  					} else {
  3453  						v16 = uint16(v8)
  3454  					}
  3455  
  3456  					if i < 4 {
  3457  						retLo |= uint64(v16) << (i * 16)
  3458  					} else {
  3459  						retHi |= uint64(v16) << ((i - 4) * 16)
  3460  					}
  3461  				}
  3462  			case wazeroir.ShapeI16x8:
  3463  				for i := 0; i < 4; i++ {
  3464  					v16 := uint16(origin >> (i * 16))
  3465  
  3466  					var v32 uint32
  3467  					if signed {
  3468  						v32 = uint32(int16(v16))
  3469  					} else {
  3470  						v32 = uint32(v16)
  3471  					}
  3472  
  3473  					if i < 2 {
  3474  						retLo |= uint64(v32) << (i * 32)
  3475  					} else {
  3476  						retHi |= uint64(v32) << ((i - 2) * 32)
  3477  					}
  3478  				}
  3479  			case wazeroir.ShapeI32x4:
  3480  				v32Lo := uint32(origin)
  3481  				v32Hi := uint32(origin >> 32)
  3482  				if signed {
  3483  					retLo = uint64(int32(v32Lo))
  3484  					retHi = uint64(int32(v32Hi))
  3485  				} else {
  3486  					retLo = uint64(v32Lo)
  3487  					retHi = uint64(v32Hi)
  3488  				}
  3489  			}
  3490  			ce.pushValue(retLo)
  3491  			ce.pushValue(retHi)
  3492  			frame.pc++
  3493  		case wazeroir.OperationKindV128ExtMul:
  3494  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3495  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3496  			var x1, x2 uint64
  3497  			if op.B3 { // use lower 64 bits
  3498  				x1, x2 = x1Lo, x2Lo
  3499  			} else {
  3500  				x1, x2 = x1Hi, x2Hi
  3501  			}
  3502  
  3503  			signed := op.B2 == 1
  3504  
  3505  			var retLo, retHi uint64
  3506  			switch op.B1 {
  3507  			case wazeroir.ShapeI8x16:
  3508  				for i := 0; i < 8; i++ {
  3509  					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
  3510  
  3511  					var v16 uint16
  3512  					if signed {
  3513  						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
  3514  					} else {
  3515  						v16 = uint16(v1) * uint16(v2)
  3516  					}
  3517  
  3518  					if i < 4 {
  3519  						retLo |= uint64(v16) << (i * 16)
  3520  					} else {
  3521  						retHi |= uint64(v16) << ((i - 4) * 16)
  3522  					}
  3523  				}
  3524  			case wazeroir.ShapeI16x8:
  3525  				for i := 0; i < 4; i++ {
  3526  					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
  3527  
  3528  					var v32 uint32
  3529  					if signed {
  3530  						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
  3531  					} else {
  3532  						v32 = uint32(v1) * uint32(v2)
  3533  					}
  3534  
  3535  					if i < 2 {
  3536  						retLo |= uint64(v32) << (i * 32)
  3537  					} else {
  3538  						retHi |= uint64(v32) << ((i - 2) * 32)
  3539  					}
  3540  				}
  3541  			case wazeroir.ShapeI32x4:
  3542  				v1Lo, v2Lo := uint32(x1), uint32(x2)
  3543  				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
  3544  				if signed {
  3545  					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
  3546  					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
  3547  				} else {
  3548  					retLo = uint64(v1Lo) * uint64(v2Lo)
  3549  					retHi = uint64(v1Hi) * uint64(v2Hi)
  3550  				}
  3551  			}
  3552  
  3553  			ce.pushValue(retLo)
  3554  			ce.pushValue(retHi)
  3555  			frame.pc++
  3556  		case wazeroir.OperationKindV128Q15mulrSatS:
  3557  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3558  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3559  			var retLo, retHi uint64
  3560  			for i := 0; i < 8; i++ {
  3561  				var v, w int16
  3562  				if i < 4 {
  3563  					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
  3564  				} else {
  3565  					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
  3566  				}
  3567  
  3568  				var uv uint64
  3569  				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
  3570  				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
  3571  					uv = uint64(uint16(0x8000))
  3572  				} else if calc > math.MaxInt16 {
  3573  					uv = uint64(uint16(0x7fff))
  3574  				} else {
  3575  					uv = uint64(uint16(int16(calc)))
  3576  				}
  3577  
  3578  				if i < 4 {
  3579  					retLo |= uv << (i * 16)
  3580  				} else {
  3581  					retHi |= uv << ((i - 4) * 16)
  3582  				}
  3583  			}
  3584  
  3585  			ce.pushValue(retLo)
  3586  			ce.pushValue(retHi)
  3587  			frame.pc++
  3588  		case wazeroir.OperationKindV128ExtAddPairwise:
  3589  			hi, lo := ce.popValue(), ce.popValue()
  3590  
  3591  			signed := op.B3
  3592  
  3593  			var retLo, retHi uint64
  3594  			switch op.B1 {
  3595  			case wazeroir.ShapeI8x16:
  3596  				for i := 0; i < 8; i++ {
  3597  					var v1, v2 byte
  3598  					if i < 4 {
  3599  						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
  3600  					} else {
  3601  						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
  3602  					}
  3603  
  3604  					var v16 uint16
  3605  					if signed {
  3606  						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
  3607  					} else {
  3608  						v16 = uint16(v1) + uint16(v2)
  3609  					}
  3610  
  3611  					if i < 4 {
  3612  						retLo |= uint64(v16) << (i * 16)
  3613  					} else {
  3614  						retHi |= uint64(v16) << ((i - 4) * 16)
  3615  					}
  3616  				}
  3617  			case wazeroir.ShapeI16x8:
  3618  				for i := 0; i < 4; i++ {
  3619  					var v1, v2 uint16
  3620  					if i < 2 {
  3621  						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
  3622  					} else {
  3623  						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
  3624  					}
  3625  
  3626  					var v32 uint32
  3627  					if signed {
  3628  						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
  3629  					} else {
  3630  						v32 = uint32(v1) + uint32(v2)
  3631  					}
  3632  
  3633  					if i < 2 {
  3634  						retLo |= uint64(v32) << (i * 32)
  3635  					} else {
  3636  						retHi |= uint64(v32) << ((i - 2) * 32)
  3637  					}
  3638  				}
  3639  			}
  3640  			ce.pushValue(retLo)
  3641  			ce.pushValue(retHi)
  3642  			frame.pc++
  3643  		case wazeroir.OperationKindV128FloatPromote:
  3644  			_, toPromote := ce.popValue(), ce.popValue()
  3645  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
  3646  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
  3647  			frame.pc++
  3648  		case wazeroir.OperationKindV128FloatDemote:
  3649  			hi, lo := ce.popValue(), ce.popValue()
  3650  			ce.pushValue(
  3651  				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
  3652  					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
  3653  			)
  3654  			ce.pushValue(0)
  3655  			frame.pc++
  3656  		case wazeroir.OperationKindV128FConvertFromI:
  3657  			hi, lo := ce.popValue(), ce.popValue()
  3658  			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
  3659  			signed := op.B3
  3660  
  3661  			var retLo, retHi uint64
  3662  			switch op.B1 { // Destination shape.
  3663  			case wazeroir.ShapeF32x4: // f32x4 from signed/unsigned i32x4
  3664  				if signed {
  3665  					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
  3666  						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
  3667  					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
  3668  						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
  3669  				} else {
  3670  					retLo = uint64(math.Float32bits(float32(v1))) |
  3671  						(uint64(math.Float32bits(float32(v2))) << 32)
  3672  					retHi = uint64(math.Float32bits(float32(v3))) |
  3673  						(uint64(math.Float32bits(float32(v4))) << 32)
  3674  				}
  3675  			case wazeroir.ShapeF64x2: // f64x2 from signed/unsigned i32x4
  3676  				if signed {
  3677  					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
  3678  				} else {
  3679  					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
  3680  				}
  3681  			}
  3682  
  3683  			ce.pushValue(retLo)
  3684  			ce.pushValue(retHi)
  3685  			frame.pc++
  3686  		case wazeroir.OperationKindV128Narrow:
  3687  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3688  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3689  			signed := op.B3
  3690  
  3691  			var retLo, retHi uint64
  3692  			switch op.B1 {
  3693  			case wazeroir.ShapeI16x8: // signed/unsigned i16x8 to i8x16
  3694  				for i := 0; i < 8; i++ {
  3695  					var v16 uint16
  3696  					if i < 4 {
  3697  						v16 = uint16(x1Lo >> (i * 16))
  3698  					} else {
  3699  						v16 = uint16(x1Hi >> ((i - 4) * 16))
  3700  					}
  3701  
  3702  					var v byte
  3703  					if signed {
  3704  						if s := int16(v16); s > math.MaxInt8 {
  3705  							v = math.MaxInt8
  3706  						} else if s < math.MinInt8 {
  3707  							s = math.MinInt8
  3708  							v = byte(s)
  3709  						} else {
  3710  							v = byte(v16)
  3711  						}
  3712  					} else {
  3713  						if s := int16(v16); s > math.MaxUint8 {
  3714  							v = math.MaxUint8
  3715  						} else if s < 0 {
  3716  							v = 0
  3717  						} else {
  3718  							v = byte(v16)
  3719  						}
  3720  					}
  3721  					retLo |= uint64(v) << (i * 8)
  3722  				}
  3723  				for i := 0; i < 8; i++ {
  3724  					var v16 uint16
  3725  					if i < 4 {
  3726  						v16 = uint16(x2Lo >> (i * 16))
  3727  					} else {
  3728  						v16 = uint16(x2Hi >> ((i - 4) * 16))
  3729  					}
  3730  
  3731  					var v byte
  3732  					if signed {
  3733  						if s := int16(v16); s > math.MaxInt8 {
  3734  							v = math.MaxInt8
  3735  						} else if s < math.MinInt8 {
  3736  							s = math.MinInt8
  3737  							v = byte(s)
  3738  						} else {
  3739  							v = byte(v16)
  3740  						}
  3741  					} else {
  3742  						if s := int16(v16); s > math.MaxUint8 {
  3743  							v = math.MaxUint8
  3744  						} else if s < 0 {
  3745  							v = 0
  3746  						} else {
  3747  							v = byte(v16)
  3748  						}
  3749  					}
  3750  					retHi |= uint64(v) << (i * 8)
  3751  				}
  3752  			case wazeroir.ShapeI32x4: // signed/unsigned i32x4 to i16x8
  3753  				for i := 0; i < 4; i++ {
  3754  					var v32 uint32
  3755  					if i < 2 {
  3756  						v32 = uint32(x1Lo >> (i * 32))
  3757  					} else {
  3758  						v32 = uint32(x1Hi >> ((i - 2) * 32))
  3759  					}
  3760  
  3761  					var v uint16
  3762  					if signed {
  3763  						if s := int32(v32); s > math.MaxInt16 {
  3764  							v = math.MaxInt16
  3765  						} else if s < math.MinInt16 {
  3766  							s = math.MinInt16
  3767  							v = uint16(s)
  3768  						} else {
  3769  							v = uint16(v32)
  3770  						}
  3771  					} else {
  3772  						if s := int32(v32); s > math.MaxUint16 {
  3773  							v = math.MaxUint16
  3774  						} else if s < 0 {
  3775  							v = 0
  3776  						} else {
  3777  							v = uint16(v32)
  3778  						}
  3779  					}
  3780  					retLo |= uint64(v) << (i * 16)
  3781  				}
  3782  
  3783  				for i := 0; i < 4; i++ {
  3784  					var v32 uint32
  3785  					if i < 2 {
  3786  						v32 = uint32(x2Lo >> (i * 32))
  3787  					} else {
  3788  						v32 = uint32(x2Hi >> ((i - 2) * 32))
  3789  					}
  3790  
  3791  					var v uint16
  3792  					if signed {
  3793  						if s := int32(v32); s > math.MaxInt16 {
  3794  							v = math.MaxInt16
  3795  						} else if s < math.MinInt16 {
  3796  							s = math.MinInt16
  3797  							v = uint16(s)
  3798  						} else {
  3799  							v = uint16(v32)
  3800  						}
  3801  					} else {
  3802  						if s := int32(v32); s > math.MaxUint16 {
  3803  							v = math.MaxUint16
  3804  						} else if s < 0 {
  3805  							v = 0
  3806  						} else {
  3807  							v = uint16(v32)
  3808  						}
  3809  					}
  3810  					retHi |= uint64(v) << (i * 16)
  3811  				}
  3812  			}
  3813  			ce.pushValue(retLo)
  3814  			ce.pushValue(retHi)
  3815  			frame.pc++
  3816  		case wazeroir.OperationKindV128Dot:
  3817  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3818  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3819  			ce.pushValue(
  3820  				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
  3821  					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
  3822  			)
  3823  			ce.pushValue(
  3824  				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
  3825  					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
  3826  			)
  3827  			frame.pc++
  3828  		case wazeroir.OperationKindV128ITruncSatFromF:
  3829  			hi, lo := ce.popValue(), ce.popValue()
  3830  			signed := op.B3
  3831  			var retLo, retHi uint64
  3832  
  3833  			switch op.B1 {
  3834  			case wazeroir.ShapeF32x4: // f32x4 to i32x4
  3835  				for i, f64 := range [4]float64{
  3836  					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
  3837  					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
  3838  					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
  3839  					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
  3840  				} {
  3841  
  3842  					var v uint32
  3843  					if math.IsNaN(f64) {
  3844  						v = 0
  3845  					} else if signed {
  3846  						if f64 < math.MinInt32 {
  3847  							f64 = math.MinInt32
  3848  						} else if f64 > math.MaxInt32 {
  3849  							f64 = math.MaxInt32
  3850  						}
  3851  						v = uint32(int32(f64))
  3852  					} else {
  3853  						if f64 < 0 {
  3854  							f64 = 0
  3855  						} else if f64 > math.MaxUint32 {
  3856  							f64 = math.MaxUint32
  3857  						}
  3858  						v = uint32(f64)
  3859  					}
  3860  
  3861  					if i < 2 {
  3862  						retLo |= uint64(v) << (i * 32)
  3863  					} else {
  3864  						retHi |= uint64(v) << ((i - 2) * 32)
  3865  					}
  3866  				}
  3867  
  3868  			case wazeroir.ShapeF64x2: // f64x2 to i32x4
  3869  				for i, f := range [2]float64{
  3870  					math.Trunc(math.Float64frombits(lo)),
  3871  					math.Trunc(math.Float64frombits(hi)),
  3872  				} {
  3873  					var v uint32
  3874  					if math.IsNaN(f) {
  3875  						v = 0
  3876  					} else if signed {
  3877  						if f < math.MinInt32 {
  3878  							f = math.MinInt32
  3879  						} else if f > math.MaxInt32 {
  3880  							f = math.MaxInt32
  3881  						}
  3882  						v = uint32(int32(f))
  3883  					} else {
  3884  						if f < 0 {
  3885  							f = 0
  3886  						} else if f > math.MaxUint32 {
  3887  							f = math.MaxUint32
  3888  						}
  3889  						v = uint32(f)
  3890  					}
  3891  
  3892  					retLo |= uint64(v) << (i * 32)
  3893  				}
  3894  			}
  3895  
  3896  			ce.pushValue(retLo)
  3897  			ce.pushValue(retHi)
  3898  			frame.pc++
  3899  		default:
  3900  			frame.pc++
  3901  		}
  3902  	}
  3903  	ce.popFrame()
  3904  }
  3905  
  3906  func WasmCompatMax32bits(v1, v2 uint32) uint64 {
  3907  	return uint64(math.Float32bits(moremath.WasmCompatMax32(
  3908  		math.Float32frombits(v1),
  3909  		math.Float32frombits(v2),
  3910  	)))
  3911  }
  3912  
  3913  func WasmCompatMin32bits(v1, v2 uint32) uint64 {
  3914  	return uint64(math.Float32bits(moremath.WasmCompatMin32(
  3915  		math.Float32frombits(v1),
  3916  		math.Float32frombits(v2),
  3917  	)))
  3918  }
  3919  
  3920  func addFloat32bits(v1, v2 uint32) uint64 {
  3921  	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
  3922  }
  3923  
  3924  func subFloat32bits(v1, v2 uint32) uint64 {
  3925  	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
  3926  }
  3927  
  3928  func mulFloat32bits(v1, v2 uint32) uint64 {
  3929  	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
  3930  }
  3931  
  3932  func divFloat32bits(v1, v2 uint32) uint64 {
  3933  	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
  3934  }
  3935  
  3936  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  3937  func flt32(z1, z2 float32) bool {
  3938  	if z1 != z1 || z2 != z2 {
  3939  		return false
  3940  	} else if z1 == z2 {
  3941  		return false
  3942  	} else if math.IsInf(float64(z1), 1) {
  3943  		return false
  3944  	} else if math.IsInf(float64(z1), -1) {
  3945  		return true
  3946  	} else if math.IsInf(float64(z2), 1) {
  3947  		return true
  3948  	} else if math.IsInf(float64(z2), -1) {
  3949  		return false
  3950  	}
  3951  	return z1 < z2
  3952  }
  3953  
  3954  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  3955  func flt64(z1, z2 float64) bool {
  3956  	if z1 != z1 || z2 != z2 {
  3957  		return false
  3958  	} else if z1 == z2 {
  3959  		return false
  3960  	} else if math.IsInf(z1, 1) {
  3961  		return false
  3962  	} else if math.IsInf(z1, -1) {
  3963  		return true
  3964  	} else if math.IsInf(z2, 1) {
  3965  		return true
  3966  	} else if math.IsInf(z2, -1) {
  3967  		return false
  3968  	}
  3969  	return z1 < z2
  3970  }
  3971  
  3972  func i8RoundingAverage(v1, v2 byte) byte {
  3973  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  3974  	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
  3975  }
  3976  
  3977  func i16RoundingAverage(v1, v2 uint16) uint16 {
  3978  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  3979  	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
  3980  }
  3981  
  3982  func i8Abs(v byte) byte {
  3983  	if i := int8(v); i < 0 {
  3984  		return byte(-i)
  3985  	} else {
  3986  		return byte(i)
  3987  	}
  3988  }
  3989  
  3990  func i8MaxU(v1, v2 byte) byte {
  3991  	if v1 < v2 {
  3992  		return v2
  3993  	} else {
  3994  		return v1
  3995  	}
  3996  }
  3997  
  3998  func i8MinU(v1, v2 byte) byte {
  3999  	if v1 > v2 {
  4000  		return v2
  4001  	} else {
  4002  		return v1
  4003  	}
  4004  }
  4005  
  4006  func i8MaxS(v1, v2 byte) byte {
  4007  	if int8(v1) < int8(v2) {
  4008  		return v2
  4009  	} else {
  4010  		return v1
  4011  	}
  4012  }
  4013  
  4014  func i8MinS(v1, v2 byte) byte {
  4015  	if int8(v1) > int8(v2) {
  4016  		return v2
  4017  	} else {
  4018  		return v1
  4019  	}
  4020  }
  4021  
  4022  func i16MaxU(v1, v2 uint16) uint16 {
  4023  	if v1 < v2 {
  4024  		return v2
  4025  	} else {
  4026  		return v1
  4027  	}
  4028  }
  4029  
  4030  func i16MinU(v1, v2 uint16) uint16 {
  4031  	if v1 > v2 {
  4032  		return v2
  4033  	} else {
  4034  		return v1
  4035  	}
  4036  }
  4037  
  4038  func i16MaxS(v1, v2 uint16) uint16 {
  4039  	if int16(v1) < int16(v2) {
  4040  		return v2
  4041  	} else {
  4042  		return v1
  4043  	}
  4044  }
  4045  
  4046  func i16MinS(v1, v2 uint16) uint16 {
  4047  	if int16(v1) > int16(v2) {
  4048  		return v2
  4049  	} else {
  4050  		return v1
  4051  	}
  4052  }
  4053  
  4054  func i32MaxU(v1, v2 uint32) uint32 {
  4055  	if v1 < v2 {
  4056  		return v2
  4057  	} else {
  4058  		return v1
  4059  	}
  4060  }
  4061  
  4062  func i32MinU(v1, v2 uint32) uint32 {
  4063  	if v1 > v2 {
  4064  		return v2
  4065  	} else {
  4066  		return v1
  4067  	}
  4068  }
  4069  
  4070  func i32MaxS(v1, v2 uint32) uint32 {
  4071  	if int32(v1) < int32(v2) {
  4072  		return v2
  4073  	} else {
  4074  		return v1
  4075  	}
  4076  }
  4077  
  4078  func i32MinS(v1, v2 uint32) uint32 {
  4079  	if int32(v1) > int32(v2) {
  4080  		return v2
  4081  	} else {
  4082  		return v1
  4083  	}
  4084  }
  4085  
  4086  func i16Abs(v uint16) uint16 {
  4087  	if i := int16(v); i < 0 {
  4088  		return uint16(-i)
  4089  	} else {
  4090  		return uint16(i)
  4091  	}
  4092  }
  4093  
  4094  func i32Abs(v uint32) uint32 {
  4095  	if i := int32(v); i < 0 {
  4096  		return uint32(-i)
  4097  	} else {
  4098  		return uint32(i)
  4099  	}
  4100  }
  4101  
  4102  func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
  4103  	def, typ := f.definition(), f.funcType
  4104  
  4105  	ce.stackIterator.reset(ce.stack, ce.frames, f)
  4106  	fnl.Before(ctx, m, def, ce.peekValues(len(typ.Params)), &ce.stackIterator)
  4107  	ce.stackIterator.clear()
  4108  	ce.callNativeFunc(ctx, m, f)
  4109  	fnl.After(ctx, m, def, ce.peekValues(len(typ.Results)))
  4110  	return ctx
  4111  }
  4112  
  4113  // popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
  4114  // As the top of stack value is 64-bit, this ensures it is in range before returning it.
  4115  func (ce *callEngine) popMemoryOffset(op *wazeroir.UnionOperation) uint32 {
  4116  	// TODO: Document what 'us' is and why we expect to look at value 1.
  4117  	offset := op.U2 + ce.popValue()
  4118  	if offset > math.MaxUint32 {
  4119  		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4120  	}
  4121  	return uint32(offset)
  4122  }
  4123  
  4124  func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
  4125  	typ := f.funcType
  4126  	paramLen := typ.ParamNumInUint64
  4127  	resultLen := typ.ResultNumInUint64
  4128  	stackLen := paramLen
  4129  
  4130  	// In the interpreter engine, ce.stack may only have capacity to store
  4131  	// parameters. Grow when there are more results than parameters.
  4132  	if growLen := resultLen - paramLen; growLen > 0 {
  4133  		for i := 0; i < growLen; i++ {
  4134  			ce.stack = append(ce.stack, 0)
  4135  		}
  4136  		stackLen += growLen
  4137  	}
  4138  
  4139  	// Pass the stack elements to the go function.
  4140  	stack := ce.stack[len(ce.stack)-stackLen:]
  4141  	ce.callGoFunc(ctx, m, f, stack)
  4142  
  4143  	// Shrink the stack when there were more parameters than results.
  4144  	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
  4145  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
  4146  	}
  4147  }