github.com/tetratelabs/wazero@v1.7.1/internal/engine/interpreter/interpreter.go (about)

     1  package interpreter
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"math/bits"
    10  	"sync"
    11  	"unsafe"
    12  
    13  	"github.com/tetratelabs/wazero/api"
    14  	"github.com/tetratelabs/wazero/experimental"
    15  	"github.com/tetratelabs/wazero/internal/expctxkeys"
    16  	"github.com/tetratelabs/wazero/internal/filecache"
    17  	"github.com/tetratelabs/wazero/internal/internalapi"
    18  	"github.com/tetratelabs/wazero/internal/moremath"
    19  	"github.com/tetratelabs/wazero/internal/wasm"
    20  	"github.com/tetratelabs/wazero/internal/wasmdebug"
    21  	"github.com/tetratelabs/wazero/internal/wasmruntime"
    22  	"github.com/tetratelabs/wazero/internal/wazeroir"
    23  )
    24  
    25  // callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
    26  // wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
    27  //
    28  // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
    29  var callStackCeiling = 2000
    30  
    31  // engine is an interpreter implementation of wasm.Engine
    32  type engine struct {
    33  	enabledFeatures   api.CoreFeatures
    34  	compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
    35  	mux               sync.RWMutex
    36  	// labelAddressResolutionCache is the temporary cache used to map LabelKind -> FrameID -> the index to the body.
    37  	labelAddressResolutionCache [wazeroir.LabelKindNum][]uint64
    38  }
    39  
    40  func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
    41  	return &engine{
    42  		enabledFeatures:   enabledFeatures,
    43  		compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
    44  	}
    45  }
    46  
    47  // Close implements the same method as documented on wasm.Engine.
    48  func (e *engine) Close() (err error) {
    49  	return
    50  }
    51  
    52  // CompiledModuleCount implements the same method as documented on wasm.Engine.
    53  func (e *engine) CompiledModuleCount() uint32 {
    54  	return uint32(len(e.compiledFunctions))
    55  }
    56  
    57  // DeleteCompiledModule implements the same method as documented on wasm.Engine.
    58  func (e *engine) DeleteCompiledModule(m *wasm.Module) {
    59  	e.deleteCompiledFunctions(m)
    60  }
    61  
    62  func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
    63  	e.mux.Lock()
    64  	defer e.mux.Unlock()
    65  	delete(e.compiledFunctions, module.ID)
    66  }
    67  
    68  func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
    69  	e.mux.Lock()
    70  	defer e.mux.Unlock()
    71  	e.compiledFunctions[module.ID] = fs
    72  }
    73  
    74  func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
    75  	e.mux.RLock()
    76  	defer e.mux.RUnlock()
    77  	fs, ok = e.compiledFunctions[module.ID]
    78  	return
    79  }
    80  
    81  // moduleEngine implements wasm.ModuleEngine
    82  type moduleEngine struct {
    83  	// codes are the compiled functions in a module instances.
    84  	// The index is module instance-scoped.
    85  	functions []function
    86  
    87  	// parentEngine holds *engine from which this module engine is created from.
    88  	parentEngine *engine
    89  }
    90  
    91  // GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
    92  func (e *moduleEngine) GetGlobalValue(wasm.Index) (lo, hi uint64) {
    93  	panic("BUG: GetGlobalValue should never be called on interpreter mode")
    94  }
    95  
    96  // SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
    97  func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) {
    98  	panic("BUG: SetGlobalValue should never be called on interpreter mode")
    99  }
   100  
   101  // OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
   102  func (e *moduleEngine) OwnsGlobals() bool { return false }
   103  
   104  // callEngine holds context per moduleEngine.Call, and shared across all the
   105  // function calls originating from the same moduleEngine.Call execution.
   106  //
   107  // This implements api.Function.
   108  type callEngine struct {
   109  	internalapi.WazeroOnlyType
   110  
   111  	// stack contains the operands.
   112  	// Note that all the values are represented as uint64.
   113  	stack []uint64
   114  
   115  	// frames are the function call stack.
   116  	frames []*callFrame
   117  
   118  	// f is the initial function for this call engine.
   119  	f *function
   120  
   121  	// stackiterator for Listeners to walk frames and stack.
   122  	stackIterator stackIterator
   123  }
   124  
   125  func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
   126  	return &callEngine{f: compiled}
   127  }
   128  
   129  func (ce *callEngine) pushValue(v uint64) {
   130  	ce.stack = append(ce.stack, v)
   131  }
   132  
   133  func (ce *callEngine) pushValues(v []uint64) {
   134  	ce.stack = append(ce.stack, v...)
   135  }
   136  
   137  func (ce *callEngine) popValue() (v uint64) {
   138  	// No need to check stack bound
   139  	// as we can assume that all the operations
   140  	// are valid thanks to validateFunction
   141  	// at module validation phase
   142  	// and wazeroir translation
   143  	// before compilation.
   144  	stackTopIndex := len(ce.stack) - 1
   145  	v = ce.stack[stackTopIndex]
   146  	ce.stack = ce.stack[:stackTopIndex]
   147  	return
   148  }
   149  
   150  func (ce *callEngine) popValues(v []uint64) {
   151  	stackTopIndex := len(ce.stack) - len(v)
   152  	copy(v, ce.stack[stackTopIndex:])
   153  	ce.stack = ce.stack[:stackTopIndex]
   154  }
   155  
   156  // peekValues peeks api.ValueType values from the stack and returns them.
   157  func (ce *callEngine) peekValues(count int) []uint64 {
   158  	if count == 0 {
   159  		return nil
   160  	}
   161  	stackLen := len(ce.stack)
   162  	return ce.stack[stackLen-count : stackLen]
   163  }
   164  
   165  func (ce *callEngine) drop(raw uint64) {
   166  	r := wazeroir.InclusiveRangeFromU64(raw)
   167  	if r.Start == -1 {
   168  		return
   169  	} else if r.Start == 0 {
   170  		ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
   171  	} else {
   172  		newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
   173  		newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
   174  		ce.stack = newStack
   175  	}
   176  }
   177  
   178  func (ce *callEngine) pushFrame(frame *callFrame) {
   179  	if callStackCeiling <= len(ce.frames) {
   180  		panic(wasmruntime.ErrRuntimeStackOverflow)
   181  	}
   182  	ce.frames = append(ce.frames, frame)
   183  }
   184  
   185  func (ce *callEngine) popFrame() (frame *callFrame) {
   186  	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
   187  	// module validation phase and wazeroir translation before compilation.
   188  	oneLess := len(ce.frames) - 1
   189  	frame = ce.frames[oneLess]
   190  	ce.frames = ce.frames[:oneLess]
   191  	return
   192  }
   193  
   194  type callFrame struct {
   195  	// pc is the program counter representing the current position in code.body.
   196  	pc uint64
   197  	// f is the compiled function used in this function frame.
   198  	f *function
   199  	// base index in the frame of this function, used to detect the count of
   200  	// values on the stack.
   201  	base int
   202  }
   203  
   204  type compiledFunction struct {
   205  	source              *wasm.Module
   206  	body                []wazeroir.UnionOperation
   207  	listener            experimental.FunctionListener
   208  	offsetsInWasmBinary []uint64
   209  	hostFn              interface{}
   210  	ensureTermination   bool
   211  	index               wasm.Index
   212  }
   213  
   214  type function struct {
   215  	funcType       *wasm.FunctionType
   216  	moduleInstance *wasm.ModuleInstance
   217  	typeID         wasm.FunctionTypeID
   218  	parent         *compiledFunction
   219  }
   220  
   221  // functionFromUintptr resurrects the original *function from the given uintptr
   222  // which comes from either funcref table or OpcodeRefFunc instruction.
   223  func functionFromUintptr(ptr uintptr) *function {
   224  	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
   225  	//
   226  	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
   227  	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
   228  	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
   229  	var wrapped *uintptr = &ptr
   230  	return *(**function)(unsafe.Pointer(wrapped))
   231  }
   232  
   233  type snapshot struct {
   234  	stack  []uint64
   235  	frames []*callFrame
   236  	pc     uint64
   237  
   238  	ret []uint64
   239  
   240  	ce *callEngine
   241  }
   242  
   243  // Snapshot implements the same method as documented on experimental.Snapshotter.
   244  func (ce *callEngine) Snapshot() experimental.Snapshot {
   245  	stack := make([]uint64, len(ce.stack))
   246  	copy(stack, ce.stack)
   247  
   248  	frames := make([]*callFrame, len(ce.frames))
   249  	copy(frames, ce.frames)
   250  
   251  	return &snapshot{
   252  		stack:  stack,
   253  		frames: frames,
   254  		ce:     ce,
   255  	}
   256  }
   257  
   258  // Restore implements the same method as documented on experimental.Snapshot.
   259  func (s *snapshot) Restore(ret []uint64) {
   260  	s.ret = ret
   261  	panic(s)
   262  }
   263  
   264  func (s *snapshot) doRestore() {
   265  	ce := s.ce
   266  
   267  	ce.stack = s.stack
   268  	ce.frames = s.frames
   269  	ce.frames[len(ce.frames)-1].pc = s.pc
   270  
   271  	copy(ce.stack[len(ce.stack)-len(s.ret):], s.ret)
   272  }
   273  
   274  // Error implements the same method on error.
   275  func (s *snapshot) Error() string {
   276  	return "unhandled snapshot restore, this generally indicates restore was called from a different " +
   277  		"exported function invocation than snapshot"
   278  }
   279  
   280  // stackIterator implements experimental.StackIterator.
   281  type stackIterator struct {
   282  	stack   []uint64
   283  	frames  []*callFrame
   284  	started bool
   285  	fn      *function
   286  	pc      uint64
   287  }
   288  
   289  func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
   290  	si.fn = f
   291  	si.pc = 0
   292  	si.stack = stack
   293  	si.frames = frames
   294  	si.started = false
   295  }
   296  
   297  func (si *stackIterator) clear() {
   298  	si.stack = nil
   299  	si.frames = nil
   300  	si.started = false
   301  	si.fn = nil
   302  }
   303  
   304  // Next implements the same method as documented on experimental.StackIterator.
   305  func (si *stackIterator) Next() bool {
   306  	if !si.started {
   307  		si.started = true
   308  		return true
   309  	}
   310  
   311  	if len(si.frames) == 0 {
   312  		return false
   313  	}
   314  
   315  	frame := si.frames[len(si.frames)-1]
   316  	si.stack = si.stack[:frame.base]
   317  	si.fn = frame.f
   318  	si.pc = frame.pc
   319  	si.frames = si.frames[:len(si.frames)-1]
   320  	return true
   321  }
   322  
   323  // Function implements the same method as documented on
   324  // experimental.StackIterator.
   325  func (si *stackIterator) Function() experimental.InternalFunction {
   326  	return internalFunction{si.fn}
   327  }
   328  
   329  // ProgramCounter implements the same method as documented on
   330  // experimental.StackIterator.
   331  func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
   332  	return experimental.ProgramCounter(si.pc)
   333  }
   334  
   335  // internalFunction implements experimental.InternalFunction.
   336  type internalFunction struct{ *function }
   337  
   338  // Definition implements the same method as documented on
   339  // experimental.InternalFunction.
   340  func (f internalFunction) Definition() api.FunctionDefinition {
   341  	return f.definition()
   342  }
   343  
   344  // SourceOffsetForPC implements the same method as documented on
   345  // experimental.InternalFunction.
   346  func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
   347  	offsetsMap := f.parent.offsetsInWasmBinary
   348  	if uint64(pc) < uint64(len(offsetsMap)) {
   349  		return offsetsMap[pc]
   350  	}
   351  	return 0
   352  }
   353  
   354  // interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
   355  const callFrameStackSize = 0
   356  
   357  // CompileModule implements the same method as documented on wasm.Engine.
   358  func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
   359  	if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
   360  		return nil
   361  	}
   362  
   363  	funcs := make([]compiledFunction, len(module.FunctionSection))
   364  	irCompiler, err := wazeroir.NewCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
   365  	if err != nil {
   366  		return err
   367  	}
   368  	imported := module.ImportFunctionCount
   369  	for i := range module.CodeSection {
   370  		var lsn experimental.FunctionListener
   371  		if i < len(listeners) {
   372  			lsn = listeners[i]
   373  		}
   374  
   375  		compiled := &funcs[i]
   376  		// If this is the host function, there's nothing to do as the runtime representation of
   377  		// host function in interpreter is its Go function itself as opposed to Wasm functions,
   378  		// which need to be compiled down to wazeroir.
   379  		if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
   380  			compiled.hostFn = codeSeg.GoFunc
   381  		} else {
   382  			ir, err := irCompiler.Next()
   383  			if err != nil {
   384  				return err
   385  			}
   386  			err = e.lowerIR(ir, compiled)
   387  			if err != nil {
   388  				def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
   389  				return fmt.Errorf("failed to lower func[%s] to wazeroir: %w", def.DebugName(), err)
   390  			}
   391  		}
   392  		compiled.source = module
   393  		compiled.ensureTermination = ensureTermination
   394  		compiled.listener = lsn
   395  		compiled.index = imported + uint32(i)
   396  	}
   397  	e.addCompiledFunctions(module, funcs)
   398  	return nil
   399  }
   400  
   401  // NewModuleEngine implements the same method as documented on wasm.Engine.
   402  func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
   403  	me := &moduleEngine{
   404  		parentEngine: e,
   405  		functions:    make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
   406  	}
   407  
   408  	codes, ok := e.getCompiledFunctions(module)
   409  	if !ok {
   410  		return nil, errors.New("source module must be compiled before instantiation")
   411  	}
   412  
   413  	for i := range codes {
   414  		c := &codes[i]
   415  		offset := i + int(module.ImportFunctionCount)
   416  		typeIndex := module.FunctionSection[i]
   417  		me.functions[offset] = function{
   418  			moduleInstance: instance,
   419  			typeID:         instance.TypeIDs[typeIndex],
   420  			funcType:       &module.TypeSection[typeIndex],
   421  			parent:         c,
   422  		}
   423  	}
   424  	return me, nil
   425  }
   426  
   427  // lowerIR lowers the wazeroir operations to engine friendly struct.
   428  func (e *engine) lowerIR(ir *wazeroir.CompilationResult, ret *compiledFunction) error {
   429  	// Copy the body from the result.
   430  	ret.body = make([]wazeroir.UnionOperation, len(ir.Operations))
   431  	copy(ret.body, ir.Operations)
   432  	// Also copy the offsets if necessary.
   433  	if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
   434  		ret.offsetsInWasmBinary = make([]uint64, len(offsets))
   435  		copy(ret.offsetsInWasmBinary, offsets)
   436  	}
   437  
   438  	// First, we iterate all labels, and resolve the address.
   439  	for i := range ret.body {
   440  		op := &ret.body[i]
   441  		switch op.Kind {
   442  		case wazeroir.OperationKindLabel:
   443  			label := wazeroir.Label(op.U1)
   444  			address := uint64(i)
   445  
   446  			kind, fid := label.Kind(), label.FrameID()
   447  			frameToAddresses := e.labelAddressResolutionCache[label.Kind()]
   448  			// Expand the slice if necessary.
   449  			if diff := fid - len(frameToAddresses) + 1; diff > 0 {
   450  				for j := 0; j < diff; j++ {
   451  					frameToAddresses = append(frameToAddresses, 0)
   452  				}
   453  			}
   454  			frameToAddresses[fid] = address
   455  			e.labelAddressResolutionCache[kind] = frameToAddresses
   456  		}
   457  	}
   458  
   459  	// Then resolve the label as the index to the body.
   460  	for i := range ret.body {
   461  		op := &ret.body[i]
   462  		switch op.Kind {
   463  		case wazeroir.OperationKindBr:
   464  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   465  		case wazeroir.OperationKindBrIf:
   466  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   467  			e.setLabelAddress(&op.U2, wazeroir.Label(op.U2))
   468  		case wazeroir.OperationKindBrTable:
   469  			for j := 0; j < len(op.Us); j += 2 {
   470  				target := op.Us[j]
   471  				e.setLabelAddress(&op.Us[j], wazeroir.Label(target))
   472  			}
   473  		}
   474  	}
   475  
   476  	// Reuses the slices for the subsequent compilation, so clear the content here.
   477  	for i := range e.labelAddressResolutionCache {
   478  		e.labelAddressResolutionCache[i] = e.labelAddressResolutionCache[i][:0]
   479  	}
   480  	return nil
   481  }
   482  
   483  func (e *engine) setLabelAddress(op *uint64, label wazeroir.Label) {
   484  	if label.IsReturnTarget() {
   485  		// Jmp to the end of the possible binary.
   486  		*op = math.MaxUint64
   487  	} else {
   488  		*op = e.labelAddressResolutionCache[label.Kind()][label.FrameID()]
   489  	}
   490  }
   491  
   492  // ResolveImportedFunction implements wasm.ModuleEngine.
   493  func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
   494  	imported := importedModuleEngine.(*moduleEngine)
   495  	e.functions[index] = imported.functions[indexInImportedModule]
   496  }
   497  
   498  // ResolveImportedMemory implements wasm.ModuleEngine.
   499  func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {}
   500  
   501  // DoneInstantiation implements wasm.ModuleEngine.
   502  func (e *moduleEngine) DoneInstantiation() {}
   503  
   504  // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
   505  func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
   506  	return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
   507  }
   508  
   509  // NewFunction implements the same method as documented on wasm.ModuleEngine.
   510  func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
   511  	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
   512  	// code on close aren't locked, neither is this read.
   513  	compiled := &e.functions[index]
   514  	return e.newCallEngine(compiled)
   515  }
   516  
   517  // LookupFunction implements the same method as documented on wasm.ModuleEngine.
   518  func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
   519  	if tableOffset >= uint32(len(t.References)) {
   520  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   521  	}
   522  	rawPtr := t.References[tableOffset]
   523  	if rawPtr == 0 {
   524  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   525  	}
   526  
   527  	tf := functionFromUintptr(rawPtr)
   528  	if tf.typeID != typeId {
   529  		panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   530  	}
   531  	return tf.moduleInstance, tf.parent.index
   532  }
   533  
   534  // Definition implements the same method as documented on api.Function.
   535  func (ce *callEngine) Definition() api.FunctionDefinition {
   536  	return ce.f.definition()
   537  }
   538  
   539  func (f *function) definition() api.FunctionDefinition {
   540  	compiled := f.parent
   541  	return compiled.source.FunctionDefinition(compiled.index)
   542  }
   543  
   544  // Call implements the same method as documented on api.Function.
   545  func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
   546  	ft := ce.f.funcType
   547  	if n := ft.ParamNumInUint64; n != len(params) {
   548  		return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
   549  	}
   550  	return ce.call(ctx, params, nil)
   551  }
   552  
   553  // CallWithStack implements the same method as documented on api.Function.
   554  func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
   555  	params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
   556  	if err != nil {
   557  		return err
   558  	}
   559  	_, err = ce.call(ctx, params, results)
   560  	return err
   561  }
   562  
   563  func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
   564  	m := ce.f.moduleInstance
   565  	if ce.f.parent.ensureTermination {
   566  		select {
   567  		case <-ctx.Done():
   568  			// If the provided context is already done, close the call context
   569  			// and return the error.
   570  			m.CloseWithCtxErr(ctx)
   571  			return nil, m.FailIfClosed()
   572  		default:
   573  		}
   574  	}
   575  
   576  	if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
   577  		ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, ce)
   578  	}
   579  
   580  	defer func() {
   581  		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
   582  		if err == nil {
   583  			err = m.FailIfClosed()
   584  		}
   585  		// TODO: ^^ Will not fail if the function was imported from a closed module.
   586  
   587  		if v := recover(); v != nil {
   588  			err = ce.recoverOnCall(ctx, m, v)
   589  		}
   590  	}()
   591  
   592  	ce.pushValues(params)
   593  
   594  	if ce.f.parent.ensureTermination {
   595  		done := m.CloseModuleOnCanceledOrTimeout(ctx)
   596  		defer done()
   597  	}
   598  
   599  	ce.callFunction(ctx, m, ce.f)
   600  
   601  	// This returns a safe copy of the results, instead of a slice view. If we
   602  	// returned a re-slice, the caller could accidentally or purposefully
   603  	// corrupt the stack of subsequent calls.
   604  	ft := ce.f.funcType
   605  	if results == nil && ft.ResultNumInUint64 > 0 {
   606  		results = make([]uint64, ft.ResultNumInUint64)
   607  	}
   608  	ce.popValues(results)
   609  	return results, nil
   610  }
   611  
   612  // functionListenerInvocation captures arguments needed to perform function
   613  // listener invocations when unwinding the call stack.
   614  type functionListenerInvocation struct {
   615  	experimental.FunctionListener
   616  	def api.FunctionDefinition
   617  }
   618  
   619  // recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
   620  // with the call frame stack traces. Also, reset the state of callEngine
   621  // so that it can be used for the subsequent calls.
   622  func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
   623  	if s, ok := v.(*snapshot); ok {
   624  		// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
   625  		// let it propagate up to be handled by the caller.
   626  		panic(s)
   627  	}
   628  
   629  	builder := wasmdebug.NewErrorBuilder()
   630  	frameCount := len(ce.frames)
   631  	functionListeners := make([]functionListenerInvocation, 0, 16)
   632  
   633  	if frameCount > wasmdebug.MaxFrames {
   634  		frameCount = wasmdebug.MaxFrames
   635  	}
   636  	for i := 0; i < frameCount; i++ {
   637  		frame := ce.popFrame()
   638  		f := frame.f
   639  		def := f.definition()
   640  		var sources []string
   641  		if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
   642  			sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
   643  		}
   644  		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
   645  		if f.parent.listener != nil {
   646  			functionListeners = append(functionListeners, functionListenerInvocation{
   647  				FunctionListener: f.parent.listener,
   648  				def:              f.definition(),
   649  			})
   650  		}
   651  	}
   652  
   653  	err = builder.FromRecovered(v)
   654  	for i := range functionListeners {
   655  		functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
   656  	}
   657  
   658  	// Allows the reuse of CallEngine.
   659  	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
   660  	return
   661  }
   662  
   663  func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   664  	if f.parent.hostFn != nil {
   665  		ce.callGoFuncWithStack(ctx, m, f)
   666  	} else if lsn := f.parent.listener; lsn != nil {
   667  		ce.callNativeFuncWithListener(ctx, m, f, lsn)
   668  	} else {
   669  		ce.callNativeFunc(ctx, m, f)
   670  	}
   671  }
   672  
   673  func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
   674  	typ := f.funcType
   675  	lsn := f.parent.listener
   676  	if lsn != nil {
   677  		params := stack[:typ.ParamNumInUint64]
   678  		ce.stackIterator.reset(ce.stack, ce.frames, f)
   679  		lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
   680  		ce.stackIterator.clear()
   681  	}
   682  	frame := &callFrame{f: f, base: len(ce.stack)}
   683  	ce.pushFrame(frame)
   684  
   685  	fn := f.parent.hostFn
   686  	switch fn := fn.(type) {
   687  	case api.GoModuleFunction:
   688  		fn.Call(ctx, m, stack)
   689  	case api.GoFunction:
   690  		fn.Call(ctx, stack)
   691  	}
   692  
   693  	ce.popFrame()
   694  	if lsn != nil {
   695  		// TODO: This doesn't get the error due to use of panic to propagate them.
   696  		results := stack[:typ.ResultNumInUint64]
   697  		lsn.After(ctx, m, f.definition(), results)
   698  	}
   699  }
   700  
   701  func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   702  	frame := &callFrame{f: f, base: len(ce.stack)}
   703  	moduleInst := f.moduleInstance
   704  	functions := moduleInst.Engine.(*moduleEngine).functions
   705  	memoryInst := moduleInst.MemoryInstance
   706  	globals := moduleInst.Globals
   707  	tables := moduleInst.Tables
   708  	typeIDs := moduleInst.TypeIDs
   709  	dataInstances := moduleInst.DataInstances
   710  	elementInstances := moduleInst.ElementInstances
   711  	ce.pushFrame(frame)
   712  	body := frame.f.parent.body
   713  	bodyLen := uint64(len(body))
   714  	for frame.pc < bodyLen {
   715  		op := &body[frame.pc]
   716  		// TODO: add description of each operation/case
   717  		// on, for example, how many args are used,
   718  		// how the stack is modified, etc.
   719  		switch op.Kind {
   720  		case wazeroir.OperationKindBuiltinFunctionCheckExitCode:
   721  			if err := m.FailIfClosed(); err != nil {
   722  				panic(err)
   723  			}
   724  			frame.pc++
   725  		case wazeroir.OperationKindUnreachable:
   726  			panic(wasmruntime.ErrRuntimeUnreachable)
   727  		case wazeroir.OperationKindBr:
   728  			frame.pc = op.U1
   729  		case wazeroir.OperationKindBrIf:
   730  			if ce.popValue() > 0 {
   731  				ce.drop(op.U3)
   732  				frame.pc = op.U1
   733  			} else {
   734  				frame.pc = op.U2
   735  			}
   736  		case wazeroir.OperationKindBrTable:
   737  			v := ce.popValue()
   738  			defaultAt := uint64(len(op.Us))/2 - 1
   739  			if v > defaultAt {
   740  				v = defaultAt
   741  			}
   742  			v *= 2
   743  			ce.drop(op.Us[v+1])
   744  			frame.pc = op.Us[v]
   745  		case wazeroir.OperationKindCall:
   746  			func() {
   747  				if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
   748  					defer func() {
   749  						if r := recover(); r != nil {
   750  							if s, ok := r.(*snapshot); ok && s.ce == ce {
   751  								s.doRestore()
   752  								frame = ce.frames[len(ce.frames)-1]
   753  								body = frame.f.parent.body
   754  								bodyLen = uint64(len(body))
   755  							} else {
   756  								panic(r)
   757  							}
   758  						}
   759  					}()
   760  				}
   761  				ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
   762  			}()
   763  			frame.pc++
   764  		case wazeroir.OperationKindCallIndirect:
   765  			offset := ce.popValue()
   766  			table := tables[op.U2]
   767  			if offset >= uint64(len(table.References)) {
   768  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   769  			}
   770  			rawPtr := table.References[offset]
   771  			if rawPtr == 0 {
   772  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   773  			}
   774  
   775  			tf := functionFromUintptr(rawPtr)
   776  			if tf.typeID != typeIDs[op.U1] {
   777  				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   778  			}
   779  
   780  			ce.callFunction(ctx, f.moduleInstance, tf)
   781  			frame.pc++
   782  		case wazeroir.OperationKindDrop:
   783  			ce.drop(op.U1)
   784  			frame.pc++
   785  		case wazeroir.OperationKindSelect:
   786  			c := ce.popValue()
   787  			if op.B3 { // Target is vector.
   788  				x2Hi, x2Lo := ce.popValue(), ce.popValue()
   789  				if c == 0 {
   790  					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
   791  					ce.pushValue(x2Lo)
   792  					ce.pushValue(x2Hi)
   793  				}
   794  			} else {
   795  				v2 := ce.popValue()
   796  				if c == 0 {
   797  					_ = ce.popValue()
   798  					ce.pushValue(v2)
   799  				}
   800  			}
   801  			frame.pc++
   802  		case wazeroir.OperationKindPick:
   803  			index := len(ce.stack) - 1 - int(op.U1)
   804  			ce.pushValue(ce.stack[index])
   805  			if op.B3 { // V128 value target.
   806  				ce.pushValue(ce.stack[index+1])
   807  			}
   808  			frame.pc++
   809  		case wazeroir.OperationKindSet:
   810  			if op.B3 { // V128 value target.
   811  				lowIndex := len(ce.stack) - 1 - int(op.U1)
   812  				highIndex := lowIndex + 1
   813  				hi, lo := ce.popValue(), ce.popValue()
   814  				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
   815  			} else {
   816  				index := len(ce.stack) - 1 - int(op.U1)
   817  				ce.stack[index] = ce.popValue()
   818  			}
   819  			frame.pc++
   820  		case wazeroir.OperationKindGlobalGet:
   821  			g := globals[op.U1]
   822  			ce.pushValue(g.Val)
   823  			if g.Type.ValType == wasm.ValueTypeV128 {
   824  				ce.pushValue(g.ValHi)
   825  			}
   826  			frame.pc++
   827  		case wazeroir.OperationKindGlobalSet:
   828  			g := globals[op.U1]
   829  			if g.Type.ValType == wasm.ValueTypeV128 {
   830  				g.ValHi = ce.popValue()
   831  			}
   832  			g.Val = ce.popValue()
   833  			frame.pc++
   834  		case wazeroir.OperationKindLoad:
   835  			offset := ce.popMemoryOffset(op)
   836  			switch wazeroir.UnsignedType(op.B1) {
   837  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   838  				if val, ok := memoryInst.ReadUint32Le(offset); !ok {
   839  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   840  				} else {
   841  					ce.pushValue(uint64(val))
   842  				}
   843  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   844  				if val, ok := memoryInst.ReadUint64Le(offset); !ok {
   845  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   846  				} else {
   847  					ce.pushValue(val)
   848  				}
   849  			}
   850  			frame.pc++
   851  		case wazeroir.OperationKindLoad8:
   852  			val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
   853  			if !ok {
   854  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   855  			}
   856  
   857  			switch wazeroir.SignedInt(op.B1) {
   858  			case wazeroir.SignedInt32:
   859  				ce.pushValue(uint64(uint32(int8(val))))
   860  			case wazeroir.SignedInt64:
   861  				ce.pushValue(uint64(int8(val)))
   862  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   863  				ce.pushValue(uint64(val))
   864  			}
   865  			frame.pc++
   866  		case wazeroir.OperationKindLoad16:
   867  
   868  			val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
   869  			if !ok {
   870  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   871  			}
   872  
   873  			switch wazeroir.SignedInt(op.B1) {
   874  			case wazeroir.SignedInt32:
   875  				ce.pushValue(uint64(uint32(int16(val))))
   876  			case wazeroir.SignedInt64:
   877  				ce.pushValue(uint64(int16(val)))
   878  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   879  				ce.pushValue(uint64(val))
   880  			}
   881  			frame.pc++
   882  		case wazeroir.OperationKindLoad32:
   883  			val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
   884  			if !ok {
   885  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   886  			}
   887  
   888  			if op.B1 == 1 { // Signed
   889  				ce.pushValue(uint64(int32(val)))
   890  			} else {
   891  				ce.pushValue(uint64(val))
   892  			}
   893  			frame.pc++
   894  		case wazeroir.OperationKindStore:
   895  			val := ce.popValue()
   896  			offset := ce.popMemoryOffset(op)
   897  			switch wazeroir.UnsignedType(op.B1) {
   898  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   899  				if !memoryInst.WriteUint32Le(offset, uint32(val)) {
   900  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   901  				}
   902  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   903  				if !memoryInst.WriteUint64Le(offset, val) {
   904  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   905  				}
   906  			}
   907  			frame.pc++
   908  		case wazeroir.OperationKindStore8:
   909  			val := byte(ce.popValue())
   910  			offset := ce.popMemoryOffset(op)
   911  			if !memoryInst.WriteByte(offset, val) {
   912  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   913  			}
   914  			frame.pc++
   915  		case wazeroir.OperationKindStore16:
   916  			val := uint16(ce.popValue())
   917  			offset := ce.popMemoryOffset(op)
   918  			if !memoryInst.WriteUint16Le(offset, val) {
   919  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   920  			}
   921  			frame.pc++
   922  		case wazeroir.OperationKindStore32:
   923  			val := uint32(ce.popValue())
   924  			offset := ce.popMemoryOffset(op)
   925  			if !memoryInst.WriteUint32Le(offset, val) {
   926  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   927  			}
   928  			frame.pc++
   929  		case wazeroir.OperationKindMemorySize:
   930  			ce.pushValue(uint64(memoryInst.Pages()))
   931  			frame.pc++
   932  		case wazeroir.OperationKindMemoryGrow:
   933  			n := ce.popValue()
   934  			if res, ok := memoryInst.Grow(uint32(n)); !ok {
   935  				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
   936  			} else {
   937  				ce.pushValue(uint64(res))
   938  			}
   939  			frame.pc++
   940  		case wazeroir.OperationKindConstI32, wazeroir.OperationKindConstI64,
   941  			wazeroir.OperationKindConstF32, wazeroir.OperationKindConstF64:
   942  			ce.pushValue(op.U1)
   943  			frame.pc++
   944  		case wazeroir.OperationKindEq:
   945  			var b bool
   946  			switch wazeroir.UnsignedType(op.B1) {
   947  			case wazeroir.UnsignedTypeI32:
   948  				v2, v1 := ce.popValue(), ce.popValue()
   949  				b = uint32(v1) == uint32(v2)
   950  			case wazeroir.UnsignedTypeI64:
   951  				v2, v1 := ce.popValue(), ce.popValue()
   952  				b = v1 == v2
   953  			case wazeroir.UnsignedTypeF32:
   954  				v2, v1 := ce.popValue(), ce.popValue()
   955  				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
   956  			case wazeroir.UnsignedTypeF64:
   957  				v2, v1 := ce.popValue(), ce.popValue()
   958  				b = math.Float64frombits(v2) == math.Float64frombits(v1)
   959  			}
   960  			if b {
   961  				ce.pushValue(1)
   962  			} else {
   963  				ce.pushValue(0)
   964  			}
   965  			frame.pc++
   966  		case wazeroir.OperationKindNe:
   967  			var b bool
   968  			switch wazeroir.UnsignedType(op.B1) {
   969  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeI64:
   970  				v2, v1 := ce.popValue(), ce.popValue()
   971  				b = v1 != v2
   972  			case wazeroir.UnsignedTypeF32:
   973  				v2, v1 := ce.popValue(), ce.popValue()
   974  				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
   975  			case wazeroir.UnsignedTypeF64:
   976  				v2, v1 := ce.popValue(), ce.popValue()
   977  				b = math.Float64frombits(v2) != math.Float64frombits(v1)
   978  			}
   979  			if b {
   980  				ce.pushValue(1)
   981  			} else {
   982  				ce.pushValue(0)
   983  			}
   984  			frame.pc++
   985  		case wazeroir.OperationKindEqz:
   986  			if ce.popValue() == 0 {
   987  				ce.pushValue(1)
   988  			} else {
   989  				ce.pushValue(0)
   990  			}
   991  			frame.pc++
   992  		case wazeroir.OperationKindLt:
   993  			v2 := ce.popValue()
   994  			v1 := ce.popValue()
   995  			var b bool
   996  			switch wazeroir.SignedType(op.B1) {
   997  			case wazeroir.SignedTypeInt32:
   998  				b = int32(v1) < int32(v2)
   999  			case wazeroir.SignedTypeInt64:
  1000  				b = int64(v1) < int64(v2)
  1001  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1002  				b = v1 < v2
  1003  			case wazeroir.SignedTypeFloat32:
  1004  				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
  1005  			case wazeroir.SignedTypeFloat64:
  1006  				b = math.Float64frombits(v1) < math.Float64frombits(v2)
  1007  			}
  1008  			if b {
  1009  				ce.pushValue(1)
  1010  			} else {
  1011  				ce.pushValue(0)
  1012  			}
  1013  			frame.pc++
  1014  		case wazeroir.OperationKindGt:
  1015  			v2 := ce.popValue()
  1016  			v1 := ce.popValue()
  1017  			var b bool
  1018  			switch wazeroir.SignedType(op.B1) {
  1019  			case wazeroir.SignedTypeInt32:
  1020  				b = int32(v1) > int32(v2)
  1021  			case wazeroir.SignedTypeInt64:
  1022  				b = int64(v1) > int64(v2)
  1023  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1024  				b = v1 > v2
  1025  			case wazeroir.SignedTypeFloat32:
  1026  				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
  1027  			case wazeroir.SignedTypeFloat64:
  1028  				b = math.Float64frombits(v1) > math.Float64frombits(v2)
  1029  			}
  1030  			if b {
  1031  				ce.pushValue(1)
  1032  			} else {
  1033  				ce.pushValue(0)
  1034  			}
  1035  			frame.pc++
  1036  		case wazeroir.OperationKindLe:
  1037  			v2 := ce.popValue()
  1038  			v1 := ce.popValue()
  1039  			var b bool
  1040  			switch wazeroir.SignedType(op.B1) {
  1041  			case wazeroir.SignedTypeInt32:
  1042  				b = int32(v1) <= int32(v2)
  1043  			case wazeroir.SignedTypeInt64:
  1044  				b = int64(v1) <= int64(v2)
  1045  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1046  				b = v1 <= v2
  1047  			case wazeroir.SignedTypeFloat32:
  1048  				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
  1049  			case wazeroir.SignedTypeFloat64:
  1050  				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
  1051  			}
  1052  			if b {
  1053  				ce.pushValue(1)
  1054  			} else {
  1055  				ce.pushValue(0)
  1056  			}
  1057  			frame.pc++
  1058  		case wazeroir.OperationKindGe:
  1059  			v2 := ce.popValue()
  1060  			v1 := ce.popValue()
  1061  			var b bool
  1062  			switch wazeroir.SignedType(op.B1) {
  1063  			case wazeroir.SignedTypeInt32:
  1064  				b = int32(v1) >= int32(v2)
  1065  			case wazeroir.SignedTypeInt64:
  1066  				b = int64(v1) >= int64(v2)
  1067  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1068  				b = v1 >= v2
  1069  			case wazeroir.SignedTypeFloat32:
  1070  				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
  1071  			case wazeroir.SignedTypeFloat64:
  1072  				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
  1073  			}
  1074  			if b {
  1075  				ce.pushValue(1)
  1076  			} else {
  1077  				ce.pushValue(0)
  1078  			}
  1079  			frame.pc++
  1080  		case wazeroir.OperationKindAdd:
  1081  			v2 := ce.popValue()
  1082  			v1 := ce.popValue()
  1083  			switch wazeroir.UnsignedType(op.B1) {
  1084  			case wazeroir.UnsignedTypeI32:
  1085  				v := uint32(v1) + uint32(v2)
  1086  				ce.pushValue(uint64(v))
  1087  			case wazeroir.UnsignedTypeI64:
  1088  				ce.pushValue(v1 + v2)
  1089  			case wazeroir.UnsignedTypeF32:
  1090  				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
  1091  			case wazeroir.UnsignedTypeF64:
  1092  				v := math.Float64frombits(v1) + math.Float64frombits(v2)
  1093  				ce.pushValue(math.Float64bits(v))
  1094  			}
  1095  			frame.pc++
  1096  		case wazeroir.OperationKindSub:
  1097  			v2 := ce.popValue()
  1098  			v1 := ce.popValue()
  1099  			switch wazeroir.UnsignedType(op.B1) {
  1100  			case wazeroir.UnsignedTypeI32:
  1101  				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
  1102  			case wazeroir.UnsignedTypeI64:
  1103  				ce.pushValue(v1 - v2)
  1104  			case wazeroir.UnsignedTypeF32:
  1105  				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
  1106  			case wazeroir.UnsignedTypeF64:
  1107  				v := math.Float64frombits(v1) - math.Float64frombits(v2)
  1108  				ce.pushValue(math.Float64bits(v))
  1109  			}
  1110  			frame.pc++
  1111  		case wazeroir.OperationKindMul:
  1112  			v2 := ce.popValue()
  1113  			v1 := ce.popValue()
  1114  			switch wazeroir.UnsignedType(op.B1) {
  1115  			case wazeroir.UnsignedTypeI32:
  1116  				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
  1117  			case wazeroir.UnsignedTypeI64:
  1118  				ce.pushValue(v1 * v2)
  1119  			case wazeroir.UnsignedTypeF32:
  1120  				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
  1121  			case wazeroir.UnsignedTypeF64:
  1122  				v := math.Float64frombits(v2) * math.Float64frombits(v1)
  1123  				ce.pushValue(math.Float64bits(v))
  1124  			}
  1125  			frame.pc++
  1126  		case wazeroir.OperationKindClz:
  1127  			v := ce.popValue()
  1128  			if op.B1 == 0 {
  1129  				// UnsignedInt32
  1130  				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
  1131  			} else {
  1132  				// UnsignedInt64
  1133  				ce.pushValue(uint64(bits.LeadingZeros64(v)))
  1134  			}
  1135  			frame.pc++
  1136  		case wazeroir.OperationKindCtz:
  1137  			v := ce.popValue()
  1138  			if op.B1 == 0 {
  1139  				// UnsignedInt32
  1140  				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
  1141  			} else {
  1142  				// UnsignedInt64
  1143  				ce.pushValue(uint64(bits.TrailingZeros64(v)))
  1144  			}
  1145  			frame.pc++
  1146  		case wazeroir.OperationKindPopcnt:
  1147  			v := ce.popValue()
  1148  			if op.B1 == 0 {
  1149  				// UnsignedInt32
  1150  				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
  1151  			} else {
  1152  				// UnsignedInt64
  1153  				ce.pushValue(uint64(bits.OnesCount64(v)))
  1154  			}
  1155  			frame.pc++
  1156  		case wazeroir.OperationKindDiv:
  1157  			// If an integer, check we won't divide by zero.
  1158  			t := wazeroir.SignedType(op.B1)
  1159  			v2, v1 := ce.popValue(), ce.popValue()
  1160  			switch t {
  1161  			case wazeroir.SignedTypeFloat32, wazeroir.SignedTypeFloat64: // not integers
  1162  			default:
  1163  				if v2 == 0 {
  1164  					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1165  				}
  1166  			}
  1167  
  1168  			switch t {
  1169  			case wazeroir.SignedTypeInt32:
  1170  				d := int32(v2)
  1171  				n := int32(v1)
  1172  				if n == math.MinInt32 && d == -1 {
  1173  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1174  				}
  1175  				ce.pushValue(uint64(uint32(n / d)))
  1176  			case wazeroir.SignedTypeInt64:
  1177  				d := int64(v2)
  1178  				n := int64(v1)
  1179  				if n == math.MinInt64 && d == -1 {
  1180  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1181  				}
  1182  				ce.pushValue(uint64(n / d))
  1183  			case wazeroir.SignedTypeUint32:
  1184  				d := uint32(v2)
  1185  				n := uint32(v1)
  1186  				ce.pushValue(uint64(n / d))
  1187  			case wazeroir.SignedTypeUint64:
  1188  				d := v2
  1189  				n := v1
  1190  				ce.pushValue(n / d)
  1191  			case wazeroir.SignedTypeFloat32:
  1192  				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
  1193  			case wazeroir.SignedTypeFloat64:
  1194  				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
  1195  			}
  1196  			frame.pc++
  1197  		case wazeroir.OperationKindRem:
  1198  			v2, v1 := ce.popValue(), ce.popValue()
  1199  			if v2 == 0 {
  1200  				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1201  			}
  1202  			switch wazeroir.SignedInt(op.B1) {
  1203  			case wazeroir.SignedInt32:
  1204  				d := int32(v2)
  1205  				n := int32(v1)
  1206  				ce.pushValue(uint64(uint32(n % d)))
  1207  			case wazeroir.SignedInt64:
  1208  				d := int64(v2)
  1209  				n := int64(v1)
  1210  				ce.pushValue(uint64(n % d))
  1211  			case wazeroir.SignedUint32:
  1212  				d := uint32(v2)
  1213  				n := uint32(v1)
  1214  				ce.pushValue(uint64(n % d))
  1215  			case wazeroir.SignedUint64:
  1216  				d := v2
  1217  				n := v1
  1218  				ce.pushValue(n % d)
  1219  			}
  1220  			frame.pc++
  1221  		case wazeroir.OperationKindAnd:
  1222  			v2 := ce.popValue()
  1223  			v1 := ce.popValue()
  1224  			if op.B1 == 0 {
  1225  				// UnsignedInt32
  1226  				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
  1227  			} else {
  1228  				// UnsignedInt64
  1229  				ce.pushValue(uint64(v2 & v1))
  1230  			}
  1231  			frame.pc++
  1232  		case wazeroir.OperationKindOr:
  1233  			v2 := ce.popValue()
  1234  			v1 := ce.popValue()
  1235  			if op.B1 == 0 {
  1236  				// UnsignedInt32
  1237  				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
  1238  			} else {
  1239  				// UnsignedInt64
  1240  				ce.pushValue(uint64(v2 | v1))
  1241  			}
  1242  			frame.pc++
  1243  		case wazeroir.OperationKindXor:
  1244  			v2 := ce.popValue()
  1245  			v1 := ce.popValue()
  1246  			if op.B1 == 0 {
  1247  				// UnsignedInt32
  1248  				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
  1249  			} else {
  1250  				// UnsignedInt64
  1251  				ce.pushValue(uint64(v2 ^ v1))
  1252  			}
  1253  			frame.pc++
  1254  		case wazeroir.OperationKindShl:
  1255  			v2 := ce.popValue()
  1256  			v1 := ce.popValue()
  1257  			if op.B1 == 0 {
  1258  				// UnsignedInt32
  1259  				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
  1260  			} else {
  1261  				// UnsignedInt64
  1262  				ce.pushValue(v1 << (v2 % 64))
  1263  			}
  1264  			frame.pc++
  1265  		case wazeroir.OperationKindShr:
  1266  			v2 := ce.popValue()
  1267  			v1 := ce.popValue()
  1268  			switch wazeroir.SignedInt(op.B1) {
  1269  			case wazeroir.SignedInt32:
  1270  				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
  1271  			case wazeroir.SignedInt64:
  1272  				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
  1273  			case wazeroir.SignedUint32:
  1274  				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
  1275  			case wazeroir.SignedUint64:
  1276  				ce.pushValue(v1 >> (v2 % 64))
  1277  			}
  1278  			frame.pc++
  1279  		case wazeroir.OperationKindRotl:
  1280  			v2 := ce.popValue()
  1281  			v1 := ce.popValue()
  1282  			if op.B1 == 0 {
  1283  				// UnsignedInt32
  1284  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
  1285  			} else {
  1286  				// UnsignedInt64
  1287  				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
  1288  			}
  1289  			frame.pc++
  1290  		case wazeroir.OperationKindRotr:
  1291  			v2 := ce.popValue()
  1292  			v1 := ce.popValue()
  1293  			if op.B1 == 0 {
  1294  				// UnsignedInt32
  1295  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
  1296  			} else {
  1297  				// UnsignedInt64
  1298  				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
  1299  			}
  1300  			frame.pc++
  1301  		case wazeroir.OperationKindAbs:
  1302  			if op.B1 == 0 {
  1303  				// Float32
  1304  				const mask uint32 = 1 << 31
  1305  				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
  1306  			} else {
  1307  				// Float64
  1308  				const mask uint64 = 1 << 63
  1309  				ce.pushValue(ce.popValue() &^ mask)
  1310  			}
  1311  			frame.pc++
  1312  		case wazeroir.OperationKindNeg:
  1313  			if op.B1 == 0 {
  1314  				// Float32
  1315  				v := -math.Float32frombits(uint32(ce.popValue()))
  1316  				ce.pushValue(uint64(math.Float32bits(v)))
  1317  			} else {
  1318  				// Float64
  1319  				v := -math.Float64frombits(ce.popValue())
  1320  				ce.pushValue(math.Float64bits(v))
  1321  			}
  1322  			frame.pc++
  1323  		case wazeroir.OperationKindCeil:
  1324  			if op.B1 == 0 {
  1325  				// Float32
  1326  				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
  1327  				ce.pushValue(uint64(math.Float32bits(v)))
  1328  			} else {
  1329  				// Float64
  1330  				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
  1331  				ce.pushValue(math.Float64bits(v))
  1332  			}
  1333  			frame.pc++
  1334  		case wazeroir.OperationKindFloor:
  1335  			if op.B1 == 0 {
  1336  				// Float32
  1337  				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
  1338  				ce.pushValue(uint64(math.Float32bits(v)))
  1339  			} else {
  1340  				// Float64
  1341  				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
  1342  				ce.pushValue(math.Float64bits(v))
  1343  			}
  1344  			frame.pc++
  1345  		case wazeroir.OperationKindTrunc:
  1346  			if op.B1 == 0 {
  1347  				// Float32
  1348  				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
  1349  				ce.pushValue(uint64(math.Float32bits(v)))
  1350  			} else {
  1351  				// Float64
  1352  				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
  1353  				ce.pushValue(math.Float64bits(v))
  1354  			}
  1355  			frame.pc++
  1356  		case wazeroir.OperationKindNearest:
  1357  			if op.B1 == 0 {
  1358  				// Float32
  1359  				f := math.Float32frombits(uint32(ce.popValue()))
  1360  				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
  1361  			} else {
  1362  				// Float64
  1363  				f := math.Float64frombits(ce.popValue())
  1364  				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
  1365  			}
  1366  			frame.pc++
  1367  		case wazeroir.OperationKindSqrt:
  1368  			if op.B1 == 0 {
  1369  				// Float32
  1370  				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
  1371  				ce.pushValue(uint64(math.Float32bits(float32(v))))
  1372  			} else {
  1373  				// Float64
  1374  				v := math.Sqrt(math.Float64frombits(ce.popValue()))
  1375  				ce.pushValue(math.Float64bits(v))
  1376  			}
  1377  			frame.pc++
  1378  		case wazeroir.OperationKindMin:
  1379  			if op.B1 == 0 {
  1380  				// Float32
  1381  				ce.pushValue(WasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1382  			} else {
  1383  				v2 := math.Float64frombits(ce.popValue())
  1384  				v1 := math.Float64frombits(ce.popValue())
  1385  				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
  1386  			}
  1387  			frame.pc++
  1388  		case wazeroir.OperationKindMax:
  1389  			if op.B1 == 0 {
  1390  				ce.pushValue(WasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1391  			} else {
  1392  				// Float64
  1393  				v2 := math.Float64frombits(ce.popValue())
  1394  				v1 := math.Float64frombits(ce.popValue())
  1395  				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
  1396  			}
  1397  			frame.pc++
  1398  		case wazeroir.OperationKindCopysign:
  1399  			if op.B1 == 0 {
  1400  				// Float32
  1401  				v2 := uint32(ce.popValue())
  1402  				v1 := uint32(ce.popValue())
  1403  				const signbit = 1 << 31
  1404  				ce.pushValue(uint64(v1&^signbit | v2&signbit))
  1405  			} else {
  1406  				// Float64
  1407  				v2 := ce.popValue()
  1408  				v1 := ce.popValue()
  1409  				const signbit = 1 << 63
  1410  				ce.pushValue(v1&^signbit | v2&signbit)
  1411  			}
  1412  			frame.pc++
  1413  		case wazeroir.OperationKindI32WrapFromI64:
  1414  			ce.pushValue(uint64(uint32(ce.popValue())))
  1415  			frame.pc++
  1416  		case wazeroir.OperationKindITruncFromF:
  1417  			if op.B1 == 0 {
  1418  				// Float32
  1419  				switch wazeroir.SignedInt(op.B2) {
  1420  				case wazeroir.SignedInt32:
  1421  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1422  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1423  						if op.B3 {
  1424  							// non-trapping conversion must cast nan to zero.
  1425  							v = 0
  1426  						} else {
  1427  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1428  						}
  1429  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1430  						if op.B3 {
  1431  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1432  							if v < 0 {
  1433  								v = math.MinInt32
  1434  							} else {
  1435  								v = math.MaxInt32
  1436  							}
  1437  						} else {
  1438  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1439  						}
  1440  					}
  1441  					ce.pushValue(uint64(uint32(int32(v))))
  1442  				case wazeroir.SignedInt64:
  1443  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1444  					res := int64(v)
  1445  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1446  						if op.B3 {
  1447  							// non-trapping conversion must cast nan to zero.
  1448  							res = 0
  1449  						} else {
  1450  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1451  						}
  1452  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1453  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1454  						// and that's why we use '>=' not '>' to check overflow.
  1455  						if op.B3 {
  1456  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1457  							if v < 0 {
  1458  								res = math.MinInt64
  1459  							} else {
  1460  								res = math.MaxInt64
  1461  							}
  1462  						} else {
  1463  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1464  						}
  1465  					}
  1466  					ce.pushValue(uint64(res))
  1467  				case wazeroir.SignedUint32:
  1468  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1469  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1470  						if op.B3 {
  1471  							// non-trapping conversion must cast nan to zero.
  1472  							v = 0
  1473  						} else {
  1474  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1475  						}
  1476  					} else if v < 0 || v > math.MaxUint32 {
  1477  						if op.B3 {
  1478  							// non-trapping conversion must "saturate" the value for overflowing source.
  1479  							if v < 0 {
  1480  								v = 0
  1481  							} else {
  1482  								v = math.MaxUint32
  1483  							}
  1484  						} else {
  1485  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1486  						}
  1487  					}
  1488  					ce.pushValue(uint64(uint32(v)))
  1489  				case wazeroir.SignedUint64:
  1490  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1491  					res := uint64(v)
  1492  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1493  						if op.B3 {
  1494  							// non-trapping conversion must cast nan to zero.
  1495  							res = 0
  1496  						} else {
  1497  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1498  						}
  1499  					} else if v < 0 || v >= math.MaxUint64 {
  1500  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1501  						// and that's why we use '>=' not '>' to check overflow.
  1502  						if op.B3 {
  1503  							// non-trapping conversion must "saturate" the value for overflowing source.
  1504  							if v < 0 {
  1505  								res = 0
  1506  							} else {
  1507  								res = math.MaxUint64
  1508  							}
  1509  						} else {
  1510  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1511  						}
  1512  					}
  1513  					ce.pushValue(res)
  1514  				}
  1515  			} else {
  1516  				// Float64
  1517  				switch wazeroir.SignedInt(op.B2) {
  1518  				case wazeroir.SignedInt32:
  1519  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1520  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1521  						if op.B3 {
  1522  							// non-trapping conversion must cast nan to zero.
  1523  							v = 0
  1524  						} else {
  1525  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1526  						}
  1527  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1528  						if op.B3 {
  1529  							// non-trapping conversion must "saturate" the value for overflowing source.
  1530  							if v < 0 {
  1531  								v = math.MinInt32
  1532  							} else {
  1533  								v = math.MaxInt32
  1534  							}
  1535  						} else {
  1536  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1537  						}
  1538  					}
  1539  					ce.pushValue(uint64(uint32(int32(v))))
  1540  				case wazeroir.SignedInt64:
  1541  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1542  					res := int64(v)
  1543  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1544  						if op.B3 {
  1545  							// non-trapping conversion must cast nan to zero.
  1546  							res = 0
  1547  						} else {
  1548  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1549  						}
  1550  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1551  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1552  						// and that's why we use '>=' not '>' to check overflow.
  1553  						if op.B3 {
  1554  							// non-trapping conversion must "saturate" the value for overflowing source.
  1555  							if v < 0 {
  1556  								res = math.MinInt64
  1557  							} else {
  1558  								res = math.MaxInt64
  1559  							}
  1560  						} else {
  1561  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1562  						}
  1563  					}
  1564  					ce.pushValue(uint64(res))
  1565  				case wazeroir.SignedUint32:
  1566  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1567  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1568  						if op.B3 {
  1569  							// non-trapping conversion must cast nan to zero.
  1570  							v = 0
  1571  						} else {
  1572  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1573  						}
  1574  					} else if v < 0 || v > math.MaxUint32 {
  1575  						if op.B3 {
  1576  							// non-trapping conversion must "saturate" the value for overflowing source.
  1577  							if v < 0 {
  1578  								v = 0
  1579  							} else {
  1580  								v = math.MaxUint32
  1581  							}
  1582  						} else {
  1583  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1584  						}
  1585  					}
  1586  					ce.pushValue(uint64(uint32(v)))
  1587  				case wazeroir.SignedUint64:
  1588  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1589  					res := uint64(v)
  1590  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1591  						if op.B3 {
  1592  							// non-trapping conversion must cast nan to zero.
  1593  							res = 0
  1594  						} else {
  1595  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1596  						}
  1597  					} else if v < 0 || v >= math.MaxUint64 {
  1598  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1599  						// and that's why we use '>=' not '>' to check overflow.
  1600  						if op.B3 {
  1601  							// non-trapping conversion must "saturate" the value for overflowing source.
  1602  							if v < 0 {
  1603  								res = 0
  1604  							} else {
  1605  								res = math.MaxUint64
  1606  							}
  1607  						} else {
  1608  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1609  						}
  1610  					}
  1611  					ce.pushValue(res)
  1612  				}
  1613  			}
  1614  			frame.pc++
  1615  		case wazeroir.OperationKindFConvertFromI:
  1616  			switch wazeroir.SignedInt(op.B1) {
  1617  			case wazeroir.SignedInt32:
  1618  				if op.B2 == 0 {
  1619  					// Float32
  1620  					v := float32(int32(ce.popValue()))
  1621  					ce.pushValue(uint64(math.Float32bits(v)))
  1622  				} else {
  1623  					// Float64
  1624  					v := float64(int32(ce.popValue()))
  1625  					ce.pushValue(math.Float64bits(v))
  1626  				}
  1627  			case wazeroir.SignedInt64:
  1628  				if op.B2 == 0 {
  1629  					// Float32
  1630  					v := float32(int64(ce.popValue()))
  1631  					ce.pushValue(uint64(math.Float32bits(v)))
  1632  				} else {
  1633  					// Float64
  1634  					v := float64(int64(ce.popValue()))
  1635  					ce.pushValue(math.Float64bits(v))
  1636  				}
  1637  			case wazeroir.SignedUint32:
  1638  				if op.B2 == 0 {
  1639  					// Float32
  1640  					v := float32(uint32(ce.popValue()))
  1641  					ce.pushValue(uint64(math.Float32bits(v)))
  1642  				} else {
  1643  					// Float64
  1644  					v := float64(uint32(ce.popValue()))
  1645  					ce.pushValue(math.Float64bits(v))
  1646  				}
  1647  			case wazeroir.SignedUint64:
  1648  				if op.B2 == 0 {
  1649  					// Float32
  1650  					v := float32(ce.popValue())
  1651  					ce.pushValue(uint64(math.Float32bits(v)))
  1652  				} else {
  1653  					// Float64
  1654  					v := float64(ce.popValue())
  1655  					ce.pushValue(math.Float64bits(v))
  1656  				}
  1657  			}
  1658  			frame.pc++
  1659  		case wazeroir.OperationKindF32DemoteFromF64:
  1660  			v := float32(math.Float64frombits(ce.popValue()))
  1661  			ce.pushValue(uint64(math.Float32bits(v)))
  1662  			frame.pc++
  1663  		case wazeroir.OperationKindF64PromoteFromF32:
  1664  			v := float64(math.Float32frombits(uint32(ce.popValue())))
  1665  			ce.pushValue(math.Float64bits(v))
  1666  			frame.pc++
  1667  		case wazeroir.OperationKindExtend:
  1668  			if op.B1 == 1 {
  1669  				// Signed.
  1670  				v := int64(int32(ce.popValue()))
  1671  				ce.pushValue(uint64(v))
  1672  			} else {
  1673  				v := uint64(uint32(ce.popValue()))
  1674  				ce.pushValue(v)
  1675  			}
  1676  			frame.pc++
  1677  		case wazeroir.OperationKindSignExtend32From8:
  1678  			v := uint32(int8(ce.popValue()))
  1679  			ce.pushValue(uint64(v))
  1680  			frame.pc++
  1681  		case wazeroir.OperationKindSignExtend32From16:
  1682  			v := uint32(int16(ce.popValue()))
  1683  			ce.pushValue(uint64(v))
  1684  			frame.pc++
  1685  		case wazeroir.OperationKindSignExtend64From8:
  1686  			v := int64(int8(ce.popValue()))
  1687  			ce.pushValue(uint64(v))
  1688  			frame.pc++
  1689  		case wazeroir.OperationKindSignExtend64From16:
  1690  			v := int64(int16(ce.popValue()))
  1691  			ce.pushValue(uint64(v))
  1692  			frame.pc++
  1693  		case wazeroir.OperationKindSignExtend64From32:
  1694  			v := int64(int32(ce.popValue()))
  1695  			ce.pushValue(uint64(v))
  1696  			frame.pc++
  1697  		case wazeroir.OperationKindMemoryInit:
  1698  			dataInstance := dataInstances[op.U1]
  1699  			copySize := ce.popValue()
  1700  			inDataOffset := ce.popValue()
  1701  			inMemoryOffset := ce.popValue()
  1702  			if inDataOffset+copySize > uint64(len(dataInstance)) ||
  1703  				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
  1704  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1705  			} else if copySize != 0 {
  1706  				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
  1707  			}
  1708  			frame.pc++
  1709  		case wazeroir.OperationKindDataDrop:
  1710  			dataInstances[op.U1] = nil
  1711  			frame.pc++
  1712  		case wazeroir.OperationKindMemoryCopy:
  1713  			memLen := uint64(len(memoryInst.Buffer))
  1714  			copySize := ce.popValue()
  1715  			sourceOffset := ce.popValue()
  1716  			destinationOffset := ce.popValue()
  1717  			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
  1718  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1719  			} else if copySize != 0 {
  1720  				copy(memoryInst.Buffer[destinationOffset:],
  1721  					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
  1722  			}
  1723  			frame.pc++
  1724  		case wazeroir.OperationKindMemoryFill:
  1725  			fillSize := ce.popValue()
  1726  			value := byte(ce.popValue())
  1727  			offset := ce.popValue()
  1728  			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
  1729  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1730  			} else if fillSize != 0 {
  1731  				// Uses the copy trick for faster filling buffer.
  1732  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1733  				buf := memoryInst.Buffer[offset : offset+fillSize]
  1734  				buf[0] = value
  1735  				for i := 1; i < len(buf); i *= 2 {
  1736  					copy(buf[i:], buf[:i])
  1737  				}
  1738  			}
  1739  			frame.pc++
  1740  		case wazeroir.OperationKindTableInit:
  1741  			elementInstance := elementInstances[op.U1]
  1742  			copySize := ce.popValue()
  1743  			inElementOffset := ce.popValue()
  1744  			inTableOffset := ce.popValue()
  1745  			table := tables[op.U2]
  1746  			if inElementOffset+copySize > uint64(len(elementInstance)) ||
  1747  				inTableOffset+copySize > uint64(len(table.References)) {
  1748  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1749  			} else if copySize != 0 {
  1750  				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:])
  1751  			}
  1752  			frame.pc++
  1753  		case wazeroir.OperationKindElemDrop:
  1754  			elementInstances[op.U1] = nil
  1755  			frame.pc++
  1756  		case wazeroir.OperationKindTableCopy:
  1757  			srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
  1758  			copySize := ce.popValue()
  1759  			sourceOffset := ce.popValue()
  1760  			destinationOffset := ce.popValue()
  1761  			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
  1762  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1763  			} else if copySize != 0 {
  1764  				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
  1765  			}
  1766  			frame.pc++
  1767  		case wazeroir.OperationKindRefFunc:
  1768  			ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
  1769  			frame.pc++
  1770  		case wazeroir.OperationKindTableGet:
  1771  			table := tables[op.U1]
  1772  
  1773  			offset := ce.popValue()
  1774  			if offset >= uint64(len(table.References)) {
  1775  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1776  			}
  1777  
  1778  			ce.pushValue(uint64(table.References[offset]))
  1779  			frame.pc++
  1780  		case wazeroir.OperationKindTableSet:
  1781  			table := tables[op.U1]
  1782  			ref := ce.popValue()
  1783  
  1784  			offset := ce.popValue()
  1785  			if offset >= uint64(len(table.References)) {
  1786  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1787  			}
  1788  
  1789  			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
  1790  			frame.pc++
  1791  		case wazeroir.OperationKindTableSize:
  1792  			table := tables[op.U1]
  1793  			ce.pushValue(uint64(len(table.References)))
  1794  			frame.pc++
  1795  		case wazeroir.OperationKindTableGrow:
  1796  			table := tables[op.U1]
  1797  			num, ref := ce.popValue(), ce.popValue()
  1798  			ret := table.Grow(uint32(num), uintptr(ref))
  1799  			ce.pushValue(uint64(ret))
  1800  			frame.pc++
  1801  		case wazeroir.OperationKindTableFill:
  1802  			table := tables[op.U1]
  1803  			num := ce.popValue()
  1804  			ref := uintptr(ce.popValue())
  1805  			offset := ce.popValue()
  1806  			if num+offset > uint64(len(table.References)) {
  1807  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1808  			} else if num > 0 {
  1809  				// Uses the copy trick for faster filling the region with the value.
  1810  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1811  				targetRegion := table.References[offset : offset+num]
  1812  				targetRegion[0] = ref
  1813  				for i := 1; i < len(targetRegion); i *= 2 {
  1814  					copy(targetRegion[i:], targetRegion[:i])
  1815  				}
  1816  			}
  1817  			frame.pc++
  1818  		case wazeroir.OperationKindV128Const:
  1819  			lo, hi := op.U1, op.U2
  1820  			ce.pushValue(lo)
  1821  			ce.pushValue(hi)
  1822  			frame.pc++
  1823  		case wazeroir.OperationKindV128Add:
  1824  			yHigh, yLow := ce.popValue(), ce.popValue()
  1825  			xHigh, xLow := ce.popValue(), ce.popValue()
  1826  			switch op.B1 {
  1827  			case wazeroir.ShapeI8x16:
  1828  				ce.pushValue(
  1829  					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
  1830  						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
  1831  						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
  1832  						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
  1833  				)
  1834  				ce.pushValue(
  1835  					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
  1836  						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
  1837  						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
  1838  						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
  1839  				)
  1840  			case wazeroir.ShapeI16x8:
  1841  				ce.pushValue(
  1842  					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
  1843  						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
  1844  				)
  1845  				ce.pushValue(
  1846  					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
  1847  						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
  1848  				)
  1849  			case wazeroir.ShapeI32x4:
  1850  				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
  1851  				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
  1852  			case wazeroir.ShapeI64x2:
  1853  				ce.pushValue(xLow + yLow)
  1854  				ce.pushValue(xHigh + yHigh)
  1855  			case wazeroir.ShapeF32x4:
  1856  				ce.pushValue(
  1857  					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1858  				)
  1859  				ce.pushValue(
  1860  					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1861  				)
  1862  			case wazeroir.ShapeF64x2:
  1863  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
  1864  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
  1865  			}
  1866  			frame.pc++
  1867  		case wazeroir.OperationKindV128Sub:
  1868  			yHigh, yLow := ce.popValue(), ce.popValue()
  1869  			xHigh, xLow := ce.popValue(), ce.popValue()
  1870  			switch op.B1 {
  1871  			case wazeroir.ShapeI8x16:
  1872  				ce.pushValue(
  1873  					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
  1874  						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
  1875  						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
  1876  						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
  1877  				)
  1878  				ce.pushValue(
  1879  					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
  1880  						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
  1881  						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
  1882  						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
  1883  				)
  1884  			case wazeroir.ShapeI16x8:
  1885  				ce.pushValue(
  1886  					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
  1887  						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
  1888  				)
  1889  				ce.pushValue(
  1890  					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
  1891  						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
  1892  				)
  1893  			case wazeroir.ShapeI32x4:
  1894  				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
  1895  				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
  1896  			case wazeroir.ShapeI64x2:
  1897  				ce.pushValue(xLow - yLow)
  1898  				ce.pushValue(xHigh - yHigh)
  1899  			case wazeroir.ShapeF32x4:
  1900  				ce.pushValue(
  1901  					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1902  				)
  1903  				ce.pushValue(
  1904  					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1905  				)
  1906  			case wazeroir.ShapeF64x2:
  1907  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
  1908  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
  1909  			}
  1910  			frame.pc++
  1911  		case wazeroir.OperationKindV128Load:
  1912  			offset := ce.popMemoryOffset(op)
  1913  			switch op.B1 {
  1914  			case wazeroir.V128LoadType128:
  1915  				lo, ok := memoryInst.ReadUint64Le(offset)
  1916  				if !ok {
  1917  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1918  				}
  1919  				ce.pushValue(lo)
  1920  				hi, ok := memoryInst.ReadUint64Le(offset + 8)
  1921  				if !ok {
  1922  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1923  				}
  1924  				ce.pushValue(hi)
  1925  			case wazeroir.V128LoadType8x8s:
  1926  				data, ok := memoryInst.Read(offset, 8)
  1927  				if !ok {
  1928  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1929  				}
  1930  				ce.pushValue(
  1931  					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
  1932  				)
  1933  				ce.pushValue(
  1934  					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
  1935  				)
  1936  			case wazeroir.V128LoadType8x8u:
  1937  				data, ok := memoryInst.Read(offset, 8)
  1938  				if !ok {
  1939  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1940  				}
  1941  				ce.pushValue(
  1942  					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
  1943  				)
  1944  				ce.pushValue(
  1945  					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
  1946  				)
  1947  			case wazeroir.V128LoadType16x4s:
  1948  				data, ok := memoryInst.Read(offset, 8)
  1949  				if !ok {
  1950  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1951  				}
  1952  				ce.pushValue(
  1953  					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
  1954  						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
  1955  				)
  1956  				ce.pushValue(
  1957  					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
  1958  						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
  1959  				)
  1960  			case wazeroir.V128LoadType16x4u:
  1961  				data, ok := memoryInst.Read(offset, 8)
  1962  				if !ok {
  1963  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1964  				}
  1965  				ce.pushValue(
  1966  					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
  1967  				)
  1968  				ce.pushValue(
  1969  					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
  1970  				)
  1971  			case wazeroir.V128LoadType32x2s:
  1972  				data, ok := memoryInst.Read(offset, 8)
  1973  				if !ok {
  1974  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1975  				}
  1976  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
  1977  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
  1978  			case wazeroir.V128LoadType32x2u:
  1979  				data, ok := memoryInst.Read(offset, 8)
  1980  				if !ok {
  1981  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1982  				}
  1983  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
  1984  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
  1985  			case wazeroir.V128LoadType8Splat:
  1986  				v, ok := memoryInst.ReadByte(offset)
  1987  				if !ok {
  1988  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1989  				}
  1990  				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
  1991  					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
  1992  				ce.pushValue(v8)
  1993  				ce.pushValue(v8)
  1994  			case wazeroir.V128LoadType16Splat:
  1995  				v, ok := memoryInst.ReadUint16Le(offset)
  1996  				if !ok {
  1997  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1998  				}
  1999  				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
  2000  				ce.pushValue(v4)
  2001  				ce.pushValue(v4)
  2002  			case wazeroir.V128LoadType32Splat:
  2003  				v, ok := memoryInst.ReadUint32Le(offset)
  2004  				if !ok {
  2005  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2006  				}
  2007  				vv := uint64(v)<<32 | uint64(v)
  2008  				ce.pushValue(vv)
  2009  				ce.pushValue(vv)
  2010  			case wazeroir.V128LoadType64Splat:
  2011  				lo, ok := memoryInst.ReadUint64Le(offset)
  2012  				if !ok {
  2013  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2014  				}
  2015  				ce.pushValue(lo)
  2016  				ce.pushValue(lo)
  2017  			case wazeroir.V128LoadType32zero:
  2018  				lo, ok := memoryInst.ReadUint32Le(offset)
  2019  				if !ok {
  2020  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2021  				}
  2022  				ce.pushValue(uint64(lo))
  2023  				ce.pushValue(0)
  2024  			case wazeroir.V128LoadType64zero:
  2025  				lo, ok := memoryInst.ReadUint64Le(offset)
  2026  				if !ok {
  2027  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2028  				}
  2029  				ce.pushValue(lo)
  2030  				ce.pushValue(0)
  2031  			}
  2032  			frame.pc++
  2033  		case wazeroir.OperationKindV128LoadLane:
  2034  			hi, lo := ce.popValue(), ce.popValue()
  2035  			offset := ce.popMemoryOffset(op)
  2036  			switch op.B1 {
  2037  			case 8:
  2038  				b, ok := memoryInst.ReadByte(offset)
  2039  				if !ok {
  2040  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2041  				}
  2042  				if op.B2 < 8 {
  2043  					s := op.B2 << 3
  2044  					lo = (lo & ^(0xff << s)) | uint64(b)<<s
  2045  				} else {
  2046  					s := (op.B2 - 8) << 3
  2047  					hi = (hi & ^(0xff << s)) | uint64(b)<<s
  2048  				}
  2049  			case 16:
  2050  				b, ok := memoryInst.ReadUint16Le(offset)
  2051  				if !ok {
  2052  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2053  				}
  2054  				if op.B2 < 4 {
  2055  					s := op.B2 << 4
  2056  					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
  2057  				} else {
  2058  					s := (op.B2 - 4) << 4
  2059  					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
  2060  				}
  2061  			case 32:
  2062  				b, ok := memoryInst.ReadUint32Le(offset)
  2063  				if !ok {
  2064  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2065  				}
  2066  				if op.B2 < 2 {
  2067  					s := op.B2 << 5
  2068  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2069  				} else {
  2070  					s := (op.B2 - 2) << 5
  2071  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2072  				}
  2073  			case 64:
  2074  				b, ok := memoryInst.ReadUint64Le(offset)
  2075  				if !ok {
  2076  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2077  				}
  2078  				if op.B2 == 0 {
  2079  					lo = b
  2080  				} else {
  2081  					hi = b
  2082  				}
  2083  			}
  2084  			ce.pushValue(lo)
  2085  			ce.pushValue(hi)
  2086  			frame.pc++
  2087  		case wazeroir.OperationKindV128Store:
  2088  			hi, lo := ce.popValue(), ce.popValue()
  2089  			offset := ce.popMemoryOffset(op)
  2090  			// Write the upper bytes first to trigger an early error if the memory access is out of bounds.
  2091  			// Otherwise, the lower bytes might be written to memory, but the upper bytes might not.
  2092  			if uint64(offset)+8 > math.MaxUint32 {
  2093  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2094  			}
  2095  			if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
  2096  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2097  			}
  2098  			if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
  2099  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2100  			}
  2101  			frame.pc++
  2102  		case wazeroir.OperationKindV128StoreLane:
  2103  			hi, lo := ce.popValue(), ce.popValue()
  2104  			offset := ce.popMemoryOffset(op)
  2105  			var ok bool
  2106  			switch op.B1 {
  2107  			case 8:
  2108  				if op.B2 < 8 {
  2109  					ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
  2110  				} else {
  2111  					ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
  2112  				}
  2113  			case 16:
  2114  				if op.B2 < 4 {
  2115  					ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
  2116  				} else {
  2117  					ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
  2118  				}
  2119  			case 32:
  2120  				if op.B2 < 2 {
  2121  					ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
  2122  				} else {
  2123  					ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
  2124  				}
  2125  			case 64:
  2126  				if op.B2 == 0 {
  2127  					ok = memoryInst.WriteUint64Le(offset, lo)
  2128  				} else {
  2129  					ok = memoryInst.WriteUint64Le(offset, hi)
  2130  				}
  2131  			}
  2132  			if !ok {
  2133  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2134  			}
  2135  			frame.pc++
  2136  		case wazeroir.OperationKindV128ReplaceLane:
  2137  			v := ce.popValue()
  2138  			hi, lo := ce.popValue(), ce.popValue()
  2139  			switch op.B1 {
  2140  			case wazeroir.ShapeI8x16:
  2141  				if op.B2 < 8 {
  2142  					s := op.B2 << 3
  2143  					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
  2144  				} else {
  2145  					s := (op.B2 - 8) << 3
  2146  					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
  2147  				}
  2148  			case wazeroir.ShapeI16x8:
  2149  				if op.B2 < 4 {
  2150  					s := op.B2 << 4
  2151  					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2152  				} else {
  2153  					s := (op.B2 - 4) << 4
  2154  					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2155  				}
  2156  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2157  				if op.B2 < 2 {
  2158  					s := op.B2 << 5
  2159  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2160  				} else {
  2161  					s := (op.B2 - 2) << 5
  2162  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2163  				}
  2164  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2165  				if op.B2 == 0 {
  2166  					lo = v
  2167  				} else {
  2168  					hi = v
  2169  				}
  2170  			}
  2171  			ce.pushValue(lo)
  2172  			ce.pushValue(hi)
  2173  			frame.pc++
  2174  		case wazeroir.OperationKindV128ExtractLane:
  2175  			hi, lo := ce.popValue(), ce.popValue()
  2176  			var v uint64
  2177  			switch op.B1 {
  2178  			case wazeroir.ShapeI8x16:
  2179  				var u8 byte
  2180  				if op.B2 < 8 {
  2181  					u8 = byte(lo >> (op.B2 * 8))
  2182  				} else {
  2183  					u8 = byte(hi >> ((op.B2 - 8) * 8))
  2184  				}
  2185  				if op.B3 {
  2186  					// sign-extend.
  2187  					v = uint64(uint32(int8(u8)))
  2188  				} else {
  2189  					v = uint64(u8)
  2190  				}
  2191  			case wazeroir.ShapeI16x8:
  2192  				var u16 uint16
  2193  				if op.B2 < 4 {
  2194  					u16 = uint16(lo >> (op.B2 * 16))
  2195  				} else {
  2196  					u16 = uint16(hi >> ((op.B2 - 4) * 16))
  2197  				}
  2198  				if op.B3 {
  2199  					// sign-extend.
  2200  					v = uint64(uint32(int16(u16)))
  2201  				} else {
  2202  					v = uint64(u16)
  2203  				}
  2204  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2205  				if op.B2 < 2 {
  2206  					v = uint64(uint32(lo >> (op.B2 * 32)))
  2207  				} else {
  2208  					v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
  2209  				}
  2210  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2211  				if op.B2 == 0 {
  2212  					v = lo
  2213  				} else {
  2214  					v = hi
  2215  				}
  2216  			}
  2217  			ce.pushValue(v)
  2218  			frame.pc++
  2219  		case wazeroir.OperationKindV128Splat:
  2220  			v := ce.popValue()
  2221  			var hi, lo uint64
  2222  			switch op.B1 {
  2223  			case wazeroir.ShapeI8x16:
  2224  				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
  2225  					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
  2226  				hi, lo = v8, v8
  2227  			case wazeroir.ShapeI16x8:
  2228  				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
  2229  				hi, lo = v4, v4
  2230  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2231  				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
  2232  				lo, hi = v2, v2
  2233  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2234  				lo, hi = v, v
  2235  			}
  2236  			ce.pushValue(lo)
  2237  			ce.pushValue(hi)
  2238  			frame.pc++
  2239  		case wazeroir.OperationKindV128Swizzle:
  2240  			idxHi, idxLo := ce.popValue(), ce.popValue()
  2241  			baseHi, baseLo := ce.popValue(), ce.popValue()
  2242  			var newVal [16]byte
  2243  			for i := 0; i < 16; i++ {
  2244  				var id byte
  2245  				if i < 8 {
  2246  					id = byte(idxLo >> (i * 8))
  2247  				} else {
  2248  					id = byte(idxHi >> ((i - 8) * 8))
  2249  				}
  2250  				if id < 8 {
  2251  					newVal[i] = byte(baseLo >> (id * 8))
  2252  				} else if id < 16 {
  2253  					newVal[i] = byte(baseHi >> ((id - 8) * 8))
  2254  				}
  2255  			}
  2256  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2257  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2258  			frame.pc++
  2259  		case wazeroir.OperationKindV128Shuffle:
  2260  			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
  2261  			var newVal [16]byte
  2262  			for i, l := range op.Us {
  2263  				if l < 8 {
  2264  					newVal[i] = byte(yLo >> (l * 8))
  2265  				} else if l < 16 {
  2266  					newVal[i] = byte(yHi >> ((l - 8) * 8))
  2267  				} else if l < 24 {
  2268  					newVal[i] = byte(xLo >> ((l - 16) * 8))
  2269  				} else if l < 32 {
  2270  					newVal[i] = byte(xHi >> ((l - 24) * 8))
  2271  				}
  2272  			}
  2273  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2274  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2275  			frame.pc++
  2276  		case wazeroir.OperationKindV128AnyTrue:
  2277  			hi, lo := ce.popValue(), ce.popValue()
  2278  			if hi != 0 || lo != 0 {
  2279  				ce.pushValue(1)
  2280  			} else {
  2281  				ce.pushValue(0)
  2282  			}
  2283  			frame.pc++
  2284  		case wazeroir.OperationKindV128AllTrue:
  2285  			hi, lo := ce.popValue(), ce.popValue()
  2286  			var ret bool
  2287  			switch op.B1 {
  2288  			case wazeroir.ShapeI8x16:
  2289  				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
  2290  					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
  2291  					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
  2292  					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
  2293  			case wazeroir.ShapeI16x8:
  2294  				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
  2295  					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
  2296  			case wazeroir.ShapeI32x4:
  2297  				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
  2298  					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
  2299  			case wazeroir.ShapeI64x2:
  2300  				ret = (lo != 0) &&
  2301  					(hi != 0)
  2302  			}
  2303  			if ret {
  2304  				ce.pushValue(1)
  2305  			} else {
  2306  				ce.pushValue(0)
  2307  			}
  2308  			frame.pc++
  2309  		case wazeroir.OperationKindV128BitMask:
  2310  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
  2311  			hi, lo := ce.popValue(), ce.popValue()
  2312  			var res uint64
  2313  			switch op.B1 {
  2314  			case wazeroir.ShapeI8x16:
  2315  				for i := 0; i < 8; i++ {
  2316  					if int8(lo>>(i*8)) < 0 {
  2317  						res |= 1 << i
  2318  					}
  2319  				}
  2320  				for i := 0; i < 8; i++ {
  2321  					if int8(hi>>(i*8)) < 0 {
  2322  						res |= 1 << (i + 8)
  2323  					}
  2324  				}
  2325  			case wazeroir.ShapeI16x8:
  2326  				for i := 0; i < 4; i++ {
  2327  					if int16(lo>>(i*16)) < 0 {
  2328  						res |= 1 << i
  2329  					}
  2330  				}
  2331  				for i := 0; i < 4; i++ {
  2332  					if int16(hi>>(i*16)) < 0 {
  2333  						res |= 1 << (i + 4)
  2334  					}
  2335  				}
  2336  			case wazeroir.ShapeI32x4:
  2337  				for i := 0; i < 2; i++ {
  2338  					if int32(lo>>(i*32)) < 0 {
  2339  						res |= 1 << i
  2340  					}
  2341  				}
  2342  				for i := 0; i < 2; i++ {
  2343  					if int32(hi>>(i*32)) < 0 {
  2344  						res |= 1 << (i + 2)
  2345  					}
  2346  				}
  2347  			case wazeroir.ShapeI64x2:
  2348  				if int64(lo) < 0 {
  2349  					res |= 0b01
  2350  				}
  2351  				if int(hi) < 0 {
  2352  					res |= 0b10
  2353  				}
  2354  			}
  2355  			ce.pushValue(res)
  2356  			frame.pc++
  2357  		case wazeroir.OperationKindV128And:
  2358  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2359  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2360  			ce.pushValue(x1Lo & x2Lo)
  2361  			ce.pushValue(x1Hi & x2Hi)
  2362  			frame.pc++
  2363  		case wazeroir.OperationKindV128Not:
  2364  			hi, lo := ce.popValue(), ce.popValue()
  2365  			ce.pushValue(^lo)
  2366  			ce.pushValue(^hi)
  2367  			frame.pc++
  2368  		case wazeroir.OperationKindV128Or:
  2369  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2370  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2371  			ce.pushValue(x1Lo | x2Lo)
  2372  			ce.pushValue(x1Hi | x2Hi)
  2373  			frame.pc++
  2374  		case wazeroir.OperationKindV128Xor:
  2375  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2376  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2377  			ce.pushValue(x1Lo ^ x2Lo)
  2378  			ce.pushValue(x1Hi ^ x2Hi)
  2379  			frame.pc++
  2380  		case wazeroir.OperationKindV128Bitselect:
  2381  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
  2382  			cHi, cLo := ce.popValue(), ce.popValue()
  2383  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2384  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2385  			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
  2386  			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
  2387  			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
  2388  			frame.pc++
  2389  		case wazeroir.OperationKindV128AndNot:
  2390  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2391  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2392  			ce.pushValue(x1Lo & (^x2Lo))
  2393  			ce.pushValue(x1Hi & (^x2Hi))
  2394  			frame.pc++
  2395  		case wazeroir.OperationKindV128Shl:
  2396  			s := ce.popValue()
  2397  			hi, lo := ce.popValue(), ce.popValue()
  2398  			switch op.B1 {
  2399  			case wazeroir.ShapeI8x16:
  2400  				s = s % 8
  2401  				lo = uint64(uint8(lo<<s)) |
  2402  					uint64(uint8((lo>>8)<<s))<<8 |
  2403  					uint64(uint8((lo>>16)<<s))<<16 |
  2404  					uint64(uint8((lo>>24)<<s))<<24 |
  2405  					uint64(uint8((lo>>32)<<s))<<32 |
  2406  					uint64(uint8((lo>>40)<<s))<<40 |
  2407  					uint64(uint8((lo>>48)<<s))<<48 |
  2408  					uint64(uint8((lo>>56)<<s))<<56
  2409  				hi = uint64(uint8(hi<<s)) |
  2410  					uint64(uint8((hi>>8)<<s))<<8 |
  2411  					uint64(uint8((hi>>16)<<s))<<16 |
  2412  					uint64(uint8((hi>>24)<<s))<<24 |
  2413  					uint64(uint8((hi>>32)<<s))<<32 |
  2414  					uint64(uint8((hi>>40)<<s))<<40 |
  2415  					uint64(uint8((hi>>48)<<s))<<48 |
  2416  					uint64(uint8((hi>>56)<<s))<<56
  2417  			case wazeroir.ShapeI16x8:
  2418  				s = s % 16
  2419  				lo = uint64(uint16(lo<<s)) |
  2420  					uint64(uint16((lo>>16)<<s))<<16 |
  2421  					uint64(uint16((lo>>32)<<s))<<32 |
  2422  					uint64(uint16((lo>>48)<<s))<<48
  2423  				hi = uint64(uint16(hi<<s)) |
  2424  					uint64(uint16((hi>>16)<<s))<<16 |
  2425  					uint64(uint16((hi>>32)<<s))<<32 |
  2426  					uint64(uint16((hi>>48)<<s))<<48
  2427  			case wazeroir.ShapeI32x4:
  2428  				s = s % 32
  2429  				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
  2430  				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
  2431  			case wazeroir.ShapeI64x2:
  2432  				s = s % 64
  2433  				lo = lo << s
  2434  				hi = hi << s
  2435  			}
  2436  			ce.pushValue(lo)
  2437  			ce.pushValue(hi)
  2438  			frame.pc++
  2439  		case wazeroir.OperationKindV128Shr:
  2440  			s := ce.popValue()
  2441  			hi, lo := ce.popValue(), ce.popValue()
  2442  			switch op.B1 {
  2443  			case wazeroir.ShapeI8x16:
  2444  				s = s % 8
  2445  				if op.B3 { // signed
  2446  					lo = uint64(uint8(int8(lo)>>s)) |
  2447  						uint64(uint8(int8(lo>>8)>>s))<<8 |
  2448  						uint64(uint8(int8(lo>>16)>>s))<<16 |
  2449  						uint64(uint8(int8(lo>>24)>>s))<<24 |
  2450  						uint64(uint8(int8(lo>>32)>>s))<<32 |
  2451  						uint64(uint8(int8(lo>>40)>>s))<<40 |
  2452  						uint64(uint8(int8(lo>>48)>>s))<<48 |
  2453  						uint64(uint8(int8(lo>>56)>>s))<<56
  2454  					hi = uint64(uint8(int8(hi)>>s)) |
  2455  						uint64(uint8(int8(hi>>8)>>s))<<8 |
  2456  						uint64(uint8(int8(hi>>16)>>s))<<16 |
  2457  						uint64(uint8(int8(hi>>24)>>s))<<24 |
  2458  						uint64(uint8(int8(hi>>32)>>s))<<32 |
  2459  						uint64(uint8(int8(hi>>40)>>s))<<40 |
  2460  						uint64(uint8(int8(hi>>48)>>s))<<48 |
  2461  						uint64(uint8(int8(hi>>56)>>s))<<56
  2462  				} else {
  2463  					lo = uint64(uint8(lo)>>s) |
  2464  						uint64(uint8(lo>>8)>>s)<<8 |
  2465  						uint64(uint8(lo>>16)>>s)<<16 |
  2466  						uint64(uint8(lo>>24)>>s)<<24 |
  2467  						uint64(uint8(lo>>32)>>s)<<32 |
  2468  						uint64(uint8(lo>>40)>>s)<<40 |
  2469  						uint64(uint8(lo>>48)>>s)<<48 |
  2470  						uint64(uint8(lo>>56)>>s)<<56
  2471  					hi = uint64(uint8(hi)>>s) |
  2472  						uint64(uint8(hi>>8)>>s)<<8 |
  2473  						uint64(uint8(hi>>16)>>s)<<16 |
  2474  						uint64(uint8(hi>>24)>>s)<<24 |
  2475  						uint64(uint8(hi>>32)>>s)<<32 |
  2476  						uint64(uint8(hi>>40)>>s)<<40 |
  2477  						uint64(uint8(hi>>48)>>s)<<48 |
  2478  						uint64(uint8(hi>>56)>>s)<<56
  2479  				}
  2480  			case wazeroir.ShapeI16x8:
  2481  				s = s % 16
  2482  				if op.B3 { // signed
  2483  					lo = uint64(uint16(int16(lo)>>s)) |
  2484  						uint64(uint16(int16(lo>>16)>>s))<<16 |
  2485  						uint64(uint16(int16(lo>>32)>>s))<<32 |
  2486  						uint64(uint16(int16(lo>>48)>>s))<<48
  2487  					hi = uint64(uint16(int16(hi)>>s)) |
  2488  						uint64(uint16(int16(hi>>16)>>s))<<16 |
  2489  						uint64(uint16(int16(hi>>32)>>s))<<32 |
  2490  						uint64(uint16(int16(hi>>48)>>s))<<48
  2491  				} else {
  2492  					lo = uint64(uint16(lo)>>s) |
  2493  						uint64(uint16(lo>>16)>>s)<<16 |
  2494  						uint64(uint16(lo>>32)>>s)<<32 |
  2495  						uint64(uint16(lo>>48)>>s)<<48
  2496  					hi = uint64(uint16(hi)>>s) |
  2497  						uint64(uint16(hi>>16)>>s)<<16 |
  2498  						uint64(uint16(hi>>32)>>s)<<32 |
  2499  						uint64(uint16(hi>>48)>>s)<<48
  2500  				}
  2501  			case wazeroir.ShapeI32x4:
  2502  				s = s % 32
  2503  				if op.B3 {
  2504  					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
  2505  					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
  2506  				} else {
  2507  					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
  2508  					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
  2509  				}
  2510  			case wazeroir.ShapeI64x2:
  2511  				s = s % 64
  2512  				if op.B3 { // signed
  2513  					lo = uint64(int64(lo) >> s)
  2514  					hi = uint64(int64(hi) >> s)
  2515  				} else {
  2516  					lo = lo >> s
  2517  					hi = hi >> s
  2518  				}
  2519  
  2520  			}
  2521  			ce.pushValue(lo)
  2522  			ce.pushValue(hi)
  2523  			frame.pc++
  2524  		case wazeroir.OperationKindV128Cmp:
  2525  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2526  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2527  			var result []bool
  2528  			switch op.B1 {
  2529  			case wazeroir.V128CmpTypeI8x16Eq:
  2530  				result = []bool{
  2531  					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
  2532  					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
  2533  					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
  2534  					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
  2535  					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
  2536  					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
  2537  					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
  2538  					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
  2539  				}
  2540  			case wazeroir.V128CmpTypeI8x16Ne:
  2541  				result = []bool{
  2542  					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
  2543  					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
  2544  					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
  2545  					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
  2546  					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
  2547  					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
  2548  					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
  2549  					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
  2550  				}
  2551  			case wazeroir.V128CmpTypeI8x16LtS:
  2552  				result = []bool{
  2553  					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
  2554  					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
  2555  					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
  2556  					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
  2557  					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
  2558  					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
  2559  					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
  2560  					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
  2561  				}
  2562  			case wazeroir.V128CmpTypeI8x16LtU:
  2563  				result = []bool{
  2564  					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
  2565  					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
  2566  					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
  2567  					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
  2568  					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
  2569  					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
  2570  					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
  2571  					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
  2572  				}
  2573  			case wazeroir.V128CmpTypeI8x16GtS:
  2574  				result = []bool{
  2575  					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
  2576  					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
  2577  					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
  2578  					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
  2579  					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
  2580  					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
  2581  					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
  2582  					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
  2583  				}
  2584  			case wazeroir.V128CmpTypeI8x16GtU:
  2585  				result = []bool{
  2586  					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
  2587  					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
  2588  					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
  2589  					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
  2590  					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
  2591  					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
  2592  					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
  2593  					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
  2594  				}
  2595  			case wazeroir.V128CmpTypeI8x16LeS:
  2596  				result = []bool{
  2597  					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
  2598  					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
  2599  					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
  2600  					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
  2601  					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
  2602  					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
  2603  					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
  2604  					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
  2605  				}
  2606  			case wazeroir.V128CmpTypeI8x16LeU:
  2607  				result = []bool{
  2608  					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
  2609  					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
  2610  					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
  2611  					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
  2612  					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
  2613  					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
  2614  					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
  2615  					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
  2616  				}
  2617  			case wazeroir.V128CmpTypeI8x16GeS:
  2618  				result = []bool{
  2619  					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
  2620  					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
  2621  					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
  2622  					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
  2623  					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
  2624  					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
  2625  					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
  2626  					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
  2627  				}
  2628  			case wazeroir.V128CmpTypeI8x16GeU:
  2629  				result = []bool{
  2630  					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
  2631  					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
  2632  					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
  2633  					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
  2634  					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
  2635  					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
  2636  					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
  2637  					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
  2638  				}
  2639  			case wazeroir.V128CmpTypeI16x8Eq:
  2640  				result = []bool{
  2641  					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
  2642  					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
  2643  					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
  2644  					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
  2645  				}
  2646  			case wazeroir.V128CmpTypeI16x8Ne:
  2647  				result = []bool{
  2648  					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
  2649  					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
  2650  					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
  2651  					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
  2652  				}
  2653  			case wazeroir.V128CmpTypeI16x8LtS:
  2654  				result = []bool{
  2655  					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
  2656  					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
  2657  					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
  2658  					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
  2659  				}
  2660  			case wazeroir.V128CmpTypeI16x8LtU:
  2661  				result = []bool{
  2662  					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
  2663  					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
  2664  					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
  2665  					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
  2666  				}
  2667  			case wazeroir.V128CmpTypeI16x8GtS:
  2668  				result = []bool{
  2669  					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
  2670  					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
  2671  					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
  2672  					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
  2673  				}
  2674  			case wazeroir.V128CmpTypeI16x8GtU:
  2675  				result = []bool{
  2676  					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
  2677  					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
  2678  					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
  2679  					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
  2680  				}
  2681  			case wazeroir.V128CmpTypeI16x8LeS:
  2682  				result = []bool{
  2683  					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
  2684  					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
  2685  					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
  2686  					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
  2687  				}
  2688  			case wazeroir.V128CmpTypeI16x8LeU:
  2689  				result = []bool{
  2690  					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
  2691  					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
  2692  					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
  2693  					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
  2694  				}
  2695  			case wazeroir.V128CmpTypeI16x8GeS:
  2696  				result = []bool{
  2697  					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
  2698  					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
  2699  					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
  2700  					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
  2701  				}
  2702  			case wazeroir.V128CmpTypeI16x8GeU:
  2703  				result = []bool{
  2704  					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
  2705  					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
  2706  					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
  2707  					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
  2708  				}
  2709  			case wazeroir.V128CmpTypeI32x4Eq:
  2710  				result = []bool{
  2711  					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
  2712  					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
  2713  				}
  2714  			case wazeroir.V128CmpTypeI32x4Ne:
  2715  				result = []bool{
  2716  					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
  2717  					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
  2718  				}
  2719  			case wazeroir.V128CmpTypeI32x4LtS:
  2720  				result = []bool{
  2721  					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
  2722  					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
  2723  				}
  2724  			case wazeroir.V128CmpTypeI32x4LtU:
  2725  				result = []bool{
  2726  					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
  2727  					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
  2728  				}
  2729  			case wazeroir.V128CmpTypeI32x4GtS:
  2730  				result = []bool{
  2731  					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
  2732  					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
  2733  				}
  2734  			case wazeroir.V128CmpTypeI32x4GtU:
  2735  				result = []bool{
  2736  					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
  2737  					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
  2738  				}
  2739  			case wazeroir.V128CmpTypeI32x4LeS:
  2740  				result = []bool{
  2741  					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
  2742  					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
  2743  				}
  2744  			case wazeroir.V128CmpTypeI32x4LeU:
  2745  				result = []bool{
  2746  					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
  2747  					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
  2748  				}
  2749  			case wazeroir.V128CmpTypeI32x4GeS:
  2750  				result = []bool{
  2751  					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
  2752  					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
  2753  				}
  2754  			case wazeroir.V128CmpTypeI32x4GeU:
  2755  				result = []bool{
  2756  					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
  2757  					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
  2758  				}
  2759  			case wazeroir.V128CmpTypeI64x2Eq:
  2760  				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
  2761  			case wazeroir.V128CmpTypeI64x2Ne:
  2762  				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
  2763  			case wazeroir.V128CmpTypeI64x2LtS:
  2764  				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
  2765  			case wazeroir.V128CmpTypeI64x2GtS:
  2766  				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
  2767  			case wazeroir.V128CmpTypeI64x2LeS:
  2768  				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
  2769  			case wazeroir.V128CmpTypeI64x2GeS:
  2770  				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
  2771  			case wazeroir.V128CmpTypeF32x4Eq:
  2772  				result = []bool{
  2773  					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
  2774  					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
  2775  					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
  2776  					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
  2777  				}
  2778  			case wazeroir.V128CmpTypeF32x4Ne:
  2779  				result = []bool{
  2780  					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
  2781  					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
  2782  					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
  2783  					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
  2784  				}
  2785  			case wazeroir.V128CmpTypeF32x4Lt:
  2786  				result = []bool{
  2787  					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
  2788  					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
  2789  					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
  2790  					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
  2791  				}
  2792  			case wazeroir.V128CmpTypeF32x4Gt:
  2793  				result = []bool{
  2794  					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
  2795  					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
  2796  					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
  2797  					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
  2798  				}
  2799  			case wazeroir.V128CmpTypeF32x4Le:
  2800  				result = []bool{
  2801  					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
  2802  					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
  2803  					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
  2804  					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
  2805  				}
  2806  			case wazeroir.V128CmpTypeF32x4Ge:
  2807  				result = []bool{
  2808  					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
  2809  					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
  2810  					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
  2811  					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
  2812  				}
  2813  			case wazeroir.V128CmpTypeF64x2Eq:
  2814  				result = []bool{
  2815  					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
  2816  					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
  2817  				}
  2818  			case wazeroir.V128CmpTypeF64x2Ne:
  2819  				result = []bool{
  2820  					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
  2821  					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
  2822  				}
  2823  			case wazeroir.V128CmpTypeF64x2Lt:
  2824  				result = []bool{
  2825  					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
  2826  					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
  2827  				}
  2828  			case wazeroir.V128CmpTypeF64x2Gt:
  2829  				result = []bool{
  2830  					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
  2831  					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
  2832  				}
  2833  			case wazeroir.V128CmpTypeF64x2Le:
  2834  				result = []bool{
  2835  					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
  2836  					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
  2837  				}
  2838  			case wazeroir.V128CmpTypeF64x2Ge:
  2839  				result = []bool{
  2840  					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
  2841  					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
  2842  				}
  2843  			}
  2844  
  2845  			var retLo, retHi uint64
  2846  			laneNum := len(result)
  2847  			switch laneNum {
  2848  			case 16:
  2849  				for i, b := range result {
  2850  					if b {
  2851  						if i < 8 {
  2852  							retLo |= 0xff << (i * 8)
  2853  						} else {
  2854  							retHi |= 0xff << ((i - 8) * 8)
  2855  						}
  2856  					}
  2857  				}
  2858  			case 8:
  2859  				for i, b := range result {
  2860  					if b {
  2861  						if i < 4 {
  2862  							retLo |= 0xffff << (i * 16)
  2863  						} else {
  2864  							retHi |= 0xffff << ((i - 4) * 16)
  2865  						}
  2866  					}
  2867  				}
  2868  			case 4:
  2869  				for i, b := range result {
  2870  					if b {
  2871  						if i < 2 {
  2872  							retLo |= 0xffff_ffff << (i * 32)
  2873  						} else {
  2874  							retHi |= 0xffff_ffff << ((i - 2) * 32)
  2875  						}
  2876  					}
  2877  				}
  2878  			case 2:
  2879  				if result[0] {
  2880  					retLo = ^uint64(0)
  2881  				}
  2882  				if result[1] {
  2883  					retHi = ^uint64(0)
  2884  				}
  2885  			}
  2886  
  2887  			ce.pushValue(retLo)
  2888  			ce.pushValue(retHi)
  2889  			frame.pc++
  2890  		case wazeroir.OperationKindV128AddSat:
  2891  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2892  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2893  
  2894  			var retLo, retHi uint64
  2895  
  2896  			// Lane-wise addition while saturating the overflowing values.
  2897  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
  2898  			switch op.B1 {
  2899  			case wazeroir.ShapeI8x16:
  2900  				for i := 0; i < 16; i++ {
  2901  					var v, w byte
  2902  					if i < 8 {
  2903  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2904  					} else {
  2905  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2906  					}
  2907  
  2908  					var uv uint64
  2909  					if op.B3 { // signed
  2910  						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
  2911  							uv = uint64(byte(0x80))
  2912  						} else if subbed > math.MaxInt8 {
  2913  							uv = uint64(byte(0x7f))
  2914  						} else {
  2915  							uv = uint64(byte(int8(subbed)))
  2916  						}
  2917  					} else {
  2918  						if subbed := int64(v) + int64(w); subbed < 0 {
  2919  							uv = uint64(byte(0))
  2920  						} else if subbed > math.MaxUint8 {
  2921  							uv = uint64(byte(0xff))
  2922  						} else {
  2923  							uv = uint64(byte(subbed))
  2924  						}
  2925  					}
  2926  
  2927  					if i < 8 { // first 8 lanes are on lower 64bits.
  2928  						retLo |= uv << (i * 8)
  2929  					} else {
  2930  						retHi |= uv << ((i - 8) * 8)
  2931  					}
  2932  				}
  2933  			case wazeroir.ShapeI16x8:
  2934  				for i := 0; i < 8; i++ {
  2935  					var v, w uint16
  2936  					if i < 4 {
  2937  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2938  					} else {
  2939  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2940  					}
  2941  
  2942  					var uv uint64
  2943  					if op.B3 { // signed
  2944  						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
  2945  							uv = uint64(uint16(0x8000))
  2946  						} else if added > math.MaxInt16 {
  2947  							uv = uint64(uint16(0x7fff))
  2948  						} else {
  2949  							uv = uint64(uint16(int16(added)))
  2950  						}
  2951  					} else {
  2952  						if added := int64(v) + int64(w); added < 0 {
  2953  							uv = uint64(uint16(0))
  2954  						} else if added > math.MaxUint16 {
  2955  							uv = uint64(uint16(0xffff))
  2956  						} else {
  2957  							uv = uint64(uint16(added))
  2958  						}
  2959  					}
  2960  
  2961  					if i < 4 { // first 4 lanes are on lower 64bits.
  2962  						retLo |= uv << (i * 16)
  2963  					} else {
  2964  						retHi |= uv << ((i - 4) * 16)
  2965  					}
  2966  				}
  2967  			}
  2968  
  2969  			ce.pushValue(retLo)
  2970  			ce.pushValue(retHi)
  2971  			frame.pc++
  2972  		case wazeroir.OperationKindV128SubSat:
  2973  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2974  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2975  
  2976  			var retLo, retHi uint64
  2977  
  2978  			// Lane-wise subtraction while saturating the overflowing values.
  2979  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
  2980  			switch op.B1 {
  2981  			case wazeroir.ShapeI8x16:
  2982  				for i := 0; i < 16; i++ {
  2983  					var v, w byte
  2984  					if i < 8 {
  2985  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2986  					} else {
  2987  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2988  					}
  2989  
  2990  					var uv uint64
  2991  					if op.B3 { // signed
  2992  						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
  2993  							uv = uint64(byte(0x80))
  2994  						} else if subbed > math.MaxInt8 {
  2995  							uv = uint64(byte(0x7f))
  2996  						} else {
  2997  							uv = uint64(byte(int8(subbed)))
  2998  						}
  2999  					} else {
  3000  						if subbed := int64(v) - int64(w); subbed < 0 {
  3001  							uv = uint64(byte(0))
  3002  						} else if subbed > math.MaxUint8 {
  3003  							uv = uint64(byte(0xff))
  3004  						} else {
  3005  							uv = uint64(byte(subbed))
  3006  						}
  3007  					}
  3008  
  3009  					if i < 8 {
  3010  						retLo |= uv << (i * 8)
  3011  					} else {
  3012  						retHi |= uv << ((i - 8) * 8)
  3013  					}
  3014  				}
  3015  			case wazeroir.ShapeI16x8:
  3016  				for i := 0; i < 8; i++ {
  3017  					var v, w uint16
  3018  					if i < 4 {
  3019  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  3020  					} else {
  3021  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  3022  					}
  3023  
  3024  					var uv uint64
  3025  					if op.B3 { // signed
  3026  						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
  3027  							uv = uint64(uint16(0x8000))
  3028  						} else if subbed > math.MaxInt16 {
  3029  							uv = uint64(uint16(0x7fff))
  3030  						} else {
  3031  							uv = uint64(uint16(int16(subbed)))
  3032  						}
  3033  					} else {
  3034  						if subbed := int64(v) - int64(w); subbed < 0 {
  3035  							uv = uint64(uint16(0))
  3036  						} else if subbed > math.MaxUint16 {
  3037  							uv = uint64(uint16(0xffff))
  3038  						} else {
  3039  							uv = uint64(uint16(subbed))
  3040  						}
  3041  					}
  3042  
  3043  					if i < 4 {
  3044  						retLo |= uv << (i * 16)
  3045  					} else {
  3046  						retHi |= uv << ((i - 4) * 16)
  3047  					}
  3048  				}
  3049  			}
  3050  
  3051  			ce.pushValue(retLo)
  3052  			ce.pushValue(retHi)
  3053  			frame.pc++
  3054  		case wazeroir.OperationKindV128Mul:
  3055  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3056  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3057  			var retLo, retHi uint64
  3058  			switch op.B1 {
  3059  			case wazeroir.ShapeI16x8:
  3060  				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
  3061  					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
  3062  				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
  3063  					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
  3064  			case wazeroir.ShapeI32x4:
  3065  				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
  3066  				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
  3067  			case wazeroir.ShapeI64x2:
  3068  				retHi = x1hi * x2hi
  3069  				retLo = x1lo * x2lo
  3070  			case wazeroir.ShapeF32x4:
  3071  				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3072  				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3073  			case wazeroir.ShapeF64x2:
  3074  				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
  3075  				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
  3076  			}
  3077  			ce.pushValue(retLo)
  3078  			ce.pushValue(retHi)
  3079  			frame.pc++
  3080  		case wazeroir.OperationKindV128Div:
  3081  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3082  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3083  			var retLo, retHi uint64
  3084  			if op.B1 == wazeroir.ShapeF64x2 {
  3085  				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
  3086  				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
  3087  			} else {
  3088  				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3089  				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3090  			}
  3091  			ce.pushValue(retLo)
  3092  			ce.pushValue(retHi)
  3093  			frame.pc++
  3094  		case wazeroir.OperationKindV128Neg:
  3095  			hi, lo := ce.popValue(), ce.popValue()
  3096  			switch op.B1 {
  3097  			case wazeroir.ShapeI8x16:
  3098  				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
  3099  					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
  3100  					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
  3101  					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
  3102  				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
  3103  					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
  3104  					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
  3105  					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
  3106  			case wazeroir.ShapeI16x8:
  3107  				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
  3108  					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
  3109  				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
  3110  					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
  3111  			case wazeroir.ShapeI32x4:
  3112  				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
  3113  				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
  3114  			case wazeroir.ShapeI64x2:
  3115  				hi = -hi
  3116  				lo = -lo
  3117  			case wazeroir.ShapeF32x4:
  3118  				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
  3119  					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
  3120  				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
  3121  					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
  3122  			case wazeroir.ShapeF64x2:
  3123  				hi = math.Float64bits(-math.Float64frombits(hi))
  3124  				lo = math.Float64bits(-math.Float64frombits(lo))
  3125  			}
  3126  			ce.pushValue(lo)
  3127  			ce.pushValue(hi)
  3128  			frame.pc++
  3129  		case wazeroir.OperationKindV128Sqrt:
  3130  			hi, lo := ce.popValue(), ce.popValue()
  3131  			if op.B1 == wazeroir.ShapeF64x2 {
  3132  				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
  3133  				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
  3134  			} else {
  3135  				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
  3136  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
  3137  				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
  3138  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
  3139  			}
  3140  			ce.pushValue(lo)
  3141  			ce.pushValue(hi)
  3142  			frame.pc++
  3143  		case wazeroir.OperationKindV128Abs:
  3144  			hi, lo := ce.popValue(), ce.popValue()
  3145  			switch op.B1 {
  3146  			case wazeroir.ShapeI8x16:
  3147  				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
  3148  					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
  3149  					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
  3150  					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
  3151  				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
  3152  					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
  3153  					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
  3154  					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
  3155  			case wazeroir.ShapeI16x8:
  3156  				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
  3157  					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
  3158  				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
  3159  					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
  3160  			case wazeroir.ShapeI32x4:
  3161  				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
  3162  				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
  3163  			case wazeroir.ShapeI64x2:
  3164  				if int64(hi) < 0 {
  3165  					hi = -hi
  3166  				}
  3167  				if int64(lo) < 0 {
  3168  					lo = -lo
  3169  				}
  3170  			case wazeroir.ShapeF32x4:
  3171  				hi = hi &^ (1<<31 | 1<<63)
  3172  				lo = lo &^ (1<<31 | 1<<63)
  3173  			case wazeroir.ShapeF64x2:
  3174  				hi = hi &^ (1 << 63)
  3175  				lo = lo &^ (1 << 63)
  3176  			}
  3177  			ce.pushValue(lo)
  3178  			ce.pushValue(hi)
  3179  			frame.pc++
  3180  		case wazeroir.OperationKindV128Popcnt:
  3181  			hi, lo := ce.popValue(), ce.popValue()
  3182  			var retLo, retHi uint64
  3183  			for i := 0; i < 16; i++ {
  3184  				var v byte
  3185  				if i < 8 {
  3186  					v = byte(lo >> (i * 8))
  3187  				} else {
  3188  					v = byte(hi >> ((i - 8) * 8))
  3189  				}
  3190  
  3191  				var cnt uint64
  3192  				for i := 0; i < 8; i++ {
  3193  					if (v>>i)&0b1 != 0 {
  3194  						cnt++
  3195  					}
  3196  				}
  3197  
  3198  				if i < 8 {
  3199  					retLo |= cnt << (i * 8)
  3200  				} else {
  3201  					retHi |= cnt << ((i - 8) * 8)
  3202  				}
  3203  			}
  3204  			ce.pushValue(retLo)
  3205  			ce.pushValue(retHi)
  3206  			frame.pc++
  3207  		case wazeroir.OperationKindV128Min:
  3208  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3209  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3210  			var retLo, retHi uint64
  3211  			switch op.B1 {
  3212  			case wazeroir.ShapeI8x16:
  3213  				if op.B3 { // signed
  3214  					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
  3215  						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3216  						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3217  						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3218  					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
  3219  						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3220  						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3221  						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3222  				} else {
  3223  					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
  3224  						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3225  						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3226  						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3227  					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
  3228  						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3229  						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3230  						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3231  				}
  3232  			case wazeroir.ShapeI16x8:
  3233  				if op.B3 { // signed
  3234  					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
  3235  						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3236  						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3237  						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3238  					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
  3239  						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3240  						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3241  						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3242  				} else {
  3243  					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
  3244  						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3245  						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3246  						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3247  					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
  3248  						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3249  						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3250  						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3251  				}
  3252  			case wazeroir.ShapeI32x4:
  3253  				if op.B3 { // signed
  3254  					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
  3255  						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3256  					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
  3257  						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3258  				} else {
  3259  					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
  3260  						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3261  					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
  3262  						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3263  				}
  3264  			case wazeroir.ShapeF32x4:
  3265  				retHi = WasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
  3266  					WasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3267  				retLo = WasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
  3268  					WasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3269  			case wazeroir.ShapeF64x2:
  3270  				retHi = math.Float64bits(moremath.WasmCompatMin64(
  3271  					math.Float64frombits(x1hi),
  3272  					math.Float64frombits(x2hi),
  3273  				))
  3274  				retLo = math.Float64bits(moremath.WasmCompatMin64(
  3275  					math.Float64frombits(x1lo),
  3276  					math.Float64frombits(x2lo),
  3277  				))
  3278  			}
  3279  			ce.pushValue(retLo)
  3280  			ce.pushValue(retHi)
  3281  			frame.pc++
  3282  		case wazeroir.OperationKindV128Max:
  3283  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3284  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3285  			var retLo, retHi uint64
  3286  			switch op.B1 {
  3287  			case wazeroir.ShapeI8x16:
  3288  				if op.B3 { // signed
  3289  					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
  3290  						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3291  						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3292  						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3293  					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
  3294  						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3295  						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3296  						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3297  				} else {
  3298  					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
  3299  						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3300  						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3301  						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3302  					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
  3303  						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3304  						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3305  						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3306  				}
  3307  			case wazeroir.ShapeI16x8:
  3308  				if op.B3 { // signed
  3309  					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
  3310  						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3311  						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3312  						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3313  					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
  3314  						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3315  						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3316  						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3317  				} else {
  3318  					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
  3319  						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3320  						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3321  						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3322  					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
  3323  						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3324  						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3325  						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3326  				}
  3327  			case wazeroir.ShapeI32x4:
  3328  				if op.B3 { // signed
  3329  					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
  3330  						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3331  					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
  3332  						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3333  				} else {
  3334  					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
  3335  						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3336  					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
  3337  						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3338  				}
  3339  			case wazeroir.ShapeF32x4:
  3340  				retHi = WasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
  3341  					WasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3342  				retLo = WasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
  3343  					WasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3344  			case wazeroir.ShapeF64x2:
  3345  				retHi = math.Float64bits(moremath.WasmCompatMax64(
  3346  					math.Float64frombits(x1hi),
  3347  					math.Float64frombits(x2hi),
  3348  				))
  3349  				retLo = math.Float64bits(moremath.WasmCompatMax64(
  3350  					math.Float64frombits(x1lo),
  3351  					math.Float64frombits(x2lo),
  3352  				))
  3353  			}
  3354  			ce.pushValue(retLo)
  3355  			ce.pushValue(retHi)
  3356  			frame.pc++
  3357  		case wazeroir.OperationKindV128AvgrU:
  3358  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3359  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3360  			var retLo, retHi uint64
  3361  			switch op.B1 {
  3362  			case wazeroir.ShapeI8x16:
  3363  				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
  3364  					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3365  					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3366  					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3367  				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
  3368  					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3369  					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3370  					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3371  			case wazeroir.ShapeI16x8:
  3372  				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
  3373  					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3374  					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3375  					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3376  				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
  3377  					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3378  					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3379  					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3380  			}
  3381  			ce.pushValue(retLo)
  3382  			ce.pushValue(retHi)
  3383  			frame.pc++
  3384  		case wazeroir.OperationKindV128Pmin:
  3385  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3386  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3387  			var retLo, retHi uint64
  3388  			if op.B1 == wazeroir.ShapeF32x4 {
  3389  				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
  3390  					retLo = x2lo & 0x00000000_ffffffff
  3391  				} else {
  3392  					retLo = x1lo & 0x00000000_ffffffff
  3393  				}
  3394  				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
  3395  					retLo |= x2lo & 0xffffffff_00000000
  3396  				} else {
  3397  					retLo |= x1lo & 0xffffffff_00000000
  3398  				}
  3399  				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
  3400  					retHi = x2hi & 0x00000000_ffffffff
  3401  				} else {
  3402  					retHi = x1hi & 0x00000000_ffffffff
  3403  				}
  3404  				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
  3405  					retHi |= x2hi & 0xffffffff_00000000
  3406  				} else {
  3407  					retHi |= x1hi & 0xffffffff_00000000
  3408  				}
  3409  			} else {
  3410  				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
  3411  					retLo = x2lo
  3412  				} else {
  3413  					retLo = x1lo
  3414  				}
  3415  				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
  3416  					retHi = x2hi
  3417  				} else {
  3418  					retHi = x1hi
  3419  				}
  3420  			}
  3421  			ce.pushValue(retLo)
  3422  			ce.pushValue(retHi)
  3423  			frame.pc++
  3424  		case wazeroir.OperationKindV128Pmax:
  3425  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3426  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3427  			var retLo, retHi uint64
  3428  			if op.B1 == wazeroir.ShapeF32x4 {
  3429  				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
  3430  					retLo = x2lo & 0x00000000_ffffffff
  3431  				} else {
  3432  					retLo = x1lo & 0x00000000_ffffffff
  3433  				}
  3434  				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
  3435  					retLo |= x2lo & 0xffffffff_00000000
  3436  				} else {
  3437  					retLo |= x1lo & 0xffffffff_00000000
  3438  				}
  3439  				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
  3440  					retHi = x2hi & 0x00000000_ffffffff
  3441  				} else {
  3442  					retHi = x1hi & 0x00000000_ffffffff
  3443  				}
  3444  				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
  3445  					retHi |= x2hi & 0xffffffff_00000000
  3446  				} else {
  3447  					retHi |= x1hi & 0xffffffff_00000000
  3448  				}
  3449  			} else {
  3450  				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
  3451  					retLo = x2lo
  3452  				} else {
  3453  					retLo = x1lo
  3454  				}
  3455  				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
  3456  					retHi = x2hi
  3457  				} else {
  3458  					retHi = x1hi
  3459  				}
  3460  			}
  3461  			ce.pushValue(retLo)
  3462  			ce.pushValue(retHi)
  3463  			frame.pc++
  3464  		case wazeroir.OperationKindV128Ceil:
  3465  			hi, lo := ce.popValue(), ce.popValue()
  3466  			if op.B1 == wazeroir.ShapeF32x4 {
  3467  				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
  3468  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3469  				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
  3470  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3471  			} else {
  3472  				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
  3473  				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
  3474  			}
  3475  			ce.pushValue(lo)
  3476  			ce.pushValue(hi)
  3477  			frame.pc++
  3478  		case wazeroir.OperationKindV128Floor:
  3479  			hi, lo := ce.popValue(), ce.popValue()
  3480  			if op.B1 == wazeroir.ShapeF32x4 {
  3481  				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
  3482  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3483  				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
  3484  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3485  			} else {
  3486  				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
  3487  				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
  3488  			}
  3489  			ce.pushValue(lo)
  3490  			ce.pushValue(hi)
  3491  			frame.pc++
  3492  		case wazeroir.OperationKindV128Trunc:
  3493  			hi, lo := ce.popValue(), ce.popValue()
  3494  			if op.B1 == wazeroir.ShapeF32x4 {
  3495  				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
  3496  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3497  				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
  3498  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3499  			} else {
  3500  				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
  3501  				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
  3502  			}
  3503  			ce.pushValue(lo)
  3504  			ce.pushValue(hi)
  3505  			frame.pc++
  3506  		case wazeroir.OperationKindV128Nearest:
  3507  			hi, lo := ce.popValue(), ce.popValue()
  3508  			if op.B1 == wazeroir.ShapeF32x4 {
  3509  				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
  3510  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3511  				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
  3512  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3513  			} else {
  3514  				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
  3515  				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
  3516  			}
  3517  			ce.pushValue(lo)
  3518  			ce.pushValue(hi)
  3519  			frame.pc++
  3520  		case wazeroir.OperationKindV128Extend:
  3521  			hi, lo := ce.popValue(), ce.popValue()
  3522  			var origin uint64
  3523  			if op.B3 { // use lower 64 bits
  3524  				origin = lo
  3525  			} else {
  3526  				origin = hi
  3527  			}
  3528  
  3529  			signed := op.B2 == 1
  3530  
  3531  			var retHi, retLo uint64
  3532  			switch op.B1 {
  3533  			case wazeroir.ShapeI8x16:
  3534  				for i := 0; i < 8; i++ {
  3535  					v8 := byte(origin >> (i * 8))
  3536  
  3537  					var v16 uint16
  3538  					if signed {
  3539  						v16 = uint16(int8(v8))
  3540  					} else {
  3541  						v16 = uint16(v8)
  3542  					}
  3543  
  3544  					if i < 4 {
  3545  						retLo |= uint64(v16) << (i * 16)
  3546  					} else {
  3547  						retHi |= uint64(v16) << ((i - 4) * 16)
  3548  					}
  3549  				}
  3550  			case wazeroir.ShapeI16x8:
  3551  				for i := 0; i < 4; i++ {
  3552  					v16 := uint16(origin >> (i * 16))
  3553  
  3554  					var v32 uint32
  3555  					if signed {
  3556  						v32 = uint32(int16(v16))
  3557  					} else {
  3558  						v32 = uint32(v16)
  3559  					}
  3560  
  3561  					if i < 2 {
  3562  						retLo |= uint64(v32) << (i * 32)
  3563  					} else {
  3564  						retHi |= uint64(v32) << ((i - 2) * 32)
  3565  					}
  3566  				}
  3567  			case wazeroir.ShapeI32x4:
  3568  				v32Lo := uint32(origin)
  3569  				v32Hi := uint32(origin >> 32)
  3570  				if signed {
  3571  					retLo = uint64(int32(v32Lo))
  3572  					retHi = uint64(int32(v32Hi))
  3573  				} else {
  3574  					retLo = uint64(v32Lo)
  3575  					retHi = uint64(v32Hi)
  3576  				}
  3577  			}
  3578  			ce.pushValue(retLo)
  3579  			ce.pushValue(retHi)
  3580  			frame.pc++
  3581  		case wazeroir.OperationKindV128ExtMul:
  3582  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3583  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3584  			var x1, x2 uint64
  3585  			if op.B3 { // use lower 64 bits
  3586  				x1, x2 = x1Lo, x2Lo
  3587  			} else {
  3588  				x1, x2 = x1Hi, x2Hi
  3589  			}
  3590  
  3591  			signed := op.B2 == 1
  3592  
  3593  			var retLo, retHi uint64
  3594  			switch op.B1 {
  3595  			case wazeroir.ShapeI8x16:
  3596  				for i := 0; i < 8; i++ {
  3597  					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
  3598  
  3599  					var v16 uint16
  3600  					if signed {
  3601  						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
  3602  					} else {
  3603  						v16 = uint16(v1) * uint16(v2)
  3604  					}
  3605  
  3606  					if i < 4 {
  3607  						retLo |= uint64(v16) << (i * 16)
  3608  					} else {
  3609  						retHi |= uint64(v16) << ((i - 4) * 16)
  3610  					}
  3611  				}
  3612  			case wazeroir.ShapeI16x8:
  3613  				for i := 0; i < 4; i++ {
  3614  					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
  3615  
  3616  					var v32 uint32
  3617  					if signed {
  3618  						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
  3619  					} else {
  3620  						v32 = uint32(v1) * uint32(v2)
  3621  					}
  3622  
  3623  					if i < 2 {
  3624  						retLo |= uint64(v32) << (i * 32)
  3625  					} else {
  3626  						retHi |= uint64(v32) << ((i - 2) * 32)
  3627  					}
  3628  				}
  3629  			case wazeroir.ShapeI32x4:
  3630  				v1Lo, v2Lo := uint32(x1), uint32(x2)
  3631  				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
  3632  				if signed {
  3633  					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
  3634  					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
  3635  				} else {
  3636  					retLo = uint64(v1Lo) * uint64(v2Lo)
  3637  					retHi = uint64(v1Hi) * uint64(v2Hi)
  3638  				}
  3639  			}
  3640  
  3641  			ce.pushValue(retLo)
  3642  			ce.pushValue(retHi)
  3643  			frame.pc++
  3644  		case wazeroir.OperationKindV128Q15mulrSatS:
  3645  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3646  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3647  			var retLo, retHi uint64
  3648  			for i := 0; i < 8; i++ {
  3649  				var v, w int16
  3650  				if i < 4 {
  3651  					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
  3652  				} else {
  3653  					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
  3654  				}
  3655  
  3656  				var uv uint64
  3657  				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
  3658  				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
  3659  					uv = uint64(uint16(0x8000))
  3660  				} else if calc > math.MaxInt16 {
  3661  					uv = uint64(uint16(0x7fff))
  3662  				} else {
  3663  					uv = uint64(uint16(int16(calc)))
  3664  				}
  3665  
  3666  				if i < 4 {
  3667  					retLo |= uv << (i * 16)
  3668  				} else {
  3669  					retHi |= uv << ((i - 4) * 16)
  3670  				}
  3671  			}
  3672  
  3673  			ce.pushValue(retLo)
  3674  			ce.pushValue(retHi)
  3675  			frame.pc++
  3676  		case wazeroir.OperationKindV128ExtAddPairwise:
  3677  			hi, lo := ce.popValue(), ce.popValue()
  3678  
  3679  			signed := op.B3
  3680  
  3681  			var retLo, retHi uint64
  3682  			switch op.B1 {
  3683  			case wazeroir.ShapeI8x16:
  3684  				for i := 0; i < 8; i++ {
  3685  					var v1, v2 byte
  3686  					if i < 4 {
  3687  						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
  3688  					} else {
  3689  						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
  3690  					}
  3691  
  3692  					var v16 uint16
  3693  					if signed {
  3694  						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
  3695  					} else {
  3696  						v16 = uint16(v1) + uint16(v2)
  3697  					}
  3698  
  3699  					if i < 4 {
  3700  						retLo |= uint64(v16) << (i * 16)
  3701  					} else {
  3702  						retHi |= uint64(v16) << ((i - 4) * 16)
  3703  					}
  3704  				}
  3705  			case wazeroir.ShapeI16x8:
  3706  				for i := 0; i < 4; i++ {
  3707  					var v1, v2 uint16
  3708  					if i < 2 {
  3709  						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
  3710  					} else {
  3711  						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
  3712  					}
  3713  
  3714  					var v32 uint32
  3715  					if signed {
  3716  						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
  3717  					} else {
  3718  						v32 = uint32(v1) + uint32(v2)
  3719  					}
  3720  
  3721  					if i < 2 {
  3722  						retLo |= uint64(v32) << (i * 32)
  3723  					} else {
  3724  						retHi |= uint64(v32) << ((i - 2) * 32)
  3725  					}
  3726  				}
  3727  			}
  3728  			ce.pushValue(retLo)
  3729  			ce.pushValue(retHi)
  3730  			frame.pc++
  3731  		case wazeroir.OperationKindV128FloatPromote:
  3732  			_, toPromote := ce.popValue(), ce.popValue()
  3733  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
  3734  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
  3735  			frame.pc++
  3736  		case wazeroir.OperationKindV128FloatDemote:
  3737  			hi, lo := ce.popValue(), ce.popValue()
  3738  			ce.pushValue(
  3739  				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
  3740  					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
  3741  			)
  3742  			ce.pushValue(0)
  3743  			frame.pc++
  3744  		case wazeroir.OperationKindV128FConvertFromI:
  3745  			hi, lo := ce.popValue(), ce.popValue()
  3746  			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
  3747  			signed := op.B3
  3748  
  3749  			var retLo, retHi uint64
  3750  			switch op.B1 { // Destination shape.
  3751  			case wazeroir.ShapeF32x4: // f32x4 from signed/unsigned i32x4
  3752  				if signed {
  3753  					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
  3754  						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
  3755  					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
  3756  						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
  3757  				} else {
  3758  					retLo = uint64(math.Float32bits(float32(v1))) |
  3759  						(uint64(math.Float32bits(float32(v2))) << 32)
  3760  					retHi = uint64(math.Float32bits(float32(v3))) |
  3761  						(uint64(math.Float32bits(float32(v4))) << 32)
  3762  				}
  3763  			case wazeroir.ShapeF64x2: // f64x2 from signed/unsigned i32x4
  3764  				if signed {
  3765  					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
  3766  				} else {
  3767  					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
  3768  				}
  3769  			}
  3770  
  3771  			ce.pushValue(retLo)
  3772  			ce.pushValue(retHi)
  3773  			frame.pc++
  3774  		case wazeroir.OperationKindV128Narrow:
  3775  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3776  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3777  			signed := op.B3
  3778  
  3779  			var retLo, retHi uint64
  3780  			switch op.B1 {
  3781  			case wazeroir.ShapeI16x8: // signed/unsigned i16x8 to i8x16
  3782  				for i := 0; i < 8; i++ {
  3783  					var v16 uint16
  3784  					if i < 4 {
  3785  						v16 = uint16(x1Lo >> (i * 16))
  3786  					} else {
  3787  						v16 = uint16(x1Hi >> ((i - 4) * 16))
  3788  					}
  3789  
  3790  					var v byte
  3791  					if signed {
  3792  						if s := int16(v16); s > math.MaxInt8 {
  3793  							v = math.MaxInt8
  3794  						} else if s < math.MinInt8 {
  3795  							s = math.MinInt8
  3796  							v = byte(s)
  3797  						} else {
  3798  							v = byte(v16)
  3799  						}
  3800  					} else {
  3801  						if s := int16(v16); s > math.MaxUint8 {
  3802  							v = math.MaxUint8
  3803  						} else if s < 0 {
  3804  							v = 0
  3805  						} else {
  3806  							v = byte(v16)
  3807  						}
  3808  					}
  3809  					retLo |= uint64(v) << (i * 8)
  3810  				}
  3811  				for i := 0; i < 8; i++ {
  3812  					var v16 uint16
  3813  					if i < 4 {
  3814  						v16 = uint16(x2Lo >> (i * 16))
  3815  					} else {
  3816  						v16 = uint16(x2Hi >> ((i - 4) * 16))
  3817  					}
  3818  
  3819  					var v byte
  3820  					if signed {
  3821  						if s := int16(v16); s > math.MaxInt8 {
  3822  							v = math.MaxInt8
  3823  						} else if s < math.MinInt8 {
  3824  							s = math.MinInt8
  3825  							v = byte(s)
  3826  						} else {
  3827  							v = byte(v16)
  3828  						}
  3829  					} else {
  3830  						if s := int16(v16); s > math.MaxUint8 {
  3831  							v = math.MaxUint8
  3832  						} else if s < 0 {
  3833  							v = 0
  3834  						} else {
  3835  							v = byte(v16)
  3836  						}
  3837  					}
  3838  					retHi |= uint64(v) << (i * 8)
  3839  				}
  3840  			case wazeroir.ShapeI32x4: // signed/unsigned i32x4 to i16x8
  3841  				for i := 0; i < 4; i++ {
  3842  					var v32 uint32
  3843  					if i < 2 {
  3844  						v32 = uint32(x1Lo >> (i * 32))
  3845  					} else {
  3846  						v32 = uint32(x1Hi >> ((i - 2) * 32))
  3847  					}
  3848  
  3849  					var v uint16
  3850  					if signed {
  3851  						if s := int32(v32); s > math.MaxInt16 {
  3852  							v = math.MaxInt16
  3853  						} else if s < math.MinInt16 {
  3854  							s = math.MinInt16
  3855  							v = uint16(s)
  3856  						} else {
  3857  							v = uint16(v32)
  3858  						}
  3859  					} else {
  3860  						if s := int32(v32); s > math.MaxUint16 {
  3861  							v = math.MaxUint16
  3862  						} else if s < 0 {
  3863  							v = 0
  3864  						} else {
  3865  							v = uint16(v32)
  3866  						}
  3867  					}
  3868  					retLo |= uint64(v) << (i * 16)
  3869  				}
  3870  
  3871  				for i := 0; i < 4; i++ {
  3872  					var v32 uint32
  3873  					if i < 2 {
  3874  						v32 = uint32(x2Lo >> (i * 32))
  3875  					} else {
  3876  						v32 = uint32(x2Hi >> ((i - 2) * 32))
  3877  					}
  3878  
  3879  					var v uint16
  3880  					if signed {
  3881  						if s := int32(v32); s > math.MaxInt16 {
  3882  							v = math.MaxInt16
  3883  						} else if s < math.MinInt16 {
  3884  							s = math.MinInt16
  3885  							v = uint16(s)
  3886  						} else {
  3887  							v = uint16(v32)
  3888  						}
  3889  					} else {
  3890  						if s := int32(v32); s > math.MaxUint16 {
  3891  							v = math.MaxUint16
  3892  						} else if s < 0 {
  3893  							v = 0
  3894  						} else {
  3895  							v = uint16(v32)
  3896  						}
  3897  					}
  3898  					retHi |= uint64(v) << (i * 16)
  3899  				}
  3900  			}
  3901  			ce.pushValue(retLo)
  3902  			ce.pushValue(retHi)
  3903  			frame.pc++
  3904  		case wazeroir.OperationKindV128Dot:
  3905  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3906  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3907  			ce.pushValue(
  3908  				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
  3909  					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
  3910  			)
  3911  			ce.pushValue(
  3912  				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
  3913  					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
  3914  			)
  3915  			frame.pc++
  3916  		case wazeroir.OperationKindV128ITruncSatFromF:
  3917  			hi, lo := ce.popValue(), ce.popValue()
  3918  			signed := op.B3
  3919  			var retLo, retHi uint64
  3920  
  3921  			switch op.B1 {
  3922  			case wazeroir.ShapeF32x4: // f32x4 to i32x4
  3923  				for i, f64 := range [4]float64{
  3924  					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
  3925  					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
  3926  					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
  3927  					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
  3928  				} {
  3929  
  3930  					var v uint32
  3931  					if math.IsNaN(f64) {
  3932  						v = 0
  3933  					} else if signed {
  3934  						if f64 < math.MinInt32 {
  3935  							f64 = math.MinInt32
  3936  						} else if f64 > math.MaxInt32 {
  3937  							f64 = math.MaxInt32
  3938  						}
  3939  						v = uint32(int32(f64))
  3940  					} else {
  3941  						if f64 < 0 {
  3942  							f64 = 0
  3943  						} else if f64 > math.MaxUint32 {
  3944  							f64 = math.MaxUint32
  3945  						}
  3946  						v = uint32(f64)
  3947  					}
  3948  
  3949  					if i < 2 {
  3950  						retLo |= uint64(v) << (i * 32)
  3951  					} else {
  3952  						retHi |= uint64(v) << ((i - 2) * 32)
  3953  					}
  3954  				}
  3955  
  3956  			case wazeroir.ShapeF64x2: // f64x2 to i32x4
  3957  				for i, f := range [2]float64{
  3958  					math.Trunc(math.Float64frombits(lo)),
  3959  					math.Trunc(math.Float64frombits(hi)),
  3960  				} {
  3961  					var v uint32
  3962  					if math.IsNaN(f) {
  3963  						v = 0
  3964  					} else if signed {
  3965  						if f < math.MinInt32 {
  3966  							f = math.MinInt32
  3967  						} else if f > math.MaxInt32 {
  3968  							f = math.MaxInt32
  3969  						}
  3970  						v = uint32(int32(f))
  3971  					} else {
  3972  						if f < 0 {
  3973  							f = 0
  3974  						} else if f > math.MaxUint32 {
  3975  							f = math.MaxUint32
  3976  						}
  3977  						v = uint32(f)
  3978  					}
  3979  
  3980  					retLo |= uint64(v) << (i * 32)
  3981  				}
  3982  			}
  3983  
  3984  			ce.pushValue(retLo)
  3985  			ce.pushValue(retHi)
  3986  			frame.pc++
  3987  		case wazeroir.OperationKindAtomicMemoryWait:
  3988  			timeout := int64(ce.popValue())
  3989  			exp := ce.popValue()
  3990  			offset := ce.popMemoryOffset(op)
  3991  			// Runtime instead of validation error because the spec intends to allow binaries to include
  3992  			// such instructions as long as they are not executed.
  3993  			if !memoryInst.Shared {
  3994  				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
  3995  			}
  3996  
  3997  			switch wazeroir.UnsignedType(op.B1) {
  3998  			case wazeroir.UnsignedTypeI32:
  3999  				if offset%4 != 0 {
  4000  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4001  				}
  4002  				if int(offset) > len(memoryInst.Buffer)-4 {
  4003  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4004  				}
  4005  				ce.pushValue(memoryInst.Wait32(offset, uint32(exp), timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
  4006  					mem.Mux.Lock()
  4007  					defer mem.Mux.Unlock()
  4008  					value, _ := mem.ReadUint32Le(offset)
  4009  					return value
  4010  				}))
  4011  			case wazeroir.UnsignedTypeI64:
  4012  				if offset%8 != 0 {
  4013  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4014  				}
  4015  				if int(offset) > len(memoryInst.Buffer)-8 {
  4016  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4017  				}
  4018  				ce.pushValue(memoryInst.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
  4019  					mem.Mux.Lock()
  4020  					defer mem.Mux.Unlock()
  4021  					value, _ := mem.ReadUint64Le(offset)
  4022  					return value
  4023  				}))
  4024  			}
  4025  			frame.pc++
  4026  		case wazeroir.OperationKindAtomicMemoryNotify:
  4027  			count := ce.popValue()
  4028  			offset := ce.popMemoryOffset(op)
  4029  			if offset%4 != 0 {
  4030  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4031  			}
  4032  			// Just a bounds check
  4033  			if offset >= memoryInst.Size() {
  4034  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4035  			}
  4036  			res := memoryInst.Notify(offset, uint32(count))
  4037  			ce.pushValue(uint64(res))
  4038  			frame.pc++
  4039  		case wazeroir.OperationKindAtomicFence:
  4040  			// Memory not required for fence only
  4041  			if memoryInst != nil {
  4042  				// An empty critical section can be used as a synchronization primitive, which is what
  4043  				// fence is. Probably, there are no spectests or defined behavior to confirm this yet.
  4044  				memoryInst.Mux.Lock()
  4045  				memoryInst.Mux.Unlock() //nolint:staticcheck
  4046  			}
  4047  			frame.pc++
  4048  		case wazeroir.OperationKindAtomicLoad:
  4049  			offset := ce.popMemoryOffset(op)
  4050  			switch wazeroir.UnsignedType(op.B1) {
  4051  			case wazeroir.UnsignedTypeI32:
  4052  				if offset%4 != 0 {
  4053  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4054  				}
  4055  				memoryInst.Mux.Lock()
  4056  				val, ok := memoryInst.ReadUint32Le(offset)
  4057  				memoryInst.Mux.Unlock()
  4058  				if !ok {
  4059  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4060  				}
  4061  				ce.pushValue(uint64(val))
  4062  			case wazeroir.UnsignedTypeI64:
  4063  				if offset%8 != 0 {
  4064  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4065  				}
  4066  				memoryInst.Mux.Lock()
  4067  				val, ok := memoryInst.ReadUint64Le(offset)
  4068  				memoryInst.Mux.Unlock()
  4069  				if !ok {
  4070  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4071  				}
  4072  				ce.pushValue(val)
  4073  			}
  4074  			frame.pc++
  4075  		case wazeroir.OperationKindAtomicLoad8:
  4076  			offset := ce.popMemoryOffset(op)
  4077  			memoryInst.Mux.Lock()
  4078  			val, ok := memoryInst.ReadByte(offset)
  4079  			memoryInst.Mux.Unlock()
  4080  			if !ok {
  4081  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4082  			}
  4083  			ce.pushValue(uint64(val))
  4084  			frame.pc++
  4085  		case wazeroir.OperationKindAtomicLoad16:
  4086  			offset := ce.popMemoryOffset(op)
  4087  			if offset%2 != 0 {
  4088  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4089  			}
  4090  			memoryInst.Mux.Lock()
  4091  			val, ok := memoryInst.ReadUint16Le(offset)
  4092  			memoryInst.Mux.Unlock()
  4093  			if !ok {
  4094  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4095  			}
  4096  			ce.pushValue(uint64(val))
  4097  			frame.pc++
  4098  		case wazeroir.OperationKindAtomicStore:
  4099  			val := ce.popValue()
  4100  			offset := ce.popMemoryOffset(op)
  4101  			switch wazeroir.UnsignedType(op.B1) {
  4102  			case wazeroir.UnsignedTypeI32:
  4103  				if offset%4 != 0 {
  4104  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4105  				}
  4106  				memoryInst.Mux.Lock()
  4107  				ok := memoryInst.WriteUint32Le(offset, uint32(val))
  4108  				memoryInst.Mux.Unlock()
  4109  				if !ok {
  4110  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4111  				}
  4112  			case wazeroir.UnsignedTypeI64:
  4113  				if offset%8 != 0 {
  4114  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4115  				}
  4116  				memoryInst.Mux.Lock()
  4117  				ok := memoryInst.WriteUint64Le(offset, val)
  4118  				memoryInst.Mux.Unlock()
  4119  				if !ok {
  4120  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4121  				}
  4122  			}
  4123  			frame.pc++
  4124  		case wazeroir.OperationKindAtomicStore8:
  4125  			val := byte(ce.popValue())
  4126  			offset := ce.popMemoryOffset(op)
  4127  			memoryInst.Mux.Lock()
  4128  			ok := memoryInst.WriteByte(offset, val)
  4129  			memoryInst.Mux.Unlock()
  4130  			if !ok {
  4131  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4132  			}
  4133  			frame.pc++
  4134  		case wazeroir.OperationKindAtomicStore16:
  4135  			val := uint16(ce.popValue())
  4136  			offset := ce.popMemoryOffset(op)
  4137  			if offset%2 != 0 {
  4138  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4139  			}
  4140  			memoryInst.Mux.Lock()
  4141  			ok := memoryInst.WriteUint16Le(offset, val)
  4142  			memoryInst.Mux.Unlock()
  4143  			if !ok {
  4144  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4145  			}
  4146  			frame.pc++
  4147  		case wazeroir.OperationKindAtomicRMW:
  4148  			val := ce.popValue()
  4149  			offset := ce.popMemoryOffset(op)
  4150  			switch wazeroir.UnsignedType(op.B1) {
  4151  			case wazeroir.UnsignedTypeI32:
  4152  				if offset%4 != 0 {
  4153  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4154  				}
  4155  				memoryInst.Mux.Lock()
  4156  				old, ok := memoryInst.ReadUint32Le(offset)
  4157  				if !ok {
  4158  					memoryInst.Mux.Unlock()
  4159  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4160  				}
  4161  				var newVal uint32
  4162  				switch wazeroir.AtomicArithmeticOp(op.B2) {
  4163  				case wazeroir.AtomicArithmeticOpAdd:
  4164  					newVal = old + uint32(val)
  4165  				case wazeroir.AtomicArithmeticOpSub:
  4166  					newVal = old - uint32(val)
  4167  				case wazeroir.AtomicArithmeticOpAnd:
  4168  					newVal = old & uint32(val)
  4169  				case wazeroir.AtomicArithmeticOpOr:
  4170  					newVal = old | uint32(val)
  4171  				case wazeroir.AtomicArithmeticOpXor:
  4172  					newVal = old ^ uint32(val)
  4173  				case wazeroir.AtomicArithmeticOpNop:
  4174  					newVal = uint32(val)
  4175  				}
  4176  				memoryInst.WriteUint32Le(offset, newVal)
  4177  				memoryInst.Mux.Unlock()
  4178  				ce.pushValue(uint64(old))
  4179  			case wazeroir.UnsignedTypeI64:
  4180  				if offset%8 != 0 {
  4181  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4182  				}
  4183  				memoryInst.Mux.Lock()
  4184  				old, ok := memoryInst.ReadUint64Le(offset)
  4185  				if !ok {
  4186  					memoryInst.Mux.Unlock()
  4187  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4188  				}
  4189  				var newVal uint64
  4190  				switch wazeroir.AtomicArithmeticOp(op.B2) {
  4191  				case wazeroir.AtomicArithmeticOpAdd:
  4192  					newVal = old + val
  4193  				case wazeroir.AtomicArithmeticOpSub:
  4194  					newVal = old - val
  4195  				case wazeroir.AtomicArithmeticOpAnd:
  4196  					newVal = old & val
  4197  				case wazeroir.AtomicArithmeticOpOr:
  4198  					newVal = old | val
  4199  				case wazeroir.AtomicArithmeticOpXor:
  4200  					newVal = old ^ val
  4201  				case wazeroir.AtomicArithmeticOpNop:
  4202  					newVal = val
  4203  				}
  4204  				memoryInst.WriteUint64Le(offset, newVal)
  4205  				memoryInst.Mux.Unlock()
  4206  				ce.pushValue(old)
  4207  			}
  4208  			frame.pc++
  4209  		case wazeroir.OperationKindAtomicRMW8:
  4210  			val := ce.popValue()
  4211  			offset := ce.popMemoryOffset(op)
  4212  			memoryInst.Mux.Lock()
  4213  			old, ok := memoryInst.ReadByte(offset)
  4214  			if !ok {
  4215  				memoryInst.Mux.Unlock()
  4216  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4217  			}
  4218  			arg := byte(val)
  4219  			var newVal byte
  4220  			switch wazeroir.AtomicArithmeticOp(op.B2) {
  4221  			case wazeroir.AtomicArithmeticOpAdd:
  4222  				newVal = old + arg
  4223  			case wazeroir.AtomicArithmeticOpSub:
  4224  				newVal = old - arg
  4225  			case wazeroir.AtomicArithmeticOpAnd:
  4226  				newVal = old & arg
  4227  			case wazeroir.AtomicArithmeticOpOr:
  4228  				newVal = old | arg
  4229  			case wazeroir.AtomicArithmeticOpXor:
  4230  				newVal = old ^ arg
  4231  			case wazeroir.AtomicArithmeticOpNop:
  4232  				newVal = arg
  4233  			}
  4234  			memoryInst.WriteByte(offset, newVal)
  4235  			memoryInst.Mux.Unlock()
  4236  			ce.pushValue(uint64(old))
  4237  			frame.pc++
  4238  		case wazeroir.OperationKindAtomicRMW16:
  4239  			val := ce.popValue()
  4240  			offset := ce.popMemoryOffset(op)
  4241  			if offset%2 != 0 {
  4242  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4243  			}
  4244  			memoryInst.Mux.Lock()
  4245  			old, ok := memoryInst.ReadUint16Le(offset)
  4246  			if !ok {
  4247  				memoryInst.Mux.Unlock()
  4248  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4249  			}
  4250  			arg := uint16(val)
  4251  			var newVal uint16
  4252  			switch wazeroir.AtomicArithmeticOp(op.B2) {
  4253  			case wazeroir.AtomicArithmeticOpAdd:
  4254  				newVal = old + arg
  4255  			case wazeroir.AtomicArithmeticOpSub:
  4256  				newVal = old - arg
  4257  			case wazeroir.AtomicArithmeticOpAnd:
  4258  				newVal = old & arg
  4259  			case wazeroir.AtomicArithmeticOpOr:
  4260  				newVal = old | arg
  4261  			case wazeroir.AtomicArithmeticOpXor:
  4262  				newVal = old ^ arg
  4263  			case wazeroir.AtomicArithmeticOpNop:
  4264  				newVal = arg
  4265  			}
  4266  			memoryInst.WriteUint16Le(offset, newVal)
  4267  			memoryInst.Mux.Unlock()
  4268  			ce.pushValue(uint64(old))
  4269  			frame.pc++
  4270  		case wazeroir.OperationKindAtomicRMWCmpxchg:
  4271  			rep := ce.popValue()
  4272  			exp := ce.popValue()
  4273  			offset := ce.popMemoryOffset(op)
  4274  			switch wazeroir.UnsignedType(op.B1) {
  4275  			case wazeroir.UnsignedTypeI32:
  4276  				if offset%4 != 0 {
  4277  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4278  				}
  4279  				memoryInst.Mux.Lock()
  4280  				old, ok := memoryInst.ReadUint32Le(offset)
  4281  				if !ok {
  4282  					memoryInst.Mux.Unlock()
  4283  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4284  				}
  4285  				if old == uint32(exp) {
  4286  					memoryInst.WriteUint32Le(offset, uint32(rep))
  4287  				}
  4288  				memoryInst.Mux.Unlock()
  4289  				ce.pushValue(uint64(old))
  4290  			case wazeroir.UnsignedTypeI64:
  4291  				if offset%8 != 0 {
  4292  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4293  				}
  4294  				memoryInst.Mux.Lock()
  4295  				old, ok := memoryInst.ReadUint64Le(offset)
  4296  				if !ok {
  4297  					memoryInst.Mux.Unlock()
  4298  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4299  				}
  4300  				if old == exp {
  4301  					memoryInst.WriteUint64Le(offset, rep)
  4302  				}
  4303  				memoryInst.Mux.Unlock()
  4304  				ce.pushValue(old)
  4305  			}
  4306  			frame.pc++
  4307  		case wazeroir.OperationKindAtomicRMW8Cmpxchg:
  4308  			rep := byte(ce.popValue())
  4309  			exp := byte(ce.popValue())
  4310  			offset := ce.popMemoryOffset(op)
  4311  			memoryInst.Mux.Lock()
  4312  			old, ok := memoryInst.ReadByte(offset)
  4313  			if !ok {
  4314  				memoryInst.Mux.Unlock()
  4315  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4316  			}
  4317  			if old == exp {
  4318  				memoryInst.WriteByte(offset, rep)
  4319  			}
  4320  			memoryInst.Mux.Unlock()
  4321  			ce.pushValue(uint64(old))
  4322  			frame.pc++
  4323  		case wazeroir.OperationKindAtomicRMW16Cmpxchg:
  4324  			rep := uint16(ce.popValue())
  4325  			exp := uint16(ce.popValue())
  4326  			offset := ce.popMemoryOffset(op)
  4327  			if offset%2 != 0 {
  4328  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4329  			}
  4330  			memoryInst.Mux.Lock()
  4331  			old, ok := memoryInst.ReadUint16Le(offset)
  4332  			if !ok {
  4333  				memoryInst.Mux.Unlock()
  4334  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4335  			}
  4336  			if old == exp {
  4337  				memoryInst.WriteUint16Le(offset, rep)
  4338  			}
  4339  			memoryInst.Mux.Unlock()
  4340  			ce.pushValue(uint64(old))
  4341  			frame.pc++
  4342  		default:
  4343  			frame.pc++
  4344  		}
  4345  	}
  4346  	ce.popFrame()
  4347  }
  4348  
  4349  func WasmCompatMax32bits(v1, v2 uint32) uint64 {
  4350  	return uint64(math.Float32bits(moremath.WasmCompatMax32(
  4351  		math.Float32frombits(v1),
  4352  		math.Float32frombits(v2),
  4353  	)))
  4354  }
  4355  
  4356  func WasmCompatMin32bits(v1, v2 uint32) uint64 {
  4357  	return uint64(math.Float32bits(moremath.WasmCompatMin32(
  4358  		math.Float32frombits(v1),
  4359  		math.Float32frombits(v2),
  4360  	)))
  4361  }
  4362  
  4363  func addFloat32bits(v1, v2 uint32) uint64 {
  4364  	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
  4365  }
  4366  
  4367  func subFloat32bits(v1, v2 uint32) uint64 {
  4368  	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
  4369  }
  4370  
  4371  func mulFloat32bits(v1, v2 uint32) uint64 {
  4372  	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
  4373  }
  4374  
  4375  func divFloat32bits(v1, v2 uint32) uint64 {
  4376  	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
  4377  }
  4378  
  4379  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4380  func flt32(z1, z2 float32) bool {
  4381  	if z1 != z1 || z2 != z2 {
  4382  		return false
  4383  	} else if z1 == z2 {
  4384  		return false
  4385  	} else if math.IsInf(float64(z1), 1) {
  4386  		return false
  4387  	} else if math.IsInf(float64(z1), -1) {
  4388  		return true
  4389  	} else if math.IsInf(float64(z2), 1) {
  4390  		return true
  4391  	} else if math.IsInf(float64(z2), -1) {
  4392  		return false
  4393  	}
  4394  	return z1 < z2
  4395  }
  4396  
  4397  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4398  func flt64(z1, z2 float64) bool {
  4399  	if z1 != z1 || z2 != z2 {
  4400  		return false
  4401  	} else if z1 == z2 {
  4402  		return false
  4403  	} else if math.IsInf(z1, 1) {
  4404  		return false
  4405  	} else if math.IsInf(z1, -1) {
  4406  		return true
  4407  	} else if math.IsInf(z2, 1) {
  4408  		return true
  4409  	} else if math.IsInf(z2, -1) {
  4410  		return false
  4411  	}
  4412  	return z1 < z2
  4413  }
  4414  
  4415  func i8RoundingAverage(v1, v2 byte) byte {
  4416  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4417  	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
  4418  }
  4419  
  4420  func i16RoundingAverage(v1, v2 uint16) uint16 {
  4421  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4422  	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
  4423  }
  4424  
  4425  func i8Abs(v byte) byte {
  4426  	if i := int8(v); i < 0 {
  4427  		return byte(-i)
  4428  	} else {
  4429  		return byte(i)
  4430  	}
  4431  }
  4432  
  4433  func i8MaxU(v1, v2 byte) byte {
  4434  	if v1 < v2 {
  4435  		return v2
  4436  	} else {
  4437  		return v1
  4438  	}
  4439  }
  4440  
  4441  func i8MinU(v1, v2 byte) byte {
  4442  	if v1 > v2 {
  4443  		return v2
  4444  	} else {
  4445  		return v1
  4446  	}
  4447  }
  4448  
  4449  func i8MaxS(v1, v2 byte) byte {
  4450  	if int8(v1) < int8(v2) {
  4451  		return v2
  4452  	} else {
  4453  		return v1
  4454  	}
  4455  }
  4456  
  4457  func i8MinS(v1, v2 byte) byte {
  4458  	if int8(v1) > int8(v2) {
  4459  		return v2
  4460  	} else {
  4461  		return v1
  4462  	}
  4463  }
  4464  
  4465  func i16MaxU(v1, v2 uint16) uint16 {
  4466  	if v1 < v2 {
  4467  		return v2
  4468  	} else {
  4469  		return v1
  4470  	}
  4471  }
  4472  
  4473  func i16MinU(v1, v2 uint16) uint16 {
  4474  	if v1 > v2 {
  4475  		return v2
  4476  	} else {
  4477  		return v1
  4478  	}
  4479  }
  4480  
  4481  func i16MaxS(v1, v2 uint16) uint16 {
  4482  	if int16(v1) < int16(v2) {
  4483  		return v2
  4484  	} else {
  4485  		return v1
  4486  	}
  4487  }
  4488  
  4489  func i16MinS(v1, v2 uint16) uint16 {
  4490  	if int16(v1) > int16(v2) {
  4491  		return v2
  4492  	} else {
  4493  		return v1
  4494  	}
  4495  }
  4496  
  4497  func i32MaxU(v1, v2 uint32) uint32 {
  4498  	if v1 < v2 {
  4499  		return v2
  4500  	} else {
  4501  		return v1
  4502  	}
  4503  }
  4504  
  4505  func i32MinU(v1, v2 uint32) uint32 {
  4506  	if v1 > v2 {
  4507  		return v2
  4508  	} else {
  4509  		return v1
  4510  	}
  4511  }
  4512  
  4513  func i32MaxS(v1, v2 uint32) uint32 {
  4514  	if int32(v1) < int32(v2) {
  4515  		return v2
  4516  	} else {
  4517  		return v1
  4518  	}
  4519  }
  4520  
  4521  func i32MinS(v1, v2 uint32) uint32 {
  4522  	if int32(v1) > int32(v2) {
  4523  		return v2
  4524  	} else {
  4525  		return v1
  4526  	}
  4527  }
  4528  
  4529  func i16Abs(v uint16) uint16 {
  4530  	if i := int16(v); i < 0 {
  4531  		return uint16(-i)
  4532  	} else {
  4533  		return uint16(i)
  4534  	}
  4535  }
  4536  
  4537  func i32Abs(v uint32) uint32 {
  4538  	if i := int32(v); i < 0 {
  4539  		return uint32(-i)
  4540  	} else {
  4541  		return uint32(i)
  4542  	}
  4543  }
  4544  
  4545  func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
  4546  	def, typ := f.definition(), f.funcType
  4547  
  4548  	ce.stackIterator.reset(ce.stack, ce.frames, f)
  4549  	fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator)
  4550  	ce.stackIterator.clear()
  4551  	ce.callNativeFunc(ctx, m, f)
  4552  	fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64))
  4553  	return ctx
  4554  }
  4555  
  4556  // popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
  4557  // As the top of stack value is 64-bit, this ensures it is in range before returning it.
  4558  func (ce *callEngine) popMemoryOffset(op *wazeroir.UnionOperation) uint32 {
  4559  	offset := op.U2 + ce.popValue()
  4560  	if offset > math.MaxUint32 {
  4561  		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4562  	}
  4563  	return uint32(offset)
  4564  }
  4565  
  4566  func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
  4567  	typ := f.funcType
  4568  	paramLen := typ.ParamNumInUint64
  4569  	resultLen := typ.ResultNumInUint64
  4570  	stackLen := paramLen
  4571  
  4572  	// In the interpreter engine, ce.stack may only have capacity to store
  4573  	// parameters. Grow when there are more results than parameters.
  4574  	if growLen := resultLen - paramLen; growLen > 0 {
  4575  		for i := 0; i < growLen; i++ {
  4576  			ce.stack = append(ce.stack, 0)
  4577  		}
  4578  		stackLen += growLen
  4579  	}
  4580  
  4581  	// Pass the stack elements to the go function.
  4582  	stack := ce.stack[len(ce.stack)-stackLen:]
  4583  	ce.callGoFunc(ctx, m, f, stack)
  4584  
  4585  	// Shrink the stack when there were more parameters than results.
  4586  	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
  4587  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
  4588  	}
  4589  }