github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/interpreter/interpreter.go (about)

     1  package interpreter
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"math/bits"
    10  	"sync"
    11  	"unsafe"
    12  
    13  	"github.com/wasilibs/wazerox/api"
    14  	"github.com/wasilibs/wazerox/experimental"
    15  	"github.com/wasilibs/wazerox/internal/filecache"
    16  	"github.com/wasilibs/wazerox/internal/internalapi"
    17  	"github.com/wasilibs/wazerox/internal/moremath"
    18  	"github.com/wasilibs/wazerox/internal/wasm"
    19  	"github.com/wasilibs/wazerox/internal/wasmdebug"
    20  	"github.com/wasilibs/wazerox/internal/wasmruntime"
    21  	"github.com/wasilibs/wazerox/internal/wazeroir"
    22  )
    23  
    24  // callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
    25  // wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
    26  //
    27  // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
    28  var callStackCeiling = 2000
    29  
    30  // engine is an interpreter implementation of wasm.Engine
    31  type engine struct {
    32  	enabledFeatures   api.CoreFeatures
    33  	compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
    34  	mux               sync.RWMutex
    35  	// labelAddressResolutionCache is the temporary cache used to map LabelKind -> FrameID -> the index to the body.
    36  	labelAddressResolutionCache [wazeroir.LabelKindNum][]uint64
    37  }
    38  
    39  func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
    40  	return &engine{
    41  		enabledFeatures:   enabledFeatures,
    42  		compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
    43  	}
    44  }
    45  
    46  // Close implements the same method as documented on wasm.Engine.
    47  func (e *engine) Close() (err error) {
    48  	return
    49  }
    50  
    51  // CompiledModuleCount implements the same method as documented on wasm.Engine.
    52  func (e *engine) CompiledModuleCount() uint32 {
    53  	return uint32(len(e.compiledFunctions))
    54  }
    55  
    56  // DeleteCompiledModule implements the same method as documented on wasm.Engine.
    57  func (e *engine) DeleteCompiledModule(m *wasm.Module) {
    58  	e.deleteCompiledFunctions(m)
    59  }
    60  
    61  func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
    62  	e.mux.Lock()
    63  	defer e.mux.Unlock()
    64  	delete(e.compiledFunctions, module.ID)
    65  }
    66  
    67  func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
    68  	e.mux.Lock()
    69  	defer e.mux.Unlock()
    70  	e.compiledFunctions[module.ID] = fs
    71  }
    72  
    73  func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
    74  	e.mux.RLock()
    75  	defer e.mux.RUnlock()
    76  	fs, ok = e.compiledFunctions[module.ID]
    77  	return
    78  }
    79  
    80  // moduleEngine implements wasm.ModuleEngine
    81  type moduleEngine struct {
    82  	// codes are the compiled functions in a module instances.
    83  	// The index is module instance-scoped.
    84  	functions []function
    85  
    86  	// parentEngine holds *engine from which this module engine is created from.
    87  	parentEngine *engine
    88  }
    89  
    90  // callEngine holds context per moduleEngine.Call, and shared across all the
    91  // function calls originating from the same moduleEngine.Call execution.
    92  //
    93  // This implements api.Function.
    94  type callEngine struct {
    95  	internalapi.WazeroOnlyType
    96  
    97  	// stack contains the operands.
    98  	// Note that all the values are represented as uint64.
    99  	stack []uint64
   100  
   101  	// frames are the function call stack.
   102  	frames []*callFrame
   103  
   104  	// f is the initial function for this call engine.
   105  	f *function
   106  
   107  	// stackiterator for Listeners to walk frames and stack.
   108  	stackIterator stackIterator
   109  }
   110  
   111  func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
   112  	return &callEngine{f: compiled}
   113  }
   114  
   115  func (ce *callEngine) pushValue(v uint64) {
   116  	ce.stack = append(ce.stack, v)
   117  }
   118  
   119  func (ce *callEngine) pushValues(v []uint64) {
   120  	ce.stack = append(ce.stack, v...)
   121  }
   122  
   123  func (ce *callEngine) popValue() (v uint64) {
   124  	// No need to check stack bound
   125  	// as we can assume that all the operations
   126  	// are valid thanks to validateFunction
   127  	// at module validation phase
   128  	// and wazeroir translation
   129  	// before compilation.
   130  	stackTopIndex := len(ce.stack) - 1
   131  	v = ce.stack[stackTopIndex]
   132  	ce.stack = ce.stack[:stackTopIndex]
   133  	return
   134  }
   135  
   136  func (ce *callEngine) popValues(v []uint64) {
   137  	stackTopIndex := len(ce.stack) - len(v)
   138  	copy(v, ce.stack[stackTopIndex:])
   139  	ce.stack = ce.stack[:stackTopIndex]
   140  }
   141  
   142  // peekValues peeks api.ValueType values from the stack and returns them.
   143  func (ce *callEngine) peekValues(count int) []uint64 {
   144  	if count == 0 {
   145  		return nil
   146  	}
   147  	stackLen := len(ce.stack)
   148  	return ce.stack[stackLen-count : stackLen]
   149  }
   150  
   151  func (ce *callEngine) drop(raw uint64) {
   152  	r := wazeroir.InclusiveRangeFromU64(raw)
   153  	if r.Start == -1 {
   154  		return
   155  	} else if r.Start == 0 {
   156  		ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
   157  	} else {
   158  		newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
   159  		newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
   160  		ce.stack = newStack
   161  	}
   162  }
   163  
   164  func (ce *callEngine) pushFrame(frame *callFrame) {
   165  	if callStackCeiling <= len(ce.frames) {
   166  		panic(wasmruntime.ErrRuntimeStackOverflow)
   167  	}
   168  	ce.frames = append(ce.frames, frame)
   169  }
   170  
   171  func (ce *callEngine) popFrame() (frame *callFrame) {
   172  	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
   173  	// module validation phase and wazeroir translation before compilation.
   174  	oneLess := len(ce.frames) - 1
   175  	frame = ce.frames[oneLess]
   176  	ce.frames = ce.frames[:oneLess]
   177  	return
   178  }
   179  
   180  type callFrame struct {
   181  	// pc is the program counter representing the current position in code.body.
   182  	pc uint64
   183  	// f is the compiled function used in this function frame.
   184  	f *function
   185  	// base index in the frame of this function, used to detect the count of
   186  	// values on the stack.
   187  	base int
   188  }
   189  
   190  type compiledFunction struct {
   191  	source              *wasm.Module
   192  	body                []wazeroir.UnionOperation
   193  	listener            experimental.FunctionListener
   194  	offsetsInWasmBinary []uint64
   195  	hostFn              interface{}
   196  	ensureTermination   bool
   197  	index               wasm.Index
   198  }
   199  
   200  type function struct {
   201  	funcType       *wasm.FunctionType
   202  	moduleInstance *wasm.ModuleInstance
   203  	typeID         wasm.FunctionTypeID
   204  	parent         *compiledFunction
   205  }
   206  
   207  type snapshot struct {
   208  	stack  []uint64
   209  	frames []*callFrame
   210  	pc     uint64
   211  
   212  	ret []uint64
   213  
   214  	ce *callEngine
   215  }
   216  
   217  // Snapshot implements the same method as documented on experimental.Snapshotter.
   218  func (ce *callEngine) Snapshot() experimental.Snapshot {
   219  	stack := make([]uint64, len(ce.stack))
   220  	copy(stack, ce.stack)
   221  
   222  	frames := make([]*callFrame, len(ce.frames))
   223  	copy(frames, ce.frames)
   224  
   225  	return &snapshot{
   226  		stack:  stack,
   227  		frames: frames,
   228  		ce:     ce,
   229  	}
   230  }
   231  
   232  // Restore implements the same method as documented on experimental.Snapshot.
   233  func (s *snapshot) Restore(ret []uint64) {
   234  	s.ret = ret
   235  	panic(s)
   236  }
   237  
   238  func (s *snapshot) doRestore() {
   239  	ce := s.ce
   240  
   241  	ce.stack = s.stack
   242  	ce.frames = s.frames
   243  	ce.frames[len(ce.frames)-1].pc = s.pc
   244  
   245  	copy(ce.stack[len(ce.stack)-len(s.ret):], s.ret)
   246  }
   247  
   248  // Error implements the same method on error.
   249  func (s *snapshot) Error() string {
   250  	return "unhandled snapshot restore, this generally indicates restore was called from a different " +
   251  		"exported function invocation than snapshot"
   252  }
   253  
   254  // functionFromUintptr resurrects the original *function from the given uintptr
   255  // which comes from either funcref table or OpcodeRefFunc instruction.
   256  func functionFromUintptr(ptr uintptr) *function {
   257  	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
   258  	//
   259  	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
   260  	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
   261  	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
   262  	var wrapped *uintptr = &ptr
   263  	return *(**function)(unsafe.Pointer(wrapped))
   264  }
   265  
   266  // stackIterator implements experimental.StackIterator.
   267  type stackIterator struct {
   268  	stack   []uint64
   269  	frames  []*callFrame
   270  	started bool
   271  	fn      *function
   272  	pc      uint64
   273  }
   274  
   275  func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
   276  	si.fn = f
   277  	si.pc = 0
   278  	si.stack = stack
   279  	si.frames = frames
   280  	si.started = false
   281  }
   282  
   283  func (si *stackIterator) clear() {
   284  	si.stack = nil
   285  	si.frames = nil
   286  	si.started = false
   287  	si.fn = nil
   288  }
   289  
   290  // Next implements the same method as documented on experimental.StackIterator.
   291  func (si *stackIterator) Next() bool {
   292  	if !si.started {
   293  		si.started = true
   294  		return true
   295  	}
   296  
   297  	if len(si.frames) == 0 {
   298  		return false
   299  	}
   300  
   301  	frame := si.frames[len(si.frames)-1]
   302  	si.stack = si.stack[:frame.base]
   303  	si.fn = frame.f
   304  	si.pc = frame.pc
   305  	si.frames = si.frames[:len(si.frames)-1]
   306  	return true
   307  }
   308  
   309  // Function implements the same method as documented on
   310  // experimental.StackIterator.
   311  func (si *stackIterator) Function() experimental.InternalFunction {
   312  	return internalFunction{si.fn}
   313  }
   314  
   315  // ProgramCounter implements the same method as documented on
   316  // experimental.StackIterator.
   317  func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
   318  	return experimental.ProgramCounter(si.pc)
   319  }
   320  
   321  // internalFunction implements experimental.InternalFunction.
   322  type internalFunction struct{ *function }
   323  
   324  // Definition implements the same method as documented on
   325  // experimental.InternalFunction.
   326  func (f internalFunction) Definition() api.FunctionDefinition {
   327  	return f.definition()
   328  }
   329  
   330  // SourceOffsetForPC implements the same method as documented on
   331  // experimental.InternalFunction.
   332  func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
   333  	offsetsMap := f.parent.offsetsInWasmBinary
   334  	if uint64(pc) < uint64(len(offsetsMap)) {
   335  		return offsetsMap[pc]
   336  	}
   337  	return 0
   338  }
   339  
   340  // interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
   341  const callFrameStackSize = 0
   342  
   343  // CompileModule implements the same method as documented on wasm.Engine.
   344  func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
   345  	if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
   346  		return nil
   347  	}
   348  
   349  	funcs := make([]compiledFunction, len(module.FunctionSection))
   350  	irCompiler, err := wazeroir.NewCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
   351  	if err != nil {
   352  		return err
   353  	}
   354  	imported := module.ImportFunctionCount
   355  	for i := range module.CodeSection {
   356  		var lsn experimental.FunctionListener
   357  		if i < len(listeners) {
   358  			lsn = listeners[i]
   359  		}
   360  
   361  		compiled := &funcs[i]
   362  		// If this is the host function, there's nothing to do as the runtime representation of
   363  		// host function in interpreter is its Go function itself as opposed to Wasm functions,
   364  		// which need to be compiled down to wazeroir.
   365  		if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
   366  			compiled.hostFn = codeSeg.GoFunc
   367  		} else {
   368  			ir, err := irCompiler.Next()
   369  			if err != nil {
   370  				return err
   371  			}
   372  			err = e.lowerIR(ir, compiled)
   373  			if err != nil {
   374  				def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
   375  				return fmt.Errorf("failed to lower func[%s] to wazeroir: %w", def.DebugName(), err)
   376  			}
   377  		}
   378  		compiled.source = module
   379  		compiled.ensureTermination = ensureTermination
   380  		compiled.listener = lsn
   381  		compiled.index = imported + uint32(i)
   382  	}
   383  	e.addCompiledFunctions(module, funcs)
   384  	return nil
   385  }
   386  
   387  // NewModuleEngine implements the same method as documented on wasm.Engine.
   388  func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
   389  	me := &moduleEngine{
   390  		parentEngine: e,
   391  		functions:    make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
   392  	}
   393  
   394  	codes, ok := e.getCompiledFunctions(module)
   395  	if !ok {
   396  		return nil, errors.New("source module must be compiled before instantiation")
   397  	}
   398  
   399  	for i := range codes {
   400  		c := &codes[i]
   401  		offset := i + int(module.ImportFunctionCount)
   402  		typeIndex := module.FunctionSection[i]
   403  		me.functions[offset] = function{
   404  			moduleInstance: instance,
   405  			typeID:         instance.TypeIDs[typeIndex],
   406  			funcType:       &module.TypeSection[typeIndex],
   407  			parent:         c,
   408  		}
   409  	}
   410  	return me, nil
   411  }
   412  
   413  // lowerIR lowers the wazeroir operations to engine friendly struct.
   414  func (e *engine) lowerIR(ir *wazeroir.CompilationResult, ret *compiledFunction) error {
   415  	// Copy the body from the result.
   416  	ret.body = make([]wazeroir.UnionOperation, len(ir.Operations))
   417  	copy(ret.body, ir.Operations)
   418  	// Also copy the offsets if necessary.
   419  	if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
   420  		ret.offsetsInWasmBinary = make([]uint64, len(offsets))
   421  		copy(ret.offsetsInWasmBinary, offsets)
   422  	}
   423  
   424  	// First, we iterate all labels, and resolve the address.
   425  	for i := range ret.body {
   426  		op := &ret.body[i]
   427  		switch op.Kind {
   428  		case wazeroir.OperationKindLabel:
   429  			label := wazeroir.Label(op.U1)
   430  			address := uint64(i)
   431  
   432  			kind, fid := label.Kind(), label.FrameID()
   433  			frameToAddresses := e.labelAddressResolutionCache[label.Kind()]
   434  			// Expand the slice if necessary.
   435  			if diff := fid - len(frameToAddresses) + 1; diff > 0 {
   436  				for j := 0; j < diff; j++ {
   437  					frameToAddresses = append(frameToAddresses, 0)
   438  				}
   439  			}
   440  			frameToAddresses[fid] = address
   441  			e.labelAddressResolutionCache[kind] = frameToAddresses
   442  		}
   443  	}
   444  
   445  	// Then resolve the label as the index to the body.
   446  	for i := range ret.body {
   447  		op := &ret.body[i]
   448  		switch op.Kind {
   449  		case wazeroir.OperationKindBr:
   450  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   451  		case wazeroir.OperationKindBrIf:
   452  			e.setLabelAddress(&op.U1, wazeroir.Label(op.U1))
   453  			e.setLabelAddress(&op.U2, wazeroir.Label(op.U2))
   454  		case wazeroir.OperationKindBrTable:
   455  			for j := 0; j < len(op.Us); j += 2 {
   456  				target := op.Us[j]
   457  				e.setLabelAddress(&op.Us[j], wazeroir.Label(target))
   458  			}
   459  		}
   460  	}
   461  
   462  	// Reuses the slices for the subsequent compilation, so clear the content here.
   463  	for i := range e.labelAddressResolutionCache {
   464  		e.labelAddressResolutionCache[i] = e.labelAddressResolutionCache[i][:0]
   465  	}
   466  	return nil
   467  }
   468  
   469  func (e *engine) setLabelAddress(op *uint64, label wazeroir.Label) {
   470  	if label.IsReturnTarget() {
   471  		// Jmp to the end of the possible binary.
   472  		*op = math.MaxUint64
   473  	} else {
   474  		*op = e.labelAddressResolutionCache[label.Kind()][label.FrameID()]
   475  	}
   476  }
   477  
   478  // ResolveImportedFunction implements wasm.ModuleEngine.
   479  func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
   480  	imported := importedModuleEngine.(*moduleEngine)
   481  	e.functions[index] = imported.functions[indexInImportedModule]
   482  }
   483  
   484  // ResolveImportedMemory implements wasm.ModuleEngine.
   485  func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {}
   486  
   487  // DoneInstantiation implements wasm.ModuleEngine.
   488  func (e *moduleEngine) DoneInstantiation() {}
   489  
   490  // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
   491  func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
   492  	return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
   493  }
   494  
   495  // NewFunction implements the same method as documented on wasm.ModuleEngine.
   496  func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
   497  	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
   498  	// code on close aren't locked, neither is this read.
   499  	compiled := &e.functions[index]
   500  	return e.newCallEngine(compiled)
   501  }
   502  
   503  // LookupFunction implements the same method as documented on wasm.ModuleEngine.
   504  func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
   505  	if tableOffset >= uint32(len(t.References)) {
   506  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   507  	}
   508  	rawPtr := t.References[tableOffset]
   509  	if rawPtr == 0 {
   510  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   511  	}
   512  
   513  	tf := functionFromUintptr(rawPtr)
   514  	if tf.typeID != typeId {
   515  		panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   516  	}
   517  	return tf.moduleInstance, tf.parent.index
   518  }
   519  
   520  // Definition implements the same method as documented on api.Function.
   521  func (ce *callEngine) Definition() api.FunctionDefinition {
   522  	return ce.f.definition()
   523  }
   524  
   525  func (f *function) definition() api.FunctionDefinition {
   526  	compiled := f.parent
   527  	return compiled.source.FunctionDefinition(compiled.index)
   528  }
   529  
   530  // Call implements the same method as documented on api.Function.
   531  func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
   532  	ft := ce.f.funcType
   533  	if n := ft.ParamNumInUint64; n != len(params) {
   534  		return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
   535  	}
   536  	return ce.call(ctx, params, nil)
   537  }
   538  
   539  // CallWithStack implements the same method as documented on api.Function.
   540  func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
   541  	params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
   542  	if err != nil {
   543  		return err
   544  	}
   545  	_, err = ce.call(ctx, params, results)
   546  	return err
   547  }
   548  
   549  func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
   550  	m := ce.f.moduleInstance
   551  	if ce.f.parent.ensureTermination {
   552  		select {
   553  		case <-ctx.Done():
   554  			// If the provided context is already done, close the call context
   555  			// and return the error.
   556  			m.CloseWithCtxErr(ctx)
   557  			return nil, m.FailIfClosed()
   558  		default:
   559  		}
   560  	}
   561  
   562  	if ctx.Value(experimental.EnableSnapshotterKey{}) != nil {
   563  		ctx = context.WithValue(ctx, experimental.SnapshotterKey{}, ce)
   564  	}
   565  
   566  	defer func() {
   567  		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
   568  		if err == nil {
   569  			err = m.FailIfClosed()
   570  		}
   571  		// TODO: ^^ Will not fail if the function was imported from a closed module.
   572  
   573  		if v := recover(); v != nil {
   574  			err = ce.recoverOnCall(ctx, m, v)
   575  		}
   576  	}()
   577  
   578  	ce.pushValues(params)
   579  
   580  	if ce.f.parent.ensureTermination {
   581  		done := m.CloseModuleOnCanceledOrTimeout(ctx)
   582  		defer done()
   583  	}
   584  
   585  	ce.callFunction(ctx, m, ce.f)
   586  
   587  	// This returns a safe copy of the results, instead of a slice view. If we
   588  	// returned a re-slice, the caller could accidentally or purposefully
   589  	// corrupt the stack of subsequent calls.
   590  	ft := ce.f.funcType
   591  	if results == nil && ft.ResultNumInUint64 > 0 {
   592  		results = make([]uint64, ft.ResultNumInUint64)
   593  	}
   594  	ce.popValues(results)
   595  	return results, nil
   596  }
   597  
   598  // functionListenerInvocation captures arguments needed to perform function
   599  // listener invocations when unwinding the call stack.
   600  type functionListenerInvocation struct {
   601  	experimental.FunctionListener
   602  	def api.FunctionDefinition
   603  }
   604  
   605  // recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
   606  // with the call frame stack traces. Also, reset the state of callEngine
   607  // so that it can be used for the subsequent calls.
   608  func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
   609  	if s, ok := v.(*snapshot); ok {
   610  		// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
   611  		// let it propagate up to be handled by the caller.
   612  		panic(s)
   613  	}
   614  
   615  	builder := wasmdebug.NewErrorBuilder()
   616  	frameCount := len(ce.frames)
   617  	functionListeners := make([]functionListenerInvocation, 0, 16)
   618  
   619  	for i := 0; i < frameCount; i++ {
   620  		frame := ce.popFrame()
   621  		f := frame.f
   622  		def := f.definition()
   623  		var sources []string
   624  		if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
   625  			sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
   626  		}
   627  		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
   628  		if f.parent.listener != nil {
   629  			functionListeners = append(functionListeners, functionListenerInvocation{
   630  				FunctionListener: f.parent.listener,
   631  				def:              f.definition(),
   632  			})
   633  		}
   634  	}
   635  
   636  	err = builder.FromRecovered(v)
   637  	for i := range functionListeners {
   638  		functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
   639  	}
   640  
   641  	// Allows the reuse of CallEngine.
   642  	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
   643  	return
   644  }
   645  
   646  func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   647  	if f.parent.hostFn != nil {
   648  		ce.callGoFuncWithStack(ctx, m, f)
   649  	} else if lsn := f.parent.listener; lsn != nil {
   650  		ce.callNativeFuncWithListener(ctx, m, f, lsn)
   651  	} else {
   652  		ce.callNativeFunc(ctx, m, f)
   653  	}
   654  }
   655  
   656  func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
   657  	typ := f.funcType
   658  	lsn := f.parent.listener
   659  	if lsn != nil {
   660  		params := stack[:typ.ParamNumInUint64]
   661  		ce.stackIterator.reset(ce.stack, ce.frames, f)
   662  		lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
   663  		ce.stackIterator.clear()
   664  	}
   665  	frame := &callFrame{f: f, base: len(ce.stack)}
   666  	ce.pushFrame(frame)
   667  
   668  	fn := f.parent.hostFn
   669  	switch fn := fn.(type) {
   670  	case api.GoModuleFunction:
   671  		fn.Call(ctx, m, stack)
   672  	case api.GoFunction:
   673  		fn.Call(ctx, stack)
   674  	}
   675  
   676  	ce.popFrame()
   677  	if lsn != nil {
   678  		// TODO: This doesn't get the error due to use of panic to propagate them.
   679  		results := stack[:typ.ResultNumInUint64]
   680  		lsn.After(ctx, m, f.definition(), results)
   681  	}
   682  }
   683  
   684  func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   685  	frame := &callFrame{f: f, base: len(ce.stack)}
   686  	moduleInst := f.moduleInstance
   687  	functions := moduleInst.Engine.(*moduleEngine).functions
   688  	memoryInst := moduleInst.MemoryInstance
   689  	globals := moduleInst.Globals
   690  	tables := moduleInst.Tables
   691  	typeIDs := moduleInst.TypeIDs
   692  	dataInstances := moduleInst.DataInstances
   693  	elementInstances := moduleInst.ElementInstances
   694  	ce.pushFrame(frame)
   695  	body := frame.f.parent.body
   696  	bodyLen := uint64(len(body))
   697  	for frame.pc < bodyLen {
   698  		op := &body[frame.pc]
   699  		// TODO: add description of each operation/case
   700  		// on, for example, how many args are used,
   701  		// how the stack is modified, etc.
   702  		switch op.Kind {
   703  		case wazeroir.OperationKindBuiltinFunctionCheckExitCode:
   704  			if err := m.FailIfClosed(); err != nil {
   705  				panic(err)
   706  			}
   707  			frame.pc++
   708  		case wazeroir.OperationKindUnreachable:
   709  			panic(wasmruntime.ErrRuntimeUnreachable)
   710  		case wazeroir.OperationKindBr:
   711  			frame.pc = op.U1
   712  		case wazeroir.OperationKindBrIf:
   713  			if ce.popValue() > 0 {
   714  				ce.drop(op.U3)
   715  				frame.pc = op.U1
   716  			} else {
   717  				frame.pc = op.U2
   718  			}
   719  		case wazeroir.OperationKindBrTable:
   720  			v := ce.popValue()
   721  			defaultAt := uint64(len(op.Us))/2 - 1
   722  			if v > defaultAt {
   723  				v = defaultAt
   724  			}
   725  			v *= 2
   726  			ce.drop(op.Us[v+1])
   727  			frame.pc = op.Us[v]
   728  		case wazeroir.OperationKindCall:
   729  			func() {
   730  				defer func() {
   731  					if r := recover(); r != nil {
   732  						if s, ok := r.(*snapshot); ok {
   733  							if s.ce == ce {
   734  								s.doRestore()
   735  								frame = ce.frames[len(ce.frames)-1]
   736  								body = frame.f.parent.body
   737  								bodyLen = uint64(len(body))
   738  							} else {
   739  								panic(r)
   740  							}
   741  						} else {
   742  							panic(r)
   743  						}
   744  					}
   745  				}()
   746  				ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
   747  			}()
   748  			frame.pc++
   749  		case wazeroir.OperationKindCallIndirect:
   750  			offset := ce.popValue()
   751  			table := tables[op.U2]
   752  			if offset >= uint64(len(table.References)) {
   753  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   754  			}
   755  			rawPtr := table.References[offset]
   756  			if rawPtr == 0 {
   757  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   758  			}
   759  
   760  			tf := functionFromUintptr(rawPtr)
   761  			if tf.typeID != typeIDs[op.U1] {
   762  				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   763  			}
   764  
   765  			ce.callFunction(ctx, f.moduleInstance, tf)
   766  			frame.pc++
   767  		case wazeroir.OperationKindDrop:
   768  			ce.drop(op.U1)
   769  			frame.pc++
   770  		case wazeroir.OperationKindSelect:
   771  			c := ce.popValue()
   772  			if op.B3 { // Target is vector.
   773  				x2Hi, x2Lo := ce.popValue(), ce.popValue()
   774  				if c == 0 {
   775  					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
   776  					ce.pushValue(x2Lo)
   777  					ce.pushValue(x2Hi)
   778  				}
   779  			} else {
   780  				v2 := ce.popValue()
   781  				if c == 0 {
   782  					_ = ce.popValue()
   783  					ce.pushValue(v2)
   784  				}
   785  			}
   786  			frame.pc++
   787  		case wazeroir.OperationKindPick:
   788  			index := len(ce.stack) - 1 - int(op.U1)
   789  			ce.pushValue(ce.stack[index])
   790  			if op.B3 { // V128 value target.
   791  				ce.pushValue(ce.stack[index+1])
   792  			}
   793  			frame.pc++
   794  		case wazeroir.OperationKindSet:
   795  			if op.B3 { // V128 value target.
   796  				lowIndex := len(ce.stack) - 1 - int(op.U1)
   797  				highIndex := lowIndex + 1
   798  				hi, lo := ce.popValue(), ce.popValue()
   799  				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
   800  			} else {
   801  				index := len(ce.stack) - 1 - int(op.U1)
   802  				ce.stack[index] = ce.popValue()
   803  			}
   804  			frame.pc++
   805  		case wazeroir.OperationKindGlobalGet:
   806  			g := globals[op.U1]
   807  			ce.pushValue(g.Val)
   808  			if g.Type.ValType == wasm.ValueTypeV128 {
   809  				ce.pushValue(g.ValHi)
   810  			}
   811  			frame.pc++
   812  		case wazeroir.OperationKindGlobalSet:
   813  			g := globals[op.U1]
   814  			if g.Type.ValType == wasm.ValueTypeV128 {
   815  				g.ValHi = ce.popValue()
   816  			}
   817  			g.Val = ce.popValue()
   818  			frame.pc++
   819  		case wazeroir.OperationKindLoad:
   820  			offset := ce.popMemoryOffset(op)
   821  			switch wazeroir.UnsignedType(op.B1) {
   822  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   823  				if val, ok := memoryInst.ReadUint32Le(offset); !ok {
   824  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   825  				} else {
   826  					ce.pushValue(uint64(val))
   827  				}
   828  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   829  				if val, ok := memoryInst.ReadUint64Le(offset); !ok {
   830  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   831  				} else {
   832  					ce.pushValue(val)
   833  				}
   834  			}
   835  			frame.pc++
   836  		case wazeroir.OperationKindLoad8:
   837  			val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
   838  			if !ok {
   839  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   840  			}
   841  
   842  			switch wazeroir.SignedInt(op.B1) {
   843  			case wazeroir.SignedInt32:
   844  				ce.pushValue(uint64(uint32(int8(val))))
   845  			case wazeroir.SignedInt64:
   846  				ce.pushValue(uint64(int8(val)))
   847  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   848  				ce.pushValue(uint64(val))
   849  			}
   850  			frame.pc++
   851  		case wazeroir.OperationKindLoad16:
   852  
   853  			val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
   854  			if !ok {
   855  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   856  			}
   857  
   858  			switch wazeroir.SignedInt(op.B1) {
   859  			case wazeroir.SignedInt32:
   860  				ce.pushValue(uint64(uint32(int16(val))))
   861  			case wazeroir.SignedInt64:
   862  				ce.pushValue(uint64(int16(val)))
   863  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
   864  				ce.pushValue(uint64(val))
   865  			}
   866  			frame.pc++
   867  		case wazeroir.OperationKindLoad32:
   868  			val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
   869  			if !ok {
   870  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   871  			}
   872  
   873  			if op.B1 == 1 { // Signed
   874  				ce.pushValue(uint64(int32(val)))
   875  			} else {
   876  				ce.pushValue(uint64(val))
   877  			}
   878  			frame.pc++
   879  		case wazeroir.OperationKindStore:
   880  			val := ce.popValue()
   881  			offset := ce.popMemoryOffset(op)
   882  			switch wazeroir.UnsignedType(op.B1) {
   883  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
   884  				if !memoryInst.WriteUint32Le(offset, uint32(val)) {
   885  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   886  				}
   887  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
   888  				if !memoryInst.WriteUint64Le(offset, val) {
   889  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   890  				}
   891  			}
   892  			frame.pc++
   893  		case wazeroir.OperationKindStore8:
   894  			val := byte(ce.popValue())
   895  			offset := ce.popMemoryOffset(op)
   896  			if !memoryInst.WriteByte(offset, val) {
   897  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   898  			}
   899  			frame.pc++
   900  		case wazeroir.OperationKindStore16:
   901  			val := uint16(ce.popValue())
   902  			offset := ce.popMemoryOffset(op)
   903  			if !memoryInst.WriteUint16Le(offset, val) {
   904  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   905  			}
   906  			frame.pc++
   907  		case wazeroir.OperationKindStore32:
   908  			val := uint32(ce.popValue())
   909  			offset := ce.popMemoryOffset(op)
   910  			if !memoryInst.WriteUint32Le(offset, val) {
   911  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   912  			}
   913  			frame.pc++
   914  		case wazeroir.OperationKindMemorySize:
   915  			ce.pushValue(uint64(memoryInst.PageSize()))
   916  			frame.pc++
   917  		case wazeroir.OperationKindMemoryGrow:
   918  			n := ce.popValue()
   919  			if res, ok := memoryInst.Grow(uint32(n)); !ok {
   920  				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
   921  			} else {
   922  				ce.pushValue(uint64(res))
   923  			}
   924  			frame.pc++
   925  		case wazeroir.OperationKindConstI32, wazeroir.OperationKindConstI64,
   926  			wazeroir.OperationKindConstF32, wazeroir.OperationKindConstF64:
   927  			ce.pushValue(op.U1)
   928  			frame.pc++
   929  		case wazeroir.OperationKindEq:
   930  			var b bool
   931  			switch wazeroir.UnsignedType(op.B1) {
   932  			case wazeroir.UnsignedTypeI32:
   933  				v2, v1 := ce.popValue(), ce.popValue()
   934  				b = uint32(v1) == uint32(v2)
   935  			case wazeroir.UnsignedTypeI64:
   936  				v2, v1 := ce.popValue(), ce.popValue()
   937  				b = v1 == v2
   938  			case wazeroir.UnsignedTypeF32:
   939  				v2, v1 := ce.popValue(), ce.popValue()
   940  				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
   941  			case wazeroir.UnsignedTypeF64:
   942  				v2, v1 := ce.popValue(), ce.popValue()
   943  				b = math.Float64frombits(v2) == math.Float64frombits(v1)
   944  			}
   945  			if b {
   946  				ce.pushValue(1)
   947  			} else {
   948  				ce.pushValue(0)
   949  			}
   950  			frame.pc++
   951  		case wazeroir.OperationKindNe:
   952  			var b bool
   953  			switch wazeroir.UnsignedType(op.B1) {
   954  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeI64:
   955  				v2, v1 := ce.popValue(), ce.popValue()
   956  				b = v1 != v2
   957  			case wazeroir.UnsignedTypeF32:
   958  				v2, v1 := ce.popValue(), ce.popValue()
   959  				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
   960  			case wazeroir.UnsignedTypeF64:
   961  				v2, v1 := ce.popValue(), ce.popValue()
   962  				b = math.Float64frombits(v2) != math.Float64frombits(v1)
   963  			}
   964  			if b {
   965  				ce.pushValue(1)
   966  			} else {
   967  				ce.pushValue(0)
   968  			}
   969  			frame.pc++
   970  		case wazeroir.OperationKindEqz:
   971  			if ce.popValue() == 0 {
   972  				ce.pushValue(1)
   973  			} else {
   974  				ce.pushValue(0)
   975  			}
   976  			frame.pc++
   977  		case wazeroir.OperationKindLt:
   978  			v2 := ce.popValue()
   979  			v1 := ce.popValue()
   980  			var b bool
   981  			switch wazeroir.SignedType(op.B1) {
   982  			case wazeroir.SignedTypeInt32:
   983  				b = int32(v1) < int32(v2)
   984  			case wazeroir.SignedTypeInt64:
   985  				b = int64(v1) < int64(v2)
   986  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
   987  				b = v1 < v2
   988  			case wazeroir.SignedTypeFloat32:
   989  				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
   990  			case wazeroir.SignedTypeFloat64:
   991  				b = math.Float64frombits(v1) < math.Float64frombits(v2)
   992  			}
   993  			if b {
   994  				ce.pushValue(1)
   995  			} else {
   996  				ce.pushValue(0)
   997  			}
   998  			frame.pc++
   999  		case wazeroir.OperationKindGt:
  1000  			v2 := ce.popValue()
  1001  			v1 := ce.popValue()
  1002  			var b bool
  1003  			switch wazeroir.SignedType(op.B1) {
  1004  			case wazeroir.SignedTypeInt32:
  1005  				b = int32(v1) > int32(v2)
  1006  			case wazeroir.SignedTypeInt64:
  1007  				b = int64(v1) > int64(v2)
  1008  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1009  				b = v1 > v2
  1010  			case wazeroir.SignedTypeFloat32:
  1011  				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
  1012  			case wazeroir.SignedTypeFloat64:
  1013  				b = math.Float64frombits(v1) > math.Float64frombits(v2)
  1014  			}
  1015  			if b {
  1016  				ce.pushValue(1)
  1017  			} else {
  1018  				ce.pushValue(0)
  1019  			}
  1020  			frame.pc++
  1021  		case wazeroir.OperationKindLe:
  1022  			v2 := ce.popValue()
  1023  			v1 := ce.popValue()
  1024  			var b bool
  1025  			switch wazeroir.SignedType(op.B1) {
  1026  			case wazeroir.SignedTypeInt32:
  1027  				b = int32(v1) <= int32(v2)
  1028  			case wazeroir.SignedTypeInt64:
  1029  				b = int64(v1) <= int64(v2)
  1030  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1031  				b = v1 <= v2
  1032  			case wazeroir.SignedTypeFloat32:
  1033  				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
  1034  			case wazeroir.SignedTypeFloat64:
  1035  				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
  1036  			}
  1037  			if b {
  1038  				ce.pushValue(1)
  1039  			} else {
  1040  				ce.pushValue(0)
  1041  			}
  1042  			frame.pc++
  1043  		case wazeroir.OperationKindGe:
  1044  			v2 := ce.popValue()
  1045  			v1 := ce.popValue()
  1046  			var b bool
  1047  			switch wazeroir.SignedType(op.B1) {
  1048  			case wazeroir.SignedTypeInt32:
  1049  				b = int32(v1) >= int32(v2)
  1050  			case wazeroir.SignedTypeInt64:
  1051  				b = int64(v1) >= int64(v2)
  1052  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1053  				b = v1 >= v2
  1054  			case wazeroir.SignedTypeFloat32:
  1055  				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
  1056  			case wazeroir.SignedTypeFloat64:
  1057  				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
  1058  			}
  1059  			if b {
  1060  				ce.pushValue(1)
  1061  			} else {
  1062  				ce.pushValue(0)
  1063  			}
  1064  			frame.pc++
  1065  		case wazeroir.OperationKindAdd:
  1066  			v2 := ce.popValue()
  1067  			v1 := ce.popValue()
  1068  			switch wazeroir.UnsignedType(op.B1) {
  1069  			case wazeroir.UnsignedTypeI32:
  1070  				v := uint32(v1) + uint32(v2)
  1071  				ce.pushValue(uint64(v))
  1072  			case wazeroir.UnsignedTypeI64:
  1073  				ce.pushValue(v1 + v2)
  1074  			case wazeroir.UnsignedTypeF32:
  1075  				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
  1076  			case wazeroir.UnsignedTypeF64:
  1077  				v := math.Float64frombits(v1) + math.Float64frombits(v2)
  1078  				ce.pushValue(math.Float64bits(v))
  1079  			}
  1080  			frame.pc++
  1081  		case wazeroir.OperationKindSub:
  1082  			v2 := ce.popValue()
  1083  			v1 := ce.popValue()
  1084  			switch wazeroir.UnsignedType(op.B1) {
  1085  			case wazeroir.UnsignedTypeI32:
  1086  				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
  1087  			case wazeroir.UnsignedTypeI64:
  1088  				ce.pushValue(v1 - v2)
  1089  			case wazeroir.UnsignedTypeF32:
  1090  				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
  1091  			case wazeroir.UnsignedTypeF64:
  1092  				v := math.Float64frombits(v1) - math.Float64frombits(v2)
  1093  				ce.pushValue(math.Float64bits(v))
  1094  			}
  1095  			frame.pc++
  1096  		case wazeroir.OperationKindMul:
  1097  			v2 := ce.popValue()
  1098  			v1 := ce.popValue()
  1099  			switch wazeroir.UnsignedType(op.B1) {
  1100  			case wazeroir.UnsignedTypeI32:
  1101  				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
  1102  			case wazeroir.UnsignedTypeI64:
  1103  				ce.pushValue(v1 * v2)
  1104  			case wazeroir.UnsignedTypeF32:
  1105  				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
  1106  			case wazeroir.UnsignedTypeF64:
  1107  				v := math.Float64frombits(v2) * math.Float64frombits(v1)
  1108  				ce.pushValue(math.Float64bits(v))
  1109  			}
  1110  			frame.pc++
  1111  		case wazeroir.OperationKindClz:
  1112  			v := ce.popValue()
  1113  			if op.B1 == 0 {
  1114  				// UnsignedInt32
  1115  				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
  1116  			} else {
  1117  				// UnsignedInt64
  1118  				ce.pushValue(uint64(bits.LeadingZeros64(v)))
  1119  			}
  1120  			frame.pc++
  1121  		case wazeroir.OperationKindCtz:
  1122  			v := ce.popValue()
  1123  			if op.B1 == 0 {
  1124  				// UnsignedInt32
  1125  				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
  1126  			} else {
  1127  				// UnsignedInt64
  1128  				ce.pushValue(uint64(bits.TrailingZeros64(v)))
  1129  			}
  1130  			frame.pc++
  1131  		case wazeroir.OperationKindPopcnt:
  1132  			v := ce.popValue()
  1133  			if op.B1 == 0 {
  1134  				// UnsignedInt32
  1135  				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
  1136  			} else {
  1137  				// UnsignedInt64
  1138  				ce.pushValue(uint64(bits.OnesCount64(v)))
  1139  			}
  1140  			frame.pc++
  1141  		case wazeroir.OperationKindDiv:
  1142  			// If an integer, check we won't divide by zero.
  1143  			t := wazeroir.SignedType(op.B1)
  1144  			v2, v1 := ce.popValue(), ce.popValue()
  1145  			switch t {
  1146  			case wazeroir.SignedTypeFloat32, wazeroir.SignedTypeFloat64: // not integers
  1147  			default:
  1148  				if v2 == 0 {
  1149  					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1150  				}
  1151  			}
  1152  
  1153  			switch t {
  1154  			case wazeroir.SignedTypeInt32:
  1155  				d := int32(v2)
  1156  				n := int32(v1)
  1157  				if n == math.MinInt32 && d == -1 {
  1158  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1159  				}
  1160  				ce.pushValue(uint64(uint32(n / d)))
  1161  			case wazeroir.SignedTypeInt64:
  1162  				d := int64(v2)
  1163  				n := int64(v1)
  1164  				if n == math.MinInt64 && d == -1 {
  1165  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1166  				}
  1167  				ce.pushValue(uint64(n / d))
  1168  			case wazeroir.SignedTypeUint32:
  1169  				d := uint32(v2)
  1170  				n := uint32(v1)
  1171  				ce.pushValue(uint64(n / d))
  1172  			case wazeroir.SignedTypeUint64:
  1173  				d := v2
  1174  				n := v1
  1175  				ce.pushValue(n / d)
  1176  			case wazeroir.SignedTypeFloat32:
  1177  				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
  1178  			case wazeroir.SignedTypeFloat64:
  1179  				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
  1180  			}
  1181  			frame.pc++
  1182  		case wazeroir.OperationKindRem:
  1183  			v2, v1 := ce.popValue(), ce.popValue()
  1184  			if v2 == 0 {
  1185  				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1186  			}
  1187  			switch wazeroir.SignedInt(op.B1) {
  1188  			case wazeroir.SignedInt32:
  1189  				d := int32(v2)
  1190  				n := int32(v1)
  1191  				ce.pushValue(uint64(uint32(n % d)))
  1192  			case wazeroir.SignedInt64:
  1193  				d := int64(v2)
  1194  				n := int64(v1)
  1195  				ce.pushValue(uint64(n % d))
  1196  			case wazeroir.SignedUint32:
  1197  				d := uint32(v2)
  1198  				n := uint32(v1)
  1199  				ce.pushValue(uint64(n % d))
  1200  			case wazeroir.SignedUint64:
  1201  				d := v2
  1202  				n := v1
  1203  				ce.pushValue(n % d)
  1204  			}
  1205  			frame.pc++
  1206  		case wazeroir.OperationKindAnd:
  1207  			v2 := ce.popValue()
  1208  			v1 := ce.popValue()
  1209  			if op.B1 == 0 {
  1210  				// UnsignedInt32
  1211  				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
  1212  			} else {
  1213  				// UnsignedInt64
  1214  				ce.pushValue(uint64(v2 & v1))
  1215  			}
  1216  			frame.pc++
  1217  		case wazeroir.OperationKindOr:
  1218  			v2 := ce.popValue()
  1219  			v1 := ce.popValue()
  1220  			if op.B1 == 0 {
  1221  				// UnsignedInt32
  1222  				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
  1223  			} else {
  1224  				// UnsignedInt64
  1225  				ce.pushValue(uint64(v2 | v1))
  1226  			}
  1227  			frame.pc++
  1228  		case wazeroir.OperationKindXor:
  1229  			v2 := ce.popValue()
  1230  			v1 := ce.popValue()
  1231  			if op.B1 == 0 {
  1232  				// UnsignedInt32
  1233  				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
  1234  			} else {
  1235  				// UnsignedInt64
  1236  				ce.pushValue(uint64(v2 ^ v1))
  1237  			}
  1238  			frame.pc++
  1239  		case wazeroir.OperationKindShl:
  1240  			v2 := ce.popValue()
  1241  			v1 := ce.popValue()
  1242  			if op.B1 == 0 {
  1243  				// UnsignedInt32
  1244  				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
  1245  			} else {
  1246  				// UnsignedInt64
  1247  				ce.pushValue(v1 << (v2 % 64))
  1248  			}
  1249  			frame.pc++
  1250  		case wazeroir.OperationKindShr:
  1251  			v2 := ce.popValue()
  1252  			v1 := ce.popValue()
  1253  			switch wazeroir.SignedInt(op.B1) {
  1254  			case wazeroir.SignedInt32:
  1255  				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
  1256  			case wazeroir.SignedInt64:
  1257  				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
  1258  			case wazeroir.SignedUint32:
  1259  				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
  1260  			case wazeroir.SignedUint64:
  1261  				ce.pushValue(v1 >> (v2 % 64))
  1262  			}
  1263  			frame.pc++
  1264  		case wazeroir.OperationKindRotl:
  1265  			v2 := ce.popValue()
  1266  			v1 := ce.popValue()
  1267  			if op.B1 == 0 {
  1268  				// UnsignedInt32
  1269  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
  1270  			} else {
  1271  				// UnsignedInt64
  1272  				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
  1273  			}
  1274  			frame.pc++
  1275  		case wazeroir.OperationKindRotr:
  1276  			v2 := ce.popValue()
  1277  			v1 := ce.popValue()
  1278  			if op.B1 == 0 {
  1279  				// UnsignedInt32
  1280  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
  1281  			} else {
  1282  				// UnsignedInt64
  1283  				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
  1284  			}
  1285  			frame.pc++
  1286  		case wazeroir.OperationKindAbs:
  1287  			if op.B1 == 0 {
  1288  				// Float32
  1289  				const mask uint32 = 1 << 31
  1290  				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
  1291  			} else {
  1292  				// Float64
  1293  				const mask uint64 = 1 << 63
  1294  				ce.pushValue(ce.popValue() &^ mask)
  1295  			}
  1296  			frame.pc++
  1297  		case wazeroir.OperationKindNeg:
  1298  			if op.B1 == 0 {
  1299  				// Float32
  1300  				v := -math.Float32frombits(uint32(ce.popValue()))
  1301  				ce.pushValue(uint64(math.Float32bits(v)))
  1302  			} else {
  1303  				// Float64
  1304  				v := -math.Float64frombits(ce.popValue())
  1305  				ce.pushValue(math.Float64bits(v))
  1306  			}
  1307  			frame.pc++
  1308  		case wazeroir.OperationKindCeil:
  1309  			if op.B1 == 0 {
  1310  				// Float32
  1311  				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
  1312  				ce.pushValue(uint64(math.Float32bits(v)))
  1313  			} else {
  1314  				// Float64
  1315  				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
  1316  				ce.pushValue(math.Float64bits(v))
  1317  			}
  1318  			frame.pc++
  1319  		case wazeroir.OperationKindFloor:
  1320  			if op.B1 == 0 {
  1321  				// Float32
  1322  				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
  1323  				ce.pushValue(uint64(math.Float32bits(v)))
  1324  			} else {
  1325  				// Float64
  1326  				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
  1327  				ce.pushValue(math.Float64bits(v))
  1328  			}
  1329  			frame.pc++
  1330  		case wazeroir.OperationKindTrunc:
  1331  			if op.B1 == 0 {
  1332  				// Float32
  1333  				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
  1334  				ce.pushValue(uint64(math.Float32bits(v)))
  1335  			} else {
  1336  				// Float64
  1337  				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
  1338  				ce.pushValue(math.Float64bits(v))
  1339  			}
  1340  			frame.pc++
  1341  		case wazeroir.OperationKindNearest:
  1342  			if op.B1 == 0 {
  1343  				// Float32
  1344  				f := math.Float32frombits(uint32(ce.popValue()))
  1345  				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
  1346  			} else {
  1347  				// Float64
  1348  				f := math.Float64frombits(ce.popValue())
  1349  				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
  1350  			}
  1351  			frame.pc++
  1352  		case wazeroir.OperationKindSqrt:
  1353  			if op.B1 == 0 {
  1354  				// Float32
  1355  				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
  1356  				ce.pushValue(uint64(math.Float32bits(float32(v))))
  1357  			} else {
  1358  				// Float64
  1359  				v := math.Sqrt(math.Float64frombits(ce.popValue()))
  1360  				ce.pushValue(math.Float64bits(v))
  1361  			}
  1362  			frame.pc++
  1363  		case wazeroir.OperationKindMin:
  1364  			if op.B1 == 0 {
  1365  				// Float32
  1366  				ce.pushValue(WasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1367  			} else {
  1368  				v2 := math.Float64frombits(ce.popValue())
  1369  				v1 := math.Float64frombits(ce.popValue())
  1370  				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
  1371  			}
  1372  			frame.pc++
  1373  		case wazeroir.OperationKindMax:
  1374  			if op.B1 == 0 {
  1375  				ce.pushValue(WasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1376  			} else {
  1377  				// Float64
  1378  				v2 := math.Float64frombits(ce.popValue())
  1379  				v1 := math.Float64frombits(ce.popValue())
  1380  				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
  1381  			}
  1382  			frame.pc++
  1383  		case wazeroir.OperationKindCopysign:
  1384  			if op.B1 == 0 {
  1385  				// Float32
  1386  				v2 := uint32(ce.popValue())
  1387  				v1 := uint32(ce.popValue())
  1388  				const signbit = 1 << 31
  1389  				ce.pushValue(uint64(v1&^signbit | v2&signbit))
  1390  			} else {
  1391  				// Float64
  1392  				v2 := ce.popValue()
  1393  				v1 := ce.popValue()
  1394  				const signbit = 1 << 63
  1395  				ce.pushValue(v1&^signbit | v2&signbit)
  1396  			}
  1397  			frame.pc++
  1398  		case wazeroir.OperationKindI32WrapFromI64:
  1399  			ce.pushValue(uint64(uint32(ce.popValue())))
  1400  			frame.pc++
  1401  		case wazeroir.OperationKindITruncFromF:
  1402  			if op.B1 == 0 {
  1403  				// Float32
  1404  				switch wazeroir.SignedInt(op.B2) {
  1405  				case wazeroir.SignedInt32:
  1406  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1407  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1408  						if op.B3 {
  1409  							// non-trapping conversion must cast nan to zero.
  1410  							v = 0
  1411  						} else {
  1412  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1413  						}
  1414  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1415  						if op.B3 {
  1416  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1417  							if v < 0 {
  1418  								v = math.MinInt32
  1419  							} else {
  1420  								v = math.MaxInt32
  1421  							}
  1422  						} else {
  1423  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1424  						}
  1425  					}
  1426  					ce.pushValue(uint64(uint32(int32(v))))
  1427  				case wazeroir.SignedInt64:
  1428  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1429  					res := int64(v)
  1430  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1431  						if op.B3 {
  1432  							// non-trapping conversion must cast nan to zero.
  1433  							res = 0
  1434  						} else {
  1435  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1436  						}
  1437  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1438  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1439  						// and that's why we use '>=' not '>' to check overflow.
  1440  						if op.B3 {
  1441  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1442  							if v < 0 {
  1443  								res = math.MinInt64
  1444  							} else {
  1445  								res = math.MaxInt64
  1446  							}
  1447  						} else {
  1448  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1449  						}
  1450  					}
  1451  					ce.pushValue(uint64(res))
  1452  				case wazeroir.SignedUint32:
  1453  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1454  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1455  						if op.B3 {
  1456  							// non-trapping conversion must cast nan to zero.
  1457  							v = 0
  1458  						} else {
  1459  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1460  						}
  1461  					} else if v < 0 || v > math.MaxUint32 {
  1462  						if op.B3 {
  1463  							// non-trapping conversion must "saturate" the value for overflowing source.
  1464  							if v < 0 {
  1465  								v = 0
  1466  							} else {
  1467  								v = math.MaxUint32
  1468  							}
  1469  						} else {
  1470  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1471  						}
  1472  					}
  1473  					ce.pushValue(uint64(uint32(v)))
  1474  				case wazeroir.SignedUint64:
  1475  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1476  					res := uint64(v)
  1477  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1478  						if op.B3 {
  1479  							// non-trapping conversion must cast nan to zero.
  1480  							res = 0
  1481  						} else {
  1482  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1483  						}
  1484  					} else if v < 0 || v >= math.MaxUint64 {
  1485  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1486  						// and that's why we use '>=' not '>' to check overflow.
  1487  						if op.B3 {
  1488  							// non-trapping conversion must "saturate" the value for overflowing source.
  1489  							if v < 0 {
  1490  								res = 0
  1491  							} else {
  1492  								res = math.MaxUint64
  1493  							}
  1494  						} else {
  1495  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1496  						}
  1497  					}
  1498  					ce.pushValue(res)
  1499  				}
  1500  			} else {
  1501  				// Float64
  1502  				switch wazeroir.SignedInt(op.B2) {
  1503  				case wazeroir.SignedInt32:
  1504  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1505  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1506  						if op.B3 {
  1507  							// non-trapping conversion must cast nan to zero.
  1508  							v = 0
  1509  						} else {
  1510  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1511  						}
  1512  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1513  						if op.B3 {
  1514  							// non-trapping conversion must "saturate" the value for overflowing source.
  1515  							if v < 0 {
  1516  								v = math.MinInt32
  1517  							} else {
  1518  								v = math.MaxInt32
  1519  							}
  1520  						} else {
  1521  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1522  						}
  1523  					}
  1524  					ce.pushValue(uint64(uint32(int32(v))))
  1525  				case wazeroir.SignedInt64:
  1526  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1527  					res := int64(v)
  1528  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1529  						if op.B3 {
  1530  							// non-trapping conversion must cast nan to zero.
  1531  							res = 0
  1532  						} else {
  1533  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1534  						}
  1535  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1536  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1537  						// and that's why we use '>=' not '>' to check overflow.
  1538  						if op.B3 {
  1539  							// non-trapping conversion must "saturate" the value for overflowing source.
  1540  							if v < 0 {
  1541  								res = math.MinInt64
  1542  							} else {
  1543  								res = math.MaxInt64
  1544  							}
  1545  						} else {
  1546  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1547  						}
  1548  					}
  1549  					ce.pushValue(uint64(res))
  1550  				case wazeroir.SignedUint32:
  1551  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1552  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1553  						if op.B3 {
  1554  							// non-trapping conversion must cast nan to zero.
  1555  							v = 0
  1556  						} else {
  1557  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1558  						}
  1559  					} else if v < 0 || v > math.MaxUint32 {
  1560  						if op.B3 {
  1561  							// non-trapping conversion must "saturate" the value for overflowing source.
  1562  							if v < 0 {
  1563  								v = 0
  1564  							} else {
  1565  								v = math.MaxUint32
  1566  							}
  1567  						} else {
  1568  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1569  						}
  1570  					}
  1571  					ce.pushValue(uint64(uint32(v)))
  1572  				case wazeroir.SignedUint64:
  1573  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1574  					res := uint64(v)
  1575  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1576  						if op.B3 {
  1577  							// non-trapping conversion must cast nan to zero.
  1578  							res = 0
  1579  						} else {
  1580  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1581  						}
  1582  					} else if v < 0 || v >= math.MaxUint64 {
  1583  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1584  						// and that's why we use '>=' not '>' to check overflow.
  1585  						if op.B3 {
  1586  							// non-trapping conversion must "saturate" the value for overflowing source.
  1587  							if v < 0 {
  1588  								res = 0
  1589  							} else {
  1590  								res = math.MaxUint64
  1591  							}
  1592  						} else {
  1593  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1594  						}
  1595  					}
  1596  					ce.pushValue(res)
  1597  				}
  1598  			}
  1599  			frame.pc++
  1600  		case wazeroir.OperationKindFConvertFromI:
  1601  			switch wazeroir.SignedInt(op.B1) {
  1602  			case wazeroir.SignedInt32:
  1603  				if op.B2 == 0 {
  1604  					// Float32
  1605  					v := float32(int32(ce.popValue()))
  1606  					ce.pushValue(uint64(math.Float32bits(v)))
  1607  				} else {
  1608  					// Float64
  1609  					v := float64(int32(ce.popValue()))
  1610  					ce.pushValue(math.Float64bits(v))
  1611  				}
  1612  			case wazeroir.SignedInt64:
  1613  				if op.B2 == 0 {
  1614  					// Float32
  1615  					v := float32(int64(ce.popValue()))
  1616  					ce.pushValue(uint64(math.Float32bits(v)))
  1617  				} else {
  1618  					// Float64
  1619  					v := float64(int64(ce.popValue()))
  1620  					ce.pushValue(math.Float64bits(v))
  1621  				}
  1622  			case wazeroir.SignedUint32:
  1623  				if op.B2 == 0 {
  1624  					// Float32
  1625  					v := float32(uint32(ce.popValue()))
  1626  					ce.pushValue(uint64(math.Float32bits(v)))
  1627  				} else {
  1628  					// Float64
  1629  					v := float64(uint32(ce.popValue()))
  1630  					ce.pushValue(math.Float64bits(v))
  1631  				}
  1632  			case wazeroir.SignedUint64:
  1633  				if op.B2 == 0 {
  1634  					// Float32
  1635  					v := float32(ce.popValue())
  1636  					ce.pushValue(uint64(math.Float32bits(v)))
  1637  				} else {
  1638  					// Float64
  1639  					v := float64(ce.popValue())
  1640  					ce.pushValue(math.Float64bits(v))
  1641  				}
  1642  			}
  1643  			frame.pc++
  1644  		case wazeroir.OperationKindF32DemoteFromF64:
  1645  			v := float32(math.Float64frombits(ce.popValue()))
  1646  			ce.pushValue(uint64(math.Float32bits(v)))
  1647  			frame.pc++
  1648  		case wazeroir.OperationKindF64PromoteFromF32:
  1649  			v := float64(math.Float32frombits(uint32(ce.popValue())))
  1650  			ce.pushValue(math.Float64bits(v))
  1651  			frame.pc++
  1652  		case wazeroir.OperationKindExtend:
  1653  			if op.B1 == 1 {
  1654  				// Signed.
  1655  				v := int64(int32(ce.popValue()))
  1656  				ce.pushValue(uint64(v))
  1657  			} else {
  1658  				v := uint64(uint32(ce.popValue()))
  1659  				ce.pushValue(v)
  1660  			}
  1661  			frame.pc++
  1662  		case wazeroir.OperationKindSignExtend32From8:
  1663  			v := uint32(int8(ce.popValue()))
  1664  			ce.pushValue(uint64(v))
  1665  			frame.pc++
  1666  		case wazeroir.OperationKindSignExtend32From16:
  1667  			v := uint32(int16(ce.popValue()))
  1668  			ce.pushValue(uint64(v))
  1669  			frame.pc++
  1670  		case wazeroir.OperationKindSignExtend64From8:
  1671  			v := int64(int8(ce.popValue()))
  1672  			ce.pushValue(uint64(v))
  1673  			frame.pc++
  1674  		case wazeroir.OperationKindSignExtend64From16:
  1675  			v := int64(int16(ce.popValue()))
  1676  			ce.pushValue(uint64(v))
  1677  			frame.pc++
  1678  		case wazeroir.OperationKindSignExtend64From32:
  1679  			v := int64(int32(ce.popValue()))
  1680  			ce.pushValue(uint64(v))
  1681  			frame.pc++
  1682  		case wazeroir.OperationKindMemoryInit:
  1683  			dataInstance := dataInstances[op.U1]
  1684  			copySize := ce.popValue()
  1685  			inDataOffset := ce.popValue()
  1686  			inMemoryOffset := ce.popValue()
  1687  			if inDataOffset+copySize > uint64(len(dataInstance)) ||
  1688  				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
  1689  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1690  			} else if copySize != 0 {
  1691  				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
  1692  			}
  1693  			frame.pc++
  1694  		case wazeroir.OperationKindDataDrop:
  1695  			dataInstances[op.U1] = nil
  1696  			frame.pc++
  1697  		case wazeroir.OperationKindMemoryCopy:
  1698  			memLen := uint64(len(memoryInst.Buffer))
  1699  			copySize := ce.popValue()
  1700  			sourceOffset := ce.popValue()
  1701  			destinationOffset := ce.popValue()
  1702  			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
  1703  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1704  			} else if copySize != 0 {
  1705  				copy(memoryInst.Buffer[destinationOffset:],
  1706  					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
  1707  			}
  1708  			frame.pc++
  1709  		case wazeroir.OperationKindMemoryFill:
  1710  			fillSize := ce.popValue()
  1711  			value := byte(ce.popValue())
  1712  			offset := ce.popValue()
  1713  			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
  1714  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1715  			} else if fillSize != 0 {
  1716  				// Uses the copy trick for faster filling buffer.
  1717  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1718  				buf := memoryInst.Buffer[offset : offset+fillSize]
  1719  				buf[0] = value
  1720  				for i := 1; i < len(buf); i *= 2 {
  1721  					copy(buf[i:], buf[:i])
  1722  				}
  1723  			}
  1724  			frame.pc++
  1725  		case wazeroir.OperationKindTableInit:
  1726  			elementInstance := elementInstances[op.U1]
  1727  			copySize := ce.popValue()
  1728  			inElementOffset := ce.popValue()
  1729  			inTableOffset := ce.popValue()
  1730  			table := tables[op.U2]
  1731  			if inElementOffset+copySize > uint64(len(elementInstance)) ||
  1732  				inTableOffset+copySize > uint64(len(table.References)) {
  1733  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1734  			} else if copySize != 0 {
  1735  				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:])
  1736  			}
  1737  			frame.pc++
  1738  		case wazeroir.OperationKindElemDrop:
  1739  			elementInstances[op.U1] = nil
  1740  			frame.pc++
  1741  		case wazeroir.OperationKindTableCopy:
  1742  			srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
  1743  			copySize := ce.popValue()
  1744  			sourceOffset := ce.popValue()
  1745  			destinationOffset := ce.popValue()
  1746  			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
  1747  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1748  			} else if copySize != 0 {
  1749  				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
  1750  			}
  1751  			frame.pc++
  1752  		case wazeroir.OperationKindRefFunc:
  1753  			ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
  1754  			frame.pc++
  1755  		case wazeroir.OperationKindTableGet:
  1756  			table := tables[op.U1]
  1757  
  1758  			offset := ce.popValue()
  1759  			if offset >= uint64(len(table.References)) {
  1760  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1761  			}
  1762  
  1763  			ce.pushValue(uint64(table.References[offset]))
  1764  			frame.pc++
  1765  		case wazeroir.OperationKindTableSet:
  1766  			table := tables[op.U1]
  1767  			ref := ce.popValue()
  1768  
  1769  			offset := ce.popValue()
  1770  			if offset >= uint64(len(table.References)) {
  1771  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1772  			}
  1773  
  1774  			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
  1775  			frame.pc++
  1776  		case wazeroir.OperationKindTableSize:
  1777  			table := tables[op.U1]
  1778  			ce.pushValue(uint64(len(table.References)))
  1779  			frame.pc++
  1780  		case wazeroir.OperationKindTableGrow:
  1781  			table := tables[op.U1]
  1782  			num, ref := ce.popValue(), ce.popValue()
  1783  			ret := table.Grow(uint32(num), uintptr(ref))
  1784  			ce.pushValue(uint64(ret))
  1785  			frame.pc++
  1786  		case wazeroir.OperationKindTableFill:
  1787  			table := tables[op.U1]
  1788  			num := ce.popValue()
  1789  			ref := uintptr(ce.popValue())
  1790  			offset := ce.popValue()
  1791  			if num+offset > uint64(len(table.References)) {
  1792  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1793  			} else if num > 0 {
  1794  				// Uses the copy trick for faster filling the region with the value.
  1795  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1796  				targetRegion := table.References[offset : offset+num]
  1797  				targetRegion[0] = ref
  1798  				for i := 1; i < len(targetRegion); i *= 2 {
  1799  					copy(targetRegion[i:], targetRegion[:i])
  1800  				}
  1801  			}
  1802  			frame.pc++
  1803  		case wazeroir.OperationKindV128Const:
  1804  			lo, hi := op.U1, op.U2
  1805  			ce.pushValue(lo)
  1806  			ce.pushValue(hi)
  1807  			frame.pc++
  1808  		case wazeroir.OperationKindV128Add:
  1809  			yHigh, yLow := ce.popValue(), ce.popValue()
  1810  			xHigh, xLow := ce.popValue(), ce.popValue()
  1811  			switch op.B1 {
  1812  			case wazeroir.ShapeI8x16:
  1813  				ce.pushValue(
  1814  					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
  1815  						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
  1816  						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
  1817  						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
  1818  				)
  1819  				ce.pushValue(
  1820  					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
  1821  						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
  1822  						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
  1823  						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
  1824  				)
  1825  			case wazeroir.ShapeI16x8:
  1826  				ce.pushValue(
  1827  					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
  1828  						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
  1829  				)
  1830  				ce.pushValue(
  1831  					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
  1832  						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
  1833  				)
  1834  			case wazeroir.ShapeI32x4:
  1835  				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
  1836  				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
  1837  			case wazeroir.ShapeI64x2:
  1838  				ce.pushValue(xLow + yLow)
  1839  				ce.pushValue(xHigh + yHigh)
  1840  			case wazeroir.ShapeF32x4:
  1841  				ce.pushValue(
  1842  					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1843  				)
  1844  				ce.pushValue(
  1845  					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1846  				)
  1847  			case wazeroir.ShapeF64x2:
  1848  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
  1849  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
  1850  			}
  1851  			frame.pc++
  1852  		case wazeroir.OperationKindV128Sub:
  1853  			yHigh, yLow := ce.popValue(), ce.popValue()
  1854  			xHigh, xLow := ce.popValue(), ce.popValue()
  1855  			switch op.B1 {
  1856  			case wazeroir.ShapeI8x16:
  1857  				ce.pushValue(
  1858  					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
  1859  						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
  1860  						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
  1861  						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
  1862  				)
  1863  				ce.pushValue(
  1864  					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
  1865  						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
  1866  						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
  1867  						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
  1868  				)
  1869  			case wazeroir.ShapeI16x8:
  1870  				ce.pushValue(
  1871  					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
  1872  						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
  1873  				)
  1874  				ce.pushValue(
  1875  					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
  1876  						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
  1877  				)
  1878  			case wazeroir.ShapeI32x4:
  1879  				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
  1880  				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
  1881  			case wazeroir.ShapeI64x2:
  1882  				ce.pushValue(xLow - yLow)
  1883  				ce.pushValue(xHigh - yHigh)
  1884  			case wazeroir.ShapeF32x4:
  1885  				ce.pushValue(
  1886  					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1887  				)
  1888  				ce.pushValue(
  1889  					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1890  				)
  1891  			case wazeroir.ShapeF64x2:
  1892  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
  1893  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
  1894  			}
  1895  			frame.pc++
  1896  		case wazeroir.OperationKindV128Load:
  1897  			offset := ce.popMemoryOffset(op)
  1898  			switch op.B1 {
  1899  			case wazeroir.V128LoadType128:
  1900  				lo, ok := memoryInst.ReadUint64Le(offset)
  1901  				if !ok {
  1902  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1903  				}
  1904  				ce.pushValue(lo)
  1905  				hi, ok := memoryInst.ReadUint64Le(offset + 8)
  1906  				if !ok {
  1907  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1908  				}
  1909  				ce.pushValue(hi)
  1910  			case wazeroir.V128LoadType8x8s:
  1911  				data, ok := memoryInst.Read(offset, 8)
  1912  				if !ok {
  1913  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1914  				}
  1915  				ce.pushValue(
  1916  					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
  1917  				)
  1918  				ce.pushValue(
  1919  					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
  1920  				)
  1921  			case wazeroir.V128LoadType8x8u:
  1922  				data, ok := memoryInst.Read(offset, 8)
  1923  				if !ok {
  1924  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1925  				}
  1926  				ce.pushValue(
  1927  					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
  1928  				)
  1929  				ce.pushValue(
  1930  					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
  1931  				)
  1932  			case wazeroir.V128LoadType16x4s:
  1933  				data, ok := memoryInst.Read(offset, 8)
  1934  				if !ok {
  1935  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1936  				}
  1937  				ce.pushValue(
  1938  					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
  1939  						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
  1940  				)
  1941  				ce.pushValue(
  1942  					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
  1943  						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
  1944  				)
  1945  			case wazeroir.V128LoadType16x4u:
  1946  				data, ok := memoryInst.Read(offset, 8)
  1947  				if !ok {
  1948  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1949  				}
  1950  				ce.pushValue(
  1951  					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
  1952  				)
  1953  				ce.pushValue(
  1954  					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
  1955  				)
  1956  			case wazeroir.V128LoadType32x2s:
  1957  				data, ok := memoryInst.Read(offset, 8)
  1958  				if !ok {
  1959  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1960  				}
  1961  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
  1962  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
  1963  			case wazeroir.V128LoadType32x2u:
  1964  				data, ok := memoryInst.Read(offset, 8)
  1965  				if !ok {
  1966  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1967  				}
  1968  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
  1969  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
  1970  			case wazeroir.V128LoadType8Splat:
  1971  				v, ok := memoryInst.ReadByte(offset)
  1972  				if !ok {
  1973  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1974  				}
  1975  				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
  1976  					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
  1977  				ce.pushValue(v8)
  1978  				ce.pushValue(v8)
  1979  			case wazeroir.V128LoadType16Splat:
  1980  				v, ok := memoryInst.ReadUint16Le(offset)
  1981  				if !ok {
  1982  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1983  				}
  1984  				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
  1985  				ce.pushValue(v4)
  1986  				ce.pushValue(v4)
  1987  			case wazeroir.V128LoadType32Splat:
  1988  				v, ok := memoryInst.ReadUint32Le(offset)
  1989  				if !ok {
  1990  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1991  				}
  1992  				vv := uint64(v)<<32 | uint64(v)
  1993  				ce.pushValue(vv)
  1994  				ce.pushValue(vv)
  1995  			case wazeroir.V128LoadType64Splat:
  1996  				lo, ok := memoryInst.ReadUint64Le(offset)
  1997  				if !ok {
  1998  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1999  				}
  2000  				ce.pushValue(lo)
  2001  				ce.pushValue(lo)
  2002  			case wazeroir.V128LoadType32zero:
  2003  				lo, ok := memoryInst.ReadUint32Le(offset)
  2004  				if !ok {
  2005  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2006  				}
  2007  				ce.pushValue(uint64(lo))
  2008  				ce.pushValue(0)
  2009  			case wazeroir.V128LoadType64zero:
  2010  				lo, ok := memoryInst.ReadUint64Le(offset)
  2011  				if !ok {
  2012  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2013  				}
  2014  				ce.pushValue(lo)
  2015  				ce.pushValue(0)
  2016  			}
  2017  			frame.pc++
  2018  		case wazeroir.OperationKindV128LoadLane:
  2019  			hi, lo := ce.popValue(), ce.popValue()
  2020  			offset := ce.popMemoryOffset(op)
  2021  			switch op.B1 {
  2022  			case 8:
  2023  				b, ok := memoryInst.ReadByte(offset)
  2024  				if !ok {
  2025  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2026  				}
  2027  				if op.B2 < 8 {
  2028  					s := op.B2 << 3
  2029  					lo = (lo & ^(0xff << s)) | uint64(b)<<s
  2030  				} else {
  2031  					s := (op.B2 - 8) << 3
  2032  					hi = (hi & ^(0xff << s)) | uint64(b)<<s
  2033  				}
  2034  			case 16:
  2035  				b, ok := memoryInst.ReadUint16Le(offset)
  2036  				if !ok {
  2037  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2038  				}
  2039  				if op.B2 < 4 {
  2040  					s := op.B2 << 4
  2041  					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
  2042  				} else {
  2043  					s := (op.B2 - 4) << 4
  2044  					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
  2045  				}
  2046  			case 32:
  2047  				b, ok := memoryInst.ReadUint32Le(offset)
  2048  				if !ok {
  2049  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2050  				}
  2051  				if op.B2 < 2 {
  2052  					s := op.B2 << 5
  2053  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2054  				} else {
  2055  					s := (op.B2 - 2) << 5
  2056  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2057  				}
  2058  			case 64:
  2059  				b, ok := memoryInst.ReadUint64Le(offset)
  2060  				if !ok {
  2061  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2062  				}
  2063  				if op.B2 == 0 {
  2064  					lo = b
  2065  				} else {
  2066  					hi = b
  2067  				}
  2068  			}
  2069  			ce.pushValue(lo)
  2070  			ce.pushValue(hi)
  2071  			frame.pc++
  2072  		case wazeroir.OperationKindV128Store:
  2073  			hi, lo := ce.popValue(), ce.popValue()
  2074  			offset := ce.popMemoryOffset(op)
  2075  			if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
  2076  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2077  			}
  2078  			if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
  2079  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2080  			}
  2081  			frame.pc++
  2082  		case wazeroir.OperationKindV128StoreLane:
  2083  			hi, lo := ce.popValue(), ce.popValue()
  2084  			offset := ce.popMemoryOffset(op)
  2085  			var ok bool
  2086  			switch op.B1 {
  2087  			case 8:
  2088  				if op.B2 < 8 {
  2089  					ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
  2090  				} else {
  2091  					ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
  2092  				}
  2093  			case 16:
  2094  				if op.B2 < 4 {
  2095  					ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
  2096  				} else {
  2097  					ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
  2098  				}
  2099  			case 32:
  2100  				if op.B2 < 2 {
  2101  					ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
  2102  				} else {
  2103  					ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
  2104  				}
  2105  			case 64:
  2106  				if op.B2 == 0 {
  2107  					ok = memoryInst.WriteUint64Le(offset, lo)
  2108  				} else {
  2109  					ok = memoryInst.WriteUint64Le(offset, hi)
  2110  				}
  2111  			}
  2112  			if !ok {
  2113  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2114  			}
  2115  			frame.pc++
  2116  		case wazeroir.OperationKindV128ReplaceLane:
  2117  			v := ce.popValue()
  2118  			hi, lo := ce.popValue(), ce.popValue()
  2119  			switch op.B1 {
  2120  			case wazeroir.ShapeI8x16:
  2121  				if op.B2 < 8 {
  2122  					s := op.B2 << 3
  2123  					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
  2124  				} else {
  2125  					s := (op.B2 - 8) << 3
  2126  					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
  2127  				}
  2128  			case wazeroir.ShapeI16x8:
  2129  				if op.B2 < 4 {
  2130  					s := op.B2 << 4
  2131  					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2132  				} else {
  2133  					s := (op.B2 - 4) << 4
  2134  					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2135  				}
  2136  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2137  				if op.B2 < 2 {
  2138  					s := op.B2 << 5
  2139  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2140  				} else {
  2141  					s := (op.B2 - 2) << 5
  2142  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2143  				}
  2144  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2145  				if op.B2 == 0 {
  2146  					lo = v
  2147  				} else {
  2148  					hi = v
  2149  				}
  2150  			}
  2151  			ce.pushValue(lo)
  2152  			ce.pushValue(hi)
  2153  			frame.pc++
  2154  		case wazeroir.OperationKindV128ExtractLane:
  2155  			hi, lo := ce.popValue(), ce.popValue()
  2156  			var v uint64
  2157  			switch op.B1 {
  2158  			case wazeroir.ShapeI8x16:
  2159  				var u8 byte
  2160  				if op.B2 < 8 {
  2161  					u8 = byte(lo >> (op.B2 * 8))
  2162  				} else {
  2163  					u8 = byte(hi >> ((op.B2 - 8) * 8))
  2164  				}
  2165  				if op.B3 {
  2166  					// sign-extend.
  2167  					v = uint64(uint32(int8(u8)))
  2168  				} else {
  2169  					v = uint64(u8)
  2170  				}
  2171  			case wazeroir.ShapeI16x8:
  2172  				var u16 uint16
  2173  				if op.B2 < 4 {
  2174  					u16 = uint16(lo >> (op.B2 * 16))
  2175  				} else {
  2176  					u16 = uint16(hi >> ((op.B2 - 4) * 16))
  2177  				}
  2178  				if op.B3 {
  2179  					// sign-extend.
  2180  					v = uint64(uint32(int16(u16)))
  2181  				} else {
  2182  					v = uint64(u16)
  2183  				}
  2184  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2185  				if op.B2 < 2 {
  2186  					v = uint64(uint32(lo >> (op.B2 * 32)))
  2187  				} else {
  2188  					v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
  2189  				}
  2190  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2191  				if op.B2 == 0 {
  2192  					v = lo
  2193  				} else {
  2194  					v = hi
  2195  				}
  2196  			}
  2197  			ce.pushValue(v)
  2198  			frame.pc++
  2199  		case wazeroir.OperationKindV128Splat:
  2200  			v := ce.popValue()
  2201  			var hi, lo uint64
  2202  			switch op.B1 {
  2203  			case wazeroir.ShapeI8x16:
  2204  				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
  2205  					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
  2206  				hi, lo = v8, v8
  2207  			case wazeroir.ShapeI16x8:
  2208  				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
  2209  				hi, lo = v4, v4
  2210  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2211  				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
  2212  				lo, hi = v2, v2
  2213  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2214  				lo, hi = v, v
  2215  			}
  2216  			ce.pushValue(lo)
  2217  			ce.pushValue(hi)
  2218  			frame.pc++
  2219  		case wazeroir.OperationKindV128Swizzle:
  2220  			idxHi, idxLo := ce.popValue(), ce.popValue()
  2221  			baseHi, baseLo := ce.popValue(), ce.popValue()
  2222  			var newVal [16]byte
  2223  			for i := 0; i < 16; i++ {
  2224  				var id byte
  2225  				if i < 8 {
  2226  					id = byte(idxLo >> (i * 8))
  2227  				} else {
  2228  					id = byte(idxHi >> ((i - 8) * 8))
  2229  				}
  2230  				if id < 8 {
  2231  					newVal[i] = byte(baseLo >> (id * 8))
  2232  				} else if id < 16 {
  2233  					newVal[i] = byte(baseHi >> ((id - 8) * 8))
  2234  				}
  2235  			}
  2236  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2237  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2238  			frame.pc++
  2239  		case wazeroir.OperationKindV128Shuffle:
  2240  			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
  2241  			var newVal [16]byte
  2242  			for i, l := range op.Us {
  2243  				if l < 8 {
  2244  					newVal[i] = byte(yLo >> (l * 8))
  2245  				} else if l < 16 {
  2246  					newVal[i] = byte(yHi >> ((l - 8) * 8))
  2247  				} else if l < 24 {
  2248  					newVal[i] = byte(xLo >> ((l - 16) * 8))
  2249  				} else if l < 32 {
  2250  					newVal[i] = byte(xHi >> ((l - 24) * 8))
  2251  				}
  2252  			}
  2253  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2254  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2255  			frame.pc++
  2256  		case wazeroir.OperationKindV128AnyTrue:
  2257  			hi, lo := ce.popValue(), ce.popValue()
  2258  			if hi != 0 || lo != 0 {
  2259  				ce.pushValue(1)
  2260  			} else {
  2261  				ce.pushValue(0)
  2262  			}
  2263  			frame.pc++
  2264  		case wazeroir.OperationKindV128AllTrue:
  2265  			hi, lo := ce.popValue(), ce.popValue()
  2266  			var ret bool
  2267  			switch op.B1 {
  2268  			case wazeroir.ShapeI8x16:
  2269  				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
  2270  					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
  2271  					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
  2272  					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
  2273  			case wazeroir.ShapeI16x8:
  2274  				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
  2275  					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
  2276  			case wazeroir.ShapeI32x4:
  2277  				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
  2278  					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
  2279  			case wazeroir.ShapeI64x2:
  2280  				ret = (lo != 0) &&
  2281  					(hi != 0)
  2282  			}
  2283  			if ret {
  2284  				ce.pushValue(1)
  2285  			} else {
  2286  				ce.pushValue(0)
  2287  			}
  2288  			frame.pc++
  2289  		case wazeroir.OperationKindV128BitMask:
  2290  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
  2291  			hi, lo := ce.popValue(), ce.popValue()
  2292  			var res uint64
  2293  			switch op.B1 {
  2294  			case wazeroir.ShapeI8x16:
  2295  				for i := 0; i < 8; i++ {
  2296  					if int8(lo>>(i*8)) < 0 {
  2297  						res |= 1 << i
  2298  					}
  2299  				}
  2300  				for i := 0; i < 8; i++ {
  2301  					if int8(hi>>(i*8)) < 0 {
  2302  						res |= 1 << (i + 8)
  2303  					}
  2304  				}
  2305  			case wazeroir.ShapeI16x8:
  2306  				for i := 0; i < 4; i++ {
  2307  					if int16(lo>>(i*16)) < 0 {
  2308  						res |= 1 << i
  2309  					}
  2310  				}
  2311  				for i := 0; i < 4; i++ {
  2312  					if int16(hi>>(i*16)) < 0 {
  2313  						res |= 1 << (i + 4)
  2314  					}
  2315  				}
  2316  			case wazeroir.ShapeI32x4:
  2317  				for i := 0; i < 2; i++ {
  2318  					if int32(lo>>(i*32)) < 0 {
  2319  						res |= 1 << i
  2320  					}
  2321  				}
  2322  				for i := 0; i < 2; i++ {
  2323  					if int32(hi>>(i*32)) < 0 {
  2324  						res |= 1 << (i + 2)
  2325  					}
  2326  				}
  2327  			case wazeroir.ShapeI64x2:
  2328  				if int64(lo) < 0 {
  2329  					res |= 0b01
  2330  				}
  2331  				if int(hi) < 0 {
  2332  					res |= 0b10
  2333  				}
  2334  			}
  2335  			ce.pushValue(res)
  2336  			frame.pc++
  2337  		case wazeroir.OperationKindV128And:
  2338  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2339  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2340  			ce.pushValue(x1Lo & x2Lo)
  2341  			ce.pushValue(x1Hi & x2Hi)
  2342  			frame.pc++
  2343  		case wazeroir.OperationKindV128Not:
  2344  			hi, lo := ce.popValue(), ce.popValue()
  2345  			ce.pushValue(^lo)
  2346  			ce.pushValue(^hi)
  2347  			frame.pc++
  2348  		case wazeroir.OperationKindV128Or:
  2349  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2350  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2351  			ce.pushValue(x1Lo | x2Lo)
  2352  			ce.pushValue(x1Hi | x2Hi)
  2353  			frame.pc++
  2354  		case wazeroir.OperationKindV128Xor:
  2355  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2356  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2357  			ce.pushValue(x1Lo ^ x2Lo)
  2358  			ce.pushValue(x1Hi ^ x2Hi)
  2359  			frame.pc++
  2360  		case wazeroir.OperationKindV128Bitselect:
  2361  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
  2362  			cHi, cLo := ce.popValue(), ce.popValue()
  2363  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2364  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2365  			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
  2366  			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
  2367  			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
  2368  			frame.pc++
  2369  		case wazeroir.OperationKindV128AndNot:
  2370  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2371  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2372  			ce.pushValue(x1Lo & (^x2Lo))
  2373  			ce.pushValue(x1Hi & (^x2Hi))
  2374  			frame.pc++
  2375  		case wazeroir.OperationKindV128Shl:
  2376  			s := ce.popValue()
  2377  			hi, lo := ce.popValue(), ce.popValue()
  2378  			switch op.B1 {
  2379  			case wazeroir.ShapeI8x16:
  2380  				s = s % 8
  2381  				lo = uint64(uint8(lo<<s)) |
  2382  					uint64(uint8((lo>>8)<<s))<<8 |
  2383  					uint64(uint8((lo>>16)<<s))<<16 |
  2384  					uint64(uint8((lo>>24)<<s))<<24 |
  2385  					uint64(uint8((lo>>32)<<s))<<32 |
  2386  					uint64(uint8((lo>>40)<<s))<<40 |
  2387  					uint64(uint8((lo>>48)<<s))<<48 |
  2388  					uint64(uint8((lo>>56)<<s))<<56
  2389  				hi = uint64(uint8(hi<<s)) |
  2390  					uint64(uint8((hi>>8)<<s))<<8 |
  2391  					uint64(uint8((hi>>16)<<s))<<16 |
  2392  					uint64(uint8((hi>>24)<<s))<<24 |
  2393  					uint64(uint8((hi>>32)<<s))<<32 |
  2394  					uint64(uint8((hi>>40)<<s))<<40 |
  2395  					uint64(uint8((hi>>48)<<s))<<48 |
  2396  					uint64(uint8((hi>>56)<<s))<<56
  2397  			case wazeroir.ShapeI16x8:
  2398  				s = s % 16
  2399  				lo = uint64(uint16(lo<<s)) |
  2400  					uint64(uint16((lo>>16)<<s))<<16 |
  2401  					uint64(uint16((lo>>32)<<s))<<32 |
  2402  					uint64(uint16((lo>>48)<<s))<<48
  2403  				hi = uint64(uint16(hi<<s)) |
  2404  					uint64(uint16((hi>>16)<<s))<<16 |
  2405  					uint64(uint16((hi>>32)<<s))<<32 |
  2406  					uint64(uint16((hi>>48)<<s))<<48
  2407  			case wazeroir.ShapeI32x4:
  2408  				s = s % 32
  2409  				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
  2410  				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
  2411  			case wazeroir.ShapeI64x2:
  2412  				s = s % 64
  2413  				lo = lo << s
  2414  				hi = hi << s
  2415  			}
  2416  			ce.pushValue(lo)
  2417  			ce.pushValue(hi)
  2418  			frame.pc++
  2419  		case wazeroir.OperationKindV128Shr:
  2420  			s := ce.popValue()
  2421  			hi, lo := ce.popValue(), ce.popValue()
  2422  			switch op.B1 {
  2423  			case wazeroir.ShapeI8x16:
  2424  				s = s % 8
  2425  				if op.B3 { // signed
  2426  					lo = uint64(uint8(int8(lo)>>s)) |
  2427  						uint64(uint8(int8(lo>>8)>>s))<<8 |
  2428  						uint64(uint8(int8(lo>>16)>>s))<<16 |
  2429  						uint64(uint8(int8(lo>>24)>>s))<<24 |
  2430  						uint64(uint8(int8(lo>>32)>>s))<<32 |
  2431  						uint64(uint8(int8(lo>>40)>>s))<<40 |
  2432  						uint64(uint8(int8(lo>>48)>>s))<<48 |
  2433  						uint64(uint8(int8(lo>>56)>>s))<<56
  2434  					hi = uint64(uint8(int8(hi)>>s)) |
  2435  						uint64(uint8(int8(hi>>8)>>s))<<8 |
  2436  						uint64(uint8(int8(hi>>16)>>s))<<16 |
  2437  						uint64(uint8(int8(hi>>24)>>s))<<24 |
  2438  						uint64(uint8(int8(hi>>32)>>s))<<32 |
  2439  						uint64(uint8(int8(hi>>40)>>s))<<40 |
  2440  						uint64(uint8(int8(hi>>48)>>s))<<48 |
  2441  						uint64(uint8(int8(hi>>56)>>s))<<56
  2442  				} else {
  2443  					lo = uint64(uint8(lo)>>s) |
  2444  						uint64(uint8(lo>>8)>>s)<<8 |
  2445  						uint64(uint8(lo>>16)>>s)<<16 |
  2446  						uint64(uint8(lo>>24)>>s)<<24 |
  2447  						uint64(uint8(lo>>32)>>s)<<32 |
  2448  						uint64(uint8(lo>>40)>>s)<<40 |
  2449  						uint64(uint8(lo>>48)>>s)<<48 |
  2450  						uint64(uint8(lo>>56)>>s)<<56
  2451  					hi = uint64(uint8(hi)>>s) |
  2452  						uint64(uint8(hi>>8)>>s)<<8 |
  2453  						uint64(uint8(hi>>16)>>s)<<16 |
  2454  						uint64(uint8(hi>>24)>>s)<<24 |
  2455  						uint64(uint8(hi>>32)>>s)<<32 |
  2456  						uint64(uint8(hi>>40)>>s)<<40 |
  2457  						uint64(uint8(hi>>48)>>s)<<48 |
  2458  						uint64(uint8(hi>>56)>>s)<<56
  2459  				}
  2460  			case wazeroir.ShapeI16x8:
  2461  				s = s % 16
  2462  				if op.B3 { // signed
  2463  					lo = uint64(uint16(int16(lo)>>s)) |
  2464  						uint64(uint16(int16(lo>>16)>>s))<<16 |
  2465  						uint64(uint16(int16(lo>>32)>>s))<<32 |
  2466  						uint64(uint16(int16(lo>>48)>>s))<<48
  2467  					hi = uint64(uint16(int16(hi)>>s)) |
  2468  						uint64(uint16(int16(hi>>16)>>s))<<16 |
  2469  						uint64(uint16(int16(hi>>32)>>s))<<32 |
  2470  						uint64(uint16(int16(hi>>48)>>s))<<48
  2471  				} else {
  2472  					lo = uint64(uint16(lo)>>s) |
  2473  						uint64(uint16(lo>>16)>>s)<<16 |
  2474  						uint64(uint16(lo>>32)>>s)<<32 |
  2475  						uint64(uint16(lo>>48)>>s)<<48
  2476  					hi = uint64(uint16(hi)>>s) |
  2477  						uint64(uint16(hi>>16)>>s)<<16 |
  2478  						uint64(uint16(hi>>32)>>s)<<32 |
  2479  						uint64(uint16(hi>>48)>>s)<<48
  2480  				}
  2481  			case wazeroir.ShapeI32x4:
  2482  				s = s % 32
  2483  				if op.B3 {
  2484  					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
  2485  					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
  2486  				} else {
  2487  					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
  2488  					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
  2489  				}
  2490  			case wazeroir.ShapeI64x2:
  2491  				s = s % 64
  2492  				if op.B3 { // signed
  2493  					lo = uint64(int64(lo) >> s)
  2494  					hi = uint64(int64(hi) >> s)
  2495  				} else {
  2496  					lo = lo >> s
  2497  					hi = hi >> s
  2498  				}
  2499  
  2500  			}
  2501  			ce.pushValue(lo)
  2502  			ce.pushValue(hi)
  2503  			frame.pc++
  2504  		case wazeroir.OperationKindV128Cmp:
  2505  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2506  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2507  			var result []bool
  2508  			switch op.B1 {
  2509  			case wazeroir.V128CmpTypeI8x16Eq:
  2510  				result = []bool{
  2511  					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
  2512  					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
  2513  					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
  2514  					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
  2515  					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
  2516  					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
  2517  					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
  2518  					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
  2519  				}
  2520  			case wazeroir.V128CmpTypeI8x16Ne:
  2521  				result = []bool{
  2522  					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
  2523  					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
  2524  					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
  2525  					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
  2526  					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
  2527  					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
  2528  					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
  2529  					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
  2530  				}
  2531  			case wazeroir.V128CmpTypeI8x16LtS:
  2532  				result = []bool{
  2533  					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
  2534  					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
  2535  					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
  2536  					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
  2537  					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
  2538  					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
  2539  					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
  2540  					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
  2541  				}
  2542  			case wazeroir.V128CmpTypeI8x16LtU:
  2543  				result = []bool{
  2544  					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
  2545  					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
  2546  					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
  2547  					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
  2548  					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
  2549  					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
  2550  					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
  2551  					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
  2552  				}
  2553  			case wazeroir.V128CmpTypeI8x16GtS:
  2554  				result = []bool{
  2555  					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
  2556  					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
  2557  					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
  2558  					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
  2559  					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
  2560  					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
  2561  					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
  2562  					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
  2563  				}
  2564  			case wazeroir.V128CmpTypeI8x16GtU:
  2565  				result = []bool{
  2566  					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
  2567  					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
  2568  					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
  2569  					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
  2570  					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
  2571  					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
  2572  					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
  2573  					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
  2574  				}
  2575  			case wazeroir.V128CmpTypeI8x16LeS:
  2576  				result = []bool{
  2577  					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
  2578  					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
  2579  					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
  2580  					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
  2581  					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
  2582  					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
  2583  					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
  2584  					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
  2585  				}
  2586  			case wazeroir.V128CmpTypeI8x16LeU:
  2587  				result = []bool{
  2588  					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
  2589  					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
  2590  					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
  2591  					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
  2592  					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
  2593  					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
  2594  					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
  2595  					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
  2596  				}
  2597  			case wazeroir.V128CmpTypeI8x16GeS:
  2598  				result = []bool{
  2599  					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
  2600  					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
  2601  					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
  2602  					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
  2603  					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
  2604  					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
  2605  					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
  2606  					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
  2607  				}
  2608  			case wazeroir.V128CmpTypeI8x16GeU:
  2609  				result = []bool{
  2610  					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
  2611  					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
  2612  					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
  2613  					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
  2614  					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
  2615  					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
  2616  					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
  2617  					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
  2618  				}
  2619  			case wazeroir.V128CmpTypeI16x8Eq:
  2620  				result = []bool{
  2621  					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
  2622  					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
  2623  					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
  2624  					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
  2625  				}
  2626  			case wazeroir.V128CmpTypeI16x8Ne:
  2627  				result = []bool{
  2628  					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
  2629  					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
  2630  					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
  2631  					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
  2632  				}
  2633  			case wazeroir.V128CmpTypeI16x8LtS:
  2634  				result = []bool{
  2635  					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
  2636  					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
  2637  					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
  2638  					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
  2639  				}
  2640  			case wazeroir.V128CmpTypeI16x8LtU:
  2641  				result = []bool{
  2642  					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
  2643  					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
  2644  					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
  2645  					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
  2646  				}
  2647  			case wazeroir.V128CmpTypeI16x8GtS:
  2648  				result = []bool{
  2649  					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
  2650  					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
  2651  					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
  2652  					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
  2653  				}
  2654  			case wazeroir.V128CmpTypeI16x8GtU:
  2655  				result = []bool{
  2656  					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
  2657  					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
  2658  					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
  2659  					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
  2660  				}
  2661  			case wazeroir.V128CmpTypeI16x8LeS:
  2662  				result = []bool{
  2663  					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
  2664  					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
  2665  					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
  2666  					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
  2667  				}
  2668  			case wazeroir.V128CmpTypeI16x8LeU:
  2669  				result = []bool{
  2670  					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
  2671  					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
  2672  					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
  2673  					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
  2674  				}
  2675  			case wazeroir.V128CmpTypeI16x8GeS:
  2676  				result = []bool{
  2677  					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
  2678  					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
  2679  					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
  2680  					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
  2681  				}
  2682  			case wazeroir.V128CmpTypeI16x8GeU:
  2683  				result = []bool{
  2684  					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
  2685  					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
  2686  					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
  2687  					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
  2688  				}
  2689  			case wazeroir.V128CmpTypeI32x4Eq:
  2690  				result = []bool{
  2691  					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
  2692  					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
  2693  				}
  2694  			case wazeroir.V128CmpTypeI32x4Ne:
  2695  				result = []bool{
  2696  					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
  2697  					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
  2698  				}
  2699  			case wazeroir.V128CmpTypeI32x4LtS:
  2700  				result = []bool{
  2701  					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
  2702  					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
  2703  				}
  2704  			case wazeroir.V128CmpTypeI32x4LtU:
  2705  				result = []bool{
  2706  					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
  2707  					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
  2708  				}
  2709  			case wazeroir.V128CmpTypeI32x4GtS:
  2710  				result = []bool{
  2711  					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
  2712  					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
  2713  				}
  2714  			case wazeroir.V128CmpTypeI32x4GtU:
  2715  				result = []bool{
  2716  					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
  2717  					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
  2718  				}
  2719  			case wazeroir.V128CmpTypeI32x4LeS:
  2720  				result = []bool{
  2721  					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
  2722  					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
  2723  				}
  2724  			case wazeroir.V128CmpTypeI32x4LeU:
  2725  				result = []bool{
  2726  					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
  2727  					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
  2728  				}
  2729  			case wazeroir.V128CmpTypeI32x4GeS:
  2730  				result = []bool{
  2731  					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
  2732  					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
  2733  				}
  2734  			case wazeroir.V128CmpTypeI32x4GeU:
  2735  				result = []bool{
  2736  					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
  2737  					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
  2738  				}
  2739  			case wazeroir.V128CmpTypeI64x2Eq:
  2740  				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
  2741  			case wazeroir.V128CmpTypeI64x2Ne:
  2742  				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
  2743  			case wazeroir.V128CmpTypeI64x2LtS:
  2744  				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
  2745  			case wazeroir.V128CmpTypeI64x2GtS:
  2746  				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
  2747  			case wazeroir.V128CmpTypeI64x2LeS:
  2748  				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
  2749  			case wazeroir.V128CmpTypeI64x2GeS:
  2750  				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
  2751  			case wazeroir.V128CmpTypeF32x4Eq:
  2752  				result = []bool{
  2753  					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
  2754  					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
  2755  					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
  2756  					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
  2757  				}
  2758  			case wazeroir.V128CmpTypeF32x4Ne:
  2759  				result = []bool{
  2760  					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
  2761  					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
  2762  					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
  2763  					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
  2764  				}
  2765  			case wazeroir.V128CmpTypeF32x4Lt:
  2766  				result = []bool{
  2767  					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
  2768  					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
  2769  					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
  2770  					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
  2771  				}
  2772  			case wazeroir.V128CmpTypeF32x4Gt:
  2773  				result = []bool{
  2774  					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
  2775  					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
  2776  					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
  2777  					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
  2778  				}
  2779  			case wazeroir.V128CmpTypeF32x4Le:
  2780  				result = []bool{
  2781  					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
  2782  					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
  2783  					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
  2784  					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
  2785  				}
  2786  			case wazeroir.V128CmpTypeF32x4Ge:
  2787  				result = []bool{
  2788  					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
  2789  					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
  2790  					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
  2791  					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
  2792  				}
  2793  			case wazeroir.V128CmpTypeF64x2Eq:
  2794  				result = []bool{
  2795  					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
  2796  					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
  2797  				}
  2798  			case wazeroir.V128CmpTypeF64x2Ne:
  2799  				result = []bool{
  2800  					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
  2801  					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
  2802  				}
  2803  			case wazeroir.V128CmpTypeF64x2Lt:
  2804  				result = []bool{
  2805  					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
  2806  					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
  2807  				}
  2808  			case wazeroir.V128CmpTypeF64x2Gt:
  2809  				result = []bool{
  2810  					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
  2811  					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
  2812  				}
  2813  			case wazeroir.V128CmpTypeF64x2Le:
  2814  				result = []bool{
  2815  					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
  2816  					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
  2817  				}
  2818  			case wazeroir.V128CmpTypeF64x2Ge:
  2819  				result = []bool{
  2820  					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
  2821  					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
  2822  				}
  2823  			}
  2824  
  2825  			var retLo, retHi uint64
  2826  			laneNum := len(result)
  2827  			switch laneNum {
  2828  			case 16:
  2829  				for i, b := range result {
  2830  					if b {
  2831  						if i < 8 {
  2832  							retLo |= 0xff << (i * 8)
  2833  						} else {
  2834  							retHi |= 0xff << ((i - 8) * 8)
  2835  						}
  2836  					}
  2837  				}
  2838  			case 8:
  2839  				for i, b := range result {
  2840  					if b {
  2841  						if i < 4 {
  2842  							retLo |= 0xffff << (i * 16)
  2843  						} else {
  2844  							retHi |= 0xffff << ((i - 4) * 16)
  2845  						}
  2846  					}
  2847  				}
  2848  			case 4:
  2849  				for i, b := range result {
  2850  					if b {
  2851  						if i < 2 {
  2852  							retLo |= 0xffff_ffff << (i * 32)
  2853  						} else {
  2854  							retHi |= 0xffff_ffff << ((i - 2) * 32)
  2855  						}
  2856  					}
  2857  				}
  2858  			case 2:
  2859  				if result[0] {
  2860  					retLo = ^uint64(0)
  2861  				}
  2862  				if result[1] {
  2863  					retHi = ^uint64(0)
  2864  				}
  2865  			}
  2866  
  2867  			ce.pushValue(retLo)
  2868  			ce.pushValue(retHi)
  2869  			frame.pc++
  2870  		case wazeroir.OperationKindV128AddSat:
  2871  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2872  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2873  
  2874  			var retLo, retHi uint64
  2875  
  2876  			// Lane-wise addition while saturating the overflowing values.
  2877  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
  2878  			switch op.B1 {
  2879  			case wazeroir.ShapeI8x16:
  2880  				for i := 0; i < 16; i++ {
  2881  					var v, w byte
  2882  					if i < 8 {
  2883  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2884  					} else {
  2885  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2886  					}
  2887  
  2888  					var uv uint64
  2889  					if op.B3 { // signed
  2890  						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
  2891  							uv = uint64(byte(0x80))
  2892  						} else if subbed > math.MaxInt8 {
  2893  							uv = uint64(byte(0x7f))
  2894  						} else {
  2895  							uv = uint64(byte(int8(subbed)))
  2896  						}
  2897  					} else {
  2898  						if subbed := int64(v) + int64(w); subbed < 0 {
  2899  							uv = uint64(byte(0))
  2900  						} else if subbed > math.MaxUint8 {
  2901  							uv = uint64(byte(0xff))
  2902  						} else {
  2903  							uv = uint64(byte(subbed))
  2904  						}
  2905  					}
  2906  
  2907  					if i < 8 { // first 8 lanes are on lower 64bits.
  2908  						retLo |= uv << (i * 8)
  2909  					} else {
  2910  						retHi |= uv << ((i - 8) * 8)
  2911  					}
  2912  				}
  2913  			case wazeroir.ShapeI16x8:
  2914  				for i := 0; i < 8; i++ {
  2915  					var v, w uint16
  2916  					if i < 4 {
  2917  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2918  					} else {
  2919  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2920  					}
  2921  
  2922  					var uv uint64
  2923  					if op.B3 { // signed
  2924  						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
  2925  							uv = uint64(uint16(0x8000))
  2926  						} else if added > math.MaxInt16 {
  2927  							uv = uint64(uint16(0x7fff))
  2928  						} else {
  2929  							uv = uint64(uint16(int16(added)))
  2930  						}
  2931  					} else {
  2932  						if added := int64(v) + int64(w); added < 0 {
  2933  							uv = uint64(uint16(0))
  2934  						} else if added > math.MaxUint16 {
  2935  							uv = uint64(uint16(0xffff))
  2936  						} else {
  2937  							uv = uint64(uint16(added))
  2938  						}
  2939  					}
  2940  
  2941  					if i < 4 { // first 4 lanes are on lower 64bits.
  2942  						retLo |= uv << (i * 16)
  2943  					} else {
  2944  						retHi |= uv << ((i - 4) * 16)
  2945  					}
  2946  				}
  2947  			}
  2948  
  2949  			ce.pushValue(retLo)
  2950  			ce.pushValue(retHi)
  2951  			frame.pc++
  2952  		case wazeroir.OperationKindV128SubSat:
  2953  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2954  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2955  
  2956  			var retLo, retHi uint64
  2957  
  2958  			// Lane-wise subtraction while saturating the overflowing values.
  2959  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
  2960  			switch op.B1 {
  2961  			case wazeroir.ShapeI8x16:
  2962  				for i := 0; i < 16; i++ {
  2963  					var v, w byte
  2964  					if i < 8 {
  2965  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2966  					} else {
  2967  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2968  					}
  2969  
  2970  					var uv uint64
  2971  					if op.B3 { // signed
  2972  						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
  2973  							uv = uint64(byte(0x80))
  2974  						} else if subbed > math.MaxInt8 {
  2975  							uv = uint64(byte(0x7f))
  2976  						} else {
  2977  							uv = uint64(byte(int8(subbed)))
  2978  						}
  2979  					} else {
  2980  						if subbed := int64(v) - int64(w); subbed < 0 {
  2981  							uv = uint64(byte(0))
  2982  						} else if subbed > math.MaxUint8 {
  2983  							uv = uint64(byte(0xff))
  2984  						} else {
  2985  							uv = uint64(byte(subbed))
  2986  						}
  2987  					}
  2988  
  2989  					if i < 8 {
  2990  						retLo |= uv << (i * 8)
  2991  					} else {
  2992  						retHi |= uv << ((i - 8) * 8)
  2993  					}
  2994  				}
  2995  			case wazeroir.ShapeI16x8:
  2996  				for i := 0; i < 8; i++ {
  2997  					var v, w uint16
  2998  					if i < 4 {
  2999  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  3000  					} else {
  3001  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  3002  					}
  3003  
  3004  					var uv uint64
  3005  					if op.B3 { // signed
  3006  						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
  3007  							uv = uint64(uint16(0x8000))
  3008  						} else if subbed > math.MaxInt16 {
  3009  							uv = uint64(uint16(0x7fff))
  3010  						} else {
  3011  							uv = uint64(uint16(int16(subbed)))
  3012  						}
  3013  					} else {
  3014  						if subbed := int64(v) - int64(w); subbed < 0 {
  3015  							uv = uint64(uint16(0))
  3016  						} else if subbed > math.MaxUint16 {
  3017  							uv = uint64(uint16(0xffff))
  3018  						} else {
  3019  							uv = uint64(uint16(subbed))
  3020  						}
  3021  					}
  3022  
  3023  					if i < 4 {
  3024  						retLo |= uv << (i * 16)
  3025  					} else {
  3026  						retHi |= uv << ((i - 4) * 16)
  3027  					}
  3028  				}
  3029  			}
  3030  
  3031  			ce.pushValue(retLo)
  3032  			ce.pushValue(retHi)
  3033  			frame.pc++
  3034  		case wazeroir.OperationKindV128Mul:
  3035  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3036  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3037  			var retLo, retHi uint64
  3038  			switch op.B1 {
  3039  			case wazeroir.ShapeI16x8:
  3040  				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
  3041  					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
  3042  				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
  3043  					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
  3044  			case wazeroir.ShapeI32x4:
  3045  				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
  3046  				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
  3047  			case wazeroir.ShapeI64x2:
  3048  				retHi = x1hi * x2hi
  3049  				retLo = x1lo * x2lo
  3050  			case wazeroir.ShapeF32x4:
  3051  				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3052  				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3053  			case wazeroir.ShapeF64x2:
  3054  				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
  3055  				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
  3056  			}
  3057  			ce.pushValue(retLo)
  3058  			ce.pushValue(retHi)
  3059  			frame.pc++
  3060  		case wazeroir.OperationKindV128Div:
  3061  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3062  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3063  			var retLo, retHi uint64
  3064  			if op.B1 == wazeroir.ShapeF64x2 {
  3065  				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
  3066  				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
  3067  			} else {
  3068  				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3069  				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3070  			}
  3071  			ce.pushValue(retLo)
  3072  			ce.pushValue(retHi)
  3073  			frame.pc++
  3074  		case wazeroir.OperationKindV128Neg:
  3075  			hi, lo := ce.popValue(), ce.popValue()
  3076  			switch op.B1 {
  3077  			case wazeroir.ShapeI8x16:
  3078  				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
  3079  					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
  3080  					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
  3081  					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
  3082  				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
  3083  					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
  3084  					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
  3085  					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
  3086  			case wazeroir.ShapeI16x8:
  3087  				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
  3088  					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
  3089  				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
  3090  					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
  3091  			case wazeroir.ShapeI32x4:
  3092  				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
  3093  				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
  3094  			case wazeroir.ShapeI64x2:
  3095  				hi = -hi
  3096  				lo = -lo
  3097  			case wazeroir.ShapeF32x4:
  3098  				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
  3099  					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
  3100  				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
  3101  					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
  3102  			case wazeroir.ShapeF64x2:
  3103  				hi = math.Float64bits(-math.Float64frombits(hi))
  3104  				lo = math.Float64bits(-math.Float64frombits(lo))
  3105  			}
  3106  			ce.pushValue(lo)
  3107  			ce.pushValue(hi)
  3108  			frame.pc++
  3109  		case wazeroir.OperationKindV128Sqrt:
  3110  			hi, lo := ce.popValue(), ce.popValue()
  3111  			if op.B1 == wazeroir.ShapeF64x2 {
  3112  				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
  3113  				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
  3114  			} else {
  3115  				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
  3116  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
  3117  				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
  3118  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
  3119  			}
  3120  			ce.pushValue(lo)
  3121  			ce.pushValue(hi)
  3122  			frame.pc++
  3123  		case wazeroir.OperationKindV128Abs:
  3124  			hi, lo := ce.popValue(), ce.popValue()
  3125  			switch op.B1 {
  3126  			case wazeroir.ShapeI8x16:
  3127  				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
  3128  					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
  3129  					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
  3130  					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
  3131  				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
  3132  					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
  3133  					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
  3134  					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
  3135  			case wazeroir.ShapeI16x8:
  3136  				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
  3137  					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
  3138  				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
  3139  					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
  3140  			case wazeroir.ShapeI32x4:
  3141  				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
  3142  				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
  3143  			case wazeroir.ShapeI64x2:
  3144  				if int64(hi) < 0 {
  3145  					hi = -hi
  3146  				}
  3147  				if int64(lo) < 0 {
  3148  					lo = -lo
  3149  				}
  3150  			case wazeroir.ShapeF32x4:
  3151  				hi = hi &^ (1<<31 | 1<<63)
  3152  				lo = lo &^ (1<<31 | 1<<63)
  3153  			case wazeroir.ShapeF64x2:
  3154  				hi = hi &^ (1 << 63)
  3155  				lo = lo &^ (1 << 63)
  3156  			}
  3157  			ce.pushValue(lo)
  3158  			ce.pushValue(hi)
  3159  			frame.pc++
  3160  		case wazeroir.OperationKindV128Popcnt:
  3161  			hi, lo := ce.popValue(), ce.popValue()
  3162  			var retLo, retHi uint64
  3163  			for i := 0; i < 16; i++ {
  3164  				var v byte
  3165  				if i < 8 {
  3166  					v = byte(lo >> (i * 8))
  3167  				} else {
  3168  					v = byte(hi >> ((i - 8) * 8))
  3169  				}
  3170  
  3171  				var cnt uint64
  3172  				for i := 0; i < 8; i++ {
  3173  					if (v>>i)&0b1 != 0 {
  3174  						cnt++
  3175  					}
  3176  				}
  3177  
  3178  				if i < 8 {
  3179  					retLo |= cnt << (i * 8)
  3180  				} else {
  3181  					retHi |= cnt << ((i - 8) * 8)
  3182  				}
  3183  			}
  3184  			ce.pushValue(retLo)
  3185  			ce.pushValue(retHi)
  3186  			frame.pc++
  3187  		case wazeroir.OperationKindV128Min:
  3188  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3189  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3190  			var retLo, retHi uint64
  3191  			switch op.B1 {
  3192  			case wazeroir.ShapeI8x16:
  3193  				if op.B3 { // signed
  3194  					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
  3195  						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3196  						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3197  						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3198  					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
  3199  						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3200  						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3201  						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3202  				} else {
  3203  					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
  3204  						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3205  						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3206  						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3207  					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
  3208  						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3209  						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3210  						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3211  				}
  3212  			case wazeroir.ShapeI16x8:
  3213  				if op.B3 { // signed
  3214  					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
  3215  						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3216  						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3217  						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3218  					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
  3219  						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3220  						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3221  						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3222  				} else {
  3223  					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
  3224  						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3225  						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3226  						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3227  					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
  3228  						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3229  						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3230  						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3231  				}
  3232  			case wazeroir.ShapeI32x4:
  3233  				if op.B3 { // signed
  3234  					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
  3235  						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3236  					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
  3237  						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3238  				} else {
  3239  					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
  3240  						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3241  					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
  3242  						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3243  				}
  3244  			case wazeroir.ShapeF32x4:
  3245  				retHi = WasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
  3246  					WasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3247  				retLo = WasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
  3248  					WasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3249  			case wazeroir.ShapeF64x2:
  3250  				retHi = math.Float64bits(moremath.WasmCompatMin64(
  3251  					math.Float64frombits(x1hi),
  3252  					math.Float64frombits(x2hi),
  3253  				))
  3254  				retLo = math.Float64bits(moremath.WasmCompatMin64(
  3255  					math.Float64frombits(x1lo),
  3256  					math.Float64frombits(x2lo),
  3257  				))
  3258  			}
  3259  			ce.pushValue(retLo)
  3260  			ce.pushValue(retHi)
  3261  			frame.pc++
  3262  		case wazeroir.OperationKindV128Max:
  3263  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3264  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3265  			var retLo, retHi uint64
  3266  			switch op.B1 {
  3267  			case wazeroir.ShapeI8x16:
  3268  				if op.B3 { // signed
  3269  					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
  3270  						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3271  						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3272  						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3273  					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
  3274  						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3275  						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3276  						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3277  				} else {
  3278  					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
  3279  						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3280  						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3281  						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3282  					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
  3283  						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3284  						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3285  						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3286  				}
  3287  			case wazeroir.ShapeI16x8:
  3288  				if op.B3 { // signed
  3289  					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
  3290  						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3291  						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3292  						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3293  					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
  3294  						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3295  						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3296  						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3297  				} else {
  3298  					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
  3299  						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3300  						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3301  						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3302  					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
  3303  						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3304  						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3305  						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3306  				}
  3307  			case wazeroir.ShapeI32x4:
  3308  				if op.B3 { // signed
  3309  					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
  3310  						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3311  					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
  3312  						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3313  				} else {
  3314  					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
  3315  						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3316  					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
  3317  						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3318  				}
  3319  			case wazeroir.ShapeF32x4:
  3320  				retHi = WasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
  3321  					WasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3322  				retLo = WasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
  3323  					WasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3324  			case wazeroir.ShapeF64x2:
  3325  				retHi = math.Float64bits(moremath.WasmCompatMax64(
  3326  					math.Float64frombits(x1hi),
  3327  					math.Float64frombits(x2hi),
  3328  				))
  3329  				retLo = math.Float64bits(moremath.WasmCompatMax64(
  3330  					math.Float64frombits(x1lo),
  3331  					math.Float64frombits(x2lo),
  3332  				))
  3333  			}
  3334  			ce.pushValue(retLo)
  3335  			ce.pushValue(retHi)
  3336  			frame.pc++
  3337  		case wazeroir.OperationKindV128AvgrU:
  3338  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3339  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3340  			var retLo, retHi uint64
  3341  			switch op.B1 {
  3342  			case wazeroir.ShapeI8x16:
  3343  				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
  3344  					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3345  					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3346  					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3347  				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
  3348  					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3349  					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3350  					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3351  			case wazeroir.ShapeI16x8:
  3352  				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
  3353  					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3354  					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3355  					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3356  				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
  3357  					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3358  					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3359  					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3360  			}
  3361  			ce.pushValue(retLo)
  3362  			ce.pushValue(retHi)
  3363  			frame.pc++
  3364  		case wazeroir.OperationKindV128Pmin:
  3365  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3366  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3367  			var retLo, retHi uint64
  3368  			if op.B1 == wazeroir.ShapeF32x4 {
  3369  				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
  3370  					retLo = x2lo & 0x00000000_ffffffff
  3371  				} else {
  3372  					retLo = x1lo & 0x00000000_ffffffff
  3373  				}
  3374  				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
  3375  					retLo |= x2lo & 0xffffffff_00000000
  3376  				} else {
  3377  					retLo |= x1lo & 0xffffffff_00000000
  3378  				}
  3379  				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
  3380  					retHi = x2hi & 0x00000000_ffffffff
  3381  				} else {
  3382  					retHi = x1hi & 0x00000000_ffffffff
  3383  				}
  3384  				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
  3385  					retHi |= x2hi & 0xffffffff_00000000
  3386  				} else {
  3387  					retHi |= x1hi & 0xffffffff_00000000
  3388  				}
  3389  			} else {
  3390  				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
  3391  					retLo = x2lo
  3392  				} else {
  3393  					retLo = x1lo
  3394  				}
  3395  				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
  3396  					retHi = x2hi
  3397  				} else {
  3398  					retHi = x1hi
  3399  				}
  3400  			}
  3401  			ce.pushValue(retLo)
  3402  			ce.pushValue(retHi)
  3403  			frame.pc++
  3404  		case wazeroir.OperationKindV128Pmax:
  3405  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3406  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3407  			var retLo, retHi uint64
  3408  			if op.B1 == wazeroir.ShapeF32x4 {
  3409  				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
  3410  					retLo = x2lo & 0x00000000_ffffffff
  3411  				} else {
  3412  					retLo = x1lo & 0x00000000_ffffffff
  3413  				}
  3414  				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
  3415  					retLo |= x2lo & 0xffffffff_00000000
  3416  				} else {
  3417  					retLo |= x1lo & 0xffffffff_00000000
  3418  				}
  3419  				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
  3420  					retHi = x2hi & 0x00000000_ffffffff
  3421  				} else {
  3422  					retHi = x1hi & 0x00000000_ffffffff
  3423  				}
  3424  				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
  3425  					retHi |= x2hi & 0xffffffff_00000000
  3426  				} else {
  3427  					retHi |= x1hi & 0xffffffff_00000000
  3428  				}
  3429  			} else {
  3430  				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
  3431  					retLo = x2lo
  3432  				} else {
  3433  					retLo = x1lo
  3434  				}
  3435  				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
  3436  					retHi = x2hi
  3437  				} else {
  3438  					retHi = x1hi
  3439  				}
  3440  			}
  3441  			ce.pushValue(retLo)
  3442  			ce.pushValue(retHi)
  3443  			frame.pc++
  3444  		case wazeroir.OperationKindV128Ceil:
  3445  			hi, lo := ce.popValue(), ce.popValue()
  3446  			if op.B1 == wazeroir.ShapeF32x4 {
  3447  				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
  3448  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3449  				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
  3450  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3451  			} else {
  3452  				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
  3453  				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
  3454  			}
  3455  			ce.pushValue(lo)
  3456  			ce.pushValue(hi)
  3457  			frame.pc++
  3458  		case wazeroir.OperationKindV128Floor:
  3459  			hi, lo := ce.popValue(), ce.popValue()
  3460  			if op.B1 == wazeroir.ShapeF32x4 {
  3461  				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
  3462  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3463  				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
  3464  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3465  			} else {
  3466  				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
  3467  				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
  3468  			}
  3469  			ce.pushValue(lo)
  3470  			ce.pushValue(hi)
  3471  			frame.pc++
  3472  		case wazeroir.OperationKindV128Trunc:
  3473  			hi, lo := ce.popValue(), ce.popValue()
  3474  			if op.B1 == wazeroir.ShapeF32x4 {
  3475  				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
  3476  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3477  				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
  3478  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3479  			} else {
  3480  				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
  3481  				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
  3482  			}
  3483  			ce.pushValue(lo)
  3484  			ce.pushValue(hi)
  3485  			frame.pc++
  3486  		case wazeroir.OperationKindV128Nearest:
  3487  			hi, lo := ce.popValue(), ce.popValue()
  3488  			if op.B1 == wazeroir.ShapeF32x4 {
  3489  				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
  3490  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3491  				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
  3492  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3493  			} else {
  3494  				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
  3495  				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
  3496  			}
  3497  			ce.pushValue(lo)
  3498  			ce.pushValue(hi)
  3499  			frame.pc++
  3500  		case wazeroir.OperationKindV128Extend:
  3501  			hi, lo := ce.popValue(), ce.popValue()
  3502  			var origin uint64
  3503  			if op.B3 { // use lower 64 bits
  3504  				origin = lo
  3505  			} else {
  3506  				origin = hi
  3507  			}
  3508  
  3509  			signed := op.B2 == 1
  3510  
  3511  			var retHi, retLo uint64
  3512  			switch op.B1 {
  3513  			case wazeroir.ShapeI8x16:
  3514  				for i := 0; i < 8; i++ {
  3515  					v8 := byte(origin >> (i * 8))
  3516  
  3517  					var v16 uint16
  3518  					if signed {
  3519  						v16 = uint16(int8(v8))
  3520  					} else {
  3521  						v16 = uint16(v8)
  3522  					}
  3523  
  3524  					if i < 4 {
  3525  						retLo |= uint64(v16) << (i * 16)
  3526  					} else {
  3527  						retHi |= uint64(v16) << ((i - 4) * 16)
  3528  					}
  3529  				}
  3530  			case wazeroir.ShapeI16x8:
  3531  				for i := 0; i < 4; i++ {
  3532  					v16 := uint16(origin >> (i * 16))
  3533  
  3534  					var v32 uint32
  3535  					if signed {
  3536  						v32 = uint32(int16(v16))
  3537  					} else {
  3538  						v32 = uint32(v16)
  3539  					}
  3540  
  3541  					if i < 2 {
  3542  						retLo |= uint64(v32) << (i * 32)
  3543  					} else {
  3544  						retHi |= uint64(v32) << ((i - 2) * 32)
  3545  					}
  3546  				}
  3547  			case wazeroir.ShapeI32x4:
  3548  				v32Lo := uint32(origin)
  3549  				v32Hi := uint32(origin >> 32)
  3550  				if signed {
  3551  					retLo = uint64(int32(v32Lo))
  3552  					retHi = uint64(int32(v32Hi))
  3553  				} else {
  3554  					retLo = uint64(v32Lo)
  3555  					retHi = uint64(v32Hi)
  3556  				}
  3557  			}
  3558  			ce.pushValue(retLo)
  3559  			ce.pushValue(retHi)
  3560  			frame.pc++
  3561  		case wazeroir.OperationKindV128ExtMul:
  3562  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3563  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3564  			var x1, x2 uint64
  3565  			if op.B3 { // use lower 64 bits
  3566  				x1, x2 = x1Lo, x2Lo
  3567  			} else {
  3568  				x1, x2 = x1Hi, x2Hi
  3569  			}
  3570  
  3571  			signed := op.B2 == 1
  3572  
  3573  			var retLo, retHi uint64
  3574  			switch op.B1 {
  3575  			case wazeroir.ShapeI8x16:
  3576  				for i := 0; i < 8; i++ {
  3577  					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
  3578  
  3579  					var v16 uint16
  3580  					if signed {
  3581  						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
  3582  					} else {
  3583  						v16 = uint16(v1) * uint16(v2)
  3584  					}
  3585  
  3586  					if i < 4 {
  3587  						retLo |= uint64(v16) << (i * 16)
  3588  					} else {
  3589  						retHi |= uint64(v16) << ((i - 4) * 16)
  3590  					}
  3591  				}
  3592  			case wazeroir.ShapeI16x8:
  3593  				for i := 0; i < 4; i++ {
  3594  					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
  3595  
  3596  					var v32 uint32
  3597  					if signed {
  3598  						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
  3599  					} else {
  3600  						v32 = uint32(v1) * uint32(v2)
  3601  					}
  3602  
  3603  					if i < 2 {
  3604  						retLo |= uint64(v32) << (i * 32)
  3605  					} else {
  3606  						retHi |= uint64(v32) << ((i - 2) * 32)
  3607  					}
  3608  				}
  3609  			case wazeroir.ShapeI32x4:
  3610  				v1Lo, v2Lo := uint32(x1), uint32(x2)
  3611  				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
  3612  				if signed {
  3613  					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
  3614  					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
  3615  				} else {
  3616  					retLo = uint64(v1Lo) * uint64(v2Lo)
  3617  					retHi = uint64(v1Hi) * uint64(v2Hi)
  3618  				}
  3619  			}
  3620  
  3621  			ce.pushValue(retLo)
  3622  			ce.pushValue(retHi)
  3623  			frame.pc++
  3624  		case wazeroir.OperationKindV128Q15mulrSatS:
  3625  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3626  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3627  			var retLo, retHi uint64
  3628  			for i := 0; i < 8; i++ {
  3629  				var v, w int16
  3630  				if i < 4 {
  3631  					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
  3632  				} else {
  3633  					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
  3634  				}
  3635  
  3636  				var uv uint64
  3637  				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
  3638  				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
  3639  					uv = uint64(uint16(0x8000))
  3640  				} else if calc > math.MaxInt16 {
  3641  					uv = uint64(uint16(0x7fff))
  3642  				} else {
  3643  					uv = uint64(uint16(int16(calc)))
  3644  				}
  3645  
  3646  				if i < 4 {
  3647  					retLo |= uv << (i * 16)
  3648  				} else {
  3649  					retHi |= uv << ((i - 4) * 16)
  3650  				}
  3651  			}
  3652  
  3653  			ce.pushValue(retLo)
  3654  			ce.pushValue(retHi)
  3655  			frame.pc++
  3656  		case wazeroir.OperationKindV128ExtAddPairwise:
  3657  			hi, lo := ce.popValue(), ce.popValue()
  3658  
  3659  			signed := op.B3
  3660  
  3661  			var retLo, retHi uint64
  3662  			switch op.B1 {
  3663  			case wazeroir.ShapeI8x16:
  3664  				for i := 0; i < 8; i++ {
  3665  					var v1, v2 byte
  3666  					if i < 4 {
  3667  						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
  3668  					} else {
  3669  						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
  3670  					}
  3671  
  3672  					var v16 uint16
  3673  					if signed {
  3674  						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
  3675  					} else {
  3676  						v16 = uint16(v1) + uint16(v2)
  3677  					}
  3678  
  3679  					if i < 4 {
  3680  						retLo |= uint64(v16) << (i * 16)
  3681  					} else {
  3682  						retHi |= uint64(v16) << ((i - 4) * 16)
  3683  					}
  3684  				}
  3685  			case wazeroir.ShapeI16x8:
  3686  				for i := 0; i < 4; i++ {
  3687  					var v1, v2 uint16
  3688  					if i < 2 {
  3689  						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
  3690  					} else {
  3691  						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
  3692  					}
  3693  
  3694  					var v32 uint32
  3695  					if signed {
  3696  						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
  3697  					} else {
  3698  						v32 = uint32(v1) + uint32(v2)
  3699  					}
  3700  
  3701  					if i < 2 {
  3702  						retLo |= uint64(v32) << (i * 32)
  3703  					} else {
  3704  						retHi |= uint64(v32) << ((i - 2) * 32)
  3705  					}
  3706  				}
  3707  			}
  3708  			ce.pushValue(retLo)
  3709  			ce.pushValue(retHi)
  3710  			frame.pc++
  3711  		case wazeroir.OperationKindV128FloatPromote:
  3712  			_, toPromote := ce.popValue(), ce.popValue()
  3713  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
  3714  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
  3715  			frame.pc++
  3716  		case wazeroir.OperationKindV128FloatDemote:
  3717  			hi, lo := ce.popValue(), ce.popValue()
  3718  			ce.pushValue(
  3719  				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
  3720  					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
  3721  			)
  3722  			ce.pushValue(0)
  3723  			frame.pc++
  3724  		case wazeroir.OperationKindV128FConvertFromI:
  3725  			hi, lo := ce.popValue(), ce.popValue()
  3726  			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
  3727  			signed := op.B3
  3728  
  3729  			var retLo, retHi uint64
  3730  			switch op.B1 { // Destination shape.
  3731  			case wazeroir.ShapeF32x4: // f32x4 from signed/unsigned i32x4
  3732  				if signed {
  3733  					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
  3734  						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
  3735  					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
  3736  						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
  3737  				} else {
  3738  					retLo = uint64(math.Float32bits(float32(v1))) |
  3739  						(uint64(math.Float32bits(float32(v2))) << 32)
  3740  					retHi = uint64(math.Float32bits(float32(v3))) |
  3741  						(uint64(math.Float32bits(float32(v4))) << 32)
  3742  				}
  3743  			case wazeroir.ShapeF64x2: // f64x2 from signed/unsigned i32x4
  3744  				if signed {
  3745  					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
  3746  				} else {
  3747  					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
  3748  				}
  3749  			}
  3750  
  3751  			ce.pushValue(retLo)
  3752  			ce.pushValue(retHi)
  3753  			frame.pc++
  3754  		case wazeroir.OperationKindV128Narrow:
  3755  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3756  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3757  			signed := op.B3
  3758  
  3759  			var retLo, retHi uint64
  3760  			switch op.B1 {
  3761  			case wazeroir.ShapeI16x8: // signed/unsigned i16x8 to i8x16
  3762  				for i := 0; i < 8; i++ {
  3763  					var v16 uint16
  3764  					if i < 4 {
  3765  						v16 = uint16(x1Lo >> (i * 16))
  3766  					} else {
  3767  						v16 = uint16(x1Hi >> ((i - 4) * 16))
  3768  					}
  3769  
  3770  					var v byte
  3771  					if signed {
  3772  						if s := int16(v16); s > math.MaxInt8 {
  3773  							v = math.MaxInt8
  3774  						} else if s < math.MinInt8 {
  3775  							s = math.MinInt8
  3776  							v = byte(s)
  3777  						} else {
  3778  							v = byte(v16)
  3779  						}
  3780  					} else {
  3781  						if s := int16(v16); s > math.MaxUint8 {
  3782  							v = math.MaxUint8
  3783  						} else if s < 0 {
  3784  							v = 0
  3785  						} else {
  3786  							v = byte(v16)
  3787  						}
  3788  					}
  3789  					retLo |= uint64(v) << (i * 8)
  3790  				}
  3791  				for i := 0; i < 8; i++ {
  3792  					var v16 uint16
  3793  					if i < 4 {
  3794  						v16 = uint16(x2Lo >> (i * 16))
  3795  					} else {
  3796  						v16 = uint16(x2Hi >> ((i - 4) * 16))
  3797  					}
  3798  
  3799  					var v byte
  3800  					if signed {
  3801  						if s := int16(v16); s > math.MaxInt8 {
  3802  							v = math.MaxInt8
  3803  						} else if s < math.MinInt8 {
  3804  							s = math.MinInt8
  3805  							v = byte(s)
  3806  						} else {
  3807  							v = byte(v16)
  3808  						}
  3809  					} else {
  3810  						if s := int16(v16); s > math.MaxUint8 {
  3811  							v = math.MaxUint8
  3812  						} else if s < 0 {
  3813  							v = 0
  3814  						} else {
  3815  							v = byte(v16)
  3816  						}
  3817  					}
  3818  					retHi |= uint64(v) << (i * 8)
  3819  				}
  3820  			case wazeroir.ShapeI32x4: // signed/unsigned i32x4 to i16x8
  3821  				for i := 0; i < 4; i++ {
  3822  					var v32 uint32
  3823  					if i < 2 {
  3824  						v32 = uint32(x1Lo >> (i * 32))
  3825  					} else {
  3826  						v32 = uint32(x1Hi >> ((i - 2) * 32))
  3827  					}
  3828  
  3829  					var v uint16
  3830  					if signed {
  3831  						if s := int32(v32); s > math.MaxInt16 {
  3832  							v = math.MaxInt16
  3833  						} else if s < math.MinInt16 {
  3834  							s = math.MinInt16
  3835  							v = uint16(s)
  3836  						} else {
  3837  							v = uint16(v32)
  3838  						}
  3839  					} else {
  3840  						if s := int32(v32); s > math.MaxUint16 {
  3841  							v = math.MaxUint16
  3842  						} else if s < 0 {
  3843  							v = 0
  3844  						} else {
  3845  							v = uint16(v32)
  3846  						}
  3847  					}
  3848  					retLo |= uint64(v) << (i * 16)
  3849  				}
  3850  
  3851  				for i := 0; i < 4; i++ {
  3852  					var v32 uint32
  3853  					if i < 2 {
  3854  						v32 = uint32(x2Lo >> (i * 32))
  3855  					} else {
  3856  						v32 = uint32(x2Hi >> ((i - 2) * 32))
  3857  					}
  3858  
  3859  					var v uint16
  3860  					if signed {
  3861  						if s := int32(v32); s > math.MaxInt16 {
  3862  							v = math.MaxInt16
  3863  						} else if s < math.MinInt16 {
  3864  							s = math.MinInt16
  3865  							v = uint16(s)
  3866  						} else {
  3867  							v = uint16(v32)
  3868  						}
  3869  					} else {
  3870  						if s := int32(v32); s > math.MaxUint16 {
  3871  							v = math.MaxUint16
  3872  						} else if s < 0 {
  3873  							v = 0
  3874  						} else {
  3875  							v = uint16(v32)
  3876  						}
  3877  					}
  3878  					retHi |= uint64(v) << (i * 16)
  3879  				}
  3880  			}
  3881  			ce.pushValue(retLo)
  3882  			ce.pushValue(retHi)
  3883  			frame.pc++
  3884  		case wazeroir.OperationKindV128Dot:
  3885  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3886  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3887  			ce.pushValue(
  3888  				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
  3889  					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
  3890  			)
  3891  			ce.pushValue(
  3892  				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
  3893  					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
  3894  			)
  3895  			frame.pc++
  3896  		case wazeroir.OperationKindV128ITruncSatFromF:
  3897  			hi, lo := ce.popValue(), ce.popValue()
  3898  			signed := op.B3
  3899  			var retLo, retHi uint64
  3900  
  3901  			switch op.B1 {
  3902  			case wazeroir.ShapeF32x4: // f32x4 to i32x4
  3903  				for i, f64 := range [4]float64{
  3904  					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
  3905  					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
  3906  					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
  3907  					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
  3908  				} {
  3909  
  3910  					var v uint32
  3911  					if math.IsNaN(f64) {
  3912  						v = 0
  3913  					} else if signed {
  3914  						if f64 < math.MinInt32 {
  3915  							f64 = math.MinInt32
  3916  						} else if f64 > math.MaxInt32 {
  3917  							f64 = math.MaxInt32
  3918  						}
  3919  						v = uint32(int32(f64))
  3920  					} else {
  3921  						if f64 < 0 {
  3922  							f64 = 0
  3923  						} else if f64 > math.MaxUint32 {
  3924  							f64 = math.MaxUint32
  3925  						}
  3926  						v = uint32(f64)
  3927  					}
  3928  
  3929  					if i < 2 {
  3930  						retLo |= uint64(v) << (i * 32)
  3931  					} else {
  3932  						retHi |= uint64(v) << ((i - 2) * 32)
  3933  					}
  3934  				}
  3935  
  3936  			case wazeroir.ShapeF64x2: // f64x2 to i32x4
  3937  				for i, f := range [2]float64{
  3938  					math.Trunc(math.Float64frombits(lo)),
  3939  					math.Trunc(math.Float64frombits(hi)),
  3940  				} {
  3941  					var v uint32
  3942  					if math.IsNaN(f) {
  3943  						v = 0
  3944  					} else if signed {
  3945  						if f < math.MinInt32 {
  3946  							f = math.MinInt32
  3947  						} else if f > math.MaxInt32 {
  3948  							f = math.MaxInt32
  3949  						}
  3950  						v = uint32(int32(f))
  3951  					} else {
  3952  						if f < 0 {
  3953  							f = 0
  3954  						} else if f > math.MaxUint32 {
  3955  							f = math.MaxUint32
  3956  						}
  3957  						v = uint32(f)
  3958  					}
  3959  
  3960  					retLo |= uint64(v) << (i * 32)
  3961  				}
  3962  			}
  3963  
  3964  			ce.pushValue(retLo)
  3965  			ce.pushValue(retHi)
  3966  			frame.pc++
  3967  		case wazeroir.OperationKindAtomicMemoryWait:
  3968  			timeout := int64(ce.popValue())
  3969  			exp := ce.popValue()
  3970  			offset := ce.popMemoryOffset(op)
  3971  			if !memoryInst.Shared {
  3972  				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
  3973  			}
  3974  			if int(offset) >= len(memoryInst.Buffer) {
  3975  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  3976  			}
  3977  
  3978  			switch wazeroir.UnsignedType(op.B1) {
  3979  			case wazeroir.UnsignedTypeI32:
  3980  				if offset%4 != 0 {
  3981  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  3982  				}
  3983  				ce.pushValue(memoryInst.Wait32(offset, uint32(exp), timeout))
  3984  			case wazeroir.UnsignedTypeI64:
  3985  				if offset%8 != 0 {
  3986  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  3987  				}
  3988  				ce.pushValue(memoryInst.Wait64(offset, exp, timeout))
  3989  			}
  3990  			frame.pc++
  3991  		case wazeroir.OperationKindAtomicMemoryNotify:
  3992  			count := ce.popValue()
  3993  			offset := ce.popMemoryOffset(op)
  3994  			if offset%4 != 0 {
  3995  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  3996  			}
  3997  			// Just a bounds check
  3998  			if offset >= memoryInst.Size() {
  3999  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4000  			}
  4001  			res := memoryInst.Notify(offset, uint32(count))
  4002  			ce.pushValue(uint64(res))
  4003  			frame.pc++
  4004  		case wazeroir.OperationKindAtomicFence:
  4005  			// Memory not required for fence only
  4006  			if memoryInst != nil {
  4007  				// An empty critical section can be used as a synchronization primitive, which is what
  4008  				// fence is. Probably, there are no spectests or defined behavior to confirm this yet.
  4009  				memoryInst.Mux.Lock()
  4010  				memoryInst.Mux.Unlock() //nolint:staticcheck
  4011  			}
  4012  			frame.pc++
  4013  		case wazeroir.OperationKindAtomicLoad:
  4014  			offset := ce.popMemoryOffset(op)
  4015  			switch wazeroir.UnsignedType(op.B1) {
  4016  			case wazeroir.UnsignedTypeI32:
  4017  				if offset%4 != 0 {
  4018  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4019  				}
  4020  				memoryInst.Mux.Lock()
  4021  				val, ok := memoryInst.ReadUint32Le(offset)
  4022  				memoryInst.Mux.Unlock()
  4023  				if !ok {
  4024  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4025  				}
  4026  				ce.pushValue(uint64(val))
  4027  			case wazeroir.UnsignedTypeI64:
  4028  				if offset%8 != 0 {
  4029  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4030  				}
  4031  				memoryInst.Mux.Lock()
  4032  				val, ok := memoryInst.ReadUint64Le(offset)
  4033  				memoryInst.Mux.Unlock()
  4034  				if !ok {
  4035  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4036  				}
  4037  				ce.pushValue(val)
  4038  			}
  4039  			frame.pc++
  4040  		case wazeroir.OperationKindAtomicLoad8:
  4041  			offset := ce.popMemoryOffset(op)
  4042  			memoryInst.Mux.Lock()
  4043  			val, ok := memoryInst.ReadByte(offset)
  4044  			memoryInst.Mux.Unlock()
  4045  			if !ok {
  4046  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4047  			}
  4048  			ce.pushValue(uint64(val))
  4049  			frame.pc++
  4050  		case wazeroir.OperationKindAtomicLoad16:
  4051  			offset := ce.popMemoryOffset(op)
  4052  			if offset%2 != 0 {
  4053  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4054  			}
  4055  			memoryInst.Mux.Lock()
  4056  			val, ok := memoryInst.ReadUint16Le(offset)
  4057  			memoryInst.Mux.Unlock()
  4058  			if !ok {
  4059  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4060  			}
  4061  			ce.pushValue(uint64(val))
  4062  			frame.pc++
  4063  		case wazeroir.OperationKindAtomicStore:
  4064  			val := ce.popValue()
  4065  			offset := ce.popMemoryOffset(op)
  4066  			switch wazeroir.UnsignedType(op.B1) {
  4067  			case wazeroir.UnsignedTypeI32:
  4068  				if offset%4 != 0 {
  4069  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4070  				}
  4071  				memoryInst.Mux.Lock()
  4072  				ok := memoryInst.WriteUint32Le(offset, uint32(val))
  4073  				memoryInst.Mux.Unlock()
  4074  				if !ok {
  4075  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4076  				}
  4077  			case wazeroir.UnsignedTypeI64:
  4078  				if offset%8 != 0 {
  4079  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4080  				}
  4081  				memoryInst.Mux.Lock()
  4082  				ok := memoryInst.WriteUint64Le(offset, val)
  4083  				memoryInst.Mux.Unlock()
  4084  				if !ok {
  4085  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4086  				}
  4087  			}
  4088  			frame.pc++
  4089  		case wazeroir.OperationKindAtomicStore8:
  4090  			val := byte(ce.popValue())
  4091  			offset := ce.popMemoryOffset(op)
  4092  			memoryInst.Mux.Lock()
  4093  			ok := memoryInst.WriteByte(offset, val)
  4094  			memoryInst.Mux.Unlock()
  4095  			if !ok {
  4096  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4097  			}
  4098  			frame.pc++
  4099  		case wazeroir.OperationKindAtomicStore16:
  4100  			val := uint16(ce.popValue())
  4101  			offset := ce.popMemoryOffset(op)
  4102  			if offset%2 != 0 {
  4103  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4104  			}
  4105  			memoryInst.Mux.Lock()
  4106  			ok := memoryInst.WriteUint16Le(offset, val)
  4107  			memoryInst.Mux.Unlock()
  4108  			if !ok {
  4109  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4110  			}
  4111  			frame.pc++
  4112  		case wazeroir.OperationKindAtomicRMW:
  4113  			val := ce.popValue()
  4114  			offset := ce.popMemoryOffset(op)
  4115  			switch wazeroir.UnsignedType(op.B1) {
  4116  			case wazeroir.UnsignedTypeI32:
  4117  				if offset%4 != 0 {
  4118  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4119  				}
  4120  				memoryInst.Mux.Lock()
  4121  				old, ok := memoryInst.ReadUint32Le(offset)
  4122  				if !ok {
  4123  					memoryInst.Mux.Unlock()
  4124  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4125  				}
  4126  				var newVal uint32
  4127  				switch wazeroir.AtomicArithmeticOp(op.B2) {
  4128  				case wazeroir.AtomicArithmeticOpAdd:
  4129  					newVal = old + uint32(val)
  4130  				case wazeroir.AtomicArithmeticOpSub:
  4131  					newVal = old - uint32(val)
  4132  				case wazeroir.AtomicArithmeticOpAnd:
  4133  					newVal = old & uint32(val)
  4134  				case wazeroir.AtomicArithmeticOpOr:
  4135  					newVal = old | uint32(val)
  4136  				case wazeroir.AtomicArithmeticOpXor:
  4137  					newVal = old ^ uint32(val)
  4138  				case wazeroir.AtomicArithmeticOpNop:
  4139  					newVal = uint32(val)
  4140  				}
  4141  				memoryInst.WriteUint32Le(offset, newVal)
  4142  				memoryInst.Mux.Unlock()
  4143  				ce.pushValue(uint64(old))
  4144  			case wazeroir.UnsignedTypeI64:
  4145  				if offset%8 != 0 {
  4146  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4147  				}
  4148  				memoryInst.Mux.Lock()
  4149  				old, ok := memoryInst.ReadUint64Le(offset)
  4150  				if !ok {
  4151  					memoryInst.Mux.Unlock()
  4152  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4153  				}
  4154  				var newVal uint64
  4155  				switch wazeroir.AtomicArithmeticOp(op.B2) {
  4156  				case wazeroir.AtomicArithmeticOpAdd:
  4157  					newVal = old + val
  4158  				case wazeroir.AtomicArithmeticOpSub:
  4159  					newVal = old - val
  4160  				case wazeroir.AtomicArithmeticOpAnd:
  4161  					newVal = old & val
  4162  				case wazeroir.AtomicArithmeticOpOr:
  4163  					newVal = old | val
  4164  				case wazeroir.AtomicArithmeticOpXor:
  4165  					newVal = old ^ val
  4166  				case wazeroir.AtomicArithmeticOpNop:
  4167  					newVal = val
  4168  				}
  4169  				memoryInst.WriteUint64Le(offset, newVal)
  4170  				memoryInst.Mux.Unlock()
  4171  				ce.pushValue(old)
  4172  			}
  4173  			frame.pc++
  4174  		case wazeroir.OperationKindAtomicRMW8:
  4175  			val := ce.popValue()
  4176  			offset := ce.popMemoryOffset(op)
  4177  			memoryInst.Mux.Lock()
  4178  			old, ok := memoryInst.ReadByte(offset)
  4179  			if !ok {
  4180  				memoryInst.Mux.Unlock()
  4181  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4182  			}
  4183  			arg := byte(val)
  4184  			var newVal byte
  4185  			switch wazeroir.AtomicArithmeticOp(op.B2) {
  4186  			case wazeroir.AtomicArithmeticOpAdd:
  4187  				newVal = old + arg
  4188  			case wazeroir.AtomicArithmeticOpSub:
  4189  				newVal = old - arg
  4190  			case wazeroir.AtomicArithmeticOpAnd:
  4191  				newVal = old & arg
  4192  			case wazeroir.AtomicArithmeticOpOr:
  4193  				newVal = old | arg
  4194  			case wazeroir.AtomicArithmeticOpXor:
  4195  				newVal = old ^ arg
  4196  			case wazeroir.AtomicArithmeticOpNop:
  4197  				newVal = arg
  4198  			}
  4199  			memoryInst.WriteByte(offset, newVal)
  4200  			memoryInst.Mux.Unlock()
  4201  			ce.pushValue(uint64(old))
  4202  			frame.pc++
  4203  		case wazeroir.OperationKindAtomicRMW16:
  4204  			val := ce.popValue()
  4205  			offset := ce.popMemoryOffset(op)
  4206  			if offset%2 != 0 {
  4207  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4208  			}
  4209  			memoryInst.Mux.Lock()
  4210  			old, ok := memoryInst.ReadUint16Le(offset)
  4211  			if !ok {
  4212  				memoryInst.Mux.Unlock()
  4213  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4214  			}
  4215  			arg := uint16(val)
  4216  			var newVal uint16
  4217  			switch wazeroir.AtomicArithmeticOp(op.B2) {
  4218  			case wazeroir.AtomicArithmeticOpAdd:
  4219  				newVal = old + arg
  4220  			case wazeroir.AtomicArithmeticOpSub:
  4221  				newVal = old - arg
  4222  			case wazeroir.AtomicArithmeticOpAnd:
  4223  				newVal = old & arg
  4224  			case wazeroir.AtomicArithmeticOpOr:
  4225  				newVal = old | arg
  4226  			case wazeroir.AtomicArithmeticOpXor:
  4227  				newVal = old ^ arg
  4228  			case wazeroir.AtomicArithmeticOpNop:
  4229  				newVal = arg
  4230  			}
  4231  			memoryInst.WriteUint16Le(offset, newVal)
  4232  			memoryInst.Mux.Unlock()
  4233  			ce.pushValue(uint64(old))
  4234  			frame.pc++
  4235  		case wazeroir.OperationKindAtomicRMWCmpxchg:
  4236  			rep := ce.popValue()
  4237  			exp := ce.popValue()
  4238  			offset := ce.popMemoryOffset(op)
  4239  			switch wazeroir.UnsignedType(op.B1) {
  4240  			case wazeroir.UnsignedTypeI32:
  4241  				if offset%4 != 0 {
  4242  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4243  				}
  4244  				memoryInst.Mux.Lock()
  4245  				old, ok := memoryInst.ReadUint32Le(offset)
  4246  				if !ok {
  4247  					memoryInst.Mux.Unlock()
  4248  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4249  				}
  4250  				if old == uint32(exp) {
  4251  					memoryInst.WriteUint32Le(offset, uint32(rep))
  4252  				}
  4253  				memoryInst.Mux.Unlock()
  4254  				ce.pushValue(uint64(old))
  4255  			case wazeroir.UnsignedTypeI64:
  4256  				if offset%8 != 0 {
  4257  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4258  				}
  4259  				memoryInst.Mux.Lock()
  4260  				old, ok := memoryInst.ReadUint64Le(offset)
  4261  				if !ok {
  4262  					memoryInst.Mux.Unlock()
  4263  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4264  				}
  4265  				if old == exp {
  4266  					memoryInst.WriteUint64Le(offset, rep)
  4267  				}
  4268  				memoryInst.Mux.Unlock()
  4269  				ce.pushValue(old)
  4270  			}
  4271  			frame.pc++
  4272  		case wazeroir.OperationKindAtomicRMW8Cmpxchg:
  4273  			rep := byte(ce.popValue())
  4274  			exp := byte(ce.popValue())
  4275  			offset := ce.popMemoryOffset(op)
  4276  			memoryInst.Mux.Lock()
  4277  			old, ok := memoryInst.ReadByte(offset)
  4278  			if !ok {
  4279  				memoryInst.Mux.Unlock()
  4280  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4281  			}
  4282  			if old == exp {
  4283  				memoryInst.WriteByte(offset, rep)
  4284  			}
  4285  			memoryInst.Mux.Unlock()
  4286  			ce.pushValue(uint64(old))
  4287  			frame.pc++
  4288  		case wazeroir.OperationKindAtomicRMW16Cmpxchg:
  4289  			rep := uint16(ce.popValue())
  4290  			exp := uint16(ce.popValue())
  4291  			offset := ce.popMemoryOffset(op)
  4292  			if offset%2 != 0 {
  4293  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4294  			}
  4295  			memoryInst.Mux.Lock()
  4296  			old, ok := memoryInst.ReadUint16Le(offset)
  4297  			if !ok {
  4298  				memoryInst.Mux.Unlock()
  4299  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4300  			}
  4301  			if old == exp {
  4302  				memoryInst.WriteUint16Le(offset, rep)
  4303  			}
  4304  			memoryInst.Mux.Unlock()
  4305  			ce.pushValue(uint64(old))
  4306  			frame.pc++
  4307  		default:
  4308  			frame.pc++
  4309  		}
  4310  	}
  4311  	ce.popFrame()
  4312  }
  4313  
  4314  func WasmCompatMax32bits(v1, v2 uint32) uint64 {
  4315  	return uint64(math.Float32bits(moremath.WasmCompatMax32(
  4316  		math.Float32frombits(v1),
  4317  		math.Float32frombits(v2),
  4318  	)))
  4319  }
  4320  
  4321  func WasmCompatMin32bits(v1, v2 uint32) uint64 {
  4322  	return uint64(math.Float32bits(moremath.WasmCompatMin32(
  4323  		math.Float32frombits(v1),
  4324  		math.Float32frombits(v2),
  4325  	)))
  4326  }
  4327  
  4328  func addFloat32bits(v1, v2 uint32) uint64 {
  4329  	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
  4330  }
  4331  
  4332  func subFloat32bits(v1, v2 uint32) uint64 {
  4333  	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
  4334  }
  4335  
  4336  func mulFloat32bits(v1, v2 uint32) uint64 {
  4337  	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
  4338  }
  4339  
  4340  func divFloat32bits(v1, v2 uint32) uint64 {
  4341  	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
  4342  }
  4343  
  4344  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4345  func flt32(z1, z2 float32) bool {
  4346  	if z1 != z1 || z2 != z2 {
  4347  		return false
  4348  	} else if z1 == z2 {
  4349  		return false
  4350  	} else if math.IsInf(float64(z1), 1) {
  4351  		return false
  4352  	} else if math.IsInf(float64(z1), -1) {
  4353  		return true
  4354  	} else if math.IsInf(float64(z2), 1) {
  4355  		return true
  4356  	} else if math.IsInf(float64(z2), -1) {
  4357  		return false
  4358  	}
  4359  	return z1 < z2
  4360  }
  4361  
  4362  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4363  func flt64(z1, z2 float64) bool {
  4364  	if z1 != z1 || z2 != z2 {
  4365  		return false
  4366  	} else if z1 == z2 {
  4367  		return false
  4368  	} else if math.IsInf(z1, 1) {
  4369  		return false
  4370  	} else if math.IsInf(z1, -1) {
  4371  		return true
  4372  	} else if math.IsInf(z2, 1) {
  4373  		return true
  4374  	} else if math.IsInf(z2, -1) {
  4375  		return false
  4376  	}
  4377  	return z1 < z2
  4378  }
  4379  
  4380  func i8RoundingAverage(v1, v2 byte) byte {
  4381  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4382  	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
  4383  }
  4384  
  4385  func i16RoundingAverage(v1, v2 uint16) uint16 {
  4386  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4387  	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
  4388  }
  4389  
  4390  func i8Abs(v byte) byte {
  4391  	if i := int8(v); i < 0 {
  4392  		return byte(-i)
  4393  	} else {
  4394  		return byte(i)
  4395  	}
  4396  }
  4397  
  4398  func i8MaxU(v1, v2 byte) byte {
  4399  	if v1 < v2 {
  4400  		return v2
  4401  	} else {
  4402  		return v1
  4403  	}
  4404  }
  4405  
  4406  func i8MinU(v1, v2 byte) byte {
  4407  	if v1 > v2 {
  4408  		return v2
  4409  	} else {
  4410  		return v1
  4411  	}
  4412  }
  4413  
  4414  func i8MaxS(v1, v2 byte) byte {
  4415  	if int8(v1) < int8(v2) {
  4416  		return v2
  4417  	} else {
  4418  		return v1
  4419  	}
  4420  }
  4421  
  4422  func i8MinS(v1, v2 byte) byte {
  4423  	if int8(v1) > int8(v2) {
  4424  		return v2
  4425  	} else {
  4426  		return v1
  4427  	}
  4428  }
  4429  
  4430  func i16MaxU(v1, v2 uint16) uint16 {
  4431  	if v1 < v2 {
  4432  		return v2
  4433  	} else {
  4434  		return v1
  4435  	}
  4436  }
  4437  
  4438  func i16MinU(v1, v2 uint16) uint16 {
  4439  	if v1 > v2 {
  4440  		return v2
  4441  	} else {
  4442  		return v1
  4443  	}
  4444  }
  4445  
  4446  func i16MaxS(v1, v2 uint16) uint16 {
  4447  	if int16(v1) < int16(v2) {
  4448  		return v2
  4449  	} else {
  4450  		return v1
  4451  	}
  4452  }
  4453  
  4454  func i16MinS(v1, v2 uint16) uint16 {
  4455  	if int16(v1) > int16(v2) {
  4456  		return v2
  4457  	} else {
  4458  		return v1
  4459  	}
  4460  }
  4461  
  4462  func i32MaxU(v1, v2 uint32) uint32 {
  4463  	if v1 < v2 {
  4464  		return v2
  4465  	} else {
  4466  		return v1
  4467  	}
  4468  }
  4469  
  4470  func i32MinU(v1, v2 uint32) uint32 {
  4471  	if v1 > v2 {
  4472  		return v2
  4473  	} else {
  4474  		return v1
  4475  	}
  4476  }
  4477  
  4478  func i32MaxS(v1, v2 uint32) uint32 {
  4479  	if int32(v1) < int32(v2) {
  4480  		return v2
  4481  	} else {
  4482  		return v1
  4483  	}
  4484  }
  4485  
  4486  func i32MinS(v1, v2 uint32) uint32 {
  4487  	if int32(v1) > int32(v2) {
  4488  		return v2
  4489  	} else {
  4490  		return v1
  4491  	}
  4492  }
  4493  
  4494  func i16Abs(v uint16) uint16 {
  4495  	if i := int16(v); i < 0 {
  4496  		return uint16(-i)
  4497  	} else {
  4498  		return uint16(i)
  4499  	}
  4500  }
  4501  
  4502  func i32Abs(v uint32) uint32 {
  4503  	if i := int32(v); i < 0 {
  4504  		return uint32(-i)
  4505  	} else {
  4506  		return uint32(i)
  4507  	}
  4508  }
  4509  
  4510  func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
  4511  	def, typ := f.definition(), f.funcType
  4512  
  4513  	ce.stackIterator.reset(ce.stack, ce.frames, f)
  4514  	fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator)
  4515  	ce.stackIterator.clear()
  4516  	ce.callNativeFunc(ctx, m, f)
  4517  	fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64))
  4518  	return ctx
  4519  }
  4520  
  4521  // popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
  4522  // As the top of stack value is 64-bit, this ensures it is in range before returning it.
  4523  func (ce *callEngine) popMemoryOffset(op *wazeroir.UnionOperation) uint32 {
  4524  	offset := op.U2 + ce.popValue()
  4525  	if offset > math.MaxUint32 {
  4526  		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4527  	}
  4528  	return uint32(offset)
  4529  }
  4530  
  4531  func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
  4532  	typ := f.funcType
  4533  	paramLen := typ.ParamNumInUint64
  4534  	resultLen := typ.ResultNumInUint64
  4535  	stackLen := paramLen
  4536  
  4537  	// In the interpreter engine, ce.stack may only have capacity to store
  4538  	// parameters. Grow when there are more results than parameters.
  4539  	if growLen := resultLen - paramLen; growLen > 0 {
  4540  		for i := 0; i < growLen; i++ {
  4541  			ce.stack = append(ce.stack, 0)
  4542  		}
  4543  		stackLen += growLen
  4544  	}
  4545  
  4546  	// Pass the stack elements to the go function.
  4547  	stack := ce.stack[len(ce.stack)-stackLen:]
  4548  	ce.callGoFunc(ctx, m, f, stack)
  4549  
  4550  	// Shrink the stack when there were more parameters than results.
  4551  	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
  4552  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
  4553  	}
  4554  }