github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/interpreter/interpreter.go (about)

     1  package interpreter
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"math/bits"
    10  	"sync"
    11  	"unsafe"
    12  
    13  	"github.com/tetratelabs/wazero/api"
    14  	"github.com/tetratelabs/wazero/experimental"
    15  	"github.com/tetratelabs/wazero/internal/expctxkeys"
    16  	"github.com/tetratelabs/wazero/internal/filecache"
    17  	"github.com/tetratelabs/wazero/internal/internalapi"
    18  	"github.com/tetratelabs/wazero/internal/moremath"
    19  	"github.com/tetratelabs/wazero/internal/wasm"
    20  	"github.com/tetratelabs/wazero/internal/wasmdebug"
    21  	"github.com/tetratelabs/wazero/internal/wasmruntime"
    22  )
    23  
    24  // callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
    25  // wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
    26  //
    27  // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
    28  var callStackCeiling = 2000
    29  
    30  // engine is an interpreter implementation of wasm.Engine
    31  type engine struct {
    32  	enabledFeatures   api.CoreFeatures
    33  	compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
    34  	mux               sync.RWMutex
    35  }
    36  
    37  func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
    38  	return &engine{
    39  		enabledFeatures:   enabledFeatures,
    40  		compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
    41  	}
    42  }
    43  
    44  // Close implements the same method as documented on wasm.Engine.
    45  func (e *engine) Close() (err error) {
    46  	return
    47  }
    48  
    49  // CompiledModuleCount implements the same method as documented on wasm.Engine.
    50  func (e *engine) CompiledModuleCount() uint32 {
    51  	return uint32(len(e.compiledFunctions))
    52  }
    53  
    54  // DeleteCompiledModule implements the same method as documented on wasm.Engine.
    55  func (e *engine) DeleteCompiledModule(m *wasm.Module) {
    56  	e.deleteCompiledFunctions(m)
    57  }
    58  
    59  func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
    60  	e.mux.Lock()
    61  	defer e.mux.Unlock()
    62  	delete(e.compiledFunctions, module.ID)
    63  }
    64  
    65  func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
    66  	e.mux.Lock()
    67  	defer e.mux.Unlock()
    68  	e.compiledFunctions[module.ID] = fs
    69  }
    70  
    71  func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
    72  	e.mux.RLock()
    73  	defer e.mux.RUnlock()
    74  	fs, ok = e.compiledFunctions[module.ID]
    75  	return
    76  }
    77  
    78  // moduleEngine implements wasm.ModuleEngine
    79  type moduleEngine struct {
    80  	// codes are the compiled functions in a module instances.
    81  	// The index is module instance-scoped.
    82  	functions []function
    83  
    84  	// parentEngine holds *engine from which this module engine is created from.
    85  	parentEngine *engine
    86  }
    87  
    88  // GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
    89  func (e *moduleEngine) GetGlobalValue(wasm.Index) (lo, hi uint64) {
    90  	panic("BUG: GetGlobalValue should never be called on interpreter mode")
    91  }
    92  
    93  // SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
    94  func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) {
    95  	panic("BUG: SetGlobalValue should never be called on interpreter mode")
    96  }
    97  
    98  // OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
    99  func (e *moduleEngine) OwnsGlobals() bool { return false }
   100  
   101  // callEngine holds context per moduleEngine.Call, and shared across all the
   102  // function calls originating from the same moduleEngine.Call execution.
   103  //
   104  // This implements api.Function.
   105  type callEngine struct {
   106  	internalapi.WazeroOnlyType
   107  
   108  	// stack contains the operands.
   109  	// Note that all the values are represented as uint64.
   110  	stack []uint64
   111  
   112  	// frames are the function call stack.
   113  	frames []*callFrame
   114  
   115  	// f is the initial function for this call engine.
   116  	f *function
   117  
   118  	// stackiterator for Listeners to walk frames and stack.
   119  	stackIterator stackIterator
   120  }
   121  
   122  func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
   123  	return &callEngine{f: compiled}
   124  }
   125  
   126  func (ce *callEngine) pushValue(v uint64) {
   127  	ce.stack = append(ce.stack, v)
   128  }
   129  
   130  func (ce *callEngine) pushValues(v []uint64) {
   131  	ce.stack = append(ce.stack, v...)
   132  }
   133  
   134  func (ce *callEngine) popValue() (v uint64) {
   135  	// No need to check stack bound
   136  	// as we can assume that all the operations
   137  	// are valid thanks to validateFunction
   138  	// at module validation phase
   139  	// and interpreterir translation
   140  	// before compilation.
   141  	stackTopIndex := len(ce.stack) - 1
   142  	v = ce.stack[stackTopIndex]
   143  	ce.stack = ce.stack[:stackTopIndex]
   144  	return
   145  }
   146  
   147  func (ce *callEngine) popValues(v []uint64) {
   148  	stackTopIndex := len(ce.stack) - len(v)
   149  	copy(v, ce.stack[stackTopIndex:])
   150  	ce.stack = ce.stack[:stackTopIndex]
   151  }
   152  
   153  // peekValues peeks api.ValueType values from the stack and returns them.
   154  func (ce *callEngine) peekValues(count int) []uint64 {
   155  	if count == 0 {
   156  		return nil
   157  	}
   158  	stackLen := len(ce.stack)
   159  	return ce.stack[stackLen-count : stackLen]
   160  }
   161  
   162  func (ce *callEngine) drop(raw uint64) {
   163  	r := inclusiveRangeFromU64(raw)
   164  	if r.Start == -1 {
   165  		return
   166  	} else if r.Start == 0 {
   167  		ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
   168  	} else {
   169  		newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
   170  		newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
   171  		ce.stack = newStack
   172  	}
   173  }
   174  
   175  func (ce *callEngine) pushFrame(frame *callFrame) {
   176  	if callStackCeiling <= len(ce.frames) {
   177  		panic(wasmruntime.ErrRuntimeStackOverflow)
   178  	}
   179  	ce.frames = append(ce.frames, frame)
   180  }
   181  
   182  func (ce *callEngine) popFrame() (frame *callFrame) {
   183  	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
   184  	// module validation phase and interpreterir translation before compilation.
   185  	oneLess := len(ce.frames) - 1
   186  	frame = ce.frames[oneLess]
   187  	ce.frames = ce.frames[:oneLess]
   188  	return
   189  }
   190  
   191  type callFrame struct {
   192  	// pc is the program counter representing the current position in code.body.
   193  	pc uint64
   194  	// f is the compiled function used in this function frame.
   195  	f *function
   196  	// base index in the frame of this function, used to detect the count of
   197  	// values on the stack.
   198  	base int
   199  }
   200  
   201  type compiledFunction struct {
   202  	source              *wasm.Module
   203  	body                []unionOperation
   204  	listener            experimental.FunctionListener
   205  	offsetsInWasmBinary []uint64
   206  	hostFn              interface{}
   207  	ensureTermination   bool
   208  	index               wasm.Index
   209  }
   210  
   211  type function struct {
   212  	funcType       *wasm.FunctionType
   213  	moduleInstance *wasm.ModuleInstance
   214  	typeID         wasm.FunctionTypeID
   215  	parent         *compiledFunction
   216  }
   217  
   218  // functionFromUintptr resurrects the original *function from the given uintptr
   219  // which comes from either funcref table or OpcodeRefFunc instruction.
   220  func functionFromUintptr(ptr uintptr) *function {
   221  	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
   222  	//
   223  	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
   224  	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
   225  	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
   226  	var wrapped *uintptr = &ptr
   227  	return *(**function)(unsafe.Pointer(wrapped))
   228  }
   229  
   230  type snapshot struct {
   231  	stack  []uint64
   232  	frames []*callFrame
   233  	pc     uint64
   234  
   235  	ret []uint64
   236  
   237  	ce *callEngine
   238  }
   239  
   240  // Snapshot implements the same method as documented on experimental.Snapshotter.
   241  func (ce *callEngine) Snapshot() experimental.Snapshot {
   242  	stack := make([]uint64, len(ce.stack))
   243  	copy(stack, ce.stack)
   244  
   245  	frames := make([]*callFrame, len(ce.frames))
   246  	copy(frames, ce.frames)
   247  
   248  	return &snapshot{
   249  		stack:  stack,
   250  		frames: frames,
   251  		ce:     ce,
   252  	}
   253  }
   254  
   255  // Restore implements the same method as documented on experimental.Snapshot.
   256  func (s *snapshot) Restore(ret []uint64) {
   257  	s.ret = ret
   258  	panic(s)
   259  }
   260  
   261  func (s *snapshot) doRestore() {
   262  	ce := s.ce
   263  
   264  	ce.stack = s.stack
   265  	ce.frames = s.frames
   266  	ce.frames[len(ce.frames)-1].pc = s.pc
   267  
   268  	copy(ce.stack[len(ce.stack)-len(s.ret):], s.ret)
   269  }
   270  
   271  // Error implements the same method on error.
   272  func (s *snapshot) Error() string {
   273  	return "unhandled snapshot restore, this generally indicates restore was called from a different " +
   274  		"exported function invocation than snapshot"
   275  }
   276  
   277  // stackIterator implements experimental.StackIterator.
   278  type stackIterator struct {
   279  	stack   []uint64
   280  	frames  []*callFrame
   281  	started bool
   282  	fn      *function
   283  	pc      uint64
   284  }
   285  
   286  func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
   287  	si.fn = f
   288  	si.pc = 0
   289  	si.stack = stack
   290  	si.frames = frames
   291  	si.started = false
   292  }
   293  
   294  func (si *stackIterator) clear() {
   295  	si.stack = nil
   296  	si.frames = nil
   297  	si.started = false
   298  	si.fn = nil
   299  }
   300  
   301  // Next implements the same method as documented on experimental.StackIterator.
   302  func (si *stackIterator) Next() bool {
   303  	if !si.started {
   304  		si.started = true
   305  		return true
   306  	}
   307  
   308  	if len(si.frames) == 0 {
   309  		return false
   310  	}
   311  
   312  	frame := si.frames[len(si.frames)-1]
   313  	si.stack = si.stack[:frame.base]
   314  	si.fn = frame.f
   315  	si.pc = frame.pc
   316  	si.frames = si.frames[:len(si.frames)-1]
   317  	return true
   318  }
   319  
   320  // Function implements the same method as documented on
   321  // experimental.StackIterator.
   322  func (si *stackIterator) Function() experimental.InternalFunction {
   323  	return internalFunction{si.fn}
   324  }
   325  
   326  // ProgramCounter implements the same method as documented on
   327  // experimental.StackIterator.
   328  func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
   329  	return experimental.ProgramCounter(si.pc)
   330  }
   331  
   332  // internalFunction implements experimental.InternalFunction.
   333  type internalFunction struct{ *function }
   334  
   335  // Definition implements the same method as documented on
   336  // experimental.InternalFunction.
   337  func (f internalFunction) Definition() api.FunctionDefinition {
   338  	return f.definition()
   339  }
   340  
   341  // SourceOffsetForPC implements the same method as documented on
   342  // experimental.InternalFunction.
   343  func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
   344  	offsetsMap := f.parent.offsetsInWasmBinary
   345  	if uint64(pc) < uint64(len(offsetsMap)) {
   346  		return offsetsMap[pc]
   347  	}
   348  	return 0
   349  }
   350  
   351  // interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
   352  const callFrameStackSize = 0
   353  
   354  // CompileModule implements the same method as documented on wasm.Engine.
   355  func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
   356  	if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
   357  		return nil
   358  	}
   359  
   360  	funcs := make([]compiledFunction, len(module.FunctionSection))
   361  	irCompiler, err := newCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
   362  	if err != nil {
   363  		return err
   364  	}
   365  	imported := module.ImportFunctionCount
   366  	for i := range module.CodeSection {
   367  		var lsn experimental.FunctionListener
   368  		if i < len(listeners) {
   369  			lsn = listeners[i]
   370  		}
   371  
   372  		compiled := &funcs[i]
   373  		// If this is the host function, there's nothing to do as the runtime representation of
   374  		// host function in interpreter is its Go function itself as opposed to Wasm functions,
   375  		// which need to be compiled down to
   376  		if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
   377  			compiled.hostFn = codeSeg.GoFunc
   378  		} else {
   379  			ir, err := irCompiler.Next()
   380  			if err != nil {
   381  				return err
   382  			}
   383  			err = e.lowerIR(ir, compiled)
   384  			if err != nil {
   385  				def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
   386  				return fmt.Errorf("failed to lower func[%s] to interpreterir: %w", def.DebugName(), err)
   387  			}
   388  		}
   389  		compiled.source = module
   390  		compiled.ensureTermination = ensureTermination
   391  		compiled.listener = lsn
   392  		compiled.index = imported + uint32(i)
   393  	}
   394  	e.addCompiledFunctions(module, funcs)
   395  	return nil
   396  }
   397  
   398  // NewModuleEngine implements the same method as documented on wasm.Engine.
   399  func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
   400  	me := &moduleEngine{
   401  		parentEngine: e,
   402  		functions:    make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
   403  	}
   404  
   405  	codes, ok := e.getCompiledFunctions(module)
   406  	if !ok {
   407  		return nil, errors.New("source module must be compiled before instantiation")
   408  	}
   409  
   410  	for i := range codes {
   411  		c := &codes[i]
   412  		offset := i + int(module.ImportFunctionCount)
   413  		typeIndex := module.FunctionSection[i]
   414  		me.functions[offset] = function{
   415  			moduleInstance: instance,
   416  			typeID:         instance.TypeIDs[typeIndex],
   417  			funcType:       &module.TypeSection[typeIndex],
   418  			parent:         c,
   419  		}
   420  	}
   421  	return me, nil
   422  }
   423  
   424  // lowerIR lowers the interpreterir operations to engine friendly struct.
   425  func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
   426  	// Copy the body from the result.
   427  	ret.body = make([]unionOperation, len(ir.Operations))
   428  	copy(ret.body, ir.Operations)
   429  	// Also copy the offsets if necessary.
   430  	if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
   431  		ret.offsetsInWasmBinary = make([]uint64, len(offsets))
   432  		copy(ret.offsetsInWasmBinary, offsets)
   433  	}
   434  
   435  	labelAddressResolutions := [labelKindNum][]uint64{}
   436  
   437  	// First, we iterate all labels, and resolve the address.
   438  	for i := range ret.body {
   439  		op := &ret.body[i]
   440  		switch op.Kind {
   441  		case operationKindLabel:
   442  			label := label(op.U1)
   443  			address := uint64(i)
   444  
   445  			kind, fid := label.Kind(), label.FrameID()
   446  			frameToAddresses := labelAddressResolutions[label.Kind()]
   447  			// Expand the slice if necessary.
   448  			if diff := fid - len(frameToAddresses) + 1; diff > 0 {
   449  				for j := 0; j < diff; j++ {
   450  					frameToAddresses = append(frameToAddresses, 0)
   451  				}
   452  			}
   453  			frameToAddresses[fid] = address
   454  			labelAddressResolutions[kind] = frameToAddresses
   455  		}
   456  	}
   457  
   458  	// Then resolve the label as the index to the body.
   459  	for i := range ret.body {
   460  		op := &ret.body[i]
   461  		switch op.Kind {
   462  		case operationKindBr:
   463  			e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions)
   464  		case operationKindBrIf:
   465  			e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions)
   466  			e.setLabelAddress(&op.U2, label(op.U2), labelAddressResolutions)
   467  		case operationKindBrTable:
   468  			for j := 0; j < len(op.Us); j += 2 {
   469  				target := op.Us[j]
   470  				e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions)
   471  			}
   472  		}
   473  	}
   474  	return nil
   475  }
   476  
   477  func (e *engine) setLabelAddress(op *uint64, label label, labelAddressResolutions [labelKindNum][]uint64) {
   478  	if label.IsReturnTarget() {
   479  		// Jmp to the end of the possible binary.
   480  		*op = math.MaxUint64
   481  	} else {
   482  		*op = labelAddressResolutions[label.Kind()][label.FrameID()]
   483  	}
   484  }
   485  
   486  // ResolveImportedFunction implements wasm.ModuleEngine.
   487  func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
   488  	imported := importedModuleEngine.(*moduleEngine)
   489  	e.functions[index] = imported.functions[indexInImportedModule]
   490  }
   491  
   492  // ResolveImportedMemory implements wasm.ModuleEngine.
   493  func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {}
   494  
   495  // DoneInstantiation implements wasm.ModuleEngine.
   496  func (e *moduleEngine) DoneInstantiation() {}
   497  
   498  // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
   499  func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
   500  	return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
   501  }
   502  
   503  // NewFunction implements the same method as documented on wasm.ModuleEngine.
   504  func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
   505  	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
   506  	// code on close aren't locked, neither is this read.
   507  	compiled := &e.functions[index]
   508  	return e.newCallEngine(compiled)
   509  }
   510  
   511  // LookupFunction implements the same method as documented on wasm.ModuleEngine.
   512  func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
   513  	if tableOffset >= uint32(len(t.References)) {
   514  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   515  	}
   516  	rawPtr := t.References[tableOffset]
   517  	if rawPtr == 0 {
   518  		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   519  	}
   520  
   521  	tf := functionFromUintptr(rawPtr)
   522  	if tf.typeID != typeId {
   523  		panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   524  	}
   525  	return tf.moduleInstance, tf.parent.index
   526  }
   527  
   528  // Definition implements the same method as documented on api.Function.
   529  func (ce *callEngine) Definition() api.FunctionDefinition {
   530  	return ce.f.definition()
   531  }
   532  
   533  func (f *function) definition() api.FunctionDefinition {
   534  	compiled := f.parent
   535  	return compiled.source.FunctionDefinition(compiled.index)
   536  }
   537  
   538  // Call implements the same method as documented on api.Function.
   539  func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
   540  	ft := ce.f.funcType
   541  	if n := ft.ParamNumInUint64; n != len(params) {
   542  		return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
   543  	}
   544  	return ce.call(ctx, params, nil)
   545  }
   546  
   547  // CallWithStack implements the same method as documented on api.Function.
   548  func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
   549  	params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
   550  	if err != nil {
   551  		return err
   552  	}
   553  	_, err = ce.call(ctx, params, results)
   554  	return err
   555  }
   556  
   557  func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
   558  	m := ce.f.moduleInstance
   559  	if ce.f.parent.ensureTermination {
   560  		select {
   561  		case <-ctx.Done():
   562  			// If the provided context is already done, close the call context
   563  			// and return the error.
   564  			m.CloseWithCtxErr(ctx)
   565  			return nil, m.FailIfClosed()
   566  		default:
   567  		}
   568  	}
   569  
   570  	if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
   571  		ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, ce)
   572  	}
   573  
   574  	defer func() {
   575  		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
   576  		if err == nil {
   577  			err = m.FailIfClosed()
   578  		}
   579  		// TODO: ^^ Will not fail if the function was imported from a closed module.
   580  
   581  		if v := recover(); v != nil {
   582  			err = ce.recoverOnCall(ctx, m, v)
   583  		}
   584  	}()
   585  
   586  	ce.pushValues(params)
   587  
   588  	if ce.f.parent.ensureTermination {
   589  		done := m.CloseModuleOnCanceledOrTimeout(ctx)
   590  		defer done()
   591  	}
   592  
   593  	ce.callFunction(ctx, m, ce.f)
   594  
   595  	// This returns a safe copy of the results, instead of a slice view. If we
   596  	// returned a re-slice, the caller could accidentally or purposefully
   597  	// corrupt the stack of subsequent calls.
   598  	ft := ce.f.funcType
   599  	if results == nil && ft.ResultNumInUint64 > 0 {
   600  		results = make([]uint64, ft.ResultNumInUint64)
   601  	}
   602  	ce.popValues(results)
   603  	return results, nil
   604  }
   605  
   606  // functionListenerInvocation captures arguments needed to perform function
   607  // listener invocations when unwinding the call stack.
   608  type functionListenerInvocation struct {
   609  	experimental.FunctionListener
   610  	def api.FunctionDefinition
   611  }
   612  
   613  // recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
   614  // with the call frame stack traces. Also, reset the state of callEngine
   615  // so that it can be used for the subsequent calls.
   616  func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
   617  	if s, ok := v.(*snapshot); ok {
   618  		// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
   619  		// let it propagate up to be handled by the caller.
   620  		panic(s)
   621  	}
   622  
   623  	builder := wasmdebug.NewErrorBuilder()
   624  	frameCount := len(ce.frames)
   625  	functionListeners := make([]functionListenerInvocation, 0, 16)
   626  
   627  	if frameCount > wasmdebug.MaxFrames {
   628  		frameCount = wasmdebug.MaxFrames
   629  	}
   630  	for i := 0; i < frameCount; i++ {
   631  		frame := ce.popFrame()
   632  		f := frame.f
   633  		def := f.definition()
   634  		var sources []string
   635  		if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
   636  			sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
   637  		}
   638  		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
   639  		if f.parent.listener != nil {
   640  			functionListeners = append(functionListeners, functionListenerInvocation{
   641  				FunctionListener: f.parent.listener,
   642  				def:              f.definition(),
   643  			})
   644  		}
   645  	}
   646  
   647  	err = builder.FromRecovered(v)
   648  	for i := range functionListeners {
   649  		functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
   650  	}
   651  
   652  	// Allows the reuse of CallEngine.
   653  	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
   654  	return
   655  }
   656  
   657  func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   658  	if f.parent.hostFn != nil {
   659  		ce.callGoFuncWithStack(ctx, m, f)
   660  	} else if lsn := f.parent.listener; lsn != nil {
   661  		ce.callNativeFuncWithListener(ctx, m, f, lsn)
   662  	} else {
   663  		ce.callNativeFunc(ctx, m, f)
   664  	}
   665  }
   666  
   667  func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
   668  	typ := f.funcType
   669  	lsn := f.parent.listener
   670  	if lsn != nil {
   671  		params := stack[:typ.ParamNumInUint64]
   672  		ce.stackIterator.reset(ce.stack, ce.frames, f)
   673  		lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
   674  		ce.stackIterator.clear()
   675  	}
   676  	frame := &callFrame{f: f, base: len(ce.stack)}
   677  	ce.pushFrame(frame)
   678  
   679  	fn := f.parent.hostFn
   680  	switch fn := fn.(type) {
   681  	case api.GoModuleFunction:
   682  		fn.Call(ctx, m, stack)
   683  	case api.GoFunction:
   684  		fn.Call(ctx, stack)
   685  	}
   686  
   687  	ce.popFrame()
   688  	if lsn != nil {
   689  		// TODO: This doesn't get the error due to use of panic to propagate them.
   690  		results := stack[:typ.ResultNumInUint64]
   691  		lsn.After(ctx, m, f.definition(), results)
   692  	}
   693  }
   694  
   695  func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
   696  	frame := &callFrame{f: f, base: len(ce.stack)}
   697  	moduleInst := f.moduleInstance
   698  	functions := moduleInst.Engine.(*moduleEngine).functions
   699  	memoryInst := moduleInst.MemoryInstance
   700  	globals := moduleInst.Globals
   701  	tables := moduleInst.Tables
   702  	typeIDs := moduleInst.TypeIDs
   703  	dataInstances := moduleInst.DataInstances
   704  	elementInstances := moduleInst.ElementInstances
   705  	ce.pushFrame(frame)
   706  	body := frame.f.parent.body
   707  	bodyLen := uint64(len(body))
   708  	for frame.pc < bodyLen {
   709  		op := &body[frame.pc]
   710  		// TODO: add description of each operation/case
   711  		// on, for example, how many args are used,
   712  		// how the stack is modified, etc.
   713  		switch op.Kind {
   714  		case operationKindBuiltinFunctionCheckExitCode:
   715  			if err := m.FailIfClosed(); err != nil {
   716  				panic(err)
   717  			}
   718  			frame.pc++
   719  		case operationKindUnreachable:
   720  			panic(wasmruntime.ErrRuntimeUnreachable)
   721  		case operationKindBr:
   722  			frame.pc = op.U1
   723  		case operationKindBrIf:
   724  			if ce.popValue() > 0 {
   725  				ce.drop(op.U3)
   726  				frame.pc = op.U1
   727  			} else {
   728  				frame.pc = op.U2
   729  			}
   730  		case operationKindBrTable:
   731  			v := ce.popValue()
   732  			defaultAt := uint64(len(op.Us))/2 - 1
   733  			if v > defaultAt {
   734  				v = defaultAt
   735  			}
   736  			v *= 2
   737  			ce.drop(op.Us[v+1])
   738  			frame.pc = op.Us[v]
   739  		case operationKindCall:
   740  			func() {
   741  				if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
   742  					defer func() {
   743  						if r := recover(); r != nil {
   744  							if s, ok := r.(*snapshot); ok && s.ce == ce {
   745  								s.doRestore()
   746  								frame = ce.frames[len(ce.frames)-1]
   747  								body = frame.f.parent.body
   748  								bodyLen = uint64(len(body))
   749  							} else {
   750  								panic(r)
   751  							}
   752  						}
   753  					}()
   754  				}
   755  				ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
   756  			}()
   757  			frame.pc++
   758  		case operationKindCallIndirect:
   759  			offset := ce.popValue()
   760  			table := tables[op.U2]
   761  			if offset >= uint64(len(table.References)) {
   762  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   763  			}
   764  			rawPtr := table.References[offset]
   765  			if rawPtr == 0 {
   766  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   767  			}
   768  
   769  			tf := functionFromUintptr(rawPtr)
   770  			if tf.typeID != typeIDs[op.U1] {
   771  				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   772  			}
   773  
   774  			ce.callFunction(ctx, f.moduleInstance, tf)
   775  			frame.pc++
   776  		case operationKindDrop:
   777  			ce.drop(op.U1)
   778  			frame.pc++
   779  		case operationKindSelect:
   780  			c := ce.popValue()
   781  			if op.B3 { // Target is vector.
   782  				x2Hi, x2Lo := ce.popValue(), ce.popValue()
   783  				if c == 0 {
   784  					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
   785  					ce.pushValue(x2Lo)
   786  					ce.pushValue(x2Hi)
   787  				}
   788  			} else {
   789  				v2 := ce.popValue()
   790  				if c == 0 {
   791  					_ = ce.popValue()
   792  					ce.pushValue(v2)
   793  				}
   794  			}
   795  			frame.pc++
   796  		case operationKindPick:
   797  			index := len(ce.stack) - 1 - int(op.U1)
   798  			ce.pushValue(ce.stack[index])
   799  			if op.B3 { // V128 value target.
   800  				ce.pushValue(ce.stack[index+1])
   801  			}
   802  			frame.pc++
   803  		case operationKindSet:
   804  			if op.B3 { // V128 value target.
   805  				lowIndex := len(ce.stack) - 1 - int(op.U1)
   806  				highIndex := lowIndex + 1
   807  				hi, lo := ce.popValue(), ce.popValue()
   808  				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
   809  			} else {
   810  				index := len(ce.stack) - 1 - int(op.U1)
   811  				ce.stack[index] = ce.popValue()
   812  			}
   813  			frame.pc++
   814  		case operationKindGlobalGet:
   815  			g := globals[op.U1]
   816  			ce.pushValue(g.Val)
   817  			if g.Type.ValType == wasm.ValueTypeV128 {
   818  				ce.pushValue(g.ValHi)
   819  			}
   820  			frame.pc++
   821  		case operationKindGlobalSet:
   822  			g := globals[op.U1]
   823  			if g.Type.ValType == wasm.ValueTypeV128 {
   824  				g.ValHi = ce.popValue()
   825  			}
   826  			g.Val = ce.popValue()
   827  			frame.pc++
   828  		case operationKindLoad:
   829  			offset := ce.popMemoryOffset(op)
   830  			switch unsignedType(op.B1) {
   831  			case unsignedTypeI32, unsignedTypeF32:
   832  				if val, ok := memoryInst.ReadUint32Le(offset); !ok {
   833  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   834  				} else {
   835  					ce.pushValue(uint64(val))
   836  				}
   837  			case unsignedTypeI64, unsignedTypeF64:
   838  				if val, ok := memoryInst.ReadUint64Le(offset); !ok {
   839  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   840  				} else {
   841  					ce.pushValue(val)
   842  				}
   843  			}
   844  			frame.pc++
   845  		case operationKindLoad8:
   846  			val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
   847  			if !ok {
   848  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   849  			}
   850  
   851  			switch signedInt(op.B1) {
   852  			case signedInt32:
   853  				ce.pushValue(uint64(uint32(int8(val))))
   854  			case signedInt64:
   855  				ce.pushValue(uint64(int8(val)))
   856  			case signedUint32, signedUint64:
   857  				ce.pushValue(uint64(val))
   858  			}
   859  			frame.pc++
   860  		case operationKindLoad16:
   861  
   862  			val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
   863  			if !ok {
   864  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   865  			}
   866  
   867  			switch signedInt(op.B1) {
   868  			case signedInt32:
   869  				ce.pushValue(uint64(uint32(int16(val))))
   870  			case signedInt64:
   871  				ce.pushValue(uint64(int16(val)))
   872  			case signedUint32, signedUint64:
   873  				ce.pushValue(uint64(val))
   874  			}
   875  			frame.pc++
   876  		case operationKindLoad32:
   877  			val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
   878  			if !ok {
   879  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   880  			}
   881  
   882  			if op.B1 == 1 { // Signed
   883  				ce.pushValue(uint64(int32(val)))
   884  			} else {
   885  				ce.pushValue(uint64(val))
   886  			}
   887  			frame.pc++
   888  		case operationKindStore:
   889  			val := ce.popValue()
   890  			offset := ce.popMemoryOffset(op)
   891  			switch unsignedType(op.B1) {
   892  			case unsignedTypeI32, unsignedTypeF32:
   893  				if !memoryInst.WriteUint32Le(offset, uint32(val)) {
   894  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   895  				}
   896  			case unsignedTypeI64, unsignedTypeF64:
   897  				if !memoryInst.WriteUint64Le(offset, val) {
   898  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   899  				}
   900  			}
   901  			frame.pc++
   902  		case operationKindStore8:
   903  			val := byte(ce.popValue())
   904  			offset := ce.popMemoryOffset(op)
   905  			if !memoryInst.WriteByte(offset, val) {
   906  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   907  			}
   908  			frame.pc++
   909  		case operationKindStore16:
   910  			val := uint16(ce.popValue())
   911  			offset := ce.popMemoryOffset(op)
   912  			if !memoryInst.WriteUint16Le(offset, val) {
   913  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   914  			}
   915  			frame.pc++
   916  		case operationKindStore32:
   917  			val := uint32(ce.popValue())
   918  			offset := ce.popMemoryOffset(op)
   919  			if !memoryInst.WriteUint32Le(offset, val) {
   920  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
   921  			}
   922  			frame.pc++
   923  		case operationKindMemorySize:
   924  			ce.pushValue(uint64(memoryInst.Pages()))
   925  			frame.pc++
   926  		case operationKindMemoryGrow:
   927  			n := ce.popValue()
   928  			if res, ok := memoryInst.Grow(uint32(n)); !ok {
   929  				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
   930  			} else {
   931  				ce.pushValue(uint64(res))
   932  			}
   933  			frame.pc++
   934  		case operationKindConstI32, operationKindConstI64,
   935  			operationKindConstF32, operationKindConstF64:
   936  			ce.pushValue(op.U1)
   937  			frame.pc++
   938  		case operationKindEq:
   939  			var b bool
   940  			switch unsignedType(op.B1) {
   941  			case unsignedTypeI32:
   942  				v2, v1 := ce.popValue(), ce.popValue()
   943  				b = uint32(v1) == uint32(v2)
   944  			case unsignedTypeI64:
   945  				v2, v1 := ce.popValue(), ce.popValue()
   946  				b = v1 == v2
   947  			case unsignedTypeF32:
   948  				v2, v1 := ce.popValue(), ce.popValue()
   949  				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
   950  			case unsignedTypeF64:
   951  				v2, v1 := ce.popValue(), ce.popValue()
   952  				b = math.Float64frombits(v2) == math.Float64frombits(v1)
   953  			}
   954  			if b {
   955  				ce.pushValue(1)
   956  			} else {
   957  				ce.pushValue(0)
   958  			}
   959  			frame.pc++
   960  		case operationKindNe:
   961  			var b bool
   962  			switch unsignedType(op.B1) {
   963  			case unsignedTypeI32, unsignedTypeI64:
   964  				v2, v1 := ce.popValue(), ce.popValue()
   965  				b = v1 != v2
   966  			case unsignedTypeF32:
   967  				v2, v1 := ce.popValue(), ce.popValue()
   968  				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
   969  			case unsignedTypeF64:
   970  				v2, v1 := ce.popValue(), ce.popValue()
   971  				b = math.Float64frombits(v2) != math.Float64frombits(v1)
   972  			}
   973  			if b {
   974  				ce.pushValue(1)
   975  			} else {
   976  				ce.pushValue(0)
   977  			}
   978  			frame.pc++
   979  		case operationKindEqz:
   980  			if ce.popValue() == 0 {
   981  				ce.pushValue(1)
   982  			} else {
   983  				ce.pushValue(0)
   984  			}
   985  			frame.pc++
   986  		case operationKindLt:
   987  			v2 := ce.popValue()
   988  			v1 := ce.popValue()
   989  			var b bool
   990  			switch signedType(op.B1) {
   991  			case signedTypeInt32:
   992  				b = int32(v1) < int32(v2)
   993  			case signedTypeInt64:
   994  				b = int64(v1) < int64(v2)
   995  			case signedTypeUint32, signedTypeUint64:
   996  				b = v1 < v2
   997  			case signedTypeFloat32:
   998  				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
   999  			case signedTypeFloat64:
  1000  				b = math.Float64frombits(v1) < math.Float64frombits(v2)
  1001  			}
  1002  			if b {
  1003  				ce.pushValue(1)
  1004  			} else {
  1005  				ce.pushValue(0)
  1006  			}
  1007  			frame.pc++
  1008  		case operationKindGt:
  1009  			v2 := ce.popValue()
  1010  			v1 := ce.popValue()
  1011  			var b bool
  1012  			switch signedType(op.B1) {
  1013  			case signedTypeInt32:
  1014  				b = int32(v1) > int32(v2)
  1015  			case signedTypeInt64:
  1016  				b = int64(v1) > int64(v2)
  1017  			case signedTypeUint32, signedTypeUint64:
  1018  				b = v1 > v2
  1019  			case signedTypeFloat32:
  1020  				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
  1021  			case signedTypeFloat64:
  1022  				b = math.Float64frombits(v1) > math.Float64frombits(v2)
  1023  			}
  1024  			if b {
  1025  				ce.pushValue(1)
  1026  			} else {
  1027  				ce.pushValue(0)
  1028  			}
  1029  			frame.pc++
  1030  		case operationKindLe:
  1031  			v2 := ce.popValue()
  1032  			v1 := ce.popValue()
  1033  			var b bool
  1034  			switch signedType(op.B1) {
  1035  			case signedTypeInt32:
  1036  				b = int32(v1) <= int32(v2)
  1037  			case signedTypeInt64:
  1038  				b = int64(v1) <= int64(v2)
  1039  			case signedTypeUint32, signedTypeUint64:
  1040  				b = v1 <= v2
  1041  			case signedTypeFloat32:
  1042  				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
  1043  			case signedTypeFloat64:
  1044  				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
  1045  			}
  1046  			if b {
  1047  				ce.pushValue(1)
  1048  			} else {
  1049  				ce.pushValue(0)
  1050  			}
  1051  			frame.pc++
  1052  		case operationKindGe:
  1053  			v2 := ce.popValue()
  1054  			v1 := ce.popValue()
  1055  			var b bool
  1056  			switch signedType(op.B1) {
  1057  			case signedTypeInt32:
  1058  				b = int32(v1) >= int32(v2)
  1059  			case signedTypeInt64:
  1060  				b = int64(v1) >= int64(v2)
  1061  			case signedTypeUint32, signedTypeUint64:
  1062  				b = v1 >= v2
  1063  			case signedTypeFloat32:
  1064  				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
  1065  			case signedTypeFloat64:
  1066  				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
  1067  			}
  1068  			if b {
  1069  				ce.pushValue(1)
  1070  			} else {
  1071  				ce.pushValue(0)
  1072  			}
  1073  			frame.pc++
  1074  		case operationKindAdd:
  1075  			v2 := ce.popValue()
  1076  			v1 := ce.popValue()
  1077  			switch unsignedType(op.B1) {
  1078  			case unsignedTypeI32:
  1079  				v := uint32(v1) + uint32(v2)
  1080  				ce.pushValue(uint64(v))
  1081  			case unsignedTypeI64:
  1082  				ce.pushValue(v1 + v2)
  1083  			case unsignedTypeF32:
  1084  				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
  1085  			case unsignedTypeF64:
  1086  				v := math.Float64frombits(v1) + math.Float64frombits(v2)
  1087  				ce.pushValue(math.Float64bits(v))
  1088  			}
  1089  			frame.pc++
  1090  		case operationKindSub:
  1091  			v2 := ce.popValue()
  1092  			v1 := ce.popValue()
  1093  			switch unsignedType(op.B1) {
  1094  			case unsignedTypeI32:
  1095  				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
  1096  			case unsignedTypeI64:
  1097  				ce.pushValue(v1 - v2)
  1098  			case unsignedTypeF32:
  1099  				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
  1100  			case unsignedTypeF64:
  1101  				v := math.Float64frombits(v1) - math.Float64frombits(v2)
  1102  				ce.pushValue(math.Float64bits(v))
  1103  			}
  1104  			frame.pc++
  1105  		case operationKindMul:
  1106  			v2 := ce.popValue()
  1107  			v1 := ce.popValue()
  1108  			switch unsignedType(op.B1) {
  1109  			case unsignedTypeI32:
  1110  				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
  1111  			case unsignedTypeI64:
  1112  				ce.pushValue(v1 * v2)
  1113  			case unsignedTypeF32:
  1114  				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
  1115  			case unsignedTypeF64:
  1116  				v := math.Float64frombits(v2) * math.Float64frombits(v1)
  1117  				ce.pushValue(math.Float64bits(v))
  1118  			}
  1119  			frame.pc++
  1120  		case operationKindClz:
  1121  			v := ce.popValue()
  1122  			if op.B1 == 0 {
  1123  				// unsignedInt32
  1124  				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
  1125  			} else {
  1126  				// unsignedInt64
  1127  				ce.pushValue(uint64(bits.LeadingZeros64(v)))
  1128  			}
  1129  			frame.pc++
  1130  		case operationKindCtz:
  1131  			v := ce.popValue()
  1132  			if op.B1 == 0 {
  1133  				// unsignedInt32
  1134  				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
  1135  			} else {
  1136  				// unsignedInt64
  1137  				ce.pushValue(uint64(bits.TrailingZeros64(v)))
  1138  			}
  1139  			frame.pc++
  1140  		case operationKindPopcnt:
  1141  			v := ce.popValue()
  1142  			if op.B1 == 0 {
  1143  				// unsignedInt32
  1144  				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
  1145  			} else {
  1146  				// unsignedInt64
  1147  				ce.pushValue(uint64(bits.OnesCount64(v)))
  1148  			}
  1149  			frame.pc++
  1150  		case operationKindDiv:
  1151  			// If an integer, check we won't divide by zero.
  1152  			t := signedType(op.B1)
  1153  			v2, v1 := ce.popValue(), ce.popValue()
  1154  			switch t {
  1155  			case signedTypeFloat32, signedTypeFloat64: // not integers
  1156  			default:
  1157  				if v2 == 0 {
  1158  					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1159  				}
  1160  			}
  1161  
  1162  			switch t {
  1163  			case signedTypeInt32:
  1164  				d := int32(v2)
  1165  				n := int32(v1)
  1166  				if n == math.MinInt32 && d == -1 {
  1167  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1168  				}
  1169  				ce.pushValue(uint64(uint32(n / d)))
  1170  			case signedTypeInt64:
  1171  				d := int64(v2)
  1172  				n := int64(v1)
  1173  				if n == math.MinInt64 && d == -1 {
  1174  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1175  				}
  1176  				ce.pushValue(uint64(n / d))
  1177  			case signedTypeUint32:
  1178  				d := uint32(v2)
  1179  				n := uint32(v1)
  1180  				ce.pushValue(uint64(n / d))
  1181  			case signedTypeUint64:
  1182  				d := v2
  1183  				n := v1
  1184  				ce.pushValue(n / d)
  1185  			case signedTypeFloat32:
  1186  				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
  1187  			case signedTypeFloat64:
  1188  				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
  1189  			}
  1190  			frame.pc++
  1191  		case operationKindRem:
  1192  			v2, v1 := ce.popValue(), ce.popValue()
  1193  			if v2 == 0 {
  1194  				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1195  			}
  1196  			switch signedInt(op.B1) {
  1197  			case signedInt32:
  1198  				d := int32(v2)
  1199  				n := int32(v1)
  1200  				ce.pushValue(uint64(uint32(n % d)))
  1201  			case signedInt64:
  1202  				d := int64(v2)
  1203  				n := int64(v1)
  1204  				ce.pushValue(uint64(n % d))
  1205  			case signedUint32:
  1206  				d := uint32(v2)
  1207  				n := uint32(v1)
  1208  				ce.pushValue(uint64(n % d))
  1209  			case signedUint64:
  1210  				d := v2
  1211  				n := v1
  1212  				ce.pushValue(n % d)
  1213  			}
  1214  			frame.pc++
  1215  		case operationKindAnd:
  1216  			v2 := ce.popValue()
  1217  			v1 := ce.popValue()
  1218  			if op.B1 == 0 {
  1219  				// unsignedInt32
  1220  				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
  1221  			} else {
  1222  				// unsignedInt64
  1223  				ce.pushValue(uint64(v2 & v1))
  1224  			}
  1225  			frame.pc++
  1226  		case operationKindOr:
  1227  			v2 := ce.popValue()
  1228  			v1 := ce.popValue()
  1229  			if op.B1 == 0 {
  1230  				// unsignedInt32
  1231  				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
  1232  			} else {
  1233  				// unsignedInt64
  1234  				ce.pushValue(uint64(v2 | v1))
  1235  			}
  1236  			frame.pc++
  1237  		case operationKindXor:
  1238  			v2 := ce.popValue()
  1239  			v1 := ce.popValue()
  1240  			if op.B1 == 0 {
  1241  				// unsignedInt32
  1242  				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
  1243  			} else {
  1244  				// unsignedInt64
  1245  				ce.pushValue(uint64(v2 ^ v1))
  1246  			}
  1247  			frame.pc++
  1248  		case operationKindShl:
  1249  			v2 := ce.popValue()
  1250  			v1 := ce.popValue()
  1251  			if op.B1 == 0 {
  1252  				// unsignedInt32
  1253  				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
  1254  			} else {
  1255  				// unsignedInt64
  1256  				ce.pushValue(v1 << (v2 % 64))
  1257  			}
  1258  			frame.pc++
  1259  		case operationKindShr:
  1260  			v2 := ce.popValue()
  1261  			v1 := ce.popValue()
  1262  			switch signedInt(op.B1) {
  1263  			case signedInt32:
  1264  				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
  1265  			case signedInt64:
  1266  				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
  1267  			case signedUint32:
  1268  				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
  1269  			case signedUint64:
  1270  				ce.pushValue(v1 >> (v2 % 64))
  1271  			}
  1272  			frame.pc++
  1273  		case operationKindRotl:
  1274  			v2 := ce.popValue()
  1275  			v1 := ce.popValue()
  1276  			if op.B1 == 0 {
  1277  				// unsignedInt32
  1278  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
  1279  			} else {
  1280  				// unsignedInt64
  1281  				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
  1282  			}
  1283  			frame.pc++
  1284  		case operationKindRotr:
  1285  			v2 := ce.popValue()
  1286  			v1 := ce.popValue()
  1287  			if op.B1 == 0 {
  1288  				// unsignedInt32
  1289  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
  1290  			} else {
  1291  				// unsignedInt64
  1292  				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
  1293  			}
  1294  			frame.pc++
  1295  		case operationKindAbs:
  1296  			if op.B1 == 0 {
  1297  				// float32
  1298  				const mask uint32 = 1 << 31
  1299  				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
  1300  			} else {
  1301  				// float64
  1302  				const mask uint64 = 1 << 63
  1303  				ce.pushValue(ce.popValue() &^ mask)
  1304  			}
  1305  			frame.pc++
  1306  		case operationKindNeg:
  1307  			if op.B1 == 0 {
  1308  				// float32
  1309  				v := -math.Float32frombits(uint32(ce.popValue()))
  1310  				ce.pushValue(uint64(math.Float32bits(v)))
  1311  			} else {
  1312  				// float64
  1313  				v := -math.Float64frombits(ce.popValue())
  1314  				ce.pushValue(math.Float64bits(v))
  1315  			}
  1316  			frame.pc++
  1317  		case operationKindCeil:
  1318  			if op.B1 == 0 {
  1319  				// float32
  1320  				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
  1321  				ce.pushValue(uint64(math.Float32bits(v)))
  1322  			} else {
  1323  				// float64
  1324  				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
  1325  				ce.pushValue(math.Float64bits(v))
  1326  			}
  1327  			frame.pc++
  1328  		case operationKindFloor:
  1329  			if op.B1 == 0 {
  1330  				// float32
  1331  				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
  1332  				ce.pushValue(uint64(math.Float32bits(v)))
  1333  			} else {
  1334  				// float64
  1335  				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
  1336  				ce.pushValue(math.Float64bits(v))
  1337  			}
  1338  			frame.pc++
  1339  		case operationKindTrunc:
  1340  			if op.B1 == 0 {
  1341  				// float32
  1342  				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
  1343  				ce.pushValue(uint64(math.Float32bits(v)))
  1344  			} else {
  1345  				// float64
  1346  				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
  1347  				ce.pushValue(math.Float64bits(v))
  1348  			}
  1349  			frame.pc++
  1350  		case operationKindNearest:
  1351  			if op.B1 == 0 {
  1352  				// float32
  1353  				f := math.Float32frombits(uint32(ce.popValue()))
  1354  				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
  1355  			} else {
  1356  				// float64
  1357  				f := math.Float64frombits(ce.popValue())
  1358  				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
  1359  			}
  1360  			frame.pc++
  1361  		case operationKindSqrt:
  1362  			if op.B1 == 0 {
  1363  				// float32
  1364  				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
  1365  				ce.pushValue(uint64(math.Float32bits(float32(v))))
  1366  			} else {
  1367  				// float64
  1368  				v := math.Sqrt(math.Float64frombits(ce.popValue()))
  1369  				ce.pushValue(math.Float64bits(v))
  1370  			}
  1371  			frame.pc++
  1372  		case operationKindMin:
  1373  			if op.B1 == 0 {
  1374  				// float32
  1375  				ce.pushValue(wasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1376  			} else {
  1377  				v2 := math.Float64frombits(ce.popValue())
  1378  				v1 := math.Float64frombits(ce.popValue())
  1379  				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
  1380  			}
  1381  			frame.pc++
  1382  		case operationKindMax:
  1383  			if op.B1 == 0 {
  1384  				ce.pushValue(wasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1385  			} else {
  1386  				// float64
  1387  				v2 := math.Float64frombits(ce.popValue())
  1388  				v1 := math.Float64frombits(ce.popValue())
  1389  				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
  1390  			}
  1391  			frame.pc++
  1392  		case operationKindCopysign:
  1393  			if op.B1 == 0 {
  1394  				// float32
  1395  				v2 := uint32(ce.popValue())
  1396  				v1 := uint32(ce.popValue())
  1397  				const signbit = 1 << 31
  1398  				ce.pushValue(uint64(v1&^signbit | v2&signbit))
  1399  			} else {
  1400  				// float64
  1401  				v2 := ce.popValue()
  1402  				v1 := ce.popValue()
  1403  				const signbit = 1 << 63
  1404  				ce.pushValue(v1&^signbit | v2&signbit)
  1405  			}
  1406  			frame.pc++
  1407  		case operationKindI32WrapFromI64:
  1408  			ce.pushValue(uint64(uint32(ce.popValue())))
  1409  			frame.pc++
  1410  		case operationKindITruncFromF:
  1411  			if op.B1 == 0 {
  1412  				// float32
  1413  				switch signedInt(op.B2) {
  1414  				case signedInt32:
  1415  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1416  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1417  						if op.B3 {
  1418  							// non-trapping conversion must cast nan to zero.
  1419  							v = 0
  1420  						} else {
  1421  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1422  						}
  1423  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1424  						if op.B3 {
  1425  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1426  							if v < 0 {
  1427  								v = math.MinInt32
  1428  							} else {
  1429  								v = math.MaxInt32
  1430  							}
  1431  						} else {
  1432  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1433  						}
  1434  					}
  1435  					ce.pushValue(uint64(uint32(int32(v))))
  1436  				case signedInt64:
  1437  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1438  					res := int64(v)
  1439  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1440  						if op.B3 {
  1441  							// non-trapping conversion must cast nan to zero.
  1442  							res = 0
  1443  						} else {
  1444  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1445  						}
  1446  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1447  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1448  						// and that's why we use '>=' not '>' to check overflow.
  1449  						if op.B3 {
  1450  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1451  							if v < 0 {
  1452  								res = math.MinInt64
  1453  							} else {
  1454  								res = math.MaxInt64
  1455  							}
  1456  						} else {
  1457  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1458  						}
  1459  					}
  1460  					ce.pushValue(uint64(res))
  1461  				case signedUint32:
  1462  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1463  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1464  						if op.B3 {
  1465  							// non-trapping conversion must cast nan to zero.
  1466  							v = 0
  1467  						} else {
  1468  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1469  						}
  1470  					} else if v < 0 || v > math.MaxUint32 {
  1471  						if op.B3 {
  1472  							// non-trapping conversion must "saturate" the value for overflowing source.
  1473  							if v < 0 {
  1474  								v = 0
  1475  							} else {
  1476  								v = math.MaxUint32
  1477  							}
  1478  						} else {
  1479  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1480  						}
  1481  					}
  1482  					ce.pushValue(uint64(uint32(v)))
  1483  				case signedUint64:
  1484  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1485  					res := uint64(v)
  1486  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1487  						if op.B3 {
  1488  							// non-trapping conversion must cast nan to zero.
  1489  							res = 0
  1490  						} else {
  1491  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1492  						}
  1493  					} else if v < 0 || v >= math.MaxUint64 {
  1494  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1495  						// and that's why we use '>=' not '>' to check overflow.
  1496  						if op.B3 {
  1497  							// non-trapping conversion must "saturate" the value for overflowing source.
  1498  							if v < 0 {
  1499  								res = 0
  1500  							} else {
  1501  								res = math.MaxUint64
  1502  							}
  1503  						} else {
  1504  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1505  						}
  1506  					}
  1507  					ce.pushValue(res)
  1508  				}
  1509  			} else {
  1510  				// float64
  1511  				switch signedInt(op.B2) {
  1512  				case signedInt32:
  1513  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1514  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1515  						if op.B3 {
  1516  							// non-trapping conversion must cast nan to zero.
  1517  							v = 0
  1518  						} else {
  1519  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1520  						}
  1521  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1522  						if op.B3 {
  1523  							// non-trapping conversion must "saturate" the value for overflowing source.
  1524  							if v < 0 {
  1525  								v = math.MinInt32
  1526  							} else {
  1527  								v = math.MaxInt32
  1528  							}
  1529  						} else {
  1530  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1531  						}
  1532  					}
  1533  					ce.pushValue(uint64(uint32(int32(v))))
  1534  				case signedInt64:
  1535  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1536  					res := int64(v)
  1537  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1538  						if op.B3 {
  1539  							// non-trapping conversion must cast nan to zero.
  1540  							res = 0
  1541  						} else {
  1542  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1543  						}
  1544  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1545  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1546  						// and that's why we use '>=' not '>' to check overflow.
  1547  						if op.B3 {
  1548  							// non-trapping conversion must "saturate" the value for overflowing source.
  1549  							if v < 0 {
  1550  								res = math.MinInt64
  1551  							} else {
  1552  								res = math.MaxInt64
  1553  							}
  1554  						} else {
  1555  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1556  						}
  1557  					}
  1558  					ce.pushValue(uint64(res))
  1559  				case signedUint32:
  1560  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1561  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1562  						if op.B3 {
  1563  							// non-trapping conversion must cast nan to zero.
  1564  							v = 0
  1565  						} else {
  1566  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1567  						}
  1568  					} else if v < 0 || v > math.MaxUint32 {
  1569  						if op.B3 {
  1570  							// non-trapping conversion must "saturate" the value for overflowing source.
  1571  							if v < 0 {
  1572  								v = 0
  1573  							} else {
  1574  								v = math.MaxUint32
  1575  							}
  1576  						} else {
  1577  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1578  						}
  1579  					}
  1580  					ce.pushValue(uint64(uint32(v)))
  1581  				case signedUint64:
  1582  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1583  					res := uint64(v)
  1584  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1585  						if op.B3 {
  1586  							// non-trapping conversion must cast nan to zero.
  1587  							res = 0
  1588  						} else {
  1589  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1590  						}
  1591  					} else if v < 0 || v >= math.MaxUint64 {
  1592  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1593  						// and that's why we use '>=' not '>' to check overflow.
  1594  						if op.B3 {
  1595  							// non-trapping conversion must "saturate" the value for overflowing source.
  1596  							if v < 0 {
  1597  								res = 0
  1598  							} else {
  1599  								res = math.MaxUint64
  1600  							}
  1601  						} else {
  1602  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1603  						}
  1604  					}
  1605  					ce.pushValue(res)
  1606  				}
  1607  			}
  1608  			frame.pc++
  1609  		case operationKindFConvertFromI:
  1610  			switch signedInt(op.B1) {
  1611  			case signedInt32:
  1612  				if op.B2 == 0 {
  1613  					// float32
  1614  					v := float32(int32(ce.popValue()))
  1615  					ce.pushValue(uint64(math.Float32bits(v)))
  1616  				} else {
  1617  					// float64
  1618  					v := float64(int32(ce.popValue()))
  1619  					ce.pushValue(math.Float64bits(v))
  1620  				}
  1621  			case signedInt64:
  1622  				if op.B2 == 0 {
  1623  					// float32
  1624  					v := float32(int64(ce.popValue()))
  1625  					ce.pushValue(uint64(math.Float32bits(v)))
  1626  				} else {
  1627  					// float64
  1628  					v := float64(int64(ce.popValue()))
  1629  					ce.pushValue(math.Float64bits(v))
  1630  				}
  1631  			case signedUint32:
  1632  				if op.B2 == 0 {
  1633  					// float32
  1634  					v := float32(uint32(ce.popValue()))
  1635  					ce.pushValue(uint64(math.Float32bits(v)))
  1636  				} else {
  1637  					// float64
  1638  					v := float64(uint32(ce.popValue()))
  1639  					ce.pushValue(math.Float64bits(v))
  1640  				}
  1641  			case signedUint64:
  1642  				if op.B2 == 0 {
  1643  					// float32
  1644  					v := float32(ce.popValue())
  1645  					ce.pushValue(uint64(math.Float32bits(v)))
  1646  				} else {
  1647  					// float64
  1648  					v := float64(ce.popValue())
  1649  					ce.pushValue(math.Float64bits(v))
  1650  				}
  1651  			}
  1652  			frame.pc++
  1653  		case operationKindF32DemoteFromF64:
  1654  			v := float32(math.Float64frombits(ce.popValue()))
  1655  			ce.pushValue(uint64(math.Float32bits(v)))
  1656  			frame.pc++
  1657  		case operationKindF64PromoteFromF32:
  1658  			v := float64(math.Float32frombits(uint32(ce.popValue())))
  1659  			ce.pushValue(math.Float64bits(v))
  1660  			frame.pc++
  1661  		case operationKindExtend:
  1662  			if op.B1 == 1 {
  1663  				// Signed.
  1664  				v := int64(int32(ce.popValue()))
  1665  				ce.pushValue(uint64(v))
  1666  			} else {
  1667  				v := uint64(uint32(ce.popValue()))
  1668  				ce.pushValue(v)
  1669  			}
  1670  			frame.pc++
  1671  		case operationKindSignExtend32From8:
  1672  			v := uint32(int8(ce.popValue()))
  1673  			ce.pushValue(uint64(v))
  1674  			frame.pc++
  1675  		case operationKindSignExtend32From16:
  1676  			v := uint32(int16(ce.popValue()))
  1677  			ce.pushValue(uint64(v))
  1678  			frame.pc++
  1679  		case operationKindSignExtend64From8:
  1680  			v := int64(int8(ce.popValue()))
  1681  			ce.pushValue(uint64(v))
  1682  			frame.pc++
  1683  		case operationKindSignExtend64From16:
  1684  			v := int64(int16(ce.popValue()))
  1685  			ce.pushValue(uint64(v))
  1686  			frame.pc++
  1687  		case operationKindSignExtend64From32:
  1688  			v := int64(int32(ce.popValue()))
  1689  			ce.pushValue(uint64(v))
  1690  			frame.pc++
  1691  		case operationKindMemoryInit:
  1692  			dataInstance := dataInstances[op.U1]
  1693  			copySize := ce.popValue()
  1694  			inDataOffset := ce.popValue()
  1695  			inMemoryOffset := ce.popValue()
  1696  			if inDataOffset+copySize > uint64(len(dataInstance)) ||
  1697  				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
  1698  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1699  			} else if copySize != 0 {
  1700  				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
  1701  			}
  1702  			frame.pc++
  1703  		case operationKindDataDrop:
  1704  			dataInstances[op.U1] = nil
  1705  			frame.pc++
  1706  		case operationKindMemoryCopy:
  1707  			memLen := uint64(len(memoryInst.Buffer))
  1708  			copySize := ce.popValue()
  1709  			sourceOffset := ce.popValue()
  1710  			destinationOffset := ce.popValue()
  1711  			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
  1712  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1713  			} else if copySize != 0 {
  1714  				copy(memoryInst.Buffer[destinationOffset:],
  1715  					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
  1716  			}
  1717  			frame.pc++
  1718  		case operationKindMemoryFill:
  1719  			fillSize := ce.popValue()
  1720  			value := byte(ce.popValue())
  1721  			offset := ce.popValue()
  1722  			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
  1723  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1724  			} else if fillSize != 0 {
  1725  				// Uses the copy trick for faster filling buffer.
  1726  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1727  				buf := memoryInst.Buffer[offset : offset+fillSize]
  1728  				buf[0] = value
  1729  				for i := 1; i < len(buf); i *= 2 {
  1730  					copy(buf[i:], buf[:i])
  1731  				}
  1732  			}
  1733  			frame.pc++
  1734  		case operationKindTableInit:
  1735  			elementInstance := elementInstances[op.U1]
  1736  			copySize := ce.popValue()
  1737  			inElementOffset := ce.popValue()
  1738  			inTableOffset := ce.popValue()
  1739  			table := tables[op.U2]
  1740  			if inElementOffset+copySize > uint64(len(elementInstance)) ||
  1741  				inTableOffset+copySize > uint64(len(table.References)) {
  1742  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1743  			} else if copySize != 0 {
  1744  				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:])
  1745  			}
  1746  			frame.pc++
  1747  		case operationKindElemDrop:
  1748  			elementInstances[op.U1] = nil
  1749  			frame.pc++
  1750  		case operationKindTableCopy:
  1751  			srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
  1752  			copySize := ce.popValue()
  1753  			sourceOffset := ce.popValue()
  1754  			destinationOffset := ce.popValue()
  1755  			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
  1756  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1757  			} else if copySize != 0 {
  1758  				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
  1759  			}
  1760  			frame.pc++
  1761  		case operationKindRefFunc:
  1762  			ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
  1763  			frame.pc++
  1764  		case operationKindTableGet:
  1765  			table := tables[op.U1]
  1766  
  1767  			offset := ce.popValue()
  1768  			if offset >= uint64(len(table.References)) {
  1769  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1770  			}
  1771  
  1772  			ce.pushValue(uint64(table.References[offset]))
  1773  			frame.pc++
  1774  		case operationKindTableSet:
  1775  			table := tables[op.U1]
  1776  			ref := ce.popValue()
  1777  
  1778  			offset := ce.popValue()
  1779  			if offset >= uint64(len(table.References)) {
  1780  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1781  			}
  1782  
  1783  			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
  1784  			frame.pc++
  1785  		case operationKindTableSize:
  1786  			table := tables[op.U1]
  1787  			ce.pushValue(uint64(len(table.References)))
  1788  			frame.pc++
  1789  		case operationKindTableGrow:
  1790  			table := tables[op.U1]
  1791  			num, ref := ce.popValue(), ce.popValue()
  1792  			ret := table.Grow(uint32(num), uintptr(ref))
  1793  			ce.pushValue(uint64(ret))
  1794  			frame.pc++
  1795  		case operationKindTableFill:
  1796  			table := tables[op.U1]
  1797  			num := ce.popValue()
  1798  			ref := uintptr(ce.popValue())
  1799  			offset := ce.popValue()
  1800  			if num+offset > uint64(len(table.References)) {
  1801  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1802  			} else if num > 0 {
  1803  				// Uses the copy trick for faster filling the region with the value.
  1804  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1805  				targetRegion := table.References[offset : offset+num]
  1806  				targetRegion[0] = ref
  1807  				for i := 1; i < len(targetRegion); i *= 2 {
  1808  					copy(targetRegion[i:], targetRegion[:i])
  1809  				}
  1810  			}
  1811  			frame.pc++
  1812  		case operationKindV128Const:
  1813  			lo, hi := op.U1, op.U2
  1814  			ce.pushValue(lo)
  1815  			ce.pushValue(hi)
  1816  			frame.pc++
  1817  		case operationKindV128Add:
  1818  			yHigh, yLow := ce.popValue(), ce.popValue()
  1819  			xHigh, xLow := ce.popValue(), ce.popValue()
  1820  			switch op.B1 {
  1821  			case shapeI8x16:
  1822  				ce.pushValue(
  1823  					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
  1824  						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
  1825  						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
  1826  						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
  1827  				)
  1828  				ce.pushValue(
  1829  					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
  1830  						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
  1831  						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
  1832  						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
  1833  				)
  1834  			case shapeI16x8:
  1835  				ce.pushValue(
  1836  					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
  1837  						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
  1838  				)
  1839  				ce.pushValue(
  1840  					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
  1841  						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
  1842  				)
  1843  			case shapeI32x4:
  1844  				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
  1845  				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
  1846  			case shapeI64x2:
  1847  				ce.pushValue(xLow + yLow)
  1848  				ce.pushValue(xHigh + yHigh)
  1849  			case shapeF32x4:
  1850  				ce.pushValue(
  1851  					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1852  				)
  1853  				ce.pushValue(
  1854  					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1855  				)
  1856  			case shapeF64x2:
  1857  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
  1858  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
  1859  			}
  1860  			frame.pc++
  1861  		case operationKindV128Sub:
  1862  			yHigh, yLow := ce.popValue(), ce.popValue()
  1863  			xHigh, xLow := ce.popValue(), ce.popValue()
  1864  			switch op.B1 {
  1865  			case shapeI8x16:
  1866  				ce.pushValue(
  1867  					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
  1868  						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
  1869  						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
  1870  						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
  1871  				)
  1872  				ce.pushValue(
  1873  					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
  1874  						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
  1875  						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
  1876  						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
  1877  				)
  1878  			case shapeI16x8:
  1879  				ce.pushValue(
  1880  					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
  1881  						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
  1882  				)
  1883  				ce.pushValue(
  1884  					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
  1885  						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
  1886  				)
  1887  			case shapeI32x4:
  1888  				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
  1889  				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
  1890  			case shapeI64x2:
  1891  				ce.pushValue(xLow - yLow)
  1892  				ce.pushValue(xHigh - yHigh)
  1893  			case shapeF32x4:
  1894  				ce.pushValue(
  1895  					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  1896  				)
  1897  				ce.pushValue(
  1898  					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  1899  				)
  1900  			case shapeF64x2:
  1901  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
  1902  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
  1903  			}
  1904  			frame.pc++
  1905  		case operationKindV128Load:
  1906  			offset := ce.popMemoryOffset(op)
  1907  			switch op.B1 {
  1908  			case v128LoadType128:
  1909  				lo, ok := memoryInst.ReadUint64Le(offset)
  1910  				if !ok {
  1911  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1912  				}
  1913  				ce.pushValue(lo)
  1914  				hi, ok := memoryInst.ReadUint64Le(offset + 8)
  1915  				if !ok {
  1916  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1917  				}
  1918  				ce.pushValue(hi)
  1919  			case v128LoadType8x8s:
  1920  				data, ok := memoryInst.Read(offset, 8)
  1921  				if !ok {
  1922  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1923  				}
  1924  				ce.pushValue(
  1925  					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
  1926  				)
  1927  				ce.pushValue(
  1928  					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
  1929  				)
  1930  			case v128LoadType8x8u:
  1931  				data, ok := memoryInst.Read(offset, 8)
  1932  				if !ok {
  1933  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1934  				}
  1935  				ce.pushValue(
  1936  					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
  1937  				)
  1938  				ce.pushValue(
  1939  					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
  1940  				)
  1941  			case v128LoadType16x4s:
  1942  				data, ok := memoryInst.Read(offset, 8)
  1943  				if !ok {
  1944  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1945  				}
  1946  				ce.pushValue(
  1947  					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
  1948  						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
  1949  				)
  1950  				ce.pushValue(
  1951  					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
  1952  						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
  1953  				)
  1954  			case v128LoadType16x4u:
  1955  				data, ok := memoryInst.Read(offset, 8)
  1956  				if !ok {
  1957  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1958  				}
  1959  				ce.pushValue(
  1960  					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
  1961  				)
  1962  				ce.pushValue(
  1963  					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
  1964  				)
  1965  			case v128LoadType32x2s:
  1966  				data, ok := memoryInst.Read(offset, 8)
  1967  				if !ok {
  1968  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1969  				}
  1970  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
  1971  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
  1972  			case v128LoadType32x2u:
  1973  				data, ok := memoryInst.Read(offset, 8)
  1974  				if !ok {
  1975  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1976  				}
  1977  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
  1978  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
  1979  			case v128LoadType8Splat:
  1980  				v, ok := memoryInst.ReadByte(offset)
  1981  				if !ok {
  1982  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1983  				}
  1984  				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
  1985  					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
  1986  				ce.pushValue(v8)
  1987  				ce.pushValue(v8)
  1988  			case v128LoadType16Splat:
  1989  				v, ok := memoryInst.ReadUint16Le(offset)
  1990  				if !ok {
  1991  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1992  				}
  1993  				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
  1994  				ce.pushValue(v4)
  1995  				ce.pushValue(v4)
  1996  			case v128LoadType32Splat:
  1997  				v, ok := memoryInst.ReadUint32Le(offset)
  1998  				if !ok {
  1999  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2000  				}
  2001  				vv := uint64(v)<<32 | uint64(v)
  2002  				ce.pushValue(vv)
  2003  				ce.pushValue(vv)
  2004  			case v128LoadType64Splat:
  2005  				lo, ok := memoryInst.ReadUint64Le(offset)
  2006  				if !ok {
  2007  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2008  				}
  2009  				ce.pushValue(lo)
  2010  				ce.pushValue(lo)
  2011  			case v128LoadType32zero:
  2012  				lo, ok := memoryInst.ReadUint32Le(offset)
  2013  				if !ok {
  2014  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2015  				}
  2016  				ce.pushValue(uint64(lo))
  2017  				ce.pushValue(0)
  2018  			case v128LoadType64zero:
  2019  				lo, ok := memoryInst.ReadUint64Le(offset)
  2020  				if !ok {
  2021  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2022  				}
  2023  				ce.pushValue(lo)
  2024  				ce.pushValue(0)
  2025  			}
  2026  			frame.pc++
  2027  		case operationKindV128LoadLane:
  2028  			hi, lo := ce.popValue(), ce.popValue()
  2029  			offset := ce.popMemoryOffset(op)
  2030  			switch op.B1 {
  2031  			case 8:
  2032  				b, ok := memoryInst.ReadByte(offset)
  2033  				if !ok {
  2034  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2035  				}
  2036  				if op.B2 < 8 {
  2037  					s := op.B2 << 3
  2038  					lo = (lo & ^(0xff << s)) | uint64(b)<<s
  2039  				} else {
  2040  					s := (op.B2 - 8) << 3
  2041  					hi = (hi & ^(0xff << s)) | uint64(b)<<s
  2042  				}
  2043  			case 16:
  2044  				b, ok := memoryInst.ReadUint16Le(offset)
  2045  				if !ok {
  2046  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2047  				}
  2048  				if op.B2 < 4 {
  2049  					s := op.B2 << 4
  2050  					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
  2051  				} else {
  2052  					s := (op.B2 - 4) << 4
  2053  					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
  2054  				}
  2055  			case 32:
  2056  				b, ok := memoryInst.ReadUint32Le(offset)
  2057  				if !ok {
  2058  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2059  				}
  2060  				if op.B2 < 2 {
  2061  					s := op.B2 << 5
  2062  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2063  				} else {
  2064  					s := (op.B2 - 2) << 5
  2065  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2066  				}
  2067  			case 64:
  2068  				b, ok := memoryInst.ReadUint64Le(offset)
  2069  				if !ok {
  2070  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2071  				}
  2072  				if op.B2 == 0 {
  2073  					lo = b
  2074  				} else {
  2075  					hi = b
  2076  				}
  2077  			}
  2078  			ce.pushValue(lo)
  2079  			ce.pushValue(hi)
  2080  			frame.pc++
  2081  		case operationKindV128Store:
  2082  			hi, lo := ce.popValue(), ce.popValue()
  2083  			offset := ce.popMemoryOffset(op)
  2084  			// Write the upper bytes first to trigger an early error if the memory access is out of bounds.
  2085  			// Otherwise, the lower bytes might be written to memory, but the upper bytes might not.
  2086  			if uint64(offset)+8 > math.MaxUint32 {
  2087  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2088  			}
  2089  			if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
  2090  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2091  			}
  2092  			if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
  2093  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2094  			}
  2095  			frame.pc++
  2096  		case operationKindV128StoreLane:
  2097  			hi, lo := ce.popValue(), ce.popValue()
  2098  			offset := ce.popMemoryOffset(op)
  2099  			var ok bool
  2100  			switch op.B1 {
  2101  			case 8:
  2102  				if op.B2 < 8 {
  2103  					ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
  2104  				} else {
  2105  					ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
  2106  				}
  2107  			case 16:
  2108  				if op.B2 < 4 {
  2109  					ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
  2110  				} else {
  2111  					ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
  2112  				}
  2113  			case 32:
  2114  				if op.B2 < 2 {
  2115  					ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
  2116  				} else {
  2117  					ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
  2118  				}
  2119  			case 64:
  2120  				if op.B2 == 0 {
  2121  					ok = memoryInst.WriteUint64Le(offset, lo)
  2122  				} else {
  2123  					ok = memoryInst.WriteUint64Le(offset, hi)
  2124  				}
  2125  			}
  2126  			if !ok {
  2127  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2128  			}
  2129  			frame.pc++
  2130  		case operationKindV128ReplaceLane:
  2131  			v := ce.popValue()
  2132  			hi, lo := ce.popValue(), ce.popValue()
  2133  			switch op.B1 {
  2134  			case shapeI8x16:
  2135  				if op.B2 < 8 {
  2136  					s := op.B2 << 3
  2137  					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
  2138  				} else {
  2139  					s := (op.B2 - 8) << 3
  2140  					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
  2141  				}
  2142  			case shapeI16x8:
  2143  				if op.B2 < 4 {
  2144  					s := op.B2 << 4
  2145  					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2146  				} else {
  2147  					s := (op.B2 - 4) << 4
  2148  					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2149  				}
  2150  			case shapeI32x4, shapeF32x4:
  2151  				if op.B2 < 2 {
  2152  					s := op.B2 << 5
  2153  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2154  				} else {
  2155  					s := (op.B2 - 2) << 5
  2156  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2157  				}
  2158  			case shapeI64x2, shapeF64x2:
  2159  				if op.B2 == 0 {
  2160  					lo = v
  2161  				} else {
  2162  					hi = v
  2163  				}
  2164  			}
  2165  			ce.pushValue(lo)
  2166  			ce.pushValue(hi)
  2167  			frame.pc++
  2168  		case operationKindV128ExtractLane:
  2169  			hi, lo := ce.popValue(), ce.popValue()
  2170  			var v uint64
  2171  			switch op.B1 {
  2172  			case shapeI8x16:
  2173  				var u8 byte
  2174  				if op.B2 < 8 {
  2175  					u8 = byte(lo >> (op.B2 * 8))
  2176  				} else {
  2177  					u8 = byte(hi >> ((op.B2 - 8) * 8))
  2178  				}
  2179  				if op.B3 {
  2180  					// sign-extend.
  2181  					v = uint64(uint32(int8(u8)))
  2182  				} else {
  2183  					v = uint64(u8)
  2184  				}
  2185  			case shapeI16x8:
  2186  				var u16 uint16
  2187  				if op.B2 < 4 {
  2188  					u16 = uint16(lo >> (op.B2 * 16))
  2189  				} else {
  2190  					u16 = uint16(hi >> ((op.B2 - 4) * 16))
  2191  				}
  2192  				if op.B3 {
  2193  					// sign-extend.
  2194  					v = uint64(uint32(int16(u16)))
  2195  				} else {
  2196  					v = uint64(u16)
  2197  				}
  2198  			case shapeI32x4, shapeF32x4:
  2199  				if op.B2 < 2 {
  2200  					v = uint64(uint32(lo >> (op.B2 * 32)))
  2201  				} else {
  2202  					v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
  2203  				}
  2204  			case shapeI64x2, shapeF64x2:
  2205  				if op.B2 == 0 {
  2206  					v = lo
  2207  				} else {
  2208  					v = hi
  2209  				}
  2210  			}
  2211  			ce.pushValue(v)
  2212  			frame.pc++
  2213  		case operationKindV128Splat:
  2214  			v := ce.popValue()
  2215  			var hi, lo uint64
  2216  			switch op.B1 {
  2217  			case shapeI8x16:
  2218  				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
  2219  					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
  2220  				hi, lo = v8, v8
  2221  			case shapeI16x8:
  2222  				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
  2223  				hi, lo = v4, v4
  2224  			case shapeI32x4, shapeF32x4:
  2225  				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
  2226  				lo, hi = v2, v2
  2227  			case shapeI64x2, shapeF64x2:
  2228  				lo, hi = v, v
  2229  			}
  2230  			ce.pushValue(lo)
  2231  			ce.pushValue(hi)
  2232  			frame.pc++
  2233  		case operationKindV128Swizzle:
  2234  			idxHi, idxLo := ce.popValue(), ce.popValue()
  2235  			baseHi, baseLo := ce.popValue(), ce.popValue()
  2236  			var newVal [16]byte
  2237  			for i := 0; i < 16; i++ {
  2238  				var id byte
  2239  				if i < 8 {
  2240  					id = byte(idxLo >> (i * 8))
  2241  				} else {
  2242  					id = byte(idxHi >> ((i - 8) * 8))
  2243  				}
  2244  				if id < 8 {
  2245  					newVal[i] = byte(baseLo >> (id * 8))
  2246  				} else if id < 16 {
  2247  					newVal[i] = byte(baseHi >> ((id - 8) * 8))
  2248  				}
  2249  			}
  2250  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2251  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2252  			frame.pc++
  2253  		case operationKindV128Shuffle:
  2254  			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
  2255  			var newVal [16]byte
  2256  			for i, l := range op.Us {
  2257  				if l < 8 {
  2258  					newVal[i] = byte(yLo >> (l * 8))
  2259  				} else if l < 16 {
  2260  					newVal[i] = byte(yHi >> ((l - 8) * 8))
  2261  				} else if l < 24 {
  2262  					newVal[i] = byte(xLo >> ((l - 16) * 8))
  2263  				} else if l < 32 {
  2264  					newVal[i] = byte(xHi >> ((l - 24) * 8))
  2265  				}
  2266  			}
  2267  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2268  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2269  			frame.pc++
  2270  		case operationKindV128AnyTrue:
  2271  			hi, lo := ce.popValue(), ce.popValue()
  2272  			if hi != 0 || lo != 0 {
  2273  				ce.pushValue(1)
  2274  			} else {
  2275  				ce.pushValue(0)
  2276  			}
  2277  			frame.pc++
  2278  		case operationKindV128AllTrue:
  2279  			hi, lo := ce.popValue(), ce.popValue()
  2280  			var ret bool
  2281  			switch op.B1 {
  2282  			case shapeI8x16:
  2283  				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
  2284  					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
  2285  					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
  2286  					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
  2287  			case shapeI16x8:
  2288  				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
  2289  					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
  2290  			case shapeI32x4:
  2291  				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
  2292  					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
  2293  			case shapeI64x2:
  2294  				ret = (lo != 0) &&
  2295  					(hi != 0)
  2296  			}
  2297  			if ret {
  2298  				ce.pushValue(1)
  2299  			} else {
  2300  				ce.pushValue(0)
  2301  			}
  2302  			frame.pc++
  2303  		case operationKindV128BitMask:
  2304  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
  2305  			hi, lo := ce.popValue(), ce.popValue()
  2306  			var res uint64
  2307  			switch op.B1 {
  2308  			case shapeI8x16:
  2309  				for i := 0; i < 8; i++ {
  2310  					if int8(lo>>(i*8)) < 0 {
  2311  						res |= 1 << i
  2312  					}
  2313  				}
  2314  				for i := 0; i < 8; i++ {
  2315  					if int8(hi>>(i*8)) < 0 {
  2316  						res |= 1 << (i + 8)
  2317  					}
  2318  				}
  2319  			case shapeI16x8:
  2320  				for i := 0; i < 4; i++ {
  2321  					if int16(lo>>(i*16)) < 0 {
  2322  						res |= 1 << i
  2323  					}
  2324  				}
  2325  				for i := 0; i < 4; i++ {
  2326  					if int16(hi>>(i*16)) < 0 {
  2327  						res |= 1 << (i + 4)
  2328  					}
  2329  				}
  2330  			case shapeI32x4:
  2331  				for i := 0; i < 2; i++ {
  2332  					if int32(lo>>(i*32)) < 0 {
  2333  						res |= 1 << i
  2334  					}
  2335  				}
  2336  				for i := 0; i < 2; i++ {
  2337  					if int32(hi>>(i*32)) < 0 {
  2338  						res |= 1 << (i + 2)
  2339  					}
  2340  				}
  2341  			case shapeI64x2:
  2342  				if int64(lo) < 0 {
  2343  					res |= 0b01
  2344  				}
  2345  				if int(hi) < 0 {
  2346  					res |= 0b10
  2347  				}
  2348  			}
  2349  			ce.pushValue(res)
  2350  			frame.pc++
  2351  		case operationKindV128And:
  2352  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2353  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2354  			ce.pushValue(x1Lo & x2Lo)
  2355  			ce.pushValue(x1Hi & x2Hi)
  2356  			frame.pc++
  2357  		case operationKindV128Not:
  2358  			hi, lo := ce.popValue(), ce.popValue()
  2359  			ce.pushValue(^lo)
  2360  			ce.pushValue(^hi)
  2361  			frame.pc++
  2362  		case operationKindV128Or:
  2363  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2364  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2365  			ce.pushValue(x1Lo | x2Lo)
  2366  			ce.pushValue(x1Hi | x2Hi)
  2367  			frame.pc++
  2368  		case operationKindV128Xor:
  2369  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2370  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2371  			ce.pushValue(x1Lo ^ x2Lo)
  2372  			ce.pushValue(x1Hi ^ x2Hi)
  2373  			frame.pc++
  2374  		case operationKindV128Bitselect:
  2375  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
  2376  			cHi, cLo := ce.popValue(), ce.popValue()
  2377  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2378  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2379  			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
  2380  			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
  2381  			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
  2382  			frame.pc++
  2383  		case operationKindV128AndNot:
  2384  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2385  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2386  			ce.pushValue(x1Lo & (^x2Lo))
  2387  			ce.pushValue(x1Hi & (^x2Hi))
  2388  			frame.pc++
  2389  		case operationKindV128Shl:
  2390  			s := ce.popValue()
  2391  			hi, lo := ce.popValue(), ce.popValue()
  2392  			switch op.B1 {
  2393  			case shapeI8x16:
  2394  				s = s % 8
  2395  				lo = uint64(uint8(lo<<s)) |
  2396  					uint64(uint8((lo>>8)<<s))<<8 |
  2397  					uint64(uint8((lo>>16)<<s))<<16 |
  2398  					uint64(uint8((lo>>24)<<s))<<24 |
  2399  					uint64(uint8((lo>>32)<<s))<<32 |
  2400  					uint64(uint8((lo>>40)<<s))<<40 |
  2401  					uint64(uint8((lo>>48)<<s))<<48 |
  2402  					uint64(uint8((lo>>56)<<s))<<56
  2403  				hi = uint64(uint8(hi<<s)) |
  2404  					uint64(uint8((hi>>8)<<s))<<8 |
  2405  					uint64(uint8((hi>>16)<<s))<<16 |
  2406  					uint64(uint8((hi>>24)<<s))<<24 |
  2407  					uint64(uint8((hi>>32)<<s))<<32 |
  2408  					uint64(uint8((hi>>40)<<s))<<40 |
  2409  					uint64(uint8((hi>>48)<<s))<<48 |
  2410  					uint64(uint8((hi>>56)<<s))<<56
  2411  			case shapeI16x8:
  2412  				s = s % 16
  2413  				lo = uint64(uint16(lo<<s)) |
  2414  					uint64(uint16((lo>>16)<<s))<<16 |
  2415  					uint64(uint16((lo>>32)<<s))<<32 |
  2416  					uint64(uint16((lo>>48)<<s))<<48
  2417  				hi = uint64(uint16(hi<<s)) |
  2418  					uint64(uint16((hi>>16)<<s))<<16 |
  2419  					uint64(uint16((hi>>32)<<s))<<32 |
  2420  					uint64(uint16((hi>>48)<<s))<<48
  2421  			case shapeI32x4:
  2422  				s = s % 32
  2423  				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
  2424  				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
  2425  			case shapeI64x2:
  2426  				s = s % 64
  2427  				lo = lo << s
  2428  				hi = hi << s
  2429  			}
  2430  			ce.pushValue(lo)
  2431  			ce.pushValue(hi)
  2432  			frame.pc++
  2433  		case operationKindV128Shr:
  2434  			s := ce.popValue()
  2435  			hi, lo := ce.popValue(), ce.popValue()
  2436  			switch op.B1 {
  2437  			case shapeI8x16:
  2438  				s = s % 8
  2439  				if op.B3 { // signed
  2440  					lo = uint64(uint8(int8(lo)>>s)) |
  2441  						uint64(uint8(int8(lo>>8)>>s))<<8 |
  2442  						uint64(uint8(int8(lo>>16)>>s))<<16 |
  2443  						uint64(uint8(int8(lo>>24)>>s))<<24 |
  2444  						uint64(uint8(int8(lo>>32)>>s))<<32 |
  2445  						uint64(uint8(int8(lo>>40)>>s))<<40 |
  2446  						uint64(uint8(int8(lo>>48)>>s))<<48 |
  2447  						uint64(uint8(int8(lo>>56)>>s))<<56
  2448  					hi = uint64(uint8(int8(hi)>>s)) |
  2449  						uint64(uint8(int8(hi>>8)>>s))<<8 |
  2450  						uint64(uint8(int8(hi>>16)>>s))<<16 |
  2451  						uint64(uint8(int8(hi>>24)>>s))<<24 |
  2452  						uint64(uint8(int8(hi>>32)>>s))<<32 |
  2453  						uint64(uint8(int8(hi>>40)>>s))<<40 |
  2454  						uint64(uint8(int8(hi>>48)>>s))<<48 |
  2455  						uint64(uint8(int8(hi>>56)>>s))<<56
  2456  				} else {
  2457  					lo = uint64(uint8(lo)>>s) |
  2458  						uint64(uint8(lo>>8)>>s)<<8 |
  2459  						uint64(uint8(lo>>16)>>s)<<16 |
  2460  						uint64(uint8(lo>>24)>>s)<<24 |
  2461  						uint64(uint8(lo>>32)>>s)<<32 |
  2462  						uint64(uint8(lo>>40)>>s)<<40 |
  2463  						uint64(uint8(lo>>48)>>s)<<48 |
  2464  						uint64(uint8(lo>>56)>>s)<<56
  2465  					hi = uint64(uint8(hi)>>s) |
  2466  						uint64(uint8(hi>>8)>>s)<<8 |
  2467  						uint64(uint8(hi>>16)>>s)<<16 |
  2468  						uint64(uint8(hi>>24)>>s)<<24 |
  2469  						uint64(uint8(hi>>32)>>s)<<32 |
  2470  						uint64(uint8(hi>>40)>>s)<<40 |
  2471  						uint64(uint8(hi>>48)>>s)<<48 |
  2472  						uint64(uint8(hi>>56)>>s)<<56
  2473  				}
  2474  			case shapeI16x8:
  2475  				s = s % 16
  2476  				if op.B3 { // signed
  2477  					lo = uint64(uint16(int16(lo)>>s)) |
  2478  						uint64(uint16(int16(lo>>16)>>s))<<16 |
  2479  						uint64(uint16(int16(lo>>32)>>s))<<32 |
  2480  						uint64(uint16(int16(lo>>48)>>s))<<48
  2481  					hi = uint64(uint16(int16(hi)>>s)) |
  2482  						uint64(uint16(int16(hi>>16)>>s))<<16 |
  2483  						uint64(uint16(int16(hi>>32)>>s))<<32 |
  2484  						uint64(uint16(int16(hi>>48)>>s))<<48
  2485  				} else {
  2486  					lo = uint64(uint16(lo)>>s) |
  2487  						uint64(uint16(lo>>16)>>s)<<16 |
  2488  						uint64(uint16(lo>>32)>>s)<<32 |
  2489  						uint64(uint16(lo>>48)>>s)<<48
  2490  					hi = uint64(uint16(hi)>>s) |
  2491  						uint64(uint16(hi>>16)>>s)<<16 |
  2492  						uint64(uint16(hi>>32)>>s)<<32 |
  2493  						uint64(uint16(hi>>48)>>s)<<48
  2494  				}
  2495  			case shapeI32x4:
  2496  				s = s % 32
  2497  				if op.B3 {
  2498  					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
  2499  					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
  2500  				} else {
  2501  					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
  2502  					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
  2503  				}
  2504  			case shapeI64x2:
  2505  				s = s % 64
  2506  				if op.B3 { // signed
  2507  					lo = uint64(int64(lo) >> s)
  2508  					hi = uint64(int64(hi) >> s)
  2509  				} else {
  2510  					lo = lo >> s
  2511  					hi = hi >> s
  2512  				}
  2513  
  2514  			}
  2515  			ce.pushValue(lo)
  2516  			ce.pushValue(hi)
  2517  			frame.pc++
  2518  		case operationKindV128Cmp:
  2519  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2520  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2521  			var result []bool
  2522  			switch op.B1 {
  2523  			case v128CmpTypeI8x16Eq:
  2524  				result = []bool{
  2525  					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
  2526  					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
  2527  					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
  2528  					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
  2529  					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
  2530  					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
  2531  					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
  2532  					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
  2533  				}
  2534  			case v128CmpTypeI8x16Ne:
  2535  				result = []bool{
  2536  					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
  2537  					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
  2538  					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
  2539  					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
  2540  					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
  2541  					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
  2542  					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
  2543  					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
  2544  				}
  2545  			case v128CmpTypeI8x16LtS:
  2546  				result = []bool{
  2547  					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
  2548  					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
  2549  					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
  2550  					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
  2551  					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
  2552  					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
  2553  					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
  2554  					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
  2555  				}
  2556  			case v128CmpTypeI8x16LtU:
  2557  				result = []bool{
  2558  					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
  2559  					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
  2560  					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
  2561  					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
  2562  					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
  2563  					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
  2564  					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
  2565  					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
  2566  				}
  2567  			case v128CmpTypeI8x16GtS:
  2568  				result = []bool{
  2569  					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
  2570  					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
  2571  					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
  2572  					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
  2573  					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
  2574  					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
  2575  					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
  2576  					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
  2577  				}
  2578  			case v128CmpTypeI8x16GtU:
  2579  				result = []bool{
  2580  					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
  2581  					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
  2582  					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
  2583  					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
  2584  					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
  2585  					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
  2586  					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
  2587  					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
  2588  				}
  2589  			case v128CmpTypeI8x16LeS:
  2590  				result = []bool{
  2591  					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
  2592  					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
  2593  					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
  2594  					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
  2595  					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
  2596  					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
  2597  					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
  2598  					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
  2599  				}
  2600  			case v128CmpTypeI8x16LeU:
  2601  				result = []bool{
  2602  					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
  2603  					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
  2604  					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
  2605  					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
  2606  					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
  2607  					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
  2608  					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
  2609  					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
  2610  				}
  2611  			case v128CmpTypeI8x16GeS:
  2612  				result = []bool{
  2613  					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
  2614  					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
  2615  					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
  2616  					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
  2617  					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
  2618  					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
  2619  					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
  2620  					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
  2621  				}
  2622  			case v128CmpTypeI8x16GeU:
  2623  				result = []bool{
  2624  					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
  2625  					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
  2626  					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
  2627  					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
  2628  					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
  2629  					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
  2630  					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
  2631  					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
  2632  				}
  2633  			case v128CmpTypeI16x8Eq:
  2634  				result = []bool{
  2635  					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
  2636  					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
  2637  					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
  2638  					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
  2639  				}
  2640  			case v128CmpTypeI16x8Ne:
  2641  				result = []bool{
  2642  					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
  2643  					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
  2644  					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
  2645  					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
  2646  				}
  2647  			case v128CmpTypeI16x8LtS:
  2648  				result = []bool{
  2649  					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
  2650  					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
  2651  					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
  2652  					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
  2653  				}
  2654  			case v128CmpTypeI16x8LtU:
  2655  				result = []bool{
  2656  					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
  2657  					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
  2658  					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
  2659  					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
  2660  				}
  2661  			case v128CmpTypeI16x8GtS:
  2662  				result = []bool{
  2663  					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
  2664  					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
  2665  					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
  2666  					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
  2667  				}
  2668  			case v128CmpTypeI16x8GtU:
  2669  				result = []bool{
  2670  					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
  2671  					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
  2672  					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
  2673  					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
  2674  				}
  2675  			case v128CmpTypeI16x8LeS:
  2676  				result = []bool{
  2677  					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
  2678  					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
  2679  					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
  2680  					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
  2681  				}
  2682  			case v128CmpTypeI16x8LeU:
  2683  				result = []bool{
  2684  					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
  2685  					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
  2686  					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
  2687  					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
  2688  				}
  2689  			case v128CmpTypeI16x8GeS:
  2690  				result = []bool{
  2691  					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
  2692  					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
  2693  					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
  2694  					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
  2695  				}
  2696  			case v128CmpTypeI16x8GeU:
  2697  				result = []bool{
  2698  					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
  2699  					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
  2700  					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
  2701  					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
  2702  				}
  2703  			case v128CmpTypeI32x4Eq:
  2704  				result = []bool{
  2705  					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
  2706  					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
  2707  				}
  2708  			case v128CmpTypeI32x4Ne:
  2709  				result = []bool{
  2710  					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
  2711  					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
  2712  				}
  2713  			case v128CmpTypeI32x4LtS:
  2714  				result = []bool{
  2715  					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
  2716  					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
  2717  				}
  2718  			case v128CmpTypeI32x4LtU:
  2719  				result = []bool{
  2720  					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
  2721  					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
  2722  				}
  2723  			case v128CmpTypeI32x4GtS:
  2724  				result = []bool{
  2725  					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
  2726  					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
  2727  				}
  2728  			case v128CmpTypeI32x4GtU:
  2729  				result = []bool{
  2730  					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
  2731  					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
  2732  				}
  2733  			case v128CmpTypeI32x4LeS:
  2734  				result = []bool{
  2735  					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
  2736  					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
  2737  				}
  2738  			case v128CmpTypeI32x4LeU:
  2739  				result = []bool{
  2740  					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
  2741  					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
  2742  				}
  2743  			case v128CmpTypeI32x4GeS:
  2744  				result = []bool{
  2745  					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
  2746  					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
  2747  				}
  2748  			case v128CmpTypeI32x4GeU:
  2749  				result = []bool{
  2750  					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
  2751  					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
  2752  				}
  2753  			case v128CmpTypeI64x2Eq:
  2754  				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
  2755  			case v128CmpTypeI64x2Ne:
  2756  				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
  2757  			case v128CmpTypeI64x2LtS:
  2758  				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
  2759  			case v128CmpTypeI64x2GtS:
  2760  				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
  2761  			case v128CmpTypeI64x2LeS:
  2762  				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
  2763  			case v128CmpTypeI64x2GeS:
  2764  				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
  2765  			case v128CmpTypeF32x4Eq:
  2766  				result = []bool{
  2767  					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
  2768  					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
  2769  					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
  2770  					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
  2771  				}
  2772  			case v128CmpTypeF32x4Ne:
  2773  				result = []bool{
  2774  					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
  2775  					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
  2776  					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
  2777  					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
  2778  				}
  2779  			case v128CmpTypeF32x4Lt:
  2780  				result = []bool{
  2781  					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
  2782  					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
  2783  					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
  2784  					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
  2785  				}
  2786  			case v128CmpTypeF32x4Gt:
  2787  				result = []bool{
  2788  					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
  2789  					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
  2790  					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
  2791  					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
  2792  				}
  2793  			case v128CmpTypeF32x4Le:
  2794  				result = []bool{
  2795  					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
  2796  					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
  2797  					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
  2798  					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
  2799  				}
  2800  			case v128CmpTypeF32x4Ge:
  2801  				result = []bool{
  2802  					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
  2803  					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
  2804  					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
  2805  					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
  2806  				}
  2807  			case v128CmpTypeF64x2Eq:
  2808  				result = []bool{
  2809  					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
  2810  					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
  2811  				}
  2812  			case v128CmpTypeF64x2Ne:
  2813  				result = []bool{
  2814  					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
  2815  					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
  2816  				}
  2817  			case v128CmpTypeF64x2Lt:
  2818  				result = []bool{
  2819  					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
  2820  					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
  2821  				}
  2822  			case v128CmpTypeF64x2Gt:
  2823  				result = []bool{
  2824  					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
  2825  					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
  2826  				}
  2827  			case v128CmpTypeF64x2Le:
  2828  				result = []bool{
  2829  					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
  2830  					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
  2831  				}
  2832  			case v128CmpTypeF64x2Ge:
  2833  				result = []bool{
  2834  					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
  2835  					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
  2836  				}
  2837  			}
  2838  
  2839  			var retLo, retHi uint64
  2840  			laneNum := len(result)
  2841  			switch laneNum {
  2842  			case 16:
  2843  				for i, b := range result {
  2844  					if b {
  2845  						if i < 8 {
  2846  							retLo |= 0xff << (i * 8)
  2847  						} else {
  2848  							retHi |= 0xff << ((i - 8) * 8)
  2849  						}
  2850  					}
  2851  				}
  2852  			case 8:
  2853  				for i, b := range result {
  2854  					if b {
  2855  						if i < 4 {
  2856  							retLo |= 0xffff << (i * 16)
  2857  						} else {
  2858  							retHi |= 0xffff << ((i - 4) * 16)
  2859  						}
  2860  					}
  2861  				}
  2862  			case 4:
  2863  				for i, b := range result {
  2864  					if b {
  2865  						if i < 2 {
  2866  							retLo |= 0xffff_ffff << (i * 32)
  2867  						} else {
  2868  							retHi |= 0xffff_ffff << ((i - 2) * 32)
  2869  						}
  2870  					}
  2871  				}
  2872  			case 2:
  2873  				if result[0] {
  2874  					retLo = ^uint64(0)
  2875  				}
  2876  				if result[1] {
  2877  					retHi = ^uint64(0)
  2878  				}
  2879  			}
  2880  
  2881  			ce.pushValue(retLo)
  2882  			ce.pushValue(retHi)
  2883  			frame.pc++
  2884  		case operationKindV128AddSat:
  2885  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2886  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2887  
  2888  			var retLo, retHi uint64
  2889  
  2890  			// Lane-wise addition while saturating the overflowing values.
  2891  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
  2892  			switch op.B1 {
  2893  			case shapeI8x16:
  2894  				for i := 0; i < 16; i++ {
  2895  					var v, w byte
  2896  					if i < 8 {
  2897  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2898  					} else {
  2899  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2900  					}
  2901  
  2902  					var uv uint64
  2903  					if op.B3 { // signed
  2904  						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
  2905  							uv = uint64(byte(0x80))
  2906  						} else if subbed > math.MaxInt8 {
  2907  							uv = uint64(byte(0x7f))
  2908  						} else {
  2909  							uv = uint64(byte(int8(subbed)))
  2910  						}
  2911  					} else {
  2912  						if subbed := int64(v) + int64(w); subbed < 0 {
  2913  							uv = uint64(byte(0))
  2914  						} else if subbed > math.MaxUint8 {
  2915  							uv = uint64(byte(0xff))
  2916  						} else {
  2917  							uv = uint64(byte(subbed))
  2918  						}
  2919  					}
  2920  
  2921  					if i < 8 { // first 8 lanes are on lower 64bits.
  2922  						retLo |= uv << (i * 8)
  2923  					} else {
  2924  						retHi |= uv << ((i - 8) * 8)
  2925  					}
  2926  				}
  2927  			case shapeI16x8:
  2928  				for i := 0; i < 8; i++ {
  2929  					var v, w uint16
  2930  					if i < 4 {
  2931  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  2932  					} else {
  2933  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  2934  					}
  2935  
  2936  					var uv uint64
  2937  					if op.B3 { // signed
  2938  						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
  2939  							uv = uint64(uint16(0x8000))
  2940  						} else if added > math.MaxInt16 {
  2941  							uv = uint64(uint16(0x7fff))
  2942  						} else {
  2943  							uv = uint64(uint16(int16(added)))
  2944  						}
  2945  					} else {
  2946  						if added := int64(v) + int64(w); added < 0 {
  2947  							uv = uint64(uint16(0))
  2948  						} else if added > math.MaxUint16 {
  2949  							uv = uint64(uint16(0xffff))
  2950  						} else {
  2951  							uv = uint64(uint16(added))
  2952  						}
  2953  					}
  2954  
  2955  					if i < 4 { // first 4 lanes are on lower 64bits.
  2956  						retLo |= uv << (i * 16)
  2957  					} else {
  2958  						retHi |= uv << ((i - 4) * 16)
  2959  					}
  2960  				}
  2961  			}
  2962  
  2963  			ce.pushValue(retLo)
  2964  			ce.pushValue(retHi)
  2965  			frame.pc++
  2966  		case operationKindV128SubSat:
  2967  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  2968  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  2969  
  2970  			var retLo, retHi uint64
  2971  
  2972  			// Lane-wise subtraction while saturating the overflowing values.
  2973  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
  2974  			switch op.B1 {
  2975  			case shapeI8x16:
  2976  				for i := 0; i < 16; i++ {
  2977  					var v, w byte
  2978  					if i < 8 {
  2979  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  2980  					} else {
  2981  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  2982  					}
  2983  
  2984  					var uv uint64
  2985  					if op.B3 { // signed
  2986  						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
  2987  							uv = uint64(byte(0x80))
  2988  						} else if subbed > math.MaxInt8 {
  2989  							uv = uint64(byte(0x7f))
  2990  						} else {
  2991  							uv = uint64(byte(int8(subbed)))
  2992  						}
  2993  					} else {
  2994  						if subbed := int64(v) - int64(w); subbed < 0 {
  2995  							uv = uint64(byte(0))
  2996  						} else if subbed > math.MaxUint8 {
  2997  							uv = uint64(byte(0xff))
  2998  						} else {
  2999  							uv = uint64(byte(subbed))
  3000  						}
  3001  					}
  3002  
  3003  					if i < 8 {
  3004  						retLo |= uv << (i * 8)
  3005  					} else {
  3006  						retHi |= uv << ((i - 8) * 8)
  3007  					}
  3008  				}
  3009  			case shapeI16x8:
  3010  				for i := 0; i < 8; i++ {
  3011  					var v, w uint16
  3012  					if i < 4 {
  3013  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  3014  					} else {
  3015  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  3016  					}
  3017  
  3018  					var uv uint64
  3019  					if op.B3 { // signed
  3020  						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
  3021  							uv = uint64(uint16(0x8000))
  3022  						} else if subbed > math.MaxInt16 {
  3023  							uv = uint64(uint16(0x7fff))
  3024  						} else {
  3025  							uv = uint64(uint16(int16(subbed)))
  3026  						}
  3027  					} else {
  3028  						if subbed := int64(v) - int64(w); subbed < 0 {
  3029  							uv = uint64(uint16(0))
  3030  						} else if subbed > math.MaxUint16 {
  3031  							uv = uint64(uint16(0xffff))
  3032  						} else {
  3033  							uv = uint64(uint16(subbed))
  3034  						}
  3035  					}
  3036  
  3037  					if i < 4 {
  3038  						retLo |= uv << (i * 16)
  3039  					} else {
  3040  						retHi |= uv << ((i - 4) * 16)
  3041  					}
  3042  				}
  3043  			}
  3044  
  3045  			ce.pushValue(retLo)
  3046  			ce.pushValue(retHi)
  3047  			frame.pc++
  3048  		case operationKindV128Mul:
  3049  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3050  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3051  			var retLo, retHi uint64
  3052  			switch op.B1 {
  3053  			case shapeI16x8:
  3054  				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
  3055  					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
  3056  				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
  3057  					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
  3058  			case shapeI32x4:
  3059  				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
  3060  				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
  3061  			case shapeI64x2:
  3062  				retHi = x1hi * x2hi
  3063  				retLo = x1lo * x2lo
  3064  			case shapeF32x4:
  3065  				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3066  				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3067  			case shapeF64x2:
  3068  				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
  3069  				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
  3070  			}
  3071  			ce.pushValue(retLo)
  3072  			ce.pushValue(retHi)
  3073  			frame.pc++
  3074  		case operationKindV128Div:
  3075  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3076  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3077  			var retLo, retHi uint64
  3078  			if op.B1 == shapeF64x2 {
  3079  				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
  3080  				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
  3081  			} else {
  3082  				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3083  				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3084  			}
  3085  			ce.pushValue(retLo)
  3086  			ce.pushValue(retHi)
  3087  			frame.pc++
  3088  		case operationKindV128Neg:
  3089  			hi, lo := ce.popValue(), ce.popValue()
  3090  			switch op.B1 {
  3091  			case shapeI8x16:
  3092  				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
  3093  					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
  3094  					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
  3095  					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
  3096  				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
  3097  					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
  3098  					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
  3099  					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
  3100  			case shapeI16x8:
  3101  				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
  3102  					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
  3103  				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
  3104  					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
  3105  			case shapeI32x4:
  3106  				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
  3107  				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
  3108  			case shapeI64x2:
  3109  				hi = -hi
  3110  				lo = -lo
  3111  			case shapeF32x4:
  3112  				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
  3113  					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
  3114  				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
  3115  					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
  3116  			case shapeF64x2:
  3117  				hi = math.Float64bits(-math.Float64frombits(hi))
  3118  				lo = math.Float64bits(-math.Float64frombits(lo))
  3119  			}
  3120  			ce.pushValue(lo)
  3121  			ce.pushValue(hi)
  3122  			frame.pc++
  3123  		case operationKindV128Sqrt:
  3124  			hi, lo := ce.popValue(), ce.popValue()
  3125  			if op.B1 == shapeF64x2 {
  3126  				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
  3127  				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
  3128  			} else {
  3129  				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
  3130  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
  3131  				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
  3132  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
  3133  			}
  3134  			ce.pushValue(lo)
  3135  			ce.pushValue(hi)
  3136  			frame.pc++
  3137  		case operationKindV128Abs:
  3138  			hi, lo := ce.popValue(), ce.popValue()
  3139  			switch op.B1 {
  3140  			case shapeI8x16:
  3141  				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
  3142  					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
  3143  					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
  3144  					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
  3145  				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
  3146  					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
  3147  					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
  3148  					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
  3149  			case shapeI16x8:
  3150  				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
  3151  					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
  3152  				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
  3153  					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
  3154  			case shapeI32x4:
  3155  				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
  3156  				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
  3157  			case shapeI64x2:
  3158  				if int64(hi) < 0 {
  3159  					hi = -hi
  3160  				}
  3161  				if int64(lo) < 0 {
  3162  					lo = -lo
  3163  				}
  3164  			case shapeF32x4:
  3165  				hi = hi &^ (1<<31 | 1<<63)
  3166  				lo = lo &^ (1<<31 | 1<<63)
  3167  			case shapeF64x2:
  3168  				hi = hi &^ (1 << 63)
  3169  				lo = lo &^ (1 << 63)
  3170  			}
  3171  			ce.pushValue(lo)
  3172  			ce.pushValue(hi)
  3173  			frame.pc++
  3174  		case operationKindV128Popcnt:
  3175  			hi, lo := ce.popValue(), ce.popValue()
  3176  			var retLo, retHi uint64
  3177  			for i := 0; i < 16; i++ {
  3178  				var v byte
  3179  				if i < 8 {
  3180  					v = byte(lo >> (i * 8))
  3181  				} else {
  3182  					v = byte(hi >> ((i - 8) * 8))
  3183  				}
  3184  
  3185  				var cnt uint64
  3186  				for i := 0; i < 8; i++ {
  3187  					if (v>>i)&0b1 != 0 {
  3188  						cnt++
  3189  					}
  3190  				}
  3191  
  3192  				if i < 8 {
  3193  					retLo |= cnt << (i * 8)
  3194  				} else {
  3195  					retHi |= cnt << ((i - 8) * 8)
  3196  				}
  3197  			}
  3198  			ce.pushValue(retLo)
  3199  			ce.pushValue(retHi)
  3200  			frame.pc++
  3201  		case operationKindV128Min:
  3202  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3203  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3204  			var retLo, retHi uint64
  3205  			switch op.B1 {
  3206  			case shapeI8x16:
  3207  				if op.B3 { // signed
  3208  					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
  3209  						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3210  						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3211  						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3212  					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
  3213  						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3214  						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3215  						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3216  				} else {
  3217  					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
  3218  						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3219  						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3220  						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3221  					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
  3222  						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3223  						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3224  						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3225  				}
  3226  			case shapeI16x8:
  3227  				if op.B3 { // signed
  3228  					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
  3229  						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3230  						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3231  						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3232  					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
  3233  						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3234  						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3235  						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3236  				} else {
  3237  					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
  3238  						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3239  						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3240  						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3241  					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
  3242  						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3243  						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3244  						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3245  				}
  3246  			case shapeI32x4:
  3247  				if op.B3 { // signed
  3248  					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
  3249  						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3250  					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
  3251  						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3252  				} else {
  3253  					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
  3254  						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3255  					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
  3256  						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3257  				}
  3258  			case shapeF32x4:
  3259  				retHi = wasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
  3260  					wasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3261  				retLo = wasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
  3262  					wasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3263  			case shapeF64x2:
  3264  				retHi = math.Float64bits(moremath.WasmCompatMin64(
  3265  					math.Float64frombits(x1hi),
  3266  					math.Float64frombits(x2hi),
  3267  				))
  3268  				retLo = math.Float64bits(moremath.WasmCompatMin64(
  3269  					math.Float64frombits(x1lo),
  3270  					math.Float64frombits(x2lo),
  3271  				))
  3272  			}
  3273  			ce.pushValue(retLo)
  3274  			ce.pushValue(retHi)
  3275  			frame.pc++
  3276  		case operationKindV128Max:
  3277  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3278  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3279  			var retLo, retHi uint64
  3280  			switch op.B1 {
  3281  			case shapeI8x16:
  3282  				if op.B3 { // signed
  3283  					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
  3284  						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3285  						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3286  						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3287  					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
  3288  						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3289  						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3290  						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3291  				} else {
  3292  					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
  3293  						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3294  						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3295  						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3296  					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
  3297  						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3298  						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3299  						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3300  				}
  3301  			case shapeI16x8:
  3302  				if op.B3 { // signed
  3303  					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
  3304  						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3305  						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3306  						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3307  					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
  3308  						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3309  						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3310  						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3311  				} else {
  3312  					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
  3313  						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3314  						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3315  						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3316  					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
  3317  						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3318  						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3319  						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3320  				}
  3321  			case shapeI32x4:
  3322  				if op.B3 { // signed
  3323  					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
  3324  						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3325  					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
  3326  						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3327  				} else {
  3328  					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
  3329  						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3330  					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
  3331  						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3332  				}
  3333  			case shapeF32x4:
  3334  				retHi = wasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
  3335  					wasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3336  				retLo = wasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
  3337  					wasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3338  			case shapeF64x2:
  3339  				retHi = math.Float64bits(moremath.WasmCompatMax64(
  3340  					math.Float64frombits(x1hi),
  3341  					math.Float64frombits(x2hi),
  3342  				))
  3343  				retLo = math.Float64bits(moremath.WasmCompatMax64(
  3344  					math.Float64frombits(x1lo),
  3345  					math.Float64frombits(x2lo),
  3346  				))
  3347  			}
  3348  			ce.pushValue(retLo)
  3349  			ce.pushValue(retHi)
  3350  			frame.pc++
  3351  		case operationKindV128AvgrU:
  3352  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3353  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3354  			var retLo, retHi uint64
  3355  			switch op.B1 {
  3356  			case shapeI8x16:
  3357  				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
  3358  					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3359  					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3360  					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3361  				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
  3362  					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3363  					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3364  					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3365  			case shapeI16x8:
  3366  				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
  3367  					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3368  					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3369  					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3370  				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
  3371  					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3372  					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3373  					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3374  			}
  3375  			ce.pushValue(retLo)
  3376  			ce.pushValue(retHi)
  3377  			frame.pc++
  3378  		case operationKindV128Pmin:
  3379  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3380  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3381  			var retLo, retHi uint64
  3382  			if op.B1 == shapeF32x4 {
  3383  				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
  3384  					retLo = x2lo & 0x00000000_ffffffff
  3385  				} else {
  3386  					retLo = x1lo & 0x00000000_ffffffff
  3387  				}
  3388  				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
  3389  					retLo |= x2lo & 0xffffffff_00000000
  3390  				} else {
  3391  					retLo |= x1lo & 0xffffffff_00000000
  3392  				}
  3393  				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
  3394  					retHi = x2hi & 0x00000000_ffffffff
  3395  				} else {
  3396  					retHi = x1hi & 0x00000000_ffffffff
  3397  				}
  3398  				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
  3399  					retHi |= x2hi & 0xffffffff_00000000
  3400  				} else {
  3401  					retHi |= x1hi & 0xffffffff_00000000
  3402  				}
  3403  			} else {
  3404  				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
  3405  					retLo = x2lo
  3406  				} else {
  3407  					retLo = x1lo
  3408  				}
  3409  				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
  3410  					retHi = x2hi
  3411  				} else {
  3412  					retHi = x1hi
  3413  				}
  3414  			}
  3415  			ce.pushValue(retLo)
  3416  			ce.pushValue(retHi)
  3417  			frame.pc++
  3418  		case operationKindV128Pmax:
  3419  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3420  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3421  			var retLo, retHi uint64
  3422  			if op.B1 == shapeF32x4 {
  3423  				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
  3424  					retLo = x2lo & 0x00000000_ffffffff
  3425  				} else {
  3426  					retLo = x1lo & 0x00000000_ffffffff
  3427  				}
  3428  				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
  3429  					retLo |= x2lo & 0xffffffff_00000000
  3430  				} else {
  3431  					retLo |= x1lo & 0xffffffff_00000000
  3432  				}
  3433  				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
  3434  					retHi = x2hi & 0x00000000_ffffffff
  3435  				} else {
  3436  					retHi = x1hi & 0x00000000_ffffffff
  3437  				}
  3438  				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
  3439  					retHi |= x2hi & 0xffffffff_00000000
  3440  				} else {
  3441  					retHi |= x1hi & 0xffffffff_00000000
  3442  				}
  3443  			} else {
  3444  				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
  3445  					retLo = x2lo
  3446  				} else {
  3447  					retLo = x1lo
  3448  				}
  3449  				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
  3450  					retHi = x2hi
  3451  				} else {
  3452  					retHi = x1hi
  3453  				}
  3454  			}
  3455  			ce.pushValue(retLo)
  3456  			ce.pushValue(retHi)
  3457  			frame.pc++
  3458  		case operationKindV128Ceil:
  3459  			hi, lo := ce.popValue(), ce.popValue()
  3460  			if op.B1 == shapeF32x4 {
  3461  				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
  3462  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3463  				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
  3464  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3465  			} else {
  3466  				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
  3467  				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
  3468  			}
  3469  			ce.pushValue(lo)
  3470  			ce.pushValue(hi)
  3471  			frame.pc++
  3472  		case operationKindV128Floor:
  3473  			hi, lo := ce.popValue(), ce.popValue()
  3474  			if op.B1 == shapeF32x4 {
  3475  				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
  3476  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3477  				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
  3478  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3479  			} else {
  3480  				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
  3481  				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
  3482  			}
  3483  			ce.pushValue(lo)
  3484  			ce.pushValue(hi)
  3485  			frame.pc++
  3486  		case operationKindV128Trunc:
  3487  			hi, lo := ce.popValue(), ce.popValue()
  3488  			if op.B1 == shapeF32x4 {
  3489  				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
  3490  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3491  				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
  3492  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3493  			} else {
  3494  				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
  3495  				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
  3496  			}
  3497  			ce.pushValue(lo)
  3498  			ce.pushValue(hi)
  3499  			frame.pc++
  3500  		case operationKindV128Nearest:
  3501  			hi, lo := ce.popValue(), ce.popValue()
  3502  			if op.B1 == shapeF32x4 {
  3503  				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
  3504  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3505  				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
  3506  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3507  			} else {
  3508  				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
  3509  				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
  3510  			}
  3511  			ce.pushValue(lo)
  3512  			ce.pushValue(hi)
  3513  			frame.pc++
  3514  		case operationKindV128Extend:
  3515  			hi, lo := ce.popValue(), ce.popValue()
  3516  			var origin uint64
  3517  			if op.B3 { // use lower 64 bits
  3518  				origin = lo
  3519  			} else {
  3520  				origin = hi
  3521  			}
  3522  
  3523  			signed := op.B2 == 1
  3524  
  3525  			var retHi, retLo uint64
  3526  			switch op.B1 {
  3527  			case shapeI8x16:
  3528  				for i := 0; i < 8; i++ {
  3529  					v8 := byte(origin >> (i * 8))
  3530  
  3531  					var v16 uint16
  3532  					if signed {
  3533  						v16 = uint16(int8(v8))
  3534  					} else {
  3535  						v16 = uint16(v8)
  3536  					}
  3537  
  3538  					if i < 4 {
  3539  						retLo |= uint64(v16) << (i * 16)
  3540  					} else {
  3541  						retHi |= uint64(v16) << ((i - 4) * 16)
  3542  					}
  3543  				}
  3544  			case shapeI16x8:
  3545  				for i := 0; i < 4; i++ {
  3546  					v16 := uint16(origin >> (i * 16))
  3547  
  3548  					var v32 uint32
  3549  					if signed {
  3550  						v32 = uint32(int16(v16))
  3551  					} else {
  3552  						v32 = uint32(v16)
  3553  					}
  3554  
  3555  					if i < 2 {
  3556  						retLo |= uint64(v32) << (i * 32)
  3557  					} else {
  3558  						retHi |= uint64(v32) << ((i - 2) * 32)
  3559  					}
  3560  				}
  3561  			case shapeI32x4:
  3562  				v32Lo := uint32(origin)
  3563  				v32Hi := uint32(origin >> 32)
  3564  				if signed {
  3565  					retLo = uint64(int32(v32Lo))
  3566  					retHi = uint64(int32(v32Hi))
  3567  				} else {
  3568  					retLo = uint64(v32Lo)
  3569  					retHi = uint64(v32Hi)
  3570  				}
  3571  			}
  3572  			ce.pushValue(retLo)
  3573  			ce.pushValue(retHi)
  3574  			frame.pc++
  3575  		case operationKindV128ExtMul:
  3576  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3577  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3578  			var x1, x2 uint64
  3579  			if op.B3 { // use lower 64 bits
  3580  				x1, x2 = x1Lo, x2Lo
  3581  			} else {
  3582  				x1, x2 = x1Hi, x2Hi
  3583  			}
  3584  
  3585  			signed := op.B2 == 1
  3586  
  3587  			var retLo, retHi uint64
  3588  			switch op.B1 {
  3589  			case shapeI8x16:
  3590  				for i := 0; i < 8; i++ {
  3591  					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
  3592  
  3593  					var v16 uint16
  3594  					if signed {
  3595  						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
  3596  					} else {
  3597  						v16 = uint16(v1) * uint16(v2)
  3598  					}
  3599  
  3600  					if i < 4 {
  3601  						retLo |= uint64(v16) << (i * 16)
  3602  					} else {
  3603  						retHi |= uint64(v16) << ((i - 4) * 16)
  3604  					}
  3605  				}
  3606  			case shapeI16x8:
  3607  				for i := 0; i < 4; i++ {
  3608  					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
  3609  
  3610  					var v32 uint32
  3611  					if signed {
  3612  						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
  3613  					} else {
  3614  						v32 = uint32(v1) * uint32(v2)
  3615  					}
  3616  
  3617  					if i < 2 {
  3618  						retLo |= uint64(v32) << (i * 32)
  3619  					} else {
  3620  						retHi |= uint64(v32) << ((i - 2) * 32)
  3621  					}
  3622  				}
  3623  			case shapeI32x4:
  3624  				v1Lo, v2Lo := uint32(x1), uint32(x2)
  3625  				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
  3626  				if signed {
  3627  					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
  3628  					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
  3629  				} else {
  3630  					retLo = uint64(v1Lo) * uint64(v2Lo)
  3631  					retHi = uint64(v1Hi) * uint64(v2Hi)
  3632  				}
  3633  			}
  3634  
  3635  			ce.pushValue(retLo)
  3636  			ce.pushValue(retHi)
  3637  			frame.pc++
  3638  		case operationKindV128Q15mulrSatS:
  3639  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3640  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3641  			var retLo, retHi uint64
  3642  			for i := 0; i < 8; i++ {
  3643  				var v, w int16
  3644  				if i < 4 {
  3645  					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
  3646  				} else {
  3647  					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
  3648  				}
  3649  
  3650  				var uv uint64
  3651  				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
  3652  				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
  3653  					uv = uint64(uint16(0x8000))
  3654  				} else if calc > math.MaxInt16 {
  3655  					uv = uint64(uint16(0x7fff))
  3656  				} else {
  3657  					uv = uint64(uint16(int16(calc)))
  3658  				}
  3659  
  3660  				if i < 4 {
  3661  					retLo |= uv << (i * 16)
  3662  				} else {
  3663  					retHi |= uv << ((i - 4) * 16)
  3664  				}
  3665  			}
  3666  
  3667  			ce.pushValue(retLo)
  3668  			ce.pushValue(retHi)
  3669  			frame.pc++
  3670  		case operationKindV128ExtAddPairwise:
  3671  			hi, lo := ce.popValue(), ce.popValue()
  3672  
  3673  			signed := op.B3
  3674  
  3675  			var retLo, retHi uint64
  3676  			switch op.B1 {
  3677  			case shapeI8x16:
  3678  				for i := 0; i < 8; i++ {
  3679  					var v1, v2 byte
  3680  					if i < 4 {
  3681  						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
  3682  					} else {
  3683  						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
  3684  					}
  3685  
  3686  					var v16 uint16
  3687  					if signed {
  3688  						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
  3689  					} else {
  3690  						v16 = uint16(v1) + uint16(v2)
  3691  					}
  3692  
  3693  					if i < 4 {
  3694  						retLo |= uint64(v16) << (i * 16)
  3695  					} else {
  3696  						retHi |= uint64(v16) << ((i - 4) * 16)
  3697  					}
  3698  				}
  3699  			case shapeI16x8:
  3700  				for i := 0; i < 4; i++ {
  3701  					var v1, v2 uint16
  3702  					if i < 2 {
  3703  						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
  3704  					} else {
  3705  						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
  3706  					}
  3707  
  3708  					var v32 uint32
  3709  					if signed {
  3710  						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
  3711  					} else {
  3712  						v32 = uint32(v1) + uint32(v2)
  3713  					}
  3714  
  3715  					if i < 2 {
  3716  						retLo |= uint64(v32) << (i * 32)
  3717  					} else {
  3718  						retHi |= uint64(v32) << ((i - 2) * 32)
  3719  					}
  3720  				}
  3721  			}
  3722  			ce.pushValue(retLo)
  3723  			ce.pushValue(retHi)
  3724  			frame.pc++
  3725  		case operationKindV128FloatPromote:
  3726  			_, toPromote := ce.popValue(), ce.popValue()
  3727  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
  3728  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
  3729  			frame.pc++
  3730  		case operationKindV128FloatDemote:
  3731  			hi, lo := ce.popValue(), ce.popValue()
  3732  			ce.pushValue(
  3733  				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
  3734  					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
  3735  			)
  3736  			ce.pushValue(0)
  3737  			frame.pc++
  3738  		case operationKindV128FConvertFromI:
  3739  			hi, lo := ce.popValue(), ce.popValue()
  3740  			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
  3741  			signed := op.B3
  3742  
  3743  			var retLo, retHi uint64
  3744  			switch op.B1 { // Destination shape.
  3745  			case shapeF32x4: // f32x4 from signed/unsigned i32x4
  3746  				if signed {
  3747  					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
  3748  						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
  3749  					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
  3750  						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
  3751  				} else {
  3752  					retLo = uint64(math.Float32bits(float32(v1))) |
  3753  						(uint64(math.Float32bits(float32(v2))) << 32)
  3754  					retHi = uint64(math.Float32bits(float32(v3))) |
  3755  						(uint64(math.Float32bits(float32(v4))) << 32)
  3756  				}
  3757  			case shapeF64x2: // f64x2 from signed/unsigned i32x4
  3758  				if signed {
  3759  					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
  3760  				} else {
  3761  					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
  3762  				}
  3763  			}
  3764  
  3765  			ce.pushValue(retLo)
  3766  			ce.pushValue(retHi)
  3767  			frame.pc++
  3768  		case operationKindV128Narrow:
  3769  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3770  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3771  			signed := op.B3
  3772  
  3773  			var retLo, retHi uint64
  3774  			switch op.B1 {
  3775  			case shapeI16x8: // signed/unsigned i16x8 to i8x16
  3776  				for i := 0; i < 8; i++ {
  3777  					var v16 uint16
  3778  					if i < 4 {
  3779  						v16 = uint16(x1Lo >> (i * 16))
  3780  					} else {
  3781  						v16 = uint16(x1Hi >> ((i - 4) * 16))
  3782  					}
  3783  
  3784  					var v byte
  3785  					if signed {
  3786  						if s := int16(v16); s > math.MaxInt8 {
  3787  							v = math.MaxInt8
  3788  						} else if s < math.MinInt8 {
  3789  							s = math.MinInt8
  3790  							v = byte(s)
  3791  						} else {
  3792  							v = byte(v16)
  3793  						}
  3794  					} else {
  3795  						if s := int16(v16); s > math.MaxUint8 {
  3796  							v = math.MaxUint8
  3797  						} else if s < 0 {
  3798  							v = 0
  3799  						} else {
  3800  							v = byte(v16)
  3801  						}
  3802  					}
  3803  					retLo |= uint64(v) << (i * 8)
  3804  				}
  3805  				for i := 0; i < 8; i++ {
  3806  					var v16 uint16
  3807  					if i < 4 {
  3808  						v16 = uint16(x2Lo >> (i * 16))
  3809  					} else {
  3810  						v16 = uint16(x2Hi >> ((i - 4) * 16))
  3811  					}
  3812  
  3813  					var v byte
  3814  					if signed {
  3815  						if s := int16(v16); s > math.MaxInt8 {
  3816  							v = math.MaxInt8
  3817  						} else if s < math.MinInt8 {
  3818  							s = math.MinInt8
  3819  							v = byte(s)
  3820  						} else {
  3821  							v = byte(v16)
  3822  						}
  3823  					} else {
  3824  						if s := int16(v16); s > math.MaxUint8 {
  3825  							v = math.MaxUint8
  3826  						} else if s < 0 {
  3827  							v = 0
  3828  						} else {
  3829  							v = byte(v16)
  3830  						}
  3831  					}
  3832  					retHi |= uint64(v) << (i * 8)
  3833  				}
  3834  			case shapeI32x4: // signed/unsigned i32x4 to i16x8
  3835  				for i := 0; i < 4; i++ {
  3836  					var v32 uint32
  3837  					if i < 2 {
  3838  						v32 = uint32(x1Lo >> (i * 32))
  3839  					} else {
  3840  						v32 = uint32(x1Hi >> ((i - 2) * 32))
  3841  					}
  3842  
  3843  					var v uint16
  3844  					if signed {
  3845  						if s := int32(v32); s > math.MaxInt16 {
  3846  							v = math.MaxInt16
  3847  						} else if s < math.MinInt16 {
  3848  							s = math.MinInt16
  3849  							v = uint16(s)
  3850  						} else {
  3851  							v = uint16(v32)
  3852  						}
  3853  					} else {
  3854  						if s := int32(v32); s > math.MaxUint16 {
  3855  							v = math.MaxUint16
  3856  						} else if s < 0 {
  3857  							v = 0
  3858  						} else {
  3859  							v = uint16(v32)
  3860  						}
  3861  					}
  3862  					retLo |= uint64(v) << (i * 16)
  3863  				}
  3864  
  3865  				for i := 0; i < 4; i++ {
  3866  					var v32 uint32
  3867  					if i < 2 {
  3868  						v32 = uint32(x2Lo >> (i * 32))
  3869  					} else {
  3870  						v32 = uint32(x2Hi >> ((i - 2) * 32))
  3871  					}
  3872  
  3873  					var v uint16
  3874  					if signed {
  3875  						if s := int32(v32); s > math.MaxInt16 {
  3876  							v = math.MaxInt16
  3877  						} else if s < math.MinInt16 {
  3878  							s = math.MinInt16
  3879  							v = uint16(s)
  3880  						} else {
  3881  							v = uint16(v32)
  3882  						}
  3883  					} else {
  3884  						if s := int32(v32); s > math.MaxUint16 {
  3885  							v = math.MaxUint16
  3886  						} else if s < 0 {
  3887  							v = 0
  3888  						} else {
  3889  							v = uint16(v32)
  3890  						}
  3891  					}
  3892  					retHi |= uint64(v) << (i * 16)
  3893  				}
  3894  			}
  3895  			ce.pushValue(retLo)
  3896  			ce.pushValue(retHi)
  3897  			frame.pc++
  3898  		case operationKindV128Dot:
  3899  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3900  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3901  			ce.pushValue(
  3902  				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
  3903  					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
  3904  			)
  3905  			ce.pushValue(
  3906  				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
  3907  					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
  3908  			)
  3909  			frame.pc++
  3910  		case operationKindV128ITruncSatFromF:
  3911  			hi, lo := ce.popValue(), ce.popValue()
  3912  			signed := op.B3
  3913  			var retLo, retHi uint64
  3914  
  3915  			switch op.B1 {
  3916  			case shapeF32x4: // f32x4 to i32x4
  3917  				for i, f64 := range [4]float64{
  3918  					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
  3919  					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
  3920  					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
  3921  					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
  3922  				} {
  3923  
  3924  					var v uint32
  3925  					if math.IsNaN(f64) {
  3926  						v = 0
  3927  					} else if signed {
  3928  						if f64 < math.MinInt32 {
  3929  							f64 = math.MinInt32
  3930  						} else if f64 > math.MaxInt32 {
  3931  							f64 = math.MaxInt32
  3932  						}
  3933  						v = uint32(int32(f64))
  3934  					} else {
  3935  						if f64 < 0 {
  3936  							f64 = 0
  3937  						} else if f64 > math.MaxUint32 {
  3938  							f64 = math.MaxUint32
  3939  						}
  3940  						v = uint32(f64)
  3941  					}
  3942  
  3943  					if i < 2 {
  3944  						retLo |= uint64(v) << (i * 32)
  3945  					} else {
  3946  						retHi |= uint64(v) << ((i - 2) * 32)
  3947  					}
  3948  				}
  3949  
  3950  			case shapeF64x2: // f64x2 to i32x4
  3951  				for i, f := range [2]float64{
  3952  					math.Trunc(math.Float64frombits(lo)),
  3953  					math.Trunc(math.Float64frombits(hi)),
  3954  				} {
  3955  					var v uint32
  3956  					if math.IsNaN(f) {
  3957  						v = 0
  3958  					} else if signed {
  3959  						if f < math.MinInt32 {
  3960  							f = math.MinInt32
  3961  						} else if f > math.MaxInt32 {
  3962  							f = math.MaxInt32
  3963  						}
  3964  						v = uint32(int32(f))
  3965  					} else {
  3966  						if f < 0 {
  3967  							f = 0
  3968  						} else if f > math.MaxUint32 {
  3969  							f = math.MaxUint32
  3970  						}
  3971  						v = uint32(f)
  3972  					}
  3973  
  3974  					retLo |= uint64(v) << (i * 32)
  3975  				}
  3976  			}
  3977  
  3978  			ce.pushValue(retLo)
  3979  			ce.pushValue(retHi)
  3980  			frame.pc++
  3981  		case operationKindAtomicMemoryWait:
  3982  			timeout := int64(ce.popValue())
  3983  			exp := ce.popValue()
  3984  			offset := ce.popMemoryOffset(op)
  3985  			// Runtime instead of validation error because the spec intends to allow binaries to include
  3986  			// such instructions as long as they are not executed.
  3987  			if !memoryInst.Shared {
  3988  				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
  3989  			}
  3990  
  3991  			switch unsignedType(op.B1) {
  3992  			case unsignedTypeI32:
  3993  				if offset%4 != 0 {
  3994  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  3995  				}
  3996  				if int(offset) > len(memoryInst.Buffer)-4 {
  3997  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  3998  				}
  3999  				ce.pushValue(memoryInst.Wait32(offset, uint32(exp), timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
  4000  					mem.Mux.Lock()
  4001  					defer mem.Mux.Unlock()
  4002  					value, _ := mem.ReadUint32Le(offset)
  4003  					return value
  4004  				}))
  4005  			case unsignedTypeI64:
  4006  				if offset%8 != 0 {
  4007  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4008  				}
  4009  				if int(offset) > len(memoryInst.Buffer)-8 {
  4010  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4011  				}
  4012  				ce.pushValue(memoryInst.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
  4013  					mem.Mux.Lock()
  4014  					defer mem.Mux.Unlock()
  4015  					value, _ := mem.ReadUint64Le(offset)
  4016  					return value
  4017  				}))
  4018  			}
  4019  			frame.pc++
  4020  		case operationKindAtomicMemoryNotify:
  4021  			count := ce.popValue()
  4022  			offset := ce.popMemoryOffset(op)
  4023  			if offset%4 != 0 {
  4024  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4025  			}
  4026  			// Just a bounds check
  4027  			if offset >= memoryInst.Size() {
  4028  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4029  			}
  4030  			res := memoryInst.Notify(offset, uint32(count))
  4031  			ce.pushValue(uint64(res))
  4032  			frame.pc++
  4033  		case operationKindAtomicFence:
  4034  			// Memory not required for fence only
  4035  			if memoryInst != nil {
  4036  				// An empty critical section can be used as a synchronization primitive, which is what
  4037  				// fence is. Probably, there are no spectests or defined behavior to confirm this yet.
  4038  				memoryInst.Mux.Lock()
  4039  				memoryInst.Mux.Unlock() //nolint:staticcheck
  4040  			}
  4041  			frame.pc++
  4042  		case operationKindAtomicLoad:
  4043  			offset := ce.popMemoryOffset(op)
  4044  			switch unsignedType(op.B1) {
  4045  			case unsignedTypeI32:
  4046  				if offset%4 != 0 {
  4047  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4048  				}
  4049  				memoryInst.Mux.Lock()
  4050  				val, ok := memoryInst.ReadUint32Le(offset)
  4051  				memoryInst.Mux.Unlock()
  4052  				if !ok {
  4053  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4054  				}
  4055  				ce.pushValue(uint64(val))
  4056  			case unsignedTypeI64:
  4057  				if offset%8 != 0 {
  4058  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4059  				}
  4060  				memoryInst.Mux.Lock()
  4061  				val, ok := memoryInst.ReadUint64Le(offset)
  4062  				memoryInst.Mux.Unlock()
  4063  				if !ok {
  4064  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4065  				}
  4066  				ce.pushValue(val)
  4067  			}
  4068  			frame.pc++
  4069  		case operationKindAtomicLoad8:
  4070  			offset := ce.popMemoryOffset(op)
  4071  			memoryInst.Mux.Lock()
  4072  			val, ok := memoryInst.ReadByte(offset)
  4073  			memoryInst.Mux.Unlock()
  4074  			if !ok {
  4075  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4076  			}
  4077  			ce.pushValue(uint64(val))
  4078  			frame.pc++
  4079  		case operationKindAtomicLoad16:
  4080  			offset := ce.popMemoryOffset(op)
  4081  			if offset%2 != 0 {
  4082  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4083  			}
  4084  			memoryInst.Mux.Lock()
  4085  			val, ok := memoryInst.ReadUint16Le(offset)
  4086  			memoryInst.Mux.Unlock()
  4087  			if !ok {
  4088  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4089  			}
  4090  			ce.pushValue(uint64(val))
  4091  			frame.pc++
  4092  		case operationKindAtomicStore:
  4093  			val := ce.popValue()
  4094  			offset := ce.popMemoryOffset(op)
  4095  			switch unsignedType(op.B1) {
  4096  			case unsignedTypeI32:
  4097  				if offset%4 != 0 {
  4098  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4099  				}
  4100  				memoryInst.Mux.Lock()
  4101  				ok := memoryInst.WriteUint32Le(offset, uint32(val))
  4102  				memoryInst.Mux.Unlock()
  4103  				if !ok {
  4104  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4105  				}
  4106  			case unsignedTypeI64:
  4107  				if offset%8 != 0 {
  4108  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4109  				}
  4110  				memoryInst.Mux.Lock()
  4111  				ok := memoryInst.WriteUint64Le(offset, val)
  4112  				memoryInst.Mux.Unlock()
  4113  				if !ok {
  4114  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4115  				}
  4116  			}
  4117  			frame.pc++
  4118  		case operationKindAtomicStore8:
  4119  			val := byte(ce.popValue())
  4120  			offset := ce.popMemoryOffset(op)
  4121  			memoryInst.Mux.Lock()
  4122  			ok := memoryInst.WriteByte(offset, val)
  4123  			memoryInst.Mux.Unlock()
  4124  			if !ok {
  4125  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4126  			}
  4127  			frame.pc++
  4128  		case operationKindAtomicStore16:
  4129  			val := uint16(ce.popValue())
  4130  			offset := ce.popMemoryOffset(op)
  4131  			if offset%2 != 0 {
  4132  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4133  			}
  4134  			memoryInst.Mux.Lock()
  4135  			ok := memoryInst.WriteUint16Le(offset, val)
  4136  			memoryInst.Mux.Unlock()
  4137  			if !ok {
  4138  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4139  			}
  4140  			frame.pc++
  4141  		case operationKindAtomicRMW:
  4142  			val := ce.popValue()
  4143  			offset := ce.popMemoryOffset(op)
  4144  			switch unsignedType(op.B1) {
  4145  			case unsignedTypeI32:
  4146  				if offset%4 != 0 {
  4147  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4148  				}
  4149  				memoryInst.Mux.Lock()
  4150  				old, ok := memoryInst.ReadUint32Le(offset)
  4151  				if !ok {
  4152  					memoryInst.Mux.Unlock()
  4153  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4154  				}
  4155  				var newVal uint32
  4156  				switch atomicArithmeticOp(op.B2) {
  4157  				case atomicArithmeticOpAdd:
  4158  					newVal = old + uint32(val)
  4159  				case atomicArithmeticOpSub:
  4160  					newVal = old - uint32(val)
  4161  				case atomicArithmeticOpAnd:
  4162  					newVal = old & uint32(val)
  4163  				case atomicArithmeticOpOr:
  4164  					newVal = old | uint32(val)
  4165  				case atomicArithmeticOpXor:
  4166  					newVal = old ^ uint32(val)
  4167  				case atomicArithmeticOpNop:
  4168  					newVal = uint32(val)
  4169  				}
  4170  				memoryInst.WriteUint32Le(offset, newVal)
  4171  				memoryInst.Mux.Unlock()
  4172  				ce.pushValue(uint64(old))
  4173  			case unsignedTypeI64:
  4174  				if offset%8 != 0 {
  4175  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4176  				}
  4177  				memoryInst.Mux.Lock()
  4178  				old, ok := memoryInst.ReadUint64Le(offset)
  4179  				if !ok {
  4180  					memoryInst.Mux.Unlock()
  4181  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4182  				}
  4183  				var newVal uint64
  4184  				switch atomicArithmeticOp(op.B2) {
  4185  				case atomicArithmeticOpAdd:
  4186  					newVal = old + val
  4187  				case atomicArithmeticOpSub:
  4188  					newVal = old - val
  4189  				case atomicArithmeticOpAnd:
  4190  					newVal = old & val
  4191  				case atomicArithmeticOpOr:
  4192  					newVal = old | val
  4193  				case atomicArithmeticOpXor:
  4194  					newVal = old ^ val
  4195  				case atomicArithmeticOpNop:
  4196  					newVal = val
  4197  				}
  4198  				memoryInst.WriteUint64Le(offset, newVal)
  4199  				memoryInst.Mux.Unlock()
  4200  				ce.pushValue(old)
  4201  			}
  4202  			frame.pc++
  4203  		case operationKindAtomicRMW8:
  4204  			val := ce.popValue()
  4205  			offset := ce.popMemoryOffset(op)
  4206  			memoryInst.Mux.Lock()
  4207  			old, ok := memoryInst.ReadByte(offset)
  4208  			if !ok {
  4209  				memoryInst.Mux.Unlock()
  4210  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4211  			}
  4212  			arg := byte(val)
  4213  			var newVal byte
  4214  			switch atomicArithmeticOp(op.B2) {
  4215  			case atomicArithmeticOpAdd:
  4216  				newVal = old + arg
  4217  			case atomicArithmeticOpSub:
  4218  				newVal = old - arg
  4219  			case atomicArithmeticOpAnd:
  4220  				newVal = old & arg
  4221  			case atomicArithmeticOpOr:
  4222  				newVal = old | arg
  4223  			case atomicArithmeticOpXor:
  4224  				newVal = old ^ arg
  4225  			case atomicArithmeticOpNop:
  4226  				newVal = arg
  4227  			}
  4228  			memoryInst.WriteByte(offset, newVal)
  4229  			memoryInst.Mux.Unlock()
  4230  			ce.pushValue(uint64(old))
  4231  			frame.pc++
  4232  		case operationKindAtomicRMW16:
  4233  			val := ce.popValue()
  4234  			offset := ce.popMemoryOffset(op)
  4235  			if offset%2 != 0 {
  4236  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4237  			}
  4238  			memoryInst.Mux.Lock()
  4239  			old, ok := memoryInst.ReadUint16Le(offset)
  4240  			if !ok {
  4241  				memoryInst.Mux.Unlock()
  4242  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4243  			}
  4244  			arg := uint16(val)
  4245  			var newVal uint16
  4246  			switch atomicArithmeticOp(op.B2) {
  4247  			case atomicArithmeticOpAdd:
  4248  				newVal = old + arg
  4249  			case atomicArithmeticOpSub:
  4250  				newVal = old - arg
  4251  			case atomicArithmeticOpAnd:
  4252  				newVal = old & arg
  4253  			case atomicArithmeticOpOr:
  4254  				newVal = old | arg
  4255  			case atomicArithmeticOpXor:
  4256  				newVal = old ^ arg
  4257  			case atomicArithmeticOpNop:
  4258  				newVal = arg
  4259  			}
  4260  			memoryInst.WriteUint16Le(offset, newVal)
  4261  			memoryInst.Mux.Unlock()
  4262  			ce.pushValue(uint64(old))
  4263  			frame.pc++
  4264  		case operationKindAtomicRMWCmpxchg:
  4265  			rep := ce.popValue()
  4266  			exp := ce.popValue()
  4267  			offset := ce.popMemoryOffset(op)
  4268  			switch unsignedType(op.B1) {
  4269  			case unsignedTypeI32:
  4270  				if offset%4 != 0 {
  4271  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4272  				}
  4273  				memoryInst.Mux.Lock()
  4274  				old, ok := memoryInst.ReadUint32Le(offset)
  4275  				if !ok {
  4276  					memoryInst.Mux.Unlock()
  4277  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4278  				}
  4279  				if old == uint32(exp) {
  4280  					memoryInst.WriteUint32Le(offset, uint32(rep))
  4281  				}
  4282  				memoryInst.Mux.Unlock()
  4283  				ce.pushValue(uint64(old))
  4284  			case unsignedTypeI64:
  4285  				if offset%8 != 0 {
  4286  					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4287  				}
  4288  				memoryInst.Mux.Lock()
  4289  				old, ok := memoryInst.ReadUint64Le(offset)
  4290  				if !ok {
  4291  					memoryInst.Mux.Unlock()
  4292  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4293  				}
  4294  				if old == exp {
  4295  					memoryInst.WriteUint64Le(offset, rep)
  4296  				}
  4297  				memoryInst.Mux.Unlock()
  4298  				ce.pushValue(old)
  4299  			}
  4300  			frame.pc++
  4301  		case operationKindAtomicRMW8Cmpxchg:
  4302  			rep := byte(ce.popValue())
  4303  			exp := byte(ce.popValue())
  4304  			offset := ce.popMemoryOffset(op)
  4305  			memoryInst.Mux.Lock()
  4306  			old, ok := memoryInst.ReadByte(offset)
  4307  			if !ok {
  4308  				memoryInst.Mux.Unlock()
  4309  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4310  			}
  4311  			if old == exp {
  4312  				memoryInst.WriteByte(offset, rep)
  4313  			}
  4314  			memoryInst.Mux.Unlock()
  4315  			ce.pushValue(uint64(old))
  4316  			frame.pc++
  4317  		case operationKindAtomicRMW16Cmpxchg:
  4318  			rep := uint16(ce.popValue())
  4319  			exp := uint16(ce.popValue())
  4320  			offset := ce.popMemoryOffset(op)
  4321  			if offset%2 != 0 {
  4322  				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
  4323  			}
  4324  			memoryInst.Mux.Lock()
  4325  			old, ok := memoryInst.ReadUint16Le(offset)
  4326  			if !ok {
  4327  				memoryInst.Mux.Unlock()
  4328  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4329  			}
  4330  			if old == exp {
  4331  				memoryInst.WriteUint16Le(offset, rep)
  4332  			}
  4333  			memoryInst.Mux.Unlock()
  4334  			ce.pushValue(uint64(old))
  4335  			frame.pc++
  4336  		default:
  4337  			frame.pc++
  4338  		}
  4339  	}
  4340  	ce.popFrame()
  4341  }
  4342  
  4343  func wasmCompatMax32bits(v1, v2 uint32) uint64 {
  4344  	return uint64(math.Float32bits(moremath.WasmCompatMax32(
  4345  		math.Float32frombits(v1),
  4346  		math.Float32frombits(v2),
  4347  	)))
  4348  }
  4349  
  4350  func wasmCompatMin32bits(v1, v2 uint32) uint64 {
  4351  	return uint64(math.Float32bits(moremath.WasmCompatMin32(
  4352  		math.Float32frombits(v1),
  4353  		math.Float32frombits(v2),
  4354  	)))
  4355  }
  4356  
  4357  func addFloat32bits(v1, v2 uint32) uint64 {
  4358  	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
  4359  }
  4360  
  4361  func subFloat32bits(v1, v2 uint32) uint64 {
  4362  	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
  4363  }
  4364  
  4365  func mulFloat32bits(v1, v2 uint32) uint64 {
  4366  	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
  4367  }
  4368  
  4369  func divFloat32bits(v1, v2 uint32) uint64 {
  4370  	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
  4371  }
  4372  
  4373  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4374  func flt32(z1, z2 float32) bool {
  4375  	if z1 != z1 || z2 != z2 {
  4376  		return false
  4377  	} else if z1 == z2 {
  4378  		return false
  4379  	} else if math.IsInf(float64(z1), 1) {
  4380  		return false
  4381  	} else if math.IsInf(float64(z1), -1) {
  4382  		return true
  4383  	} else if math.IsInf(float64(z2), 1) {
  4384  		return true
  4385  	} else if math.IsInf(float64(z2), -1) {
  4386  		return false
  4387  	}
  4388  	return z1 < z2
  4389  }
  4390  
  4391  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4392  func flt64(z1, z2 float64) bool {
  4393  	if z1 != z1 || z2 != z2 {
  4394  		return false
  4395  	} else if z1 == z2 {
  4396  		return false
  4397  	} else if math.IsInf(z1, 1) {
  4398  		return false
  4399  	} else if math.IsInf(z1, -1) {
  4400  		return true
  4401  	} else if math.IsInf(z2, 1) {
  4402  		return true
  4403  	} else if math.IsInf(z2, -1) {
  4404  		return false
  4405  	}
  4406  	return z1 < z2
  4407  }
  4408  
  4409  func i8RoundingAverage(v1, v2 byte) byte {
  4410  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4411  	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
  4412  }
  4413  
  4414  func i16RoundingAverage(v1, v2 uint16) uint16 {
  4415  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4416  	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
  4417  }
  4418  
  4419  func i8Abs(v byte) byte {
  4420  	if i := int8(v); i < 0 {
  4421  		return byte(-i)
  4422  	} else {
  4423  		return byte(i)
  4424  	}
  4425  }
  4426  
  4427  func i8MaxU(v1, v2 byte) byte {
  4428  	if v1 < v2 {
  4429  		return v2
  4430  	} else {
  4431  		return v1
  4432  	}
  4433  }
  4434  
  4435  func i8MinU(v1, v2 byte) byte {
  4436  	if v1 > v2 {
  4437  		return v2
  4438  	} else {
  4439  		return v1
  4440  	}
  4441  }
  4442  
  4443  func i8MaxS(v1, v2 byte) byte {
  4444  	if int8(v1) < int8(v2) {
  4445  		return v2
  4446  	} else {
  4447  		return v1
  4448  	}
  4449  }
  4450  
  4451  func i8MinS(v1, v2 byte) byte {
  4452  	if int8(v1) > int8(v2) {
  4453  		return v2
  4454  	} else {
  4455  		return v1
  4456  	}
  4457  }
  4458  
  4459  func i16MaxU(v1, v2 uint16) uint16 {
  4460  	if v1 < v2 {
  4461  		return v2
  4462  	} else {
  4463  		return v1
  4464  	}
  4465  }
  4466  
  4467  func i16MinU(v1, v2 uint16) uint16 {
  4468  	if v1 > v2 {
  4469  		return v2
  4470  	} else {
  4471  		return v1
  4472  	}
  4473  }
  4474  
  4475  func i16MaxS(v1, v2 uint16) uint16 {
  4476  	if int16(v1) < int16(v2) {
  4477  		return v2
  4478  	} else {
  4479  		return v1
  4480  	}
  4481  }
  4482  
  4483  func i16MinS(v1, v2 uint16) uint16 {
  4484  	if int16(v1) > int16(v2) {
  4485  		return v2
  4486  	} else {
  4487  		return v1
  4488  	}
  4489  }
  4490  
  4491  func i32MaxU(v1, v2 uint32) uint32 {
  4492  	if v1 < v2 {
  4493  		return v2
  4494  	} else {
  4495  		return v1
  4496  	}
  4497  }
  4498  
  4499  func i32MinU(v1, v2 uint32) uint32 {
  4500  	if v1 > v2 {
  4501  		return v2
  4502  	} else {
  4503  		return v1
  4504  	}
  4505  }
  4506  
  4507  func i32MaxS(v1, v2 uint32) uint32 {
  4508  	if int32(v1) < int32(v2) {
  4509  		return v2
  4510  	} else {
  4511  		return v1
  4512  	}
  4513  }
  4514  
  4515  func i32MinS(v1, v2 uint32) uint32 {
  4516  	if int32(v1) > int32(v2) {
  4517  		return v2
  4518  	} else {
  4519  		return v1
  4520  	}
  4521  }
  4522  
  4523  func i16Abs(v uint16) uint16 {
  4524  	if i := int16(v); i < 0 {
  4525  		return uint16(-i)
  4526  	} else {
  4527  		return uint16(i)
  4528  	}
  4529  }
  4530  
  4531  func i32Abs(v uint32) uint32 {
  4532  	if i := int32(v); i < 0 {
  4533  		return uint32(-i)
  4534  	} else {
  4535  		return uint32(i)
  4536  	}
  4537  }
  4538  
  4539  func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
  4540  	def, typ := f.definition(), f.funcType
  4541  
  4542  	ce.stackIterator.reset(ce.stack, ce.frames, f)
  4543  	fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator)
  4544  	ce.stackIterator.clear()
  4545  	ce.callNativeFunc(ctx, m, f)
  4546  	fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64))
  4547  	return ctx
  4548  }
  4549  
  4550  // popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
  4551  // As the top of stack value is 64-bit, this ensures it is in range before returning it.
  4552  func (ce *callEngine) popMemoryOffset(op *unionOperation) uint32 {
  4553  	offset := op.U2 + ce.popValue()
  4554  	if offset > math.MaxUint32 {
  4555  		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4556  	}
  4557  	return uint32(offset)
  4558  }
  4559  
  4560  func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
  4561  	typ := f.funcType
  4562  	paramLen := typ.ParamNumInUint64
  4563  	resultLen := typ.ResultNumInUint64
  4564  	stackLen := paramLen
  4565  
  4566  	// In the interpreter engine, ce.stack may only have capacity to store
  4567  	// parameters. Grow when there are more results than parameters.
  4568  	if growLen := resultLen - paramLen; growLen > 0 {
  4569  		for i := 0; i < growLen; i++ {
  4570  			ce.stack = append(ce.stack, 0)
  4571  		}
  4572  		stackLen += growLen
  4573  	}
  4574  
  4575  	// Pass the stack elements to the go function.
  4576  	stack := ce.stack[len(ce.stack)-stackLen:]
  4577  	ce.callGoFunc(ctx, m, f, stack)
  4578  
  4579  	// Shrink the stack when there were more parameters than results.
  4580  	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
  4581  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
  4582  	}
  4583  }