wa-lang.org/wazero@v1.0.2/internal/engine/interpreter/interpreter.go (about)

     1  package interpreter
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"math"
     8  	"math/bits"
     9  	"strings"
    10  	"sync"
    11  	"unsafe"
    12  
    13  	"wa-lang.org/wazero/api"
    14  	"wa-lang.org/wazero/experimental"
    15  	"wa-lang.org/wazero/internal/moremath"
    16  	"wa-lang.org/wazero/internal/wasm"
    17  	"wa-lang.org/wazero/internal/wasmdebug"
    18  	"wa-lang.org/wazero/internal/wasmruntime"
    19  	"wa-lang.org/wazero/internal/wazeroir"
    20  )
    21  
    22  // callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
    23  // wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
    24  //
    25  // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
    26  var callStackCeiling = 2000
    27  
    28  // engine is an interpreter implementation of wasm.Engine
    29  type engine struct {
    30  	enabledFeatures api.CoreFeatures
    31  	codes           map[wasm.ModuleID][]*code // guarded by mutex.
    32  	mux             sync.RWMutex
    33  }
    34  
    35  func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures) wasm.Engine {
    36  	return &engine{
    37  		enabledFeatures: enabledFeatures,
    38  		codes:           map[wasm.ModuleID][]*code{},
    39  	}
    40  }
    41  
    42  // CompiledModuleCount implements the same method as documented on wasm.Engine.
    43  func (e *engine) CompiledModuleCount() uint32 {
    44  	return uint32(len(e.codes))
    45  }
    46  
    47  // DeleteCompiledModule implements the same method as documented on wasm.Engine.
    48  func (e *engine) DeleteCompiledModule(m *wasm.Module) {
    49  	e.deleteCodes(m)
    50  }
    51  
    52  func (e *engine) deleteCodes(module *wasm.Module) {
    53  	e.mux.Lock()
    54  	defer e.mux.Unlock()
    55  	delete(e.codes, module.ID)
    56  }
    57  
    58  func (e *engine) addCodes(module *wasm.Module, fs []*code) {
    59  	e.mux.Lock()
    60  	defer e.mux.Unlock()
    61  	e.codes[module.ID] = fs
    62  }
    63  
    64  func (e *engine) getCodes(module *wasm.Module) (fs []*code, ok bool) {
    65  	e.mux.RLock()
    66  	defer e.mux.RUnlock()
    67  	fs, ok = e.codes[module.ID]
    68  	return
    69  }
    70  
    71  // moduleEngine implements wasm.ModuleEngine
    72  type moduleEngine struct {
    73  	// name is the name the module was instantiated with used for error handling.
    74  	name string
    75  
    76  	// codes are the compiled functions in a module instances.
    77  	// The index is module instance-scoped.
    78  	functions []*function
    79  
    80  	// parentEngine holds *engine from which this module engine is created from.
    81  	parentEngine          *engine
    82  	importedFunctionCount uint32
    83  }
    84  
    85  // callEngine holds context per moduleEngine.Call, and shared across all the
    86  // function calls originating from the same moduleEngine.Call execution.
    87  type callEngine struct {
    88  	// stack contains the operands.
    89  	// Note that all the values are represented as uint64.
    90  	stack []uint64
    91  
    92  	// frames are the function call stack.
    93  	frames []*callFrame
    94  
    95  	// compiled is the initial function for this call engine.
    96  	compiled *function
    97  	// source is the FunctionInstance from which compiled is created from.
    98  	source *wasm.FunctionInstance
    99  }
   100  
   101  func (e *moduleEngine) newCallEngine(source *wasm.FunctionInstance, compiled *function) *callEngine {
   102  	return &callEngine{source: source, compiled: compiled}
   103  }
   104  
   105  func (ce *callEngine) pushValue(v uint64) {
   106  	ce.stack = append(ce.stack, v)
   107  }
   108  
   109  func (ce *callEngine) popValue() (v uint64) {
   110  	// No need to check stack bound
   111  	// as we can assume that all the operations
   112  	// are valid thanks to validateFunction
   113  	// at module validation phase
   114  	// and wazeroir translation
   115  	// before compilation.
   116  	stackTopIndex := len(ce.stack) - 1
   117  	v = ce.stack[stackTopIndex]
   118  	ce.stack = ce.stack[:stackTopIndex]
   119  	return
   120  }
   121  
   122  // peekValues peeks api.ValueType values from the stack and returns them.
   123  func (ce *callEngine) peekValues(count int) []uint64 {
   124  	if count == 0 {
   125  		return nil
   126  	}
   127  	stackLen := len(ce.stack)
   128  	return ce.stack[stackLen-count : stackLen]
   129  }
   130  
   131  func (ce *callEngine) drop(r *wazeroir.InclusiveRange) {
   132  	// No need to check stack bound
   133  	// as we can assume that all the operations
   134  	// are valid thanks to validateFunction
   135  	// at module validation phase
   136  	// and wazeroir translation
   137  	// before compilation.
   138  	if r == nil {
   139  		return
   140  	} else if r.Start == 0 {
   141  		ce.stack = ce.stack[:len(ce.stack)-1-r.End]
   142  	} else {
   143  		newStack := ce.stack[:len(ce.stack)-1-r.End]
   144  		newStack = append(newStack, ce.stack[len(ce.stack)-r.Start:]...)
   145  		ce.stack = newStack
   146  	}
   147  }
   148  
   149  func (ce *callEngine) pushFrame(frame *callFrame) {
   150  	if callStackCeiling <= len(ce.frames) {
   151  		panic(wasmruntime.ErrRuntimeStackOverflow)
   152  	}
   153  	ce.frames = append(ce.frames, frame)
   154  }
   155  
   156  func (ce *callEngine) popFrame() (frame *callFrame) {
   157  	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
   158  	// module validation phase and wazeroir translation before compilation.
   159  	oneLess := len(ce.frames) - 1
   160  	frame = ce.frames[oneLess]
   161  	ce.frames = ce.frames[:oneLess]
   162  	return
   163  }
   164  
   165  type callFrame struct {
   166  	// pc is the program counter representing the current position in code.body.
   167  	pc uint64
   168  	// f is the compiled function used in this function frame.
   169  	f *function
   170  }
   171  
   172  type code struct {
   173  	body     []*interpreterOp
   174  	listener experimental.FunctionListener
   175  	hostFn   interface{}
   176  }
   177  
   178  type function struct {
   179  	source *wasm.FunctionInstance
   180  	body   []*interpreterOp
   181  	hostFn interface{}
   182  	parent *code
   183  }
   184  
   185  // functionFromUintptr resurrects the original *function from the given uintptr
   186  // which comes from either funcref table or OpcodeRefFunc instruction.
   187  func functionFromUintptr(ptr uintptr) *function {
   188  	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
   189  	//
   190  	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
   191  	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
   192  	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
   193  	var wrapped *uintptr = &ptr
   194  	return *(**function)(unsafe.Pointer(wrapped))
   195  }
   196  
   197  func (c *code) instantiate(f *wasm.FunctionInstance) *function {
   198  	return &function{
   199  		source: f,
   200  		body:   c.body,
   201  		hostFn: c.hostFn,
   202  		parent: c,
   203  	}
   204  }
   205  
   206  // interpreterOp is the compilation (engine.lowerIR) result of a wazeroir.Operation.
   207  //
   208  // Not all operations result in an interpreterOp, e.g. wazeroir.OperationI32ReinterpretFromF32, and some operations are
   209  // more complex than others, e.g. wazeroir.OperationBrTable.
   210  //
   211  // Note: This is a form of union type as it can store fields needed for any operation. Hence, most fields are opaque and
   212  // only relevant when in context of its kind.
   213  type interpreterOp struct {
   214  	// kind determines how to interpret the other fields in this struct.
   215  	kind   wazeroir.OperationKind
   216  	b1, b2 byte
   217  	b3     bool
   218  	us     []uint64
   219  	rs     []*wazeroir.InclusiveRange
   220  }
   221  
   222  // interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
   223  const callFrameStackSize = 0
   224  
   225  // CompileModule implements the same method as documented on wasm.Engine.
   226  func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) error {
   227  	if _, ok := e.getCodes(module); ok { // cache hit!
   228  		return nil
   229  	}
   230  
   231  	funcs := make([]*code, len(module.FunctionSection))
   232  	irs, err := wazeroir.CompileFunctions(ctx, e.enabledFeatures, callFrameStackSize, module)
   233  	if err != nil {
   234  		return err
   235  	}
   236  	for i, ir := range irs {
   237  		var lsn experimental.FunctionListener
   238  		if i < len(listeners) {
   239  			lsn = listeners[i]
   240  		}
   241  
   242  		// If this is the host function, there's nothing to do as the runtime representation of
   243  		// host function in interpreter is its Go function itself as opposed to Wasm functions,
   244  		// which need to be compiled down to wazeroir.
   245  		if ir.GoFunc != nil {
   246  			funcs[i] = &code{hostFn: ir.GoFunc, listener: lsn}
   247  			continue
   248  		} else {
   249  			compiled, err := e.lowerIR(ir)
   250  			if err != nil {
   251  				def := module.FunctionDefinitionSection[uint32(i)+module.ImportFuncCount()]
   252  				return fmt.Errorf("failed to lower func[%s] to wazeroir: %w", def.DebugName(), err)
   253  			}
   254  			compiled.listener = lsn
   255  			funcs[i] = compiled
   256  		}
   257  
   258  	}
   259  	e.addCodes(module, funcs)
   260  	return nil
   261  }
   262  
   263  // NewModuleEngine implements the same method as documented on wasm.Engine.
   264  func (e *engine) NewModuleEngine(name string, module *wasm.Module, importedFunctions, moduleFunctions []*wasm.FunctionInstance) (wasm.ModuleEngine, error) {
   265  	imported := uint32(len(importedFunctions))
   266  	me := &moduleEngine{
   267  		name:                  name,
   268  		parentEngine:          e,
   269  		importedFunctionCount: imported,
   270  	}
   271  
   272  	for _, f := range importedFunctions {
   273  		cf := f.Module.Engine.(*moduleEngine).functions[f.Idx]
   274  		me.functions = append(me.functions, cf)
   275  	}
   276  
   277  	codes, ok := e.getCodes(module)
   278  	if !ok {
   279  		return nil, fmt.Errorf("source module for %s must be compiled before instantiation", name)
   280  	}
   281  
   282  	for i, c := range codes {
   283  		f := moduleFunctions[i]
   284  		insntantiatedcode := c.instantiate(f)
   285  		me.functions = append(me.functions, insntantiatedcode)
   286  	}
   287  	return me, nil
   288  }
   289  
   290  // lowerIR lowers the wazeroir operations to engine friendly struct.
   291  func (e *engine) lowerIR(ir *wazeroir.CompilationResult) (*code, error) {
   292  	ops := ir.Operations
   293  	ret := &code{}
   294  	labelAddress := map[string]uint64{}
   295  	onLabelAddressResolved := map[string][]func(addr uint64){}
   296  	for _, original := range ops {
   297  		op := &interpreterOp{kind: original.Kind()}
   298  		switch o := original.(type) {
   299  		case *wazeroir.OperationUnreachable:
   300  		case *wazeroir.OperationLabel:
   301  			labelKey := o.Label.String()
   302  			address := uint64(len(ret.body))
   303  			labelAddress[labelKey] = address
   304  			for _, cb := range onLabelAddressResolved[labelKey] {
   305  				cb(address)
   306  			}
   307  			delete(onLabelAddressResolved, labelKey)
   308  			// We just ignore the label operation
   309  			// as we translate branch operations to the direct address jmp.
   310  			continue
   311  		case *wazeroir.OperationBr:
   312  			op.us = make([]uint64, 1)
   313  			if o.Target.IsReturnTarget() {
   314  				// Jmp to the end of the possible binary.
   315  				op.us[0] = math.MaxUint64
   316  			} else {
   317  				labelKey := o.Target.String()
   318  				addr, ok := labelAddress[labelKey]
   319  				if !ok {
   320  					// If this is the forward jump (e.g. to the continuation of if, etc.),
   321  					// the target is not emitted yet, so resolve the address later.
   322  					onLabelAddressResolved[labelKey] = append(onLabelAddressResolved[labelKey],
   323  						func(addr uint64) {
   324  							op.us[0] = addr
   325  						},
   326  					)
   327  				} else {
   328  					op.us[0] = addr
   329  				}
   330  			}
   331  		case *wazeroir.OperationBrIf:
   332  			op.rs = make([]*wazeroir.InclusiveRange, 2)
   333  			op.us = make([]uint64, 2)
   334  			for i, target := range []*wazeroir.BranchTargetDrop{o.Then, o.Else} {
   335  				op.rs[i] = target.ToDrop
   336  				if target.Target.IsReturnTarget() {
   337  					// Jmp to the end of the possible binary.
   338  					op.us[i] = math.MaxUint64
   339  				} else {
   340  					labelKey := target.Target.String()
   341  					addr, ok := labelAddress[labelKey]
   342  					if !ok {
   343  						i := i
   344  						// If this is the forward jump (e.g. to the continuation of if, etc.),
   345  						// the target is not emitted yet, so resolve the address later.
   346  						onLabelAddressResolved[labelKey] = append(onLabelAddressResolved[labelKey],
   347  							func(addr uint64) {
   348  								op.us[i] = addr
   349  							},
   350  						)
   351  					} else {
   352  						op.us[i] = addr
   353  					}
   354  				}
   355  			}
   356  		case *wazeroir.OperationBrTable:
   357  			targets := append([]*wazeroir.BranchTargetDrop{o.Default}, o.Targets...)
   358  			op.rs = make([]*wazeroir.InclusiveRange, len(targets))
   359  			op.us = make([]uint64, len(targets))
   360  			for i, target := range targets {
   361  				op.rs[i] = target.ToDrop
   362  				if target.Target.IsReturnTarget() {
   363  					// Jmp to the end of the possible binary.
   364  					op.us[i] = math.MaxUint64
   365  				} else {
   366  					labelKey := target.Target.String()
   367  					addr, ok := labelAddress[labelKey]
   368  					if !ok {
   369  						i := i // pin index for later resolution
   370  						// If this is the forward jump (e.g. to the continuation of if, etc.),
   371  						// the target is not emitted yet, so resolve the address later.
   372  						onLabelAddressResolved[labelKey] = append(onLabelAddressResolved[labelKey],
   373  							func(addr uint64) {
   374  								op.us[i] = addr
   375  							},
   376  						)
   377  					} else {
   378  						op.us[i] = addr
   379  					}
   380  				}
   381  			}
   382  		case *wazeroir.OperationCall:
   383  			op.us = make([]uint64, 1)
   384  			op.us = []uint64{uint64(o.FunctionIndex)}
   385  		case *wazeroir.OperationCallIndirect:
   386  			op.us = make([]uint64, 2)
   387  			op.us[0] = uint64(o.TypeIndex)
   388  			op.us[1] = uint64(o.TableIndex)
   389  		case *wazeroir.OperationDrop:
   390  			op.rs = make([]*wazeroir.InclusiveRange, 1)
   391  			op.rs[0] = o.Depth
   392  		case *wazeroir.OperationSelect:
   393  			op.b3 = o.IsTargetVector
   394  		case *wazeroir.OperationPick:
   395  			op.us = make([]uint64, 1)
   396  			op.us[0] = uint64(o.Depth)
   397  			op.b3 = o.IsTargetVector
   398  		case *wazeroir.OperationSet:
   399  			op.us = make([]uint64, 1)
   400  			op.us[0] = uint64(o.Depth)
   401  			op.b3 = o.IsTargetVector
   402  		case *wazeroir.OperationGlobalGet:
   403  			op.us = make([]uint64, 1)
   404  			op.us[0] = uint64(o.Index)
   405  		case *wazeroir.OperationGlobalSet:
   406  			op.us = make([]uint64, 1)
   407  			op.us[0] = uint64(o.Index)
   408  		case *wazeroir.OperationLoad:
   409  			op.b1 = byte(o.Type)
   410  			op.us = make([]uint64, 2)
   411  			op.us[0] = uint64(o.Arg.Alignment)
   412  			op.us[1] = uint64(o.Arg.Offset)
   413  		case *wazeroir.OperationLoad8:
   414  			op.b1 = byte(o.Type)
   415  			op.us = make([]uint64, 2)
   416  			op.us[0] = uint64(o.Arg.Alignment)
   417  			op.us[1] = uint64(o.Arg.Offset)
   418  		case *wazeroir.OperationLoad16:
   419  			op.b1 = byte(o.Type)
   420  			op.us = make([]uint64, 2)
   421  			op.us[0] = uint64(o.Arg.Alignment)
   422  			op.us[1] = uint64(o.Arg.Offset)
   423  		case *wazeroir.OperationLoad32:
   424  			if o.Signed {
   425  				op.b1 = 1
   426  			}
   427  			op.us = make([]uint64, 2)
   428  			op.us[0] = uint64(o.Arg.Alignment)
   429  			op.us[1] = uint64(o.Arg.Offset)
   430  		case *wazeroir.OperationStore:
   431  			op.b1 = byte(o.Type)
   432  			op.us = make([]uint64, 2)
   433  			op.us[0] = uint64(o.Arg.Alignment)
   434  			op.us[1] = uint64(o.Arg.Offset)
   435  		case *wazeroir.OperationStore8:
   436  			op.us = make([]uint64, 2)
   437  			op.us[0] = uint64(o.Arg.Alignment)
   438  			op.us[1] = uint64(o.Arg.Offset)
   439  		case *wazeroir.OperationStore16:
   440  			op.us = make([]uint64, 2)
   441  			op.us[0] = uint64(o.Arg.Alignment)
   442  			op.us[1] = uint64(o.Arg.Offset)
   443  		case *wazeroir.OperationStore32:
   444  			op.us = make([]uint64, 2)
   445  			op.us[0] = uint64(o.Arg.Alignment)
   446  			op.us[1] = uint64(o.Arg.Offset)
   447  		case *wazeroir.OperationMemorySize:
   448  		case *wazeroir.OperationMemoryGrow:
   449  		case *wazeroir.OperationConstI32:
   450  			op.us = make([]uint64, 1)
   451  			op.us[0] = uint64(o.Value)
   452  		case *wazeroir.OperationConstI64:
   453  			op.us = make([]uint64, 1)
   454  			op.us[0] = o.Value
   455  		case *wazeroir.OperationConstF32:
   456  			op.us = make([]uint64, 1)
   457  			op.us[0] = uint64(math.Float32bits(o.Value))
   458  		case *wazeroir.OperationConstF64:
   459  			op.us = make([]uint64, 1)
   460  			op.us[0] = math.Float64bits(o.Value)
   461  		case *wazeroir.OperationEq:
   462  			op.b1 = byte(o.Type)
   463  		case *wazeroir.OperationNe:
   464  			op.b1 = byte(o.Type)
   465  		case *wazeroir.OperationEqz:
   466  			op.b1 = byte(o.Type)
   467  		case *wazeroir.OperationLt:
   468  			op.b1 = byte(o.Type)
   469  		case *wazeroir.OperationGt:
   470  			op.b1 = byte(o.Type)
   471  		case *wazeroir.OperationLe:
   472  			op.b1 = byte(o.Type)
   473  		case *wazeroir.OperationGe:
   474  			op.b1 = byte(o.Type)
   475  		case *wazeroir.OperationAdd:
   476  			op.b1 = byte(o.Type)
   477  		case *wazeroir.OperationSub:
   478  			op.b1 = byte(o.Type)
   479  		case *wazeroir.OperationMul:
   480  			op.b1 = byte(o.Type)
   481  		case *wazeroir.OperationClz:
   482  			op.b1 = byte(o.Type)
   483  		case *wazeroir.OperationCtz:
   484  			op.b1 = byte(o.Type)
   485  		case *wazeroir.OperationPopcnt:
   486  			op.b1 = byte(o.Type)
   487  		case *wazeroir.OperationDiv:
   488  			op.b1 = byte(o.Type)
   489  		case *wazeroir.OperationRem:
   490  			op.b1 = byte(o.Type)
   491  		case *wazeroir.OperationAnd:
   492  			op.b1 = byte(o.Type)
   493  		case *wazeroir.OperationOr:
   494  			op.b1 = byte(o.Type)
   495  		case *wazeroir.OperationXor:
   496  			op.b1 = byte(o.Type)
   497  		case *wazeroir.OperationShl:
   498  			op.b1 = byte(o.Type)
   499  		case *wazeroir.OperationShr:
   500  			op.b1 = byte(o.Type)
   501  		case *wazeroir.OperationRotl:
   502  			op.b1 = byte(o.Type)
   503  		case *wazeroir.OperationRotr:
   504  			op.b1 = byte(o.Type)
   505  		case *wazeroir.OperationAbs:
   506  			op.b1 = byte(o.Type)
   507  		case *wazeroir.OperationNeg:
   508  			op.b1 = byte(o.Type)
   509  		case *wazeroir.OperationCeil:
   510  			op.b1 = byte(o.Type)
   511  		case *wazeroir.OperationFloor:
   512  			op.b1 = byte(o.Type)
   513  		case *wazeroir.OperationTrunc:
   514  			op.b1 = byte(o.Type)
   515  		case *wazeroir.OperationNearest:
   516  			op.b1 = byte(o.Type)
   517  		case *wazeroir.OperationSqrt:
   518  			op.b1 = byte(o.Type)
   519  		case *wazeroir.OperationMin:
   520  			op.b1 = byte(o.Type)
   521  		case *wazeroir.OperationMax:
   522  			op.b1 = byte(o.Type)
   523  		case *wazeroir.OperationCopysign:
   524  			op.b1 = byte(o.Type)
   525  		case *wazeroir.OperationI32WrapFromI64:
   526  		case *wazeroir.OperationITruncFromF:
   527  			op.b1 = byte(o.InputType)
   528  			op.b2 = byte(o.OutputType)
   529  			op.b3 = o.NonTrapping
   530  		case *wazeroir.OperationFConvertFromI:
   531  			op.b1 = byte(o.InputType)
   532  			op.b2 = byte(o.OutputType)
   533  		case *wazeroir.OperationF32DemoteFromF64:
   534  		case *wazeroir.OperationF64PromoteFromF32:
   535  		case *wazeroir.OperationI32ReinterpretFromF32,
   536  			*wazeroir.OperationI64ReinterpretFromF64,
   537  			*wazeroir.OperationF32ReinterpretFromI32,
   538  			*wazeroir.OperationF64ReinterpretFromI64:
   539  			// Reinterpret ops are essentially nop for engine mode
   540  			// because we treat all values as uint64, and Reinterpret* is only used at module
   541  			// validation phase where we check type soundness of all the operations.
   542  			// So just eliminate the ops.
   543  			continue
   544  		case *wazeroir.OperationExtend:
   545  			if o.Signed {
   546  				op.b1 = 1
   547  			}
   548  		case *wazeroir.OperationSignExtend32From8, *wazeroir.OperationSignExtend32From16, *wazeroir.OperationSignExtend64From8,
   549  			*wazeroir.OperationSignExtend64From16, *wazeroir.OperationSignExtend64From32:
   550  		case *wazeroir.OperationMemoryInit:
   551  			op.us = make([]uint64, 1)
   552  			op.us[0] = uint64(o.DataIndex)
   553  		case *wazeroir.OperationDataDrop:
   554  			op.us = make([]uint64, 1)
   555  			op.us[0] = uint64(o.DataIndex)
   556  		case *wazeroir.OperationMemoryCopy:
   557  		case *wazeroir.OperationMemoryFill:
   558  		case *wazeroir.OperationTableInit:
   559  			op.us = make([]uint64, 2)
   560  			op.us[0] = uint64(o.ElemIndex)
   561  			op.us[1] = uint64(o.TableIndex)
   562  		case *wazeroir.OperationElemDrop:
   563  			op.us = make([]uint64, 1)
   564  			op.us[0] = uint64(o.ElemIndex)
   565  		case *wazeroir.OperationTableCopy:
   566  			op.us = make([]uint64, 2)
   567  			op.us[0] = uint64(o.SrcTableIndex)
   568  			op.us[1] = uint64(o.DstTableIndex)
   569  		case *wazeroir.OperationRefFunc:
   570  			op.us = make([]uint64, 1)
   571  			op.us[0] = uint64(o.FunctionIndex)
   572  		case *wazeroir.OperationTableGet:
   573  			op.us = make([]uint64, 1)
   574  			op.us[0] = uint64(o.TableIndex)
   575  		case *wazeroir.OperationTableSet:
   576  			op.us = make([]uint64, 1)
   577  			op.us[0] = uint64(o.TableIndex)
   578  		case *wazeroir.OperationTableSize:
   579  			op.us = make([]uint64, 1)
   580  			op.us[0] = uint64(o.TableIndex)
   581  		case *wazeroir.OperationTableGrow:
   582  			op.us = make([]uint64, 1)
   583  			op.us[0] = uint64(o.TableIndex)
   584  		case *wazeroir.OperationTableFill:
   585  			op.us = make([]uint64, 1)
   586  			op.us[0] = uint64(o.TableIndex)
   587  		case *wazeroir.OperationV128Const:
   588  			op.us = make([]uint64, 2)
   589  			op.us[0] = o.Lo
   590  			op.us[1] = o.Hi
   591  		case *wazeroir.OperationV128Add:
   592  			op.b1 = o.Shape
   593  		case *wazeroir.OperationV128Sub:
   594  			op.b1 = o.Shape
   595  		case *wazeroir.OperationV128Load:
   596  			op.b1 = o.Type
   597  			op.us = make([]uint64, 2)
   598  			op.us[0] = uint64(o.Arg.Alignment)
   599  			op.us[1] = uint64(o.Arg.Offset)
   600  		case *wazeroir.OperationV128LoadLane:
   601  			op.b1 = o.LaneSize
   602  			op.b2 = o.LaneIndex
   603  			op.us = make([]uint64, 2)
   604  			op.us[0] = uint64(o.Arg.Alignment)
   605  			op.us[1] = uint64(o.Arg.Offset)
   606  		case *wazeroir.OperationV128Store:
   607  			op.us = make([]uint64, 2)
   608  			op.us[0] = uint64(o.Arg.Alignment)
   609  			op.us[1] = uint64(o.Arg.Offset)
   610  		case *wazeroir.OperationV128StoreLane:
   611  			op.b1 = o.LaneSize
   612  			op.b2 = o.LaneIndex
   613  			op.us = make([]uint64, 2)
   614  			op.us[0] = uint64(o.Arg.Alignment)
   615  			op.us[1] = uint64(o.Arg.Offset)
   616  		case *wazeroir.OperationV128ExtractLane:
   617  			op.b1 = o.Shape
   618  			op.b2 = o.LaneIndex
   619  			op.b3 = o.Signed
   620  		case *wazeroir.OperationV128ReplaceLane:
   621  			op.b1 = o.Shape
   622  			op.b2 = o.LaneIndex
   623  		case *wazeroir.OperationV128Splat:
   624  			op.b1 = o.Shape
   625  		case *wazeroir.OperationV128Shuffle:
   626  			op.us = make([]uint64, 16)
   627  			for i, l := range o.Lanes {
   628  				op.us[i] = uint64(l)
   629  			}
   630  		case *wazeroir.OperationV128Swizzle:
   631  		case *wazeroir.OperationV128AnyTrue:
   632  		case *wazeroir.OperationV128AllTrue:
   633  			op.b1 = o.Shape
   634  		case *wazeroir.OperationV128BitMask:
   635  			op.b1 = o.Shape
   636  		case *wazeroir.OperationV128And:
   637  		case *wazeroir.OperationV128Not:
   638  		case *wazeroir.OperationV128Or:
   639  		case *wazeroir.OperationV128Xor:
   640  		case *wazeroir.OperationV128Bitselect:
   641  		case *wazeroir.OperationV128AndNot:
   642  		case *wazeroir.OperationV128Shr:
   643  			op.b1 = o.Shape
   644  			op.b3 = o.Signed
   645  		case *wazeroir.OperationV128Shl:
   646  			op.b1 = o.Shape
   647  		case *wazeroir.OperationV128Cmp:
   648  			op.b1 = o.Type
   649  		case *wazeroir.OperationV128AddSat:
   650  			op.b1 = o.Shape
   651  			op.b3 = o.Signed
   652  		case *wazeroir.OperationV128SubSat:
   653  			op.b1 = o.Shape
   654  			op.b3 = o.Signed
   655  		case *wazeroir.OperationV128Mul:
   656  			op.b1 = o.Shape
   657  		case *wazeroir.OperationV128Div:
   658  			op.b1 = o.Shape
   659  		case *wazeroir.OperationV128Neg:
   660  			op.b1 = o.Shape
   661  		case *wazeroir.OperationV128Sqrt:
   662  			op.b1 = o.Shape
   663  		case *wazeroir.OperationV128Abs:
   664  			op.b1 = o.Shape
   665  		case *wazeroir.OperationV128Popcnt:
   666  		case *wazeroir.OperationV128Min:
   667  			op.b1 = o.Shape
   668  			op.b3 = o.Signed
   669  		case *wazeroir.OperationV128Max:
   670  			op.b1 = o.Shape
   671  			op.b3 = o.Signed
   672  		case *wazeroir.OperationV128AvgrU:
   673  			op.b1 = o.Shape
   674  		case *wazeroir.OperationV128Pmin:
   675  			op.b1 = o.Shape
   676  		case *wazeroir.OperationV128Pmax:
   677  			op.b1 = o.Shape
   678  		case *wazeroir.OperationV128Ceil:
   679  			op.b1 = o.Shape
   680  		case *wazeroir.OperationV128Floor:
   681  			op.b1 = o.Shape
   682  		case *wazeroir.OperationV128Trunc:
   683  			op.b1 = o.Shape
   684  		case *wazeroir.OperationV128Nearest:
   685  			op.b1 = o.Shape
   686  		case *wazeroir.OperationV128Extend:
   687  			op.b1 = o.OriginShape
   688  			if o.Signed {
   689  				op.b2 = 1
   690  			}
   691  			op.b3 = o.UseLow
   692  		case *wazeroir.OperationV128ExtMul:
   693  			op.b1 = o.OriginShape
   694  			if o.Signed {
   695  				op.b2 = 1
   696  			}
   697  			op.b3 = o.UseLow
   698  		case *wazeroir.OperationV128Q15mulrSatS:
   699  		case *wazeroir.OperationV128ExtAddPairwise:
   700  			op.b1 = o.OriginShape
   701  			op.b3 = o.Signed
   702  		case *wazeroir.OperationV128FloatPromote:
   703  		case *wazeroir.OperationV128FloatDemote:
   704  		case *wazeroir.OperationV128FConvertFromI:
   705  			op.b1 = o.DestinationShape
   706  			op.b3 = o.Signed
   707  		case *wazeroir.OperationV128Dot:
   708  		case *wazeroir.OperationV128Narrow:
   709  			op.b1 = o.OriginShape
   710  			op.b3 = o.Signed
   711  		case *wazeroir.OperationV128ITruncSatFromF:
   712  			op.b1 = o.OriginShape
   713  			op.b3 = o.Signed
   714  		default:
   715  			panic(fmt.Errorf("BUG: unimplemented operation %s", op.kind.String()))
   716  		}
   717  		ret.body = append(ret.body, op)
   718  	}
   719  
   720  	if len(onLabelAddressResolved) > 0 {
   721  		keys := make([]string, 0, len(onLabelAddressResolved))
   722  		for key := range onLabelAddressResolved {
   723  			keys = append(keys, key)
   724  		}
   725  		return nil, fmt.Errorf("labels are not defined: %s", strings.Join(keys, ","))
   726  	}
   727  	return ret, nil
   728  }
   729  
   730  // Name implements the same method as documented on wasm.ModuleEngine.
   731  func (e *moduleEngine) Name() string {
   732  	return e.name
   733  }
   734  
   735  // CreateFuncElementInstance implements the same method as documented on wasm.ModuleEngine.
   736  func (e *moduleEngine) CreateFuncElementInstance(indexes []*wasm.Index) *wasm.ElementInstance {
   737  	refs := make([]wasm.Reference, len(indexes))
   738  	for i, index := range indexes {
   739  		if index != nil {
   740  			refs[i] = uintptr(unsafe.Pointer(e.functions[*index]))
   741  		}
   742  	}
   743  	return &wasm.ElementInstance{
   744  		References: refs,
   745  		Type:       wasm.RefTypeFuncref,
   746  	}
   747  }
   748  
   749  // InitializeFuncrefGlobals implements the same method as documented on wasm.ModuleEngine.
   750  func (e *moduleEngine) InitializeFuncrefGlobals(globals []*wasm.GlobalInstance) {
   751  	for _, g := range globals {
   752  		if g.Type.ValType == wasm.ValueTypeFuncref {
   753  			if int64(g.Val) == wasm.GlobalInstanceNullFuncRefValue {
   754  				g.Val = 0 // Null funcref is expressed as zero.
   755  			} else {
   756  				// Lowers the stored function index into the interpreter specific function's opaque pointer.
   757  				g.Val = uint64(uintptr(unsafe.Pointer(e.functions[g.Val])))
   758  			}
   759  		}
   760  	}
   761  }
   762  
   763  // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
   764  func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
   765  	return uintptr(unsafe.Pointer(e.functions[funcIndex]))
   766  }
   767  
   768  // NewCallEngine implements the same method as documented on wasm.ModuleEngine.
   769  func (e *moduleEngine) NewCallEngine(callCtx *wasm.CallContext, f *wasm.FunctionInstance) (ce wasm.CallEngine, err error) {
   770  	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
   771  	// code on close aren't locked, neither is this read.
   772  	compiled := e.functions[f.Idx]
   773  	if compiled == nil { // Lazy check the cause as it could be because the module was already closed.
   774  		if err = callCtx.FailIfClosed(); err == nil {
   775  			panic(fmt.Errorf("BUG: %s.func[%d] was nil before close", e.name, f.Idx))
   776  		}
   777  		return
   778  	}
   779  	return e.newCallEngine(f, compiled), nil
   780  }
   781  
   782  // LookupFunction implements the same method as documented on wasm.ModuleEngine.
   783  func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (idx wasm.Index, err error) {
   784  	if tableOffset >= uint32(len(t.References)) {
   785  		err = wasmruntime.ErrRuntimeInvalidTableAccess
   786  		return
   787  	}
   788  	rawPtr := t.References[tableOffset]
   789  	if rawPtr == 0 {
   790  		err = wasmruntime.ErrRuntimeInvalidTableAccess
   791  		return
   792  	}
   793  
   794  	tf := functionFromUintptr(rawPtr)
   795  	if tf.source.TypeID != typeId {
   796  		err = wasmruntime.ErrRuntimeIndirectCallTypeMismatch
   797  		return
   798  	}
   799  	idx = tf.source.Idx
   800  
   801  	return
   802  }
   803  
   804  // Call implements the same method as documented on wasm.CallEngine.
   805  func (ce *callEngine) Call(ctx context.Context, m *wasm.CallContext, params []uint64) (results []uint64, err error) {
   806  	return ce.call(ctx, m, ce.compiled, params)
   807  }
   808  
   809  func (ce *callEngine) call(ctx context.Context, m *wasm.CallContext, tf *function, params []uint64) (results []uint64, err error) {
   810  	ft := tf.source.Type
   811  	paramSignature := ft.ParamNumInUint64
   812  	paramCount := len(params)
   813  	if paramSignature != paramCount {
   814  		return nil, fmt.Errorf("expected %d params, but passed %d", paramSignature, paramCount)
   815  	}
   816  
   817  	defer func() {
   818  		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
   819  		if err == nil {
   820  			err = m.FailIfClosed()
   821  		}
   822  		// TODO: ^^ Will not fail if the function was imported from a closed module.
   823  
   824  		if v := recover(); v != nil {
   825  			err = ce.recoverOnCall(v)
   826  		}
   827  	}()
   828  
   829  	for _, param := range params {
   830  		ce.pushValue(param)
   831  	}
   832  
   833  	ce.callFunction(ctx, m, tf)
   834  
   835  	// This returns a safe copy of the results, instead of a slice view. If we
   836  	// returned a re-slice, the caller could accidentally or purposefully
   837  	// corrupt the stack of subsequent calls.
   838  	results = wasm.PopValues(ft.ResultNumInUint64, ce.popValue)
   839  	return
   840  }
   841  
   842  // recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
   843  // with the call frame stack traces. Also, reset the state of callEngine
   844  // so that it can be used for the subsequent calls.
   845  func (ce *callEngine) recoverOnCall(v interface{}) (err error) {
   846  	builder := wasmdebug.NewErrorBuilder()
   847  	frameCount := len(ce.frames)
   848  	for i := 0; i < frameCount; i++ {
   849  		frame := ce.popFrame()
   850  		def := frame.f.source.Definition
   851  		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes())
   852  	}
   853  	err = builder.FromRecovered(v)
   854  
   855  	// Allows the reuse of CallEngine.
   856  	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
   857  	return
   858  }
   859  
   860  func (ce *callEngine) callFunction(ctx context.Context, callCtx *wasm.CallContext, f *function) {
   861  	if f.hostFn != nil {
   862  		ce.callGoFuncWithStack(ctx, callCtx, f)
   863  	} else if lsn := f.parent.listener; lsn != nil {
   864  		ce.callNativeFuncWithListener(ctx, callCtx, f, lsn)
   865  	} else {
   866  		ce.callNativeFunc(ctx, callCtx, f)
   867  	}
   868  }
   869  
   870  func (ce *callEngine) callGoFunc(ctx context.Context, callCtx *wasm.CallContext, f *function, stack []uint64) {
   871  	lsn := f.parent.listener
   872  	if lsn != nil {
   873  		params := stack[:f.source.Type.ParamNumInUint64]
   874  		ctx = lsn.Before(ctx, f.source.Definition, params)
   875  	}
   876  	frame := &callFrame{f: f}
   877  	ce.pushFrame(frame)
   878  
   879  	fn := f.source.GoFunc
   880  	switch fn := fn.(type) {
   881  	case api.GoModuleFunction:
   882  		fn.Call(ctx, callCtx.WithMemory(ce.callerMemory()), stack)
   883  	case api.GoFunction:
   884  		fn.Call(ctx, stack)
   885  	}
   886  
   887  	ce.popFrame()
   888  	if lsn != nil {
   889  		// TODO: This doesn't get the error due to use of panic to propagate them.
   890  		results := stack[:f.source.Type.ResultNumInUint64]
   891  		lsn.After(ctx, f.source.Definition, nil, results)
   892  	}
   893  }
   894  
   895  func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallContext, f *function) {
   896  	frame := &callFrame{f: f}
   897  	moduleInst := f.source.Module
   898  	functions := moduleInst.Engine.(*moduleEngine).functions
   899  	var memoryInst *wasm.MemoryInstance
   900  	if f.source.IsHostFunction {
   901  		memoryInst = ce.callerMemory()
   902  	} else {
   903  		memoryInst = moduleInst.Memory
   904  	}
   905  	globals := moduleInst.Globals
   906  	tables := moduleInst.Tables
   907  	typeIDs := f.source.Module.TypeIDs
   908  	dataInstances := f.source.Module.DataInstances
   909  	elementInstances := f.source.Module.ElementInstances
   910  	ce.pushFrame(frame)
   911  	bodyLen := uint64(len(frame.f.body))
   912  	for frame.pc < bodyLen {
   913  		op := frame.f.body[frame.pc]
   914  		// TODO: add description of each operation/case
   915  		// on, for example, how many args are used,
   916  		// how the stack is modified, etc.
   917  		switch op.kind {
   918  		case wazeroir.OperationKindUnreachable:
   919  			panic(wasmruntime.ErrRuntimeUnreachable)
   920  		case wazeroir.OperationKindBr:
   921  			frame.pc = op.us[0]
   922  		case wazeroir.OperationKindBrIf:
   923  			if ce.popValue() > 0 {
   924  				ce.drop(op.rs[0])
   925  				frame.pc = op.us[0]
   926  			} else {
   927  				ce.drop(op.rs[1])
   928  				frame.pc = op.us[1]
   929  			}
   930  		case wazeroir.OperationKindBrTable:
   931  			if v := uint64(ce.popValue()); v < uint64(len(op.us)-1) {
   932  				ce.drop(op.rs[v+1])
   933  				frame.pc = op.us[v+1]
   934  			} else {
   935  				// Default branch.
   936  				ce.drop(op.rs[0])
   937  				frame.pc = op.us[0]
   938  			}
   939  		case wazeroir.OperationKindCall:
   940  			ce.callFunction(ctx, callCtx, functions[op.us[0]])
   941  			frame.pc++
   942  		case wazeroir.OperationKindCallIndirect:
   943  			offset := ce.popValue()
   944  			table := tables[op.us[1]]
   945  			if offset >= uint64(len(table.References)) {
   946  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   947  			}
   948  			rawPtr := table.References[offset]
   949  			if rawPtr == 0 {
   950  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
   951  			}
   952  
   953  			tf := functionFromUintptr(rawPtr)
   954  			if tf.source.TypeID != typeIDs[op.us[0]] {
   955  				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
   956  			}
   957  
   958  			ce.callFunction(ctx, callCtx, tf)
   959  			frame.pc++
   960  		case wazeroir.OperationKindDrop:
   961  			ce.drop(op.rs[0])
   962  			frame.pc++
   963  		case wazeroir.OperationKindSelect:
   964  			c := ce.popValue()
   965  			if op.b3 { // Target is vector.
   966  				x2Hi, x2Lo := ce.popValue(), ce.popValue()
   967  				if c == 0 {
   968  					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
   969  					ce.pushValue(x2Lo)
   970  					ce.pushValue(x2Hi)
   971  				}
   972  			} else {
   973  				v2 := ce.popValue()
   974  				if c == 0 {
   975  					_ = ce.popValue()
   976  					ce.pushValue(v2)
   977  				}
   978  			}
   979  			frame.pc++
   980  		case wazeroir.OperationKindPick:
   981  			index := len(ce.stack) - 1 - int(op.us[0])
   982  			ce.pushValue(ce.stack[index])
   983  			if op.b3 { // V128 value target.
   984  				ce.pushValue(ce.stack[index+1])
   985  			}
   986  			frame.pc++
   987  		case wazeroir.OperationKindSet:
   988  			if op.b3 { // V128 value target.
   989  				lowIndex := len(ce.stack) - 1 - int(op.us[0])
   990  				highIndex := lowIndex + 1
   991  				hi, lo := ce.popValue(), ce.popValue()
   992  				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
   993  			} else {
   994  				index := len(ce.stack) - 1 - int(op.us[0])
   995  				ce.stack[index] = ce.popValue()
   996  			}
   997  			frame.pc++
   998  		case wazeroir.OperationKindGlobalGet:
   999  			g := globals[op.us[0]]
  1000  			ce.pushValue(g.Val)
  1001  			if g.Type.ValType == wasm.ValueTypeV128 {
  1002  				ce.pushValue(g.ValHi)
  1003  			}
  1004  			frame.pc++
  1005  		case wazeroir.OperationKindGlobalSet:
  1006  			g := globals[op.us[0]]
  1007  			if g.Type.ValType == wasm.ValueTypeV128 {
  1008  				g.ValHi = ce.popValue()
  1009  			}
  1010  			g.Val = ce.popValue()
  1011  			frame.pc++
  1012  		case wazeroir.OperationKindLoad:
  1013  			offset := ce.popMemoryOffset(op)
  1014  			switch wazeroir.UnsignedType(op.b1) {
  1015  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
  1016  				if val, ok := memoryInst.ReadUint32Le(ctx, offset); !ok {
  1017  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1018  				} else {
  1019  					ce.pushValue(uint64(val))
  1020  				}
  1021  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
  1022  				if val, ok := memoryInst.ReadUint64Le(ctx, offset); !ok {
  1023  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1024  				} else {
  1025  					ce.pushValue(val)
  1026  				}
  1027  			}
  1028  			frame.pc++
  1029  		case wazeroir.OperationKindLoad8:
  1030  			val, ok := memoryInst.ReadByte(ctx, ce.popMemoryOffset(op))
  1031  			if !ok {
  1032  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1033  			}
  1034  
  1035  			switch wazeroir.SignedInt(op.b1) {
  1036  			case wazeroir.SignedInt32:
  1037  				ce.pushValue(uint64(uint32(int8(val))))
  1038  			case wazeroir.SignedInt64:
  1039  				ce.pushValue(uint64(int8(val)))
  1040  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
  1041  				ce.pushValue(uint64(val))
  1042  			}
  1043  			frame.pc++
  1044  		case wazeroir.OperationKindLoad16:
  1045  
  1046  			val, ok := memoryInst.ReadUint16Le(ctx, ce.popMemoryOffset(op))
  1047  			if !ok {
  1048  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1049  			}
  1050  
  1051  			switch wazeroir.SignedInt(op.b1) {
  1052  			case wazeroir.SignedInt32:
  1053  				ce.pushValue(uint64(uint32(int16(val))))
  1054  			case wazeroir.SignedInt64:
  1055  				ce.pushValue(uint64(int16(val)))
  1056  			case wazeroir.SignedUint32, wazeroir.SignedUint64:
  1057  				ce.pushValue(uint64(val))
  1058  			}
  1059  			frame.pc++
  1060  		case wazeroir.OperationKindLoad32:
  1061  			val, ok := memoryInst.ReadUint32Le(ctx, ce.popMemoryOffset(op))
  1062  			if !ok {
  1063  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1064  			}
  1065  
  1066  			if op.b1 == 1 { // Signed
  1067  				ce.pushValue(uint64(int32(val)))
  1068  			} else {
  1069  				ce.pushValue(uint64(val))
  1070  			}
  1071  			frame.pc++
  1072  		case wazeroir.OperationKindStore:
  1073  			val := ce.popValue()
  1074  			offset := ce.popMemoryOffset(op)
  1075  			switch wazeroir.UnsignedType(op.b1) {
  1076  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32:
  1077  				if !memoryInst.WriteUint32Le(ctx, offset, uint32(val)) {
  1078  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1079  				}
  1080  			case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64:
  1081  				if !memoryInst.WriteUint64Le(ctx, offset, val) {
  1082  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1083  				}
  1084  			}
  1085  			frame.pc++
  1086  		case wazeroir.OperationKindStore8:
  1087  			val := byte(ce.popValue())
  1088  			offset := ce.popMemoryOffset(op)
  1089  			if !memoryInst.WriteByte(ctx, offset, val) {
  1090  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1091  			}
  1092  			frame.pc++
  1093  		case wazeroir.OperationKindStore16:
  1094  			val := uint16(ce.popValue())
  1095  			offset := ce.popMemoryOffset(op)
  1096  			if !memoryInst.WriteUint16Le(ctx, offset, val) {
  1097  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1098  			}
  1099  			frame.pc++
  1100  		case wazeroir.OperationKindStore32:
  1101  			val := uint32(ce.popValue())
  1102  			offset := ce.popMemoryOffset(op)
  1103  			if !memoryInst.WriteUint32Le(ctx, offset, val) {
  1104  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1105  			}
  1106  			frame.pc++
  1107  		case wazeroir.OperationKindMemorySize:
  1108  			ce.pushValue(uint64(memoryInst.PageSize(ctx)))
  1109  			frame.pc++
  1110  		case wazeroir.OperationKindMemoryGrow:
  1111  			n := ce.popValue()
  1112  			if res, ok := memoryInst.Grow(ctx, uint32(n)); !ok {
  1113  				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
  1114  			} else {
  1115  				ce.pushValue(uint64(res))
  1116  			}
  1117  			frame.pc++
  1118  		case wazeroir.OperationKindConstI32, wazeroir.OperationKindConstI64,
  1119  			wazeroir.OperationKindConstF32, wazeroir.OperationKindConstF64:
  1120  			ce.pushValue(op.us[0])
  1121  			frame.pc++
  1122  		case wazeroir.OperationKindEq:
  1123  			var b bool
  1124  			switch wazeroir.UnsignedType(op.b1) {
  1125  			case wazeroir.UnsignedTypeI32:
  1126  				v2, v1 := ce.popValue(), ce.popValue()
  1127  				b = uint32(v1) == uint32(v2)
  1128  			case wazeroir.UnsignedTypeI64:
  1129  				v2, v1 := ce.popValue(), ce.popValue()
  1130  				b = v1 == v2
  1131  			case wazeroir.UnsignedTypeF32:
  1132  				v2, v1 := ce.popValue(), ce.popValue()
  1133  				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
  1134  			case wazeroir.UnsignedTypeF64:
  1135  				v2, v1 := ce.popValue(), ce.popValue()
  1136  				b = math.Float64frombits(v2) == math.Float64frombits(v1)
  1137  			}
  1138  			if b {
  1139  				ce.pushValue(1)
  1140  			} else {
  1141  				ce.pushValue(0)
  1142  			}
  1143  			frame.pc++
  1144  		case wazeroir.OperationKindNe:
  1145  			var b bool
  1146  			switch wazeroir.UnsignedType(op.b1) {
  1147  			case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeI64:
  1148  				v2, v1 := ce.popValue(), ce.popValue()
  1149  				b = v1 != v2
  1150  			case wazeroir.UnsignedTypeF32:
  1151  				v2, v1 := ce.popValue(), ce.popValue()
  1152  				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
  1153  			case wazeroir.UnsignedTypeF64:
  1154  				v2, v1 := ce.popValue(), ce.popValue()
  1155  				b = math.Float64frombits(v2) != math.Float64frombits(v1)
  1156  			}
  1157  			if b {
  1158  				ce.pushValue(1)
  1159  			} else {
  1160  				ce.pushValue(0)
  1161  			}
  1162  			frame.pc++
  1163  		case wazeroir.OperationKindEqz:
  1164  			if ce.popValue() == 0 {
  1165  				ce.pushValue(1)
  1166  			} else {
  1167  				ce.pushValue(0)
  1168  			}
  1169  			frame.pc++
  1170  		case wazeroir.OperationKindLt:
  1171  			v2 := ce.popValue()
  1172  			v1 := ce.popValue()
  1173  			var b bool
  1174  			switch wazeroir.SignedType(op.b1) {
  1175  			case wazeroir.SignedTypeInt32:
  1176  				b = int32(v1) < int32(v2)
  1177  			case wazeroir.SignedTypeInt64:
  1178  				b = int64(v1) < int64(v2)
  1179  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1180  				b = v1 < v2
  1181  			case wazeroir.SignedTypeFloat32:
  1182  				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
  1183  			case wazeroir.SignedTypeFloat64:
  1184  				b = math.Float64frombits(v1) < math.Float64frombits(v2)
  1185  			}
  1186  			if b {
  1187  				ce.pushValue(1)
  1188  			} else {
  1189  				ce.pushValue(0)
  1190  			}
  1191  			frame.pc++
  1192  		case wazeroir.OperationKindGt:
  1193  			v2 := ce.popValue()
  1194  			v1 := ce.popValue()
  1195  			var b bool
  1196  			switch wazeroir.SignedType(op.b1) {
  1197  			case wazeroir.SignedTypeInt32:
  1198  				b = int32(v1) > int32(v2)
  1199  			case wazeroir.SignedTypeInt64:
  1200  				b = int64(v1) > int64(v2)
  1201  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1202  				b = v1 > v2
  1203  			case wazeroir.SignedTypeFloat32:
  1204  				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
  1205  			case wazeroir.SignedTypeFloat64:
  1206  				b = math.Float64frombits(v1) > math.Float64frombits(v2)
  1207  			}
  1208  			if b {
  1209  				ce.pushValue(1)
  1210  			} else {
  1211  				ce.pushValue(0)
  1212  			}
  1213  			frame.pc++
  1214  		case wazeroir.OperationKindLe:
  1215  			v2 := ce.popValue()
  1216  			v1 := ce.popValue()
  1217  			var b bool
  1218  			switch wazeroir.SignedType(op.b1) {
  1219  			case wazeroir.SignedTypeInt32:
  1220  				b = int32(v1) <= int32(v2)
  1221  			case wazeroir.SignedTypeInt64:
  1222  				b = int64(v1) <= int64(v2)
  1223  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1224  				b = v1 <= v2
  1225  			case wazeroir.SignedTypeFloat32:
  1226  				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
  1227  			case wazeroir.SignedTypeFloat64:
  1228  				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
  1229  			}
  1230  			if b {
  1231  				ce.pushValue(1)
  1232  			} else {
  1233  				ce.pushValue(0)
  1234  			}
  1235  			frame.pc++
  1236  		case wazeroir.OperationKindGe:
  1237  			v2 := ce.popValue()
  1238  			v1 := ce.popValue()
  1239  			var b bool
  1240  			switch wazeroir.SignedType(op.b1) {
  1241  			case wazeroir.SignedTypeInt32:
  1242  				b = int32(v1) >= int32(v2)
  1243  			case wazeroir.SignedTypeInt64:
  1244  				b = int64(v1) >= int64(v2)
  1245  			case wazeroir.SignedTypeUint32, wazeroir.SignedTypeUint64:
  1246  				b = v1 >= v2
  1247  			case wazeroir.SignedTypeFloat32:
  1248  				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
  1249  			case wazeroir.SignedTypeFloat64:
  1250  				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
  1251  			}
  1252  			if b {
  1253  				ce.pushValue(1)
  1254  			} else {
  1255  				ce.pushValue(0)
  1256  			}
  1257  			frame.pc++
  1258  		case wazeroir.OperationKindAdd:
  1259  			v2 := ce.popValue()
  1260  			v1 := ce.popValue()
  1261  			switch wazeroir.UnsignedType(op.b1) {
  1262  			case wazeroir.UnsignedTypeI32:
  1263  				v := uint32(v1) + uint32(v2)
  1264  				ce.pushValue(uint64(v))
  1265  			case wazeroir.UnsignedTypeI64:
  1266  				ce.pushValue(v1 + v2)
  1267  			case wazeroir.UnsignedTypeF32:
  1268  				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
  1269  			case wazeroir.UnsignedTypeF64:
  1270  				v := math.Float64frombits(v1) + math.Float64frombits(v2)
  1271  				ce.pushValue(math.Float64bits(v))
  1272  			}
  1273  			frame.pc++
  1274  		case wazeroir.OperationKindSub:
  1275  			v2 := ce.popValue()
  1276  			v1 := ce.popValue()
  1277  			switch wazeroir.UnsignedType(op.b1) {
  1278  			case wazeroir.UnsignedTypeI32:
  1279  				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
  1280  			case wazeroir.UnsignedTypeI64:
  1281  				ce.pushValue(v1 - v2)
  1282  			case wazeroir.UnsignedTypeF32:
  1283  				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
  1284  			case wazeroir.UnsignedTypeF64:
  1285  				v := math.Float64frombits(v1) - math.Float64frombits(v2)
  1286  				ce.pushValue(math.Float64bits(v))
  1287  			}
  1288  			frame.pc++
  1289  		case wazeroir.OperationKindMul:
  1290  			v2 := ce.popValue()
  1291  			v1 := ce.popValue()
  1292  			switch wazeroir.UnsignedType(op.b1) {
  1293  			case wazeroir.UnsignedTypeI32:
  1294  				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
  1295  			case wazeroir.UnsignedTypeI64:
  1296  				ce.pushValue(v1 * v2)
  1297  			case wazeroir.UnsignedTypeF32:
  1298  				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
  1299  			case wazeroir.UnsignedTypeF64:
  1300  				v := math.Float64frombits(v2) * math.Float64frombits(v1)
  1301  				ce.pushValue(math.Float64bits(v))
  1302  			}
  1303  			frame.pc++
  1304  		case wazeroir.OperationKindClz:
  1305  			v := ce.popValue()
  1306  			if op.b1 == 0 {
  1307  				// UnsignedInt32
  1308  				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
  1309  			} else {
  1310  				// UnsignedInt64
  1311  				ce.pushValue(uint64(bits.LeadingZeros64(v)))
  1312  			}
  1313  			frame.pc++
  1314  		case wazeroir.OperationKindCtz:
  1315  			v := ce.popValue()
  1316  			if op.b1 == 0 {
  1317  				// UnsignedInt32
  1318  				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
  1319  			} else {
  1320  				// UnsignedInt64
  1321  				ce.pushValue(uint64(bits.TrailingZeros64(v)))
  1322  			}
  1323  			frame.pc++
  1324  		case wazeroir.OperationKindPopcnt:
  1325  			v := ce.popValue()
  1326  			if op.b1 == 0 {
  1327  				// UnsignedInt32
  1328  				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
  1329  			} else {
  1330  				// UnsignedInt64
  1331  				ce.pushValue(uint64(bits.OnesCount64(v)))
  1332  			}
  1333  			frame.pc++
  1334  		case wazeroir.OperationKindDiv:
  1335  			// If an integer, check we won't divide by zero.
  1336  			t := wazeroir.SignedType(op.b1)
  1337  			v2, v1 := ce.popValue(), ce.popValue()
  1338  			switch t {
  1339  			case wazeroir.SignedTypeFloat32, wazeroir.SignedTypeFloat64: // not integers
  1340  			default:
  1341  				if v2 == 0 {
  1342  					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1343  				}
  1344  			}
  1345  
  1346  			switch t {
  1347  			case wazeroir.SignedTypeInt32:
  1348  				d := int32(v2)
  1349  				n := int32(v1)
  1350  				if n == math.MinInt32 && d == -1 {
  1351  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1352  				}
  1353  				ce.pushValue(uint64(uint32(n / d)))
  1354  			case wazeroir.SignedTypeInt64:
  1355  				d := int64(v2)
  1356  				n := int64(v1)
  1357  				if n == math.MinInt64 && d == -1 {
  1358  					panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1359  				}
  1360  				ce.pushValue(uint64(n / d))
  1361  			case wazeroir.SignedTypeUint32:
  1362  				d := uint32(v2)
  1363  				n := uint32(v1)
  1364  				ce.pushValue(uint64(n / d))
  1365  			case wazeroir.SignedTypeUint64:
  1366  				d := v2
  1367  				n := v1
  1368  				ce.pushValue(n / d)
  1369  			case wazeroir.SignedTypeFloat32:
  1370  				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
  1371  			case wazeroir.SignedTypeFloat64:
  1372  				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
  1373  			}
  1374  			frame.pc++
  1375  		case wazeroir.OperationKindRem:
  1376  			v2, v1 := ce.popValue(), ce.popValue()
  1377  			if v2 == 0 {
  1378  				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
  1379  			}
  1380  			switch wazeroir.SignedInt(op.b1) {
  1381  			case wazeroir.SignedInt32:
  1382  				d := int32(v2)
  1383  				n := int32(v1)
  1384  				ce.pushValue(uint64(uint32(n % d)))
  1385  			case wazeroir.SignedInt64:
  1386  				d := int64(v2)
  1387  				n := int64(v1)
  1388  				ce.pushValue(uint64(n % d))
  1389  			case wazeroir.SignedUint32:
  1390  				d := uint32(v2)
  1391  				n := uint32(v1)
  1392  				ce.pushValue(uint64(n % d))
  1393  			case wazeroir.SignedUint64:
  1394  				d := v2
  1395  				n := v1
  1396  				ce.pushValue(n % d)
  1397  			}
  1398  			frame.pc++
  1399  		case wazeroir.OperationKindAnd:
  1400  			v2 := ce.popValue()
  1401  			v1 := ce.popValue()
  1402  			if op.b1 == 0 {
  1403  				// UnsignedInt32
  1404  				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
  1405  			} else {
  1406  				// UnsignedInt64
  1407  				ce.pushValue(uint64(v2 & v1))
  1408  			}
  1409  			frame.pc++
  1410  		case wazeroir.OperationKindOr:
  1411  			v2 := ce.popValue()
  1412  			v1 := ce.popValue()
  1413  			if op.b1 == 0 {
  1414  				// UnsignedInt32
  1415  				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
  1416  			} else {
  1417  				// UnsignedInt64
  1418  				ce.pushValue(uint64(v2 | v1))
  1419  			}
  1420  			frame.pc++
  1421  		case wazeroir.OperationKindXor:
  1422  			v2 := ce.popValue()
  1423  			v1 := ce.popValue()
  1424  			if op.b1 == 0 {
  1425  				// UnsignedInt32
  1426  				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
  1427  			} else {
  1428  				// UnsignedInt64
  1429  				ce.pushValue(uint64(v2 ^ v1))
  1430  			}
  1431  			frame.pc++
  1432  		case wazeroir.OperationKindShl:
  1433  			v2 := ce.popValue()
  1434  			v1 := ce.popValue()
  1435  			if op.b1 == 0 {
  1436  				// UnsignedInt32
  1437  				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
  1438  			} else {
  1439  				// UnsignedInt64
  1440  				ce.pushValue(v1 << (v2 % 64))
  1441  			}
  1442  			frame.pc++
  1443  		case wazeroir.OperationKindShr:
  1444  			v2 := ce.popValue()
  1445  			v1 := ce.popValue()
  1446  			switch wazeroir.SignedInt(op.b1) {
  1447  			case wazeroir.SignedInt32:
  1448  				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
  1449  			case wazeroir.SignedInt64:
  1450  				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
  1451  			case wazeroir.SignedUint32:
  1452  				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
  1453  			case wazeroir.SignedUint64:
  1454  				ce.pushValue(v1 >> (v2 % 64))
  1455  			}
  1456  			frame.pc++
  1457  		case wazeroir.OperationKindRotl:
  1458  			v2 := ce.popValue()
  1459  			v1 := ce.popValue()
  1460  			if op.b1 == 0 {
  1461  				// UnsignedInt32
  1462  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
  1463  			} else {
  1464  				// UnsignedInt64
  1465  				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
  1466  			}
  1467  			frame.pc++
  1468  		case wazeroir.OperationKindRotr:
  1469  			v2 := ce.popValue()
  1470  			v1 := ce.popValue()
  1471  			if op.b1 == 0 {
  1472  				// UnsignedInt32
  1473  				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
  1474  			} else {
  1475  				// UnsignedInt64
  1476  				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
  1477  			}
  1478  			frame.pc++
  1479  		case wazeroir.OperationKindAbs:
  1480  			if op.b1 == 0 {
  1481  				// Float32
  1482  				const mask uint32 = 1 << 31
  1483  				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
  1484  			} else {
  1485  				// Float64
  1486  				const mask uint64 = 1 << 63
  1487  				ce.pushValue(ce.popValue() &^ mask)
  1488  			}
  1489  			frame.pc++
  1490  		case wazeroir.OperationKindNeg:
  1491  			if op.b1 == 0 {
  1492  				// Float32
  1493  				v := -math.Float32frombits(uint32(ce.popValue()))
  1494  				ce.pushValue(uint64(math.Float32bits(v)))
  1495  			} else {
  1496  				// Float64
  1497  				v := -math.Float64frombits(ce.popValue())
  1498  				ce.pushValue(math.Float64bits(v))
  1499  			}
  1500  			frame.pc++
  1501  		case wazeroir.OperationKindCeil:
  1502  			if op.b1 == 0 {
  1503  				// Float32
  1504  				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
  1505  				ce.pushValue(uint64(math.Float32bits(v)))
  1506  			} else {
  1507  				// Float64
  1508  				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
  1509  				ce.pushValue(math.Float64bits(v))
  1510  			}
  1511  			frame.pc++
  1512  		case wazeroir.OperationKindFloor:
  1513  			if op.b1 == 0 {
  1514  				// Float32
  1515  				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
  1516  				ce.pushValue(uint64(math.Float32bits(v)))
  1517  			} else {
  1518  				// Float64
  1519  				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
  1520  				ce.pushValue(math.Float64bits(v))
  1521  			}
  1522  			frame.pc++
  1523  		case wazeroir.OperationKindTrunc:
  1524  			if op.b1 == 0 {
  1525  				// Float32
  1526  				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
  1527  				ce.pushValue(uint64(math.Float32bits(v)))
  1528  			} else {
  1529  				// Float64
  1530  				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
  1531  				ce.pushValue(math.Float64bits(v))
  1532  			}
  1533  			frame.pc++
  1534  		case wazeroir.OperationKindNearest:
  1535  			if op.b1 == 0 {
  1536  				// Float32
  1537  				f := math.Float32frombits(uint32(ce.popValue()))
  1538  				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
  1539  			} else {
  1540  				// Float64
  1541  				f := math.Float64frombits(ce.popValue())
  1542  				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
  1543  			}
  1544  			frame.pc++
  1545  		case wazeroir.OperationKindSqrt:
  1546  			if op.b1 == 0 {
  1547  				// Float32
  1548  				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
  1549  				ce.pushValue(uint64(math.Float32bits(float32(v))))
  1550  			} else {
  1551  				// Float64
  1552  				v := math.Sqrt(math.Float64frombits(ce.popValue()))
  1553  				ce.pushValue(math.Float64bits(v))
  1554  			}
  1555  			frame.pc++
  1556  		case wazeroir.OperationKindMin:
  1557  			if op.b1 == 0 {
  1558  				// Float32
  1559  				ce.pushValue(WasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1560  			} else {
  1561  				v2 := math.Float64frombits(ce.popValue())
  1562  				v1 := math.Float64frombits(ce.popValue())
  1563  				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
  1564  			}
  1565  			frame.pc++
  1566  		case wazeroir.OperationKindMax:
  1567  			if op.b1 == 0 {
  1568  				ce.pushValue(WasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
  1569  			} else {
  1570  				// Float64
  1571  				v2 := math.Float64frombits(ce.popValue())
  1572  				v1 := math.Float64frombits(ce.popValue())
  1573  				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
  1574  			}
  1575  			frame.pc++
  1576  		case wazeroir.OperationKindCopysign:
  1577  			if op.b1 == 0 {
  1578  				// Float32
  1579  				v2 := uint32(ce.popValue())
  1580  				v1 := uint32(ce.popValue())
  1581  				const signbit = 1 << 31
  1582  				ce.pushValue(uint64(v1&^signbit | v2&signbit))
  1583  			} else {
  1584  				// Float64
  1585  				v2 := ce.popValue()
  1586  				v1 := ce.popValue()
  1587  				const signbit = 1 << 63
  1588  				ce.pushValue(v1&^signbit | v2&signbit)
  1589  			}
  1590  			frame.pc++
  1591  		case wazeroir.OperationKindI32WrapFromI64:
  1592  			ce.pushValue(uint64(uint32(ce.popValue())))
  1593  			frame.pc++
  1594  		case wazeroir.OperationKindITruncFromF:
  1595  			if op.b1 == 0 {
  1596  				// Float32
  1597  				switch wazeroir.SignedInt(op.b2) {
  1598  				case wazeroir.SignedInt32:
  1599  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1600  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1601  						if op.b3 {
  1602  							// non-trapping conversion must cast nan to zero.
  1603  							v = 0
  1604  						} else {
  1605  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1606  						}
  1607  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1608  						if op.b3 {
  1609  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1610  							if v < 0 {
  1611  								v = math.MinInt32
  1612  							} else {
  1613  								v = math.MaxInt32
  1614  							}
  1615  						} else {
  1616  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1617  						}
  1618  					}
  1619  					ce.pushValue(uint64(uint32(int32(v))))
  1620  				case wazeroir.SignedInt64:
  1621  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1622  					res := int64(v)
  1623  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1624  						if op.b3 {
  1625  							// non-trapping conversion must cast nan to zero.
  1626  							res = 0
  1627  						} else {
  1628  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1629  						}
  1630  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1631  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1632  						// and that's why we use '>=' not '>' to check overflow.
  1633  						if op.b3 {
  1634  							// non-trapping conversion must "saturate" the value for overflowing sources.
  1635  							if v < 0 {
  1636  								res = math.MinInt64
  1637  							} else {
  1638  								res = math.MaxInt64
  1639  							}
  1640  						} else {
  1641  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1642  						}
  1643  					}
  1644  					ce.pushValue(uint64(res))
  1645  				case wazeroir.SignedUint32:
  1646  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1647  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1648  						if op.b3 {
  1649  							// non-trapping conversion must cast nan to zero.
  1650  							v = 0
  1651  						} else {
  1652  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1653  						}
  1654  					} else if v < 0 || v > math.MaxUint32 {
  1655  						if op.b3 {
  1656  							// non-trapping conversion must "saturate" the value for overflowing source.
  1657  							if v < 0 {
  1658  								v = 0
  1659  							} else {
  1660  								v = math.MaxUint32
  1661  							}
  1662  						} else {
  1663  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1664  						}
  1665  					}
  1666  					ce.pushValue(uint64(uint32(v)))
  1667  				case wazeroir.SignedUint64:
  1668  					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
  1669  					res := uint64(v)
  1670  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1671  						if op.b3 {
  1672  							// non-trapping conversion must cast nan to zero.
  1673  							res = 0
  1674  						} else {
  1675  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1676  						}
  1677  					} else if v < 0 || v >= math.MaxUint64 {
  1678  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1679  						// and that's why we use '>=' not '>' to check overflow.
  1680  						if op.b3 {
  1681  							// non-trapping conversion must "saturate" the value for overflowing source.
  1682  							if v < 0 {
  1683  								res = 0
  1684  							} else {
  1685  								res = math.MaxUint64
  1686  							}
  1687  						} else {
  1688  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1689  						}
  1690  					}
  1691  					ce.pushValue(res)
  1692  				}
  1693  			} else {
  1694  				// Float64
  1695  				switch wazeroir.SignedInt(op.b2) {
  1696  				case wazeroir.SignedInt32:
  1697  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1698  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1699  						if op.b3 {
  1700  							// non-trapping conversion must cast nan to zero.
  1701  							v = 0
  1702  						} else {
  1703  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1704  						}
  1705  					} else if v < math.MinInt32 || v > math.MaxInt32 {
  1706  						if op.b3 {
  1707  							// non-trapping conversion must "saturate" the value for overflowing source.
  1708  							if v < 0 {
  1709  								v = math.MinInt32
  1710  							} else {
  1711  								v = math.MaxInt32
  1712  							}
  1713  						} else {
  1714  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1715  						}
  1716  					}
  1717  					ce.pushValue(uint64(uint32(int32(v))))
  1718  				case wazeroir.SignedInt64:
  1719  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1720  					res := int64(v)
  1721  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1722  						if op.b3 {
  1723  							// non-trapping conversion must cast nan to zero.
  1724  							res = 0
  1725  						} else {
  1726  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1727  						}
  1728  					} else if v < math.MinInt64 || v >= math.MaxInt64 {
  1729  						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
  1730  						// and that's why we use '>=' not '>' to check overflow.
  1731  						if op.b3 {
  1732  							// non-trapping conversion must "saturate" the value for overflowing source.
  1733  							if v < 0 {
  1734  								res = math.MinInt64
  1735  							} else {
  1736  								res = math.MaxInt64
  1737  							}
  1738  						} else {
  1739  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1740  						}
  1741  					}
  1742  					ce.pushValue(uint64(res))
  1743  				case wazeroir.SignedUint32:
  1744  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1745  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1746  						if op.b3 {
  1747  							// non-trapping conversion must cast nan to zero.
  1748  							v = 0
  1749  						} else {
  1750  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1751  						}
  1752  					} else if v < 0 || v > math.MaxUint32 {
  1753  						if op.b3 {
  1754  							// non-trapping conversion must "saturate" the value for overflowing source.
  1755  							if v < 0 {
  1756  								v = 0
  1757  							} else {
  1758  								v = math.MaxUint32
  1759  							}
  1760  						} else {
  1761  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1762  						}
  1763  					}
  1764  					ce.pushValue(uint64(uint32(v)))
  1765  				case wazeroir.SignedUint64:
  1766  					v := math.Trunc(math.Float64frombits(ce.popValue()))
  1767  					res := uint64(v)
  1768  					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
  1769  						if op.b3 {
  1770  							// non-trapping conversion must cast nan to zero.
  1771  							res = 0
  1772  						} else {
  1773  							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
  1774  						}
  1775  					} else if v < 0 || v >= math.MaxUint64 {
  1776  						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
  1777  						// and that's why we use '>=' not '>' to check overflow.
  1778  						if op.b3 {
  1779  							// non-trapping conversion must "saturate" the value for overflowing source.
  1780  							if v < 0 {
  1781  								res = 0
  1782  							} else {
  1783  								res = math.MaxUint64
  1784  							}
  1785  						} else {
  1786  							panic(wasmruntime.ErrRuntimeIntegerOverflow)
  1787  						}
  1788  					}
  1789  					ce.pushValue(res)
  1790  				}
  1791  			}
  1792  			frame.pc++
  1793  		case wazeroir.OperationKindFConvertFromI:
  1794  			switch wazeroir.SignedInt(op.b1) {
  1795  			case wazeroir.SignedInt32:
  1796  				if op.b2 == 0 {
  1797  					// Float32
  1798  					v := float32(int32(ce.popValue()))
  1799  					ce.pushValue(uint64(math.Float32bits(v)))
  1800  				} else {
  1801  					// Float64
  1802  					v := float64(int32(ce.popValue()))
  1803  					ce.pushValue(math.Float64bits(v))
  1804  				}
  1805  			case wazeroir.SignedInt64:
  1806  				if op.b2 == 0 {
  1807  					// Float32
  1808  					v := float32(int64(ce.popValue()))
  1809  					ce.pushValue(uint64(math.Float32bits(v)))
  1810  				} else {
  1811  					// Float64
  1812  					v := float64(int64(ce.popValue()))
  1813  					ce.pushValue(math.Float64bits(v))
  1814  				}
  1815  			case wazeroir.SignedUint32:
  1816  				if op.b2 == 0 {
  1817  					// Float32
  1818  					v := float32(uint32(ce.popValue()))
  1819  					ce.pushValue(uint64(math.Float32bits(v)))
  1820  				} else {
  1821  					// Float64
  1822  					v := float64(uint32(ce.popValue()))
  1823  					ce.pushValue(math.Float64bits(v))
  1824  				}
  1825  			case wazeroir.SignedUint64:
  1826  				if op.b2 == 0 {
  1827  					// Float32
  1828  					v := float32(ce.popValue())
  1829  					ce.pushValue(uint64(math.Float32bits(v)))
  1830  				} else {
  1831  					// Float64
  1832  					v := float64(ce.popValue())
  1833  					ce.pushValue(math.Float64bits(v))
  1834  				}
  1835  			}
  1836  			frame.pc++
  1837  		case wazeroir.OperationKindF32DemoteFromF64:
  1838  			v := float32(math.Float64frombits(ce.popValue()))
  1839  			ce.pushValue(uint64(math.Float32bits(v)))
  1840  			frame.pc++
  1841  		case wazeroir.OperationKindF64PromoteFromF32:
  1842  			v := float64(math.Float32frombits(uint32(ce.popValue())))
  1843  			ce.pushValue(math.Float64bits(v))
  1844  			frame.pc++
  1845  		case wazeroir.OperationKindExtend:
  1846  			if op.b1 == 1 {
  1847  				// Signed.
  1848  				v := int64(int32(ce.popValue()))
  1849  				ce.pushValue(uint64(v))
  1850  			} else {
  1851  				v := uint64(uint32(ce.popValue()))
  1852  				ce.pushValue(v)
  1853  			}
  1854  			frame.pc++
  1855  		case wazeroir.OperationKindSignExtend32From8:
  1856  			v := uint32(int8(ce.popValue()))
  1857  			ce.pushValue(uint64(v))
  1858  			frame.pc++
  1859  		case wazeroir.OperationKindSignExtend32From16:
  1860  			v := uint32(int16(ce.popValue()))
  1861  			ce.pushValue(uint64(v))
  1862  			frame.pc++
  1863  		case wazeroir.OperationKindSignExtend64From8:
  1864  			v := int64(int8(ce.popValue()))
  1865  			ce.pushValue(uint64(v))
  1866  			frame.pc++
  1867  		case wazeroir.OperationKindSignExtend64From16:
  1868  			v := int64(int16(ce.popValue()))
  1869  			ce.pushValue(uint64(v))
  1870  			frame.pc++
  1871  		case wazeroir.OperationKindSignExtend64From32:
  1872  			v := int64(int32(ce.popValue()))
  1873  			ce.pushValue(uint64(v))
  1874  			frame.pc++
  1875  		case wazeroir.OperationKindMemoryInit:
  1876  			dataInstance := dataInstances[op.us[0]]
  1877  			copySize := ce.popValue()
  1878  			inDataOffset := ce.popValue()
  1879  			inMemoryOffset := ce.popValue()
  1880  			if inDataOffset+copySize > uint64(len(dataInstance)) ||
  1881  				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
  1882  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1883  			} else if copySize != 0 {
  1884  				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
  1885  			}
  1886  			frame.pc++
  1887  		case wazeroir.OperationKindDataDrop:
  1888  			dataInstances[op.us[0]] = nil
  1889  			frame.pc++
  1890  		case wazeroir.OperationKindMemoryCopy:
  1891  			memLen := uint64(len(memoryInst.Buffer))
  1892  			copySize := ce.popValue()
  1893  			sourceOffset := ce.popValue()
  1894  			destinationOffset := ce.popValue()
  1895  			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
  1896  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1897  			} else if copySize != 0 {
  1898  				copy(memoryInst.Buffer[destinationOffset:],
  1899  					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
  1900  			}
  1901  			frame.pc++
  1902  		case wazeroir.OperationKindMemoryFill:
  1903  			fillSize := ce.popValue()
  1904  			value := byte(ce.popValue())
  1905  			offset := ce.popValue()
  1906  			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
  1907  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  1908  			} else if fillSize != 0 {
  1909  				// Uses the copy trick for faster filling buffer.
  1910  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1911  				buf := memoryInst.Buffer[offset : offset+fillSize]
  1912  				buf[0] = value
  1913  				for i := 1; i < len(buf); i *= 2 {
  1914  					copy(buf[i:], buf[:i])
  1915  				}
  1916  			}
  1917  			frame.pc++
  1918  		case wazeroir.OperationKindTableInit:
  1919  			elementInstance := elementInstances[op.us[0]]
  1920  			copySize := ce.popValue()
  1921  			inElementOffset := ce.popValue()
  1922  			inTableOffset := ce.popValue()
  1923  			table := tables[op.us[1]]
  1924  			if inElementOffset+copySize > uint64(len(elementInstance.References)) ||
  1925  				inTableOffset+copySize > uint64(len(table.References)) {
  1926  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1927  			} else if copySize != 0 {
  1928  				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance.References[inElementOffset:])
  1929  			}
  1930  			frame.pc++
  1931  		case wazeroir.OperationKindElemDrop:
  1932  			elementInstances[op.us[0]].References = nil
  1933  			frame.pc++
  1934  		case wazeroir.OperationKindTableCopy:
  1935  			srcTable, dstTable := tables[op.us[0]].References, tables[op.us[1]].References
  1936  			copySize := ce.popValue()
  1937  			sourceOffset := ce.popValue()
  1938  			destinationOffset := ce.popValue()
  1939  			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
  1940  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1941  			} else if copySize != 0 {
  1942  				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
  1943  			}
  1944  			frame.pc++
  1945  		case wazeroir.OperationKindRefFunc:
  1946  			ce.pushValue(uint64(uintptr(unsafe.Pointer(functions[op.us[0]]))))
  1947  			frame.pc++
  1948  		case wazeroir.OperationKindTableGet:
  1949  			table := tables[op.us[0]]
  1950  
  1951  			offset := ce.popValue()
  1952  			if offset >= uint64(len(table.References)) {
  1953  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1954  			}
  1955  
  1956  			ce.pushValue(uint64(table.References[offset]))
  1957  			frame.pc++
  1958  		case wazeroir.OperationKindTableSet:
  1959  			table := tables[op.us[0]]
  1960  			ref := ce.popValue()
  1961  
  1962  			offset := ce.popValue()
  1963  			if offset >= uint64(len(table.References)) {
  1964  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1965  			}
  1966  
  1967  			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
  1968  			frame.pc++
  1969  		case wazeroir.OperationKindTableSize:
  1970  			table := tables[op.us[0]]
  1971  			ce.pushValue(uint64(len(table.References)))
  1972  			frame.pc++
  1973  		case wazeroir.OperationKindTableGrow:
  1974  			table := tables[op.us[0]]
  1975  			num, ref := ce.popValue(), ce.popValue()
  1976  			ret := table.Grow(ctx, uint32(num), uintptr(ref))
  1977  			ce.pushValue(uint64(ret))
  1978  			frame.pc++
  1979  		case wazeroir.OperationKindTableFill:
  1980  			table := tables[op.us[0]]
  1981  			num := ce.popValue()
  1982  			ref := uintptr(ce.popValue())
  1983  			offset := ce.popValue()
  1984  			if num+offset > uint64(len(table.References)) {
  1985  				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
  1986  			} else if num > 0 {
  1987  				// Uses the copy trick for faster filling the region with the value.
  1988  				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
  1989  				targetRegion := table.References[offset : offset+num]
  1990  				targetRegion[0] = ref
  1991  				for i := 1; i < len(targetRegion); i *= 2 {
  1992  					copy(targetRegion[i:], targetRegion[:i])
  1993  				}
  1994  			}
  1995  			frame.pc++
  1996  		case wazeroir.OperationKindV128Const:
  1997  			lo, hi := op.us[0], op.us[1]
  1998  			ce.pushValue(lo)
  1999  			ce.pushValue(hi)
  2000  			frame.pc++
  2001  		case wazeroir.OperationKindV128Add:
  2002  			yHigh, yLow := ce.popValue(), ce.popValue()
  2003  			xHigh, xLow := ce.popValue(), ce.popValue()
  2004  			switch op.b1 {
  2005  			case wazeroir.ShapeI8x16:
  2006  				ce.pushValue(
  2007  					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
  2008  						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
  2009  						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
  2010  						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
  2011  				)
  2012  				ce.pushValue(
  2013  					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
  2014  						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
  2015  						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
  2016  						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
  2017  				)
  2018  			case wazeroir.ShapeI16x8:
  2019  				ce.pushValue(
  2020  					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
  2021  						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
  2022  				)
  2023  				ce.pushValue(
  2024  					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
  2025  						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
  2026  				)
  2027  			case wazeroir.ShapeI32x4:
  2028  				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
  2029  				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
  2030  			case wazeroir.ShapeI64x2:
  2031  				ce.pushValue(xLow + yLow)
  2032  				ce.pushValue(xHigh + yHigh)
  2033  			case wazeroir.ShapeF32x4:
  2034  				ce.pushValue(
  2035  					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  2036  				)
  2037  				ce.pushValue(
  2038  					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  2039  				)
  2040  			case wazeroir.ShapeF64x2:
  2041  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
  2042  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
  2043  			}
  2044  			frame.pc++
  2045  		case wazeroir.OperationKindV128Sub:
  2046  			yHigh, yLow := ce.popValue(), ce.popValue()
  2047  			xHigh, xLow := ce.popValue(), ce.popValue()
  2048  			switch op.b1 {
  2049  			case wazeroir.ShapeI8x16:
  2050  				ce.pushValue(
  2051  					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
  2052  						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
  2053  						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
  2054  						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
  2055  				)
  2056  				ce.pushValue(
  2057  					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
  2058  						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
  2059  						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
  2060  						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
  2061  				)
  2062  			case wazeroir.ShapeI16x8:
  2063  				ce.pushValue(
  2064  					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
  2065  						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
  2066  				)
  2067  				ce.pushValue(
  2068  					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
  2069  						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
  2070  				)
  2071  			case wazeroir.ShapeI32x4:
  2072  				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
  2073  				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
  2074  			case wazeroir.ShapeI64x2:
  2075  				ce.pushValue(xLow - yLow)
  2076  				ce.pushValue(xHigh - yHigh)
  2077  			case wazeroir.ShapeF32x4:
  2078  				ce.pushValue(
  2079  					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
  2080  				)
  2081  				ce.pushValue(
  2082  					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
  2083  				)
  2084  			case wazeroir.ShapeF64x2:
  2085  				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
  2086  				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
  2087  			}
  2088  			frame.pc++
  2089  		case wazeroir.OperationKindV128Load:
  2090  			offset := ce.popMemoryOffset(op)
  2091  			switch op.b1 {
  2092  			case wazeroir.V128LoadType128:
  2093  				lo, ok := memoryInst.ReadUint64Le(ctx, offset)
  2094  				if !ok {
  2095  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2096  				}
  2097  				ce.pushValue(lo)
  2098  				hi, ok := memoryInst.ReadUint64Le(ctx, offset+8)
  2099  				if !ok {
  2100  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2101  				}
  2102  				ce.pushValue(hi)
  2103  			case wazeroir.V128LoadType8x8s:
  2104  				data, ok := memoryInst.Read(ctx, offset, 8)
  2105  				if !ok {
  2106  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2107  				}
  2108  				ce.pushValue(
  2109  					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
  2110  				)
  2111  				ce.pushValue(
  2112  					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
  2113  				)
  2114  			case wazeroir.V128LoadType8x8u:
  2115  				data, ok := memoryInst.Read(ctx, offset, 8)
  2116  				if !ok {
  2117  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2118  				}
  2119  				ce.pushValue(
  2120  					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
  2121  				)
  2122  				ce.pushValue(
  2123  					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
  2124  				)
  2125  			case wazeroir.V128LoadType16x4s:
  2126  				data, ok := memoryInst.Read(ctx, offset, 8)
  2127  				if !ok {
  2128  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2129  				}
  2130  				ce.pushValue(
  2131  					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
  2132  						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
  2133  				)
  2134  				ce.pushValue(
  2135  					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
  2136  						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
  2137  				)
  2138  			case wazeroir.V128LoadType16x4u:
  2139  				data, ok := memoryInst.Read(ctx, offset, 8)
  2140  				if !ok {
  2141  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2142  				}
  2143  				ce.pushValue(
  2144  					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
  2145  				)
  2146  				ce.pushValue(
  2147  					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
  2148  				)
  2149  			case wazeroir.V128LoadType32x2s:
  2150  				data, ok := memoryInst.Read(ctx, offset, 8)
  2151  				if !ok {
  2152  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2153  				}
  2154  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
  2155  				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
  2156  			case wazeroir.V128LoadType32x2u:
  2157  				data, ok := memoryInst.Read(ctx, offset, 8)
  2158  				if !ok {
  2159  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2160  				}
  2161  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
  2162  				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
  2163  			case wazeroir.V128LoadType8Splat:
  2164  				v, ok := memoryInst.ReadByte(ctx, offset)
  2165  				if !ok {
  2166  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2167  				}
  2168  				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
  2169  					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
  2170  				ce.pushValue(v8)
  2171  				ce.pushValue(v8)
  2172  			case wazeroir.V128LoadType16Splat:
  2173  				v, ok := memoryInst.ReadUint16Le(ctx, offset)
  2174  				if !ok {
  2175  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2176  				}
  2177  				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
  2178  				ce.pushValue(v4)
  2179  				ce.pushValue(v4)
  2180  			case wazeroir.V128LoadType32Splat:
  2181  				v, ok := memoryInst.ReadUint32Le(ctx, offset)
  2182  				if !ok {
  2183  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2184  				}
  2185  				vv := uint64(v)<<32 | uint64(v)
  2186  				ce.pushValue(vv)
  2187  				ce.pushValue(vv)
  2188  			case wazeroir.V128LoadType64Splat:
  2189  				lo, ok := memoryInst.ReadUint64Le(ctx, offset)
  2190  				if !ok {
  2191  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2192  				}
  2193  				ce.pushValue(lo)
  2194  				ce.pushValue(lo)
  2195  			case wazeroir.V128LoadType32zero:
  2196  				lo, ok := memoryInst.ReadUint32Le(ctx, offset)
  2197  				if !ok {
  2198  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2199  				}
  2200  				ce.pushValue(uint64(lo))
  2201  				ce.pushValue(0)
  2202  			case wazeroir.V128LoadType64zero:
  2203  				lo, ok := memoryInst.ReadUint64Le(ctx, offset)
  2204  				if !ok {
  2205  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2206  				}
  2207  				ce.pushValue(lo)
  2208  				ce.pushValue(0)
  2209  			}
  2210  			frame.pc++
  2211  		case wazeroir.OperationKindV128LoadLane:
  2212  			hi, lo := ce.popValue(), ce.popValue()
  2213  			offset := ce.popMemoryOffset(op)
  2214  			switch op.b1 {
  2215  			case 8:
  2216  				b, ok := memoryInst.ReadByte(ctx, offset)
  2217  				if !ok {
  2218  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2219  				}
  2220  				if op.b2 < 8 {
  2221  					s := op.b2 << 3
  2222  					lo = (lo & ^(0xff << s)) | uint64(b)<<s
  2223  				} else {
  2224  					s := (op.b2 - 8) << 3
  2225  					hi = (hi & ^(0xff << s)) | uint64(b)<<s
  2226  				}
  2227  			case 16:
  2228  				b, ok := memoryInst.ReadUint16Le(ctx, offset)
  2229  				if !ok {
  2230  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2231  				}
  2232  				if op.b2 < 4 {
  2233  					s := op.b2 << 4
  2234  					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
  2235  				} else {
  2236  					s := (op.b2 - 4) << 4
  2237  					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
  2238  				}
  2239  			case 32:
  2240  				b, ok := memoryInst.ReadUint32Le(ctx, offset)
  2241  				if !ok {
  2242  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2243  				}
  2244  				if op.b2 < 2 {
  2245  					s := op.b2 << 5
  2246  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2247  				} else {
  2248  					s := (op.b2 - 2) << 5
  2249  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
  2250  				}
  2251  			case 64:
  2252  				b, ok := memoryInst.ReadUint64Le(ctx, offset)
  2253  				if !ok {
  2254  					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2255  				}
  2256  				if op.b2 == 0 {
  2257  					lo = b
  2258  				} else {
  2259  					hi = b
  2260  				}
  2261  			}
  2262  			ce.pushValue(lo)
  2263  			ce.pushValue(hi)
  2264  			frame.pc++
  2265  		case wazeroir.OperationKindV128Store:
  2266  			hi, lo := ce.popValue(), ce.popValue()
  2267  			offset := ce.popMemoryOffset(op)
  2268  			if ok := memoryInst.WriteUint64Le(ctx, offset, lo); !ok {
  2269  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2270  			}
  2271  			if ok := memoryInst.WriteUint64Le(ctx, offset+8, hi); !ok {
  2272  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2273  			}
  2274  			frame.pc++
  2275  		case wazeroir.OperationKindV128StoreLane:
  2276  			hi, lo := ce.popValue(), ce.popValue()
  2277  			offset := ce.popMemoryOffset(op)
  2278  			var ok bool
  2279  			switch op.b1 {
  2280  			case 8:
  2281  				if op.b2 < 8 {
  2282  					ok = memoryInst.WriteByte(ctx, offset, byte(lo>>(op.b2*8)))
  2283  				} else {
  2284  					ok = memoryInst.WriteByte(ctx, offset, byte(hi>>((op.b2-8)*8)))
  2285  				}
  2286  			case 16:
  2287  				if op.b2 < 4 {
  2288  					ok = memoryInst.WriteUint16Le(ctx, offset, uint16(lo>>(op.b2*16)))
  2289  				} else {
  2290  					ok = memoryInst.WriteUint16Le(ctx, offset, uint16(hi>>((op.b2-4)*16)))
  2291  				}
  2292  			case 32:
  2293  				if op.b2 < 2 {
  2294  					ok = memoryInst.WriteUint32Le(ctx, offset, uint32(lo>>(op.b2*32)))
  2295  				} else {
  2296  					ok = memoryInst.WriteUint32Le(ctx, offset, uint32(hi>>((op.b2-2)*32)))
  2297  				}
  2298  			case 64:
  2299  				if op.b2 == 0 {
  2300  					ok = memoryInst.WriteUint64Le(ctx, offset, lo)
  2301  				} else {
  2302  					ok = memoryInst.WriteUint64Le(ctx, offset, hi)
  2303  				}
  2304  			}
  2305  			if !ok {
  2306  				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  2307  			}
  2308  			frame.pc++
  2309  		case wazeroir.OperationKindV128ReplaceLane:
  2310  			v := ce.popValue()
  2311  			hi, lo := ce.popValue(), ce.popValue()
  2312  			switch op.b1 {
  2313  			case wazeroir.ShapeI8x16:
  2314  				if op.b2 < 8 {
  2315  					s := op.b2 << 3
  2316  					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
  2317  				} else {
  2318  					s := (op.b2 - 8) << 3
  2319  					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
  2320  				}
  2321  			case wazeroir.ShapeI16x8:
  2322  				if op.b2 < 4 {
  2323  					s := op.b2 << 4
  2324  					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2325  				} else {
  2326  					s := (op.b2 - 4) << 4
  2327  					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
  2328  				}
  2329  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2330  				if op.b2 < 2 {
  2331  					s := op.b2 << 5
  2332  					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2333  				} else {
  2334  					s := (op.b2 - 2) << 5
  2335  					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
  2336  				}
  2337  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2338  				if op.b2 == 0 {
  2339  					lo = v
  2340  				} else {
  2341  					hi = v
  2342  				}
  2343  			}
  2344  			ce.pushValue(lo)
  2345  			ce.pushValue(hi)
  2346  			frame.pc++
  2347  		case wazeroir.OperationKindV128ExtractLane:
  2348  			hi, lo := ce.popValue(), ce.popValue()
  2349  			var v uint64
  2350  			switch op.b1 {
  2351  			case wazeroir.ShapeI8x16:
  2352  				var u8 byte
  2353  				if op.b2 < 8 {
  2354  					u8 = byte(lo >> (op.b2 * 8))
  2355  				} else {
  2356  					u8 = byte(hi >> ((op.b2 - 8) * 8))
  2357  				}
  2358  				if op.b3 {
  2359  					// sign-extend.
  2360  					v = uint64(uint32(int8(u8)))
  2361  				} else {
  2362  					v = uint64(u8)
  2363  				}
  2364  			case wazeroir.ShapeI16x8:
  2365  				var u16 uint16
  2366  				if op.b2 < 4 {
  2367  					u16 = uint16(lo >> (op.b2 * 16))
  2368  				} else {
  2369  					u16 = uint16(hi >> ((op.b2 - 4) * 16))
  2370  				}
  2371  				if op.b3 {
  2372  					// sign-extend.
  2373  					v = uint64(uint32(int16(u16)))
  2374  				} else {
  2375  					v = uint64(u16)
  2376  				}
  2377  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2378  				if op.b2 < 2 {
  2379  					v = uint64(uint32(lo >> (op.b2 * 32)))
  2380  				} else {
  2381  					v = uint64(uint32(hi >> ((op.b2 - 2) * 32)))
  2382  				}
  2383  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2384  				if op.b2 == 0 {
  2385  					v = lo
  2386  				} else {
  2387  					v = hi
  2388  				}
  2389  			}
  2390  			ce.pushValue(v)
  2391  			frame.pc++
  2392  		case wazeroir.OperationKindV128Splat:
  2393  			v := ce.popValue()
  2394  			var hi, lo uint64
  2395  			switch op.b1 {
  2396  			case wazeroir.ShapeI8x16:
  2397  				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
  2398  					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
  2399  				hi, lo = v8, v8
  2400  			case wazeroir.ShapeI16x8:
  2401  				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
  2402  				hi, lo = v4, v4
  2403  			case wazeroir.ShapeI32x4, wazeroir.ShapeF32x4:
  2404  				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
  2405  				lo, hi = v2, v2
  2406  			case wazeroir.ShapeI64x2, wazeroir.ShapeF64x2:
  2407  				lo, hi = v, v
  2408  			}
  2409  			ce.pushValue(lo)
  2410  			ce.pushValue(hi)
  2411  			frame.pc++
  2412  		case wazeroir.OperationKindV128Swizzle:
  2413  			idxHi, idxLo := ce.popValue(), ce.popValue()
  2414  			baseHi, baseLo := ce.popValue(), ce.popValue()
  2415  			var newVal [16]byte
  2416  			for i := 0; i < 16; i++ {
  2417  				var id byte
  2418  				if i < 8 {
  2419  					id = byte(idxLo >> (i * 8))
  2420  				} else {
  2421  					id = byte(idxHi >> ((i - 8) * 8))
  2422  				}
  2423  				if id < 8 {
  2424  					newVal[i] = byte(baseLo >> (id * 8))
  2425  				} else if id < 16 {
  2426  					newVal[i] = byte(baseHi >> ((id - 8) * 8))
  2427  				}
  2428  			}
  2429  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2430  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2431  			frame.pc++
  2432  		case wazeroir.OperationKindV128Shuffle:
  2433  			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
  2434  			var newVal [16]byte
  2435  			for i, l := range op.us {
  2436  				if l < 8 {
  2437  					newVal[i] = byte(yLo >> (l * 8))
  2438  				} else if l < 16 {
  2439  					newVal[i] = byte(yHi >> ((l - 8) * 8))
  2440  				} else if l < 24 {
  2441  					newVal[i] = byte(xLo >> ((l - 16) * 8))
  2442  				} else if l < 32 {
  2443  					newVal[i] = byte(xHi >> ((l - 24) * 8))
  2444  				}
  2445  			}
  2446  			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
  2447  			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
  2448  			frame.pc++
  2449  		case wazeroir.OperationKindV128AnyTrue:
  2450  			hi, lo := ce.popValue(), ce.popValue()
  2451  			if hi != 0 || lo != 0 {
  2452  				ce.pushValue(1)
  2453  			} else {
  2454  				ce.pushValue(0)
  2455  			}
  2456  			frame.pc++
  2457  		case wazeroir.OperationKindV128AllTrue:
  2458  			hi, lo := ce.popValue(), ce.popValue()
  2459  			var ret bool
  2460  			switch op.b1 {
  2461  			case wazeroir.ShapeI8x16:
  2462  				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
  2463  					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
  2464  					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
  2465  					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
  2466  			case wazeroir.ShapeI16x8:
  2467  				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
  2468  					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
  2469  			case wazeroir.ShapeI32x4:
  2470  				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
  2471  					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
  2472  			case wazeroir.ShapeI64x2:
  2473  				ret = (lo != 0) &&
  2474  					(hi != 0)
  2475  			}
  2476  			if ret {
  2477  				ce.pushValue(1)
  2478  			} else {
  2479  				ce.pushValue(0)
  2480  			}
  2481  			frame.pc++
  2482  		case wazeroir.OperationKindV128BitMask:
  2483  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
  2484  			hi, lo := ce.popValue(), ce.popValue()
  2485  			var res uint64
  2486  			switch op.b1 {
  2487  			case wazeroir.ShapeI8x16:
  2488  				for i := 0; i < 8; i++ {
  2489  					if int8(lo>>(i*8)) < 0 {
  2490  						res |= 1 << i
  2491  					}
  2492  				}
  2493  				for i := 0; i < 8; i++ {
  2494  					if int8(hi>>(i*8)) < 0 {
  2495  						res |= 1 << (i + 8)
  2496  					}
  2497  				}
  2498  			case wazeroir.ShapeI16x8:
  2499  				for i := 0; i < 4; i++ {
  2500  					if int16(lo>>(i*16)) < 0 {
  2501  						res |= 1 << i
  2502  					}
  2503  				}
  2504  				for i := 0; i < 4; i++ {
  2505  					if int16(hi>>(i*16)) < 0 {
  2506  						res |= 1 << (i + 4)
  2507  					}
  2508  				}
  2509  			case wazeroir.ShapeI32x4:
  2510  				for i := 0; i < 2; i++ {
  2511  					if int32(lo>>(i*32)) < 0 {
  2512  						res |= 1 << i
  2513  					}
  2514  				}
  2515  				for i := 0; i < 2; i++ {
  2516  					if int32(hi>>(i*32)) < 0 {
  2517  						res |= 1 << (i + 2)
  2518  					}
  2519  				}
  2520  			case wazeroir.ShapeI64x2:
  2521  				if int64(lo) < 0 {
  2522  					res |= 0b01
  2523  				}
  2524  				if int(hi) < 0 {
  2525  					res |= 0b10
  2526  				}
  2527  			}
  2528  			ce.pushValue(res)
  2529  			frame.pc++
  2530  		case wazeroir.OperationKindV128And:
  2531  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2532  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2533  			ce.pushValue(x1Lo & x2Lo)
  2534  			ce.pushValue(x1Hi & x2Hi)
  2535  			frame.pc++
  2536  		case wazeroir.OperationKindV128Not:
  2537  			hi, lo := ce.popValue(), ce.popValue()
  2538  			ce.pushValue(^lo)
  2539  			ce.pushValue(^hi)
  2540  			frame.pc++
  2541  		case wazeroir.OperationKindV128Or:
  2542  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2543  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2544  			ce.pushValue(x1Lo | x2Lo)
  2545  			ce.pushValue(x1Hi | x2Hi)
  2546  			frame.pc++
  2547  		case wazeroir.OperationKindV128Xor:
  2548  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2549  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2550  			ce.pushValue(x1Lo ^ x2Lo)
  2551  			ce.pushValue(x1Hi ^ x2Hi)
  2552  			frame.pc++
  2553  		case wazeroir.OperationKindV128Bitselect:
  2554  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
  2555  			cHi, cLo := ce.popValue(), ce.popValue()
  2556  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2557  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2558  			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
  2559  			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
  2560  			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
  2561  			frame.pc++
  2562  		case wazeroir.OperationKindV128AndNot:
  2563  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2564  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2565  			ce.pushValue(x1Lo & (^x2Lo))
  2566  			ce.pushValue(x1Hi & (^x2Hi))
  2567  			frame.pc++
  2568  		case wazeroir.OperationKindV128Shl:
  2569  			s := ce.popValue()
  2570  			hi, lo := ce.popValue(), ce.popValue()
  2571  			switch op.b1 {
  2572  			case wazeroir.ShapeI8x16:
  2573  				s = s % 8
  2574  				lo = uint64(uint8(lo<<s)) |
  2575  					uint64(uint8((lo>>8)<<s))<<8 |
  2576  					uint64(uint8((lo>>16)<<s))<<16 |
  2577  					uint64(uint8((lo>>24)<<s))<<24 |
  2578  					uint64(uint8((lo>>32)<<s))<<32 |
  2579  					uint64(uint8((lo>>40)<<s))<<40 |
  2580  					uint64(uint8((lo>>48)<<s))<<48 |
  2581  					uint64(uint8((lo>>56)<<s))<<56
  2582  				hi = uint64(uint8(hi<<s)) |
  2583  					uint64(uint8((hi>>8)<<s))<<8 |
  2584  					uint64(uint8((hi>>16)<<s))<<16 |
  2585  					uint64(uint8((hi>>24)<<s))<<24 |
  2586  					uint64(uint8((hi>>32)<<s))<<32 |
  2587  					uint64(uint8((hi>>40)<<s))<<40 |
  2588  					uint64(uint8((hi>>48)<<s))<<48 |
  2589  					uint64(uint8((hi>>56)<<s))<<56
  2590  			case wazeroir.ShapeI16x8:
  2591  				s = s % 16
  2592  				lo = uint64(uint16(lo<<s)) |
  2593  					uint64(uint16((lo>>16)<<s))<<16 |
  2594  					uint64(uint16((lo>>32)<<s))<<32 |
  2595  					uint64(uint16((lo>>48)<<s))<<48
  2596  				hi = uint64(uint16(hi<<s)) |
  2597  					uint64(uint16((hi>>16)<<s))<<16 |
  2598  					uint64(uint16((hi>>32)<<s))<<32 |
  2599  					uint64(uint16((hi>>48)<<s))<<48
  2600  			case wazeroir.ShapeI32x4:
  2601  				s = s % 32
  2602  				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
  2603  				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
  2604  			case wazeroir.ShapeI64x2:
  2605  				s = s % 64
  2606  				lo = lo << s
  2607  				hi = hi << s
  2608  			}
  2609  			ce.pushValue(lo)
  2610  			ce.pushValue(hi)
  2611  			frame.pc++
  2612  		case wazeroir.OperationKindV128Shr:
  2613  			s := ce.popValue()
  2614  			hi, lo := ce.popValue(), ce.popValue()
  2615  			switch op.b1 {
  2616  			case wazeroir.ShapeI8x16:
  2617  				s = s % 8
  2618  				if op.b3 { // signed
  2619  					lo = uint64(uint8(int8(lo)>>s)) |
  2620  						uint64(uint8(int8(lo>>8)>>s))<<8 |
  2621  						uint64(uint8(int8(lo>>16)>>s))<<16 |
  2622  						uint64(uint8(int8(lo>>24)>>s))<<24 |
  2623  						uint64(uint8(int8(lo>>32)>>s))<<32 |
  2624  						uint64(uint8(int8(lo>>40)>>s))<<40 |
  2625  						uint64(uint8(int8(lo>>48)>>s))<<48 |
  2626  						uint64(uint8(int8(lo>>56)>>s))<<56
  2627  					hi = uint64(uint8(int8(hi)>>s)) |
  2628  						uint64(uint8(int8(hi>>8)>>s))<<8 |
  2629  						uint64(uint8(int8(hi>>16)>>s))<<16 |
  2630  						uint64(uint8(int8(hi>>24)>>s))<<24 |
  2631  						uint64(uint8(int8(hi>>32)>>s))<<32 |
  2632  						uint64(uint8(int8(hi>>40)>>s))<<40 |
  2633  						uint64(uint8(int8(hi>>48)>>s))<<48 |
  2634  						uint64(uint8(int8(hi>>56)>>s))<<56
  2635  				} else {
  2636  					lo = uint64(uint8(lo)>>s) |
  2637  						uint64(uint8(lo>>8)>>s)<<8 |
  2638  						uint64(uint8(lo>>16)>>s)<<16 |
  2639  						uint64(uint8(lo>>24)>>s)<<24 |
  2640  						uint64(uint8(lo>>32)>>s)<<32 |
  2641  						uint64(uint8(lo>>40)>>s)<<40 |
  2642  						uint64(uint8(lo>>48)>>s)<<48 |
  2643  						uint64(uint8(lo>>56)>>s)<<56
  2644  					hi = uint64(uint8(hi)>>s) |
  2645  						uint64(uint8(hi>>8)>>s)<<8 |
  2646  						uint64(uint8(hi>>16)>>s)<<16 |
  2647  						uint64(uint8(hi>>24)>>s)<<24 |
  2648  						uint64(uint8(hi>>32)>>s)<<32 |
  2649  						uint64(uint8(hi>>40)>>s)<<40 |
  2650  						uint64(uint8(hi>>48)>>s)<<48 |
  2651  						uint64(uint8(hi>>56)>>s)<<56
  2652  				}
  2653  			case wazeroir.ShapeI16x8:
  2654  				s = s % 16
  2655  				if op.b3 { // signed
  2656  					lo = uint64(uint16(int16(lo)>>s)) |
  2657  						uint64(uint16(int16(lo>>16)>>s))<<16 |
  2658  						uint64(uint16(int16(lo>>32)>>s))<<32 |
  2659  						uint64(uint16(int16(lo>>48)>>s))<<48
  2660  					hi = uint64(uint16(int16(hi)>>s)) |
  2661  						uint64(uint16(int16(hi>>16)>>s))<<16 |
  2662  						uint64(uint16(int16(hi>>32)>>s))<<32 |
  2663  						uint64(uint16(int16(hi>>48)>>s))<<48
  2664  				} else {
  2665  					lo = uint64(uint16(lo)>>s) |
  2666  						uint64(uint16(lo>>16)>>s)<<16 |
  2667  						uint64(uint16(lo>>32)>>s)<<32 |
  2668  						uint64(uint16(lo>>48)>>s)<<48
  2669  					hi = uint64(uint16(hi)>>s) |
  2670  						uint64(uint16(hi>>16)>>s)<<16 |
  2671  						uint64(uint16(hi>>32)>>s)<<32 |
  2672  						uint64(uint16(hi>>48)>>s)<<48
  2673  				}
  2674  			case wazeroir.ShapeI32x4:
  2675  				s = s % 32
  2676  				if op.b3 {
  2677  					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
  2678  					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
  2679  				} else {
  2680  					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
  2681  					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
  2682  				}
  2683  			case wazeroir.ShapeI64x2:
  2684  				s = s % 64
  2685  				if op.b3 { // signed
  2686  					lo = uint64(int64(lo) >> s)
  2687  					hi = uint64(int64(hi) >> s)
  2688  				} else {
  2689  					lo = lo >> s
  2690  					hi = hi >> s
  2691  				}
  2692  
  2693  			}
  2694  			ce.pushValue(lo)
  2695  			ce.pushValue(hi)
  2696  			frame.pc++
  2697  		case wazeroir.OperationKindV128Cmp:
  2698  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  2699  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  2700  			var result []bool
  2701  			switch op.b1 {
  2702  			case wazeroir.V128CmpTypeI8x16Eq:
  2703  				result = []bool{
  2704  					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
  2705  					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
  2706  					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
  2707  					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
  2708  					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
  2709  					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
  2710  					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
  2711  					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
  2712  				}
  2713  			case wazeroir.V128CmpTypeI8x16Ne:
  2714  				result = []bool{
  2715  					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
  2716  					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
  2717  					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
  2718  					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
  2719  					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
  2720  					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
  2721  					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
  2722  					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
  2723  				}
  2724  			case wazeroir.V128CmpTypeI8x16LtS:
  2725  				result = []bool{
  2726  					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
  2727  					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
  2728  					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
  2729  					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
  2730  					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
  2731  					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
  2732  					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
  2733  					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
  2734  				}
  2735  			case wazeroir.V128CmpTypeI8x16LtU:
  2736  				result = []bool{
  2737  					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
  2738  					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
  2739  					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
  2740  					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
  2741  					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
  2742  					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
  2743  					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
  2744  					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
  2745  				}
  2746  			case wazeroir.V128CmpTypeI8x16GtS:
  2747  				result = []bool{
  2748  					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
  2749  					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
  2750  					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
  2751  					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
  2752  					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
  2753  					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
  2754  					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
  2755  					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
  2756  				}
  2757  			case wazeroir.V128CmpTypeI8x16GtU:
  2758  				result = []bool{
  2759  					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
  2760  					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
  2761  					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
  2762  					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
  2763  					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
  2764  					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
  2765  					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
  2766  					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
  2767  				}
  2768  			case wazeroir.V128CmpTypeI8x16LeS:
  2769  				result = []bool{
  2770  					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
  2771  					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
  2772  					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
  2773  					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
  2774  					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
  2775  					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
  2776  					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
  2777  					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
  2778  				}
  2779  			case wazeroir.V128CmpTypeI8x16LeU:
  2780  				result = []bool{
  2781  					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
  2782  					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
  2783  					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
  2784  					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
  2785  					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
  2786  					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
  2787  					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
  2788  					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
  2789  				}
  2790  			case wazeroir.V128CmpTypeI8x16GeS:
  2791  				result = []bool{
  2792  					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
  2793  					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
  2794  					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
  2795  					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
  2796  					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
  2797  					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
  2798  					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
  2799  					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
  2800  				}
  2801  			case wazeroir.V128CmpTypeI8x16GeU:
  2802  				result = []bool{
  2803  					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
  2804  					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
  2805  					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
  2806  					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
  2807  					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
  2808  					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
  2809  					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
  2810  					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
  2811  				}
  2812  			case wazeroir.V128CmpTypeI16x8Eq:
  2813  				result = []bool{
  2814  					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
  2815  					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
  2816  					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
  2817  					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
  2818  				}
  2819  			case wazeroir.V128CmpTypeI16x8Ne:
  2820  				result = []bool{
  2821  					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
  2822  					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
  2823  					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
  2824  					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
  2825  				}
  2826  			case wazeroir.V128CmpTypeI16x8LtS:
  2827  				result = []bool{
  2828  					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
  2829  					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
  2830  					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
  2831  					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
  2832  				}
  2833  			case wazeroir.V128CmpTypeI16x8LtU:
  2834  				result = []bool{
  2835  					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
  2836  					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
  2837  					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
  2838  					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
  2839  				}
  2840  			case wazeroir.V128CmpTypeI16x8GtS:
  2841  				result = []bool{
  2842  					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
  2843  					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
  2844  					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
  2845  					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
  2846  				}
  2847  			case wazeroir.V128CmpTypeI16x8GtU:
  2848  				result = []bool{
  2849  					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
  2850  					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
  2851  					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
  2852  					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
  2853  				}
  2854  			case wazeroir.V128CmpTypeI16x8LeS:
  2855  				result = []bool{
  2856  					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
  2857  					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
  2858  					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
  2859  					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
  2860  				}
  2861  			case wazeroir.V128CmpTypeI16x8LeU:
  2862  				result = []bool{
  2863  					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
  2864  					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
  2865  					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
  2866  					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
  2867  				}
  2868  			case wazeroir.V128CmpTypeI16x8GeS:
  2869  				result = []bool{
  2870  					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
  2871  					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
  2872  					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
  2873  					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
  2874  				}
  2875  			case wazeroir.V128CmpTypeI16x8GeU:
  2876  				result = []bool{
  2877  					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
  2878  					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
  2879  					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
  2880  					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
  2881  				}
  2882  			case wazeroir.V128CmpTypeI32x4Eq:
  2883  				result = []bool{
  2884  					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
  2885  					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
  2886  				}
  2887  			case wazeroir.V128CmpTypeI32x4Ne:
  2888  				result = []bool{
  2889  					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
  2890  					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
  2891  				}
  2892  			case wazeroir.V128CmpTypeI32x4LtS:
  2893  				result = []bool{
  2894  					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
  2895  					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
  2896  				}
  2897  			case wazeroir.V128CmpTypeI32x4LtU:
  2898  				result = []bool{
  2899  					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
  2900  					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
  2901  				}
  2902  			case wazeroir.V128CmpTypeI32x4GtS:
  2903  				result = []bool{
  2904  					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
  2905  					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
  2906  				}
  2907  			case wazeroir.V128CmpTypeI32x4GtU:
  2908  				result = []bool{
  2909  					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
  2910  					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
  2911  				}
  2912  			case wazeroir.V128CmpTypeI32x4LeS:
  2913  				result = []bool{
  2914  					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
  2915  					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
  2916  				}
  2917  			case wazeroir.V128CmpTypeI32x4LeU:
  2918  				result = []bool{
  2919  					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
  2920  					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
  2921  				}
  2922  			case wazeroir.V128CmpTypeI32x4GeS:
  2923  				result = []bool{
  2924  					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
  2925  					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
  2926  				}
  2927  			case wazeroir.V128CmpTypeI32x4GeU:
  2928  				result = []bool{
  2929  					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
  2930  					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
  2931  				}
  2932  			case wazeroir.V128CmpTypeI64x2Eq:
  2933  				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
  2934  			case wazeroir.V128CmpTypeI64x2Ne:
  2935  				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
  2936  			case wazeroir.V128CmpTypeI64x2LtS:
  2937  				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
  2938  			case wazeroir.V128CmpTypeI64x2GtS:
  2939  				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
  2940  			case wazeroir.V128CmpTypeI64x2LeS:
  2941  				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
  2942  			case wazeroir.V128CmpTypeI64x2GeS:
  2943  				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
  2944  			case wazeroir.V128CmpTypeF32x4Eq:
  2945  				result = []bool{
  2946  					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
  2947  					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
  2948  					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
  2949  					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
  2950  				}
  2951  			case wazeroir.V128CmpTypeF32x4Ne:
  2952  				result = []bool{
  2953  					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
  2954  					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
  2955  					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
  2956  					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
  2957  				}
  2958  			case wazeroir.V128CmpTypeF32x4Lt:
  2959  				result = []bool{
  2960  					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
  2961  					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
  2962  					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
  2963  					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
  2964  				}
  2965  			case wazeroir.V128CmpTypeF32x4Gt:
  2966  				result = []bool{
  2967  					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
  2968  					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
  2969  					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
  2970  					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
  2971  				}
  2972  			case wazeroir.V128CmpTypeF32x4Le:
  2973  				result = []bool{
  2974  					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
  2975  					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
  2976  					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
  2977  					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
  2978  				}
  2979  			case wazeroir.V128CmpTypeF32x4Ge:
  2980  				result = []bool{
  2981  					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
  2982  					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
  2983  					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
  2984  					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
  2985  				}
  2986  			case wazeroir.V128CmpTypeF64x2Eq:
  2987  				result = []bool{
  2988  					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
  2989  					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
  2990  				}
  2991  			case wazeroir.V128CmpTypeF64x2Ne:
  2992  				result = []bool{
  2993  					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
  2994  					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
  2995  				}
  2996  			case wazeroir.V128CmpTypeF64x2Lt:
  2997  				result = []bool{
  2998  					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
  2999  					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
  3000  				}
  3001  			case wazeroir.V128CmpTypeF64x2Gt:
  3002  				result = []bool{
  3003  					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
  3004  					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
  3005  				}
  3006  			case wazeroir.V128CmpTypeF64x2Le:
  3007  				result = []bool{
  3008  					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
  3009  					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
  3010  				}
  3011  			case wazeroir.V128CmpTypeF64x2Ge:
  3012  				result = []bool{
  3013  					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
  3014  					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
  3015  				}
  3016  			}
  3017  
  3018  			var retLo, retHi uint64
  3019  			laneNum := len(result)
  3020  			switch laneNum {
  3021  			case 16:
  3022  				for i, b := range result {
  3023  					if b {
  3024  						if i < 8 {
  3025  							retLo |= 0xff << (i * 8)
  3026  						} else {
  3027  							retHi |= 0xff << ((i - 8) * 8)
  3028  						}
  3029  					}
  3030  				}
  3031  			case 8:
  3032  				for i, b := range result {
  3033  					if b {
  3034  						if i < 4 {
  3035  							retLo |= 0xffff << (i * 16)
  3036  						} else {
  3037  							retHi |= 0xffff << ((i - 4) * 16)
  3038  						}
  3039  					}
  3040  				}
  3041  			case 4:
  3042  				for i, b := range result {
  3043  					if b {
  3044  						if i < 2 {
  3045  							retLo |= 0xffff_ffff << (i * 32)
  3046  						} else {
  3047  							retHi |= 0xffff_ffff << ((i - 2) * 32)
  3048  						}
  3049  					}
  3050  				}
  3051  			case 2:
  3052  				if result[0] {
  3053  					retLo = ^uint64(0)
  3054  				}
  3055  				if result[1] {
  3056  					retHi = ^uint64(0)
  3057  				}
  3058  			}
  3059  
  3060  			ce.pushValue(retLo)
  3061  			ce.pushValue(retHi)
  3062  			frame.pc++
  3063  		case wazeroir.OperationKindV128AddSat:
  3064  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3065  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3066  
  3067  			var retLo, retHi uint64
  3068  
  3069  			// Lane-wise addition while saturating the overflowing values.
  3070  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
  3071  			switch op.b1 {
  3072  			case wazeroir.ShapeI8x16:
  3073  				for i := 0; i < 16; i++ {
  3074  					var v, w byte
  3075  					if i < 8 {
  3076  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  3077  					} else {
  3078  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  3079  					}
  3080  
  3081  					var uv uint64
  3082  					if op.b3 { // signed
  3083  						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
  3084  							uv = uint64(byte(0x80))
  3085  						} else if subbed > math.MaxInt8 {
  3086  							uv = uint64(byte(0x7f))
  3087  						} else {
  3088  							uv = uint64(byte(int8(subbed)))
  3089  						}
  3090  					} else {
  3091  						if subbed := int64(v) + int64(w); subbed < 0 {
  3092  							uv = uint64(byte(0))
  3093  						} else if subbed > math.MaxUint8 {
  3094  							uv = uint64(byte(0xff))
  3095  						} else {
  3096  							uv = uint64(byte(subbed))
  3097  						}
  3098  					}
  3099  
  3100  					if i < 8 { // first 8 lanes are on lower 64bits.
  3101  						retLo |= uv << (i * 8)
  3102  					} else {
  3103  						retHi |= uv << ((i - 8) * 8)
  3104  					}
  3105  				}
  3106  			case wazeroir.ShapeI16x8:
  3107  				for i := 0; i < 8; i++ {
  3108  					var v, w uint16
  3109  					if i < 4 {
  3110  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  3111  					} else {
  3112  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  3113  					}
  3114  
  3115  					var uv uint64
  3116  					if op.b3 { // signed
  3117  						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
  3118  							uv = uint64(uint16(0x8000))
  3119  						} else if added > math.MaxInt16 {
  3120  							uv = uint64(uint16(0x7fff))
  3121  						} else {
  3122  							uv = uint64(uint16(int16(added)))
  3123  						}
  3124  					} else {
  3125  						if added := int64(v) + int64(w); added < 0 {
  3126  							uv = uint64(uint16(0))
  3127  						} else if added > math.MaxUint16 {
  3128  							uv = uint64(uint16(0xffff))
  3129  						} else {
  3130  							uv = uint64(uint16(added))
  3131  						}
  3132  					}
  3133  
  3134  					if i < 4 { // first 4 lanes are on lower 64bits.
  3135  						retLo |= uv << (i * 16)
  3136  					} else {
  3137  						retHi |= uv << ((i - 4) * 16)
  3138  					}
  3139  				}
  3140  			}
  3141  
  3142  			ce.pushValue(retLo)
  3143  			ce.pushValue(retHi)
  3144  			frame.pc++
  3145  		case wazeroir.OperationKindV128SubSat:
  3146  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3147  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3148  
  3149  			var retLo, retHi uint64
  3150  
  3151  			// Lane-wise subtraction while saturating the overflowing values.
  3152  			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
  3153  			switch op.b1 {
  3154  			case wazeroir.ShapeI8x16:
  3155  				for i := 0; i < 16; i++ {
  3156  					var v, w byte
  3157  					if i < 8 {
  3158  						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
  3159  					} else {
  3160  						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
  3161  					}
  3162  
  3163  					var uv uint64
  3164  					if op.b3 { // signed
  3165  						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
  3166  							uv = uint64(byte(0x80))
  3167  						} else if subbed > math.MaxInt8 {
  3168  							uv = uint64(byte(0x7f))
  3169  						} else {
  3170  							uv = uint64(byte(int8(subbed)))
  3171  						}
  3172  					} else {
  3173  						if subbed := int64(v) - int64(w); subbed < 0 {
  3174  							uv = uint64(byte(0))
  3175  						} else if subbed > math.MaxUint8 {
  3176  							uv = uint64(byte(0xff))
  3177  						} else {
  3178  							uv = uint64(byte(subbed))
  3179  						}
  3180  					}
  3181  
  3182  					if i < 8 {
  3183  						retLo |= uv << (i * 8)
  3184  					} else {
  3185  						retHi |= uv << ((i - 8) * 8)
  3186  					}
  3187  				}
  3188  			case wazeroir.ShapeI16x8:
  3189  				for i := 0; i < 8; i++ {
  3190  					var v, w uint16
  3191  					if i < 4 {
  3192  						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
  3193  					} else {
  3194  						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
  3195  					}
  3196  
  3197  					var uv uint64
  3198  					if op.b3 { // signed
  3199  						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
  3200  							uv = uint64(uint16(0x8000))
  3201  						} else if subbed > math.MaxInt16 {
  3202  							uv = uint64(uint16(0x7fff))
  3203  						} else {
  3204  							uv = uint64(uint16(int16(subbed)))
  3205  						}
  3206  					} else {
  3207  						if subbed := int64(v) - int64(w); subbed < 0 {
  3208  							uv = uint64(uint16(0))
  3209  						} else if subbed > math.MaxUint16 {
  3210  							uv = uint64(uint16(0xffff))
  3211  						} else {
  3212  							uv = uint64(uint16(subbed))
  3213  						}
  3214  					}
  3215  
  3216  					if i < 4 {
  3217  						retLo |= uv << (i * 16)
  3218  					} else {
  3219  						retHi |= uv << ((i - 4) * 16)
  3220  					}
  3221  				}
  3222  			}
  3223  
  3224  			ce.pushValue(retLo)
  3225  			ce.pushValue(retHi)
  3226  			frame.pc++
  3227  		case wazeroir.OperationKindV128Mul:
  3228  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3229  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3230  			var retLo, retHi uint64
  3231  			switch op.b1 {
  3232  			case wazeroir.ShapeI16x8:
  3233  				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
  3234  					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
  3235  				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
  3236  					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
  3237  			case wazeroir.ShapeI32x4:
  3238  				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
  3239  				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
  3240  			case wazeroir.ShapeI64x2:
  3241  				retHi = x1hi * x2hi
  3242  				retLo = x1lo * x2lo
  3243  			case wazeroir.ShapeF32x4:
  3244  				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3245  				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3246  			case wazeroir.ShapeF64x2:
  3247  				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
  3248  				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
  3249  			}
  3250  			ce.pushValue(retLo)
  3251  			ce.pushValue(retHi)
  3252  			frame.pc++
  3253  		case wazeroir.OperationKindV128Div:
  3254  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3255  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3256  			var retLo, retHi uint64
  3257  			if op.b1 == wazeroir.ShapeF64x2 {
  3258  				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
  3259  				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
  3260  			} else {
  3261  				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3262  				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3263  			}
  3264  			ce.pushValue(retLo)
  3265  			ce.pushValue(retHi)
  3266  			frame.pc++
  3267  		case wazeroir.OperationKindV128Neg:
  3268  			hi, lo := ce.popValue(), ce.popValue()
  3269  			switch op.b1 {
  3270  			case wazeroir.ShapeI8x16:
  3271  				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
  3272  					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
  3273  					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
  3274  					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
  3275  				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
  3276  					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
  3277  					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
  3278  					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
  3279  			case wazeroir.ShapeI16x8:
  3280  				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
  3281  					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
  3282  				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
  3283  					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
  3284  			case wazeroir.ShapeI32x4:
  3285  				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
  3286  				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
  3287  			case wazeroir.ShapeI64x2:
  3288  				hi = -hi
  3289  				lo = -lo
  3290  			case wazeroir.ShapeF32x4:
  3291  				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
  3292  					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
  3293  				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
  3294  					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
  3295  			case wazeroir.ShapeF64x2:
  3296  				hi = math.Float64bits(-math.Float64frombits(hi))
  3297  				lo = math.Float64bits(-math.Float64frombits(lo))
  3298  			}
  3299  			ce.pushValue(lo)
  3300  			ce.pushValue(hi)
  3301  			frame.pc++
  3302  		case wazeroir.OperationKindV128Sqrt:
  3303  			hi, lo := ce.popValue(), ce.popValue()
  3304  			if op.b1 == wazeroir.ShapeF64x2 {
  3305  				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
  3306  				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
  3307  			} else {
  3308  				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
  3309  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
  3310  				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
  3311  					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
  3312  			}
  3313  			ce.pushValue(lo)
  3314  			ce.pushValue(hi)
  3315  			frame.pc++
  3316  		case wazeroir.OperationKindV128Abs:
  3317  			hi, lo := ce.popValue(), ce.popValue()
  3318  			switch op.b1 {
  3319  			case wazeroir.ShapeI8x16:
  3320  				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
  3321  					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
  3322  					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
  3323  					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
  3324  				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
  3325  					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
  3326  					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
  3327  					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
  3328  			case wazeroir.ShapeI16x8:
  3329  				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
  3330  					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
  3331  				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
  3332  					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
  3333  			case wazeroir.ShapeI32x4:
  3334  				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
  3335  				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
  3336  			case wazeroir.ShapeI64x2:
  3337  				if int64(hi) < 0 {
  3338  					hi = -hi
  3339  				}
  3340  				if int64(lo) < 0 {
  3341  					lo = -lo
  3342  				}
  3343  			case wazeroir.ShapeF32x4:
  3344  				hi = hi &^ (1<<31 | 1<<63)
  3345  				lo = lo &^ (1<<31 | 1<<63)
  3346  			case wazeroir.ShapeF64x2:
  3347  				hi = hi &^ (1 << 63)
  3348  				lo = lo &^ (1 << 63)
  3349  			}
  3350  			ce.pushValue(lo)
  3351  			ce.pushValue(hi)
  3352  			frame.pc++
  3353  		case wazeroir.OperationKindV128Popcnt:
  3354  			hi, lo := ce.popValue(), ce.popValue()
  3355  			var retLo, retHi uint64
  3356  			for i := 0; i < 16; i++ {
  3357  				var v byte
  3358  				if i < 8 {
  3359  					v = byte(lo >> (i * 8))
  3360  				} else {
  3361  					v = byte(hi >> ((i - 8) * 8))
  3362  				}
  3363  
  3364  				var cnt uint64
  3365  				for i := 0; i < 8; i++ {
  3366  					if (v>>i)&0b1 != 0 {
  3367  						cnt++
  3368  					}
  3369  				}
  3370  
  3371  				if i < 8 {
  3372  					retLo |= cnt << (i * 8)
  3373  				} else {
  3374  					retHi |= cnt << ((i - 8) * 8)
  3375  				}
  3376  			}
  3377  			ce.pushValue(retLo)
  3378  			ce.pushValue(retHi)
  3379  			frame.pc++
  3380  		case wazeroir.OperationKindV128Min:
  3381  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3382  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3383  			var retLo, retHi uint64
  3384  			switch op.b1 {
  3385  			case wazeroir.ShapeI8x16:
  3386  				if op.b3 { // signed
  3387  					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
  3388  						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3389  						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3390  						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3391  					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
  3392  						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3393  						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3394  						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3395  				} else {
  3396  					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
  3397  						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3398  						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3399  						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3400  					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
  3401  						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3402  						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3403  						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3404  				}
  3405  			case wazeroir.ShapeI16x8:
  3406  				if op.b3 { // signed
  3407  					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
  3408  						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3409  						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3410  						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3411  					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
  3412  						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3413  						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3414  						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3415  				} else {
  3416  					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
  3417  						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3418  						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3419  						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3420  					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
  3421  						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3422  						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3423  						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3424  				}
  3425  			case wazeroir.ShapeI32x4:
  3426  				if op.b3 { // signed
  3427  					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
  3428  						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3429  					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
  3430  						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3431  				} else {
  3432  					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
  3433  						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3434  					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
  3435  						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3436  				}
  3437  			case wazeroir.ShapeF32x4:
  3438  				retHi = WasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
  3439  					WasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3440  				retLo = WasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
  3441  					WasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3442  			case wazeroir.ShapeF64x2:
  3443  				retHi = math.Float64bits(moremath.WasmCompatMin64(
  3444  					math.Float64frombits(x1hi),
  3445  					math.Float64frombits(x2hi),
  3446  				))
  3447  				retLo = math.Float64bits(moremath.WasmCompatMin64(
  3448  					math.Float64frombits(x1lo),
  3449  					math.Float64frombits(x2lo),
  3450  				))
  3451  			}
  3452  			ce.pushValue(retLo)
  3453  			ce.pushValue(retHi)
  3454  			frame.pc++
  3455  		case wazeroir.OperationKindV128Max:
  3456  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3457  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3458  			var retLo, retHi uint64
  3459  			switch op.b1 {
  3460  			case wazeroir.ShapeI8x16:
  3461  				if op.b3 { // signed
  3462  					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
  3463  						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3464  						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3465  						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3466  					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
  3467  						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3468  						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3469  						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3470  				} else {
  3471  					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
  3472  						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3473  						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3474  						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3475  					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
  3476  						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3477  						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3478  						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3479  				}
  3480  			case wazeroir.ShapeI16x8:
  3481  				if op.b3 { // signed
  3482  					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
  3483  						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3484  						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3485  						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3486  					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
  3487  						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3488  						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3489  						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3490  				} else {
  3491  					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
  3492  						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3493  						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3494  						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3495  					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
  3496  						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3497  						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3498  						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3499  				}
  3500  			case wazeroir.ShapeI32x4:
  3501  				if op.b3 { // signed
  3502  					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
  3503  						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3504  					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
  3505  						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3506  				} else {
  3507  					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
  3508  						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
  3509  					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
  3510  						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
  3511  				}
  3512  			case wazeroir.ShapeF32x4:
  3513  				retHi = WasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
  3514  					WasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
  3515  				retLo = WasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
  3516  					WasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
  3517  			case wazeroir.ShapeF64x2:
  3518  				retHi = math.Float64bits(moremath.WasmCompatMax64(
  3519  					math.Float64frombits(x1hi),
  3520  					math.Float64frombits(x2hi),
  3521  				))
  3522  				retLo = math.Float64bits(moremath.WasmCompatMax64(
  3523  					math.Float64frombits(x1lo),
  3524  					math.Float64frombits(x2lo),
  3525  				))
  3526  			}
  3527  			ce.pushValue(retLo)
  3528  			ce.pushValue(retHi)
  3529  			frame.pc++
  3530  		case wazeroir.OperationKindV128AvgrU:
  3531  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3532  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3533  			var retLo, retHi uint64
  3534  			switch op.b1 {
  3535  			case wazeroir.ShapeI8x16:
  3536  				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
  3537  					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
  3538  					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
  3539  					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
  3540  				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
  3541  					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
  3542  					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
  3543  					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
  3544  			case wazeroir.ShapeI16x8:
  3545  				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
  3546  					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
  3547  					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
  3548  					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
  3549  				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
  3550  					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
  3551  					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
  3552  					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
  3553  			}
  3554  			ce.pushValue(retLo)
  3555  			ce.pushValue(retHi)
  3556  			frame.pc++
  3557  		case wazeroir.OperationKindV128Pmin:
  3558  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3559  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3560  			var retLo, retHi uint64
  3561  			if op.b1 == wazeroir.ShapeF32x4 {
  3562  				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
  3563  					retLo = x2lo & 0x00000000_ffffffff
  3564  				} else {
  3565  					retLo = x1lo & 0x00000000_ffffffff
  3566  				}
  3567  				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
  3568  					retLo |= x2lo & 0xffffffff_00000000
  3569  				} else {
  3570  					retLo |= x1lo & 0xffffffff_00000000
  3571  				}
  3572  				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
  3573  					retHi = x2hi & 0x00000000_ffffffff
  3574  				} else {
  3575  					retHi = x1hi & 0x00000000_ffffffff
  3576  				}
  3577  				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
  3578  					retHi |= x2hi & 0xffffffff_00000000
  3579  				} else {
  3580  					retHi |= x1hi & 0xffffffff_00000000
  3581  				}
  3582  			} else {
  3583  				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
  3584  					retLo = x2lo
  3585  				} else {
  3586  					retLo = x1lo
  3587  				}
  3588  				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
  3589  					retHi = x2hi
  3590  				} else {
  3591  					retHi = x1hi
  3592  				}
  3593  			}
  3594  			ce.pushValue(retLo)
  3595  			ce.pushValue(retHi)
  3596  			frame.pc++
  3597  		case wazeroir.OperationKindV128Pmax:
  3598  			x2hi, x2lo := ce.popValue(), ce.popValue()
  3599  			x1hi, x1lo := ce.popValue(), ce.popValue()
  3600  			var retLo, retHi uint64
  3601  			if op.b1 == wazeroir.ShapeF32x4 {
  3602  				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
  3603  					retLo = x2lo & 0x00000000_ffffffff
  3604  				} else {
  3605  					retLo = x1lo & 0x00000000_ffffffff
  3606  				}
  3607  				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
  3608  					retLo |= x2lo & 0xffffffff_00000000
  3609  				} else {
  3610  					retLo |= x1lo & 0xffffffff_00000000
  3611  				}
  3612  				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
  3613  					retHi = x2hi & 0x00000000_ffffffff
  3614  				} else {
  3615  					retHi = x1hi & 0x00000000_ffffffff
  3616  				}
  3617  				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
  3618  					retHi |= x2hi & 0xffffffff_00000000
  3619  				} else {
  3620  					retHi |= x1hi & 0xffffffff_00000000
  3621  				}
  3622  			} else {
  3623  				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
  3624  					retLo = x2lo
  3625  				} else {
  3626  					retLo = x1lo
  3627  				}
  3628  				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
  3629  					retHi = x2hi
  3630  				} else {
  3631  					retHi = x1hi
  3632  				}
  3633  			}
  3634  			ce.pushValue(retLo)
  3635  			ce.pushValue(retHi)
  3636  			frame.pc++
  3637  		case wazeroir.OperationKindV128Ceil:
  3638  			hi, lo := ce.popValue(), ce.popValue()
  3639  			if op.b1 == wazeroir.ShapeF32x4 {
  3640  				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
  3641  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3642  				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
  3643  					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3644  			} else {
  3645  				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
  3646  				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
  3647  			}
  3648  			ce.pushValue(lo)
  3649  			ce.pushValue(hi)
  3650  			frame.pc++
  3651  		case wazeroir.OperationKindV128Floor:
  3652  			hi, lo := ce.popValue(), ce.popValue()
  3653  			if op.b1 == wazeroir.ShapeF32x4 {
  3654  				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
  3655  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3656  				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
  3657  					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3658  			} else {
  3659  				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
  3660  				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
  3661  			}
  3662  			ce.pushValue(lo)
  3663  			ce.pushValue(hi)
  3664  			frame.pc++
  3665  		case wazeroir.OperationKindV128Trunc:
  3666  			hi, lo := ce.popValue(), ce.popValue()
  3667  			if op.b1 == wazeroir.ShapeF32x4 {
  3668  				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
  3669  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3670  				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
  3671  					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3672  			} else {
  3673  				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
  3674  				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
  3675  			}
  3676  			ce.pushValue(lo)
  3677  			ce.pushValue(hi)
  3678  			frame.pc++
  3679  		case wazeroir.OperationKindV128Nearest:
  3680  			hi, lo := ce.popValue(), ce.popValue()
  3681  			if op.b1 == wazeroir.ShapeF32x4 {
  3682  				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
  3683  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
  3684  				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
  3685  					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
  3686  			} else {
  3687  				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
  3688  				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
  3689  			}
  3690  			ce.pushValue(lo)
  3691  			ce.pushValue(hi)
  3692  			frame.pc++
  3693  		case wazeroir.OperationKindV128Extend:
  3694  			hi, lo := ce.popValue(), ce.popValue()
  3695  			var origin uint64
  3696  			if op.b3 { // use lower 64 bits
  3697  				origin = lo
  3698  			} else {
  3699  				origin = hi
  3700  			}
  3701  
  3702  			signed := op.b2 == 1
  3703  
  3704  			var retHi, retLo uint64
  3705  			switch op.b1 {
  3706  			case wazeroir.ShapeI8x16:
  3707  				for i := 0; i < 8; i++ {
  3708  					v8 := byte(origin >> (i * 8))
  3709  
  3710  					var v16 uint16
  3711  					if signed {
  3712  						v16 = uint16(int8(v8))
  3713  					} else {
  3714  						v16 = uint16(v8)
  3715  					}
  3716  
  3717  					if i < 4 {
  3718  						retLo |= uint64(v16) << (i * 16)
  3719  					} else {
  3720  						retHi |= uint64(v16) << ((i - 4) * 16)
  3721  					}
  3722  				}
  3723  			case wazeroir.ShapeI16x8:
  3724  				for i := 0; i < 4; i++ {
  3725  					v16 := uint16(origin >> (i * 16))
  3726  
  3727  					var v32 uint32
  3728  					if signed {
  3729  						v32 = uint32(int16(v16))
  3730  					} else {
  3731  						v32 = uint32(v16)
  3732  					}
  3733  
  3734  					if i < 2 {
  3735  						retLo |= uint64(v32) << (i * 32)
  3736  					} else {
  3737  						retHi |= uint64(v32) << ((i - 2) * 32)
  3738  					}
  3739  				}
  3740  			case wazeroir.ShapeI32x4:
  3741  				v32Lo := uint32(origin)
  3742  				v32Hi := uint32(origin >> 32)
  3743  				if signed {
  3744  					retLo = uint64(int32(v32Lo))
  3745  					retHi = uint64(int32(v32Hi))
  3746  				} else {
  3747  					retLo = uint64(v32Lo)
  3748  					retHi = uint64(v32Hi)
  3749  				}
  3750  			}
  3751  			ce.pushValue(retLo)
  3752  			ce.pushValue(retHi)
  3753  			frame.pc++
  3754  		case wazeroir.OperationKindV128ExtMul:
  3755  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3756  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3757  			var x1, x2 uint64
  3758  			if op.b3 { // use lower 64 bits
  3759  				x1, x2 = x1Lo, x2Lo
  3760  			} else {
  3761  				x1, x2 = x1Hi, x2Hi
  3762  			}
  3763  
  3764  			signed := op.b2 == 1
  3765  
  3766  			var retLo, retHi uint64
  3767  			switch op.b1 {
  3768  			case wazeroir.ShapeI8x16:
  3769  				for i := 0; i < 8; i++ {
  3770  					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
  3771  
  3772  					var v16 uint16
  3773  					if signed {
  3774  						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
  3775  					} else {
  3776  						v16 = uint16(v1) * uint16(v2)
  3777  					}
  3778  
  3779  					if i < 4 {
  3780  						retLo |= uint64(v16) << (i * 16)
  3781  					} else {
  3782  						retHi |= uint64(v16) << ((i - 4) * 16)
  3783  					}
  3784  				}
  3785  			case wazeroir.ShapeI16x8:
  3786  				for i := 0; i < 4; i++ {
  3787  					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
  3788  
  3789  					var v32 uint32
  3790  					if signed {
  3791  						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
  3792  					} else {
  3793  						v32 = uint32(v1) * uint32(v2)
  3794  					}
  3795  
  3796  					if i < 2 {
  3797  						retLo |= uint64(v32) << (i * 32)
  3798  					} else {
  3799  						retHi |= uint64(v32) << ((i - 2) * 32)
  3800  					}
  3801  				}
  3802  			case wazeroir.ShapeI32x4:
  3803  				v1Lo, v2Lo := uint32(x1), uint32(x2)
  3804  				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
  3805  				if signed {
  3806  					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
  3807  					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
  3808  				} else {
  3809  					retLo = uint64(v1Lo) * uint64(v2Lo)
  3810  					retHi = uint64(v1Hi) * uint64(v2Hi)
  3811  				}
  3812  			}
  3813  
  3814  			ce.pushValue(retLo)
  3815  			ce.pushValue(retHi)
  3816  			frame.pc++
  3817  		case wazeroir.OperationKindV128Q15mulrSatS:
  3818  			x2hi, x2Lo := ce.popValue(), ce.popValue()
  3819  			x1hi, x1Lo := ce.popValue(), ce.popValue()
  3820  			var retLo, retHi uint64
  3821  			for i := 0; i < 8; i++ {
  3822  				var v, w int16
  3823  				if i < 4 {
  3824  					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
  3825  				} else {
  3826  					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
  3827  				}
  3828  
  3829  				var uv uint64
  3830  				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
  3831  				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
  3832  					uv = uint64(uint16(0x8000))
  3833  				} else if calc > math.MaxInt16 {
  3834  					uv = uint64(uint16(0x7fff))
  3835  				} else {
  3836  					uv = uint64(uint16(int16(calc)))
  3837  				}
  3838  
  3839  				if i < 4 {
  3840  					retLo |= uv << (i * 16)
  3841  				} else {
  3842  					retHi |= uv << ((i - 4) * 16)
  3843  				}
  3844  			}
  3845  
  3846  			ce.pushValue(retLo)
  3847  			ce.pushValue(retHi)
  3848  			frame.pc++
  3849  		case wazeroir.OperationKindV128ExtAddPairwise:
  3850  			hi, lo := ce.popValue(), ce.popValue()
  3851  
  3852  			signed := op.b3
  3853  
  3854  			var retLo, retHi uint64
  3855  			switch op.b1 {
  3856  			case wazeroir.ShapeI8x16:
  3857  				for i := 0; i < 8; i++ {
  3858  					var v1, v2 byte
  3859  					if i < 4 {
  3860  						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
  3861  					} else {
  3862  						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
  3863  					}
  3864  
  3865  					var v16 uint16
  3866  					if signed {
  3867  						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
  3868  					} else {
  3869  						v16 = uint16(v1) + uint16(v2)
  3870  					}
  3871  
  3872  					if i < 4 {
  3873  						retLo |= uint64(v16) << (i * 16)
  3874  					} else {
  3875  						retHi |= uint64(v16) << ((i - 4) * 16)
  3876  					}
  3877  				}
  3878  			case wazeroir.ShapeI16x8:
  3879  				for i := 0; i < 4; i++ {
  3880  					var v1, v2 uint16
  3881  					if i < 2 {
  3882  						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
  3883  					} else {
  3884  						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
  3885  					}
  3886  
  3887  					var v32 uint32
  3888  					if signed {
  3889  						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
  3890  					} else {
  3891  						v32 = uint32(v1) + uint32(v2)
  3892  					}
  3893  
  3894  					if i < 2 {
  3895  						retLo |= uint64(v32) << (i * 32)
  3896  					} else {
  3897  						retHi |= uint64(v32) << ((i - 2) * 32)
  3898  					}
  3899  				}
  3900  			}
  3901  			ce.pushValue(retLo)
  3902  			ce.pushValue(retHi)
  3903  			frame.pc++
  3904  		case wazeroir.OperationKindV128FloatPromote:
  3905  			_, toPromote := ce.popValue(), ce.popValue()
  3906  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
  3907  			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
  3908  			frame.pc++
  3909  		case wazeroir.OperationKindV128FloatDemote:
  3910  			hi, lo := ce.popValue(), ce.popValue()
  3911  			ce.pushValue(
  3912  				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
  3913  					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
  3914  			)
  3915  			ce.pushValue(0)
  3916  			frame.pc++
  3917  		case wazeroir.OperationKindV128FConvertFromI:
  3918  			hi, lo := ce.popValue(), ce.popValue()
  3919  			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
  3920  			signed := op.b3
  3921  
  3922  			var retLo, retHi uint64
  3923  			switch op.b1 { // Destination shape.
  3924  			case wazeroir.ShapeF32x4: // f32x4 from signed/unsigned i32x4
  3925  				if signed {
  3926  					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
  3927  						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
  3928  					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
  3929  						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
  3930  				} else {
  3931  					retLo = uint64(math.Float32bits(float32(v1))) |
  3932  						(uint64(math.Float32bits(float32(v2))) << 32)
  3933  					retHi = uint64(math.Float32bits(float32(v3))) |
  3934  						(uint64(math.Float32bits(float32(v4))) << 32)
  3935  				}
  3936  			case wazeroir.ShapeF64x2: // f64x2 from signed/unsigned i32x4
  3937  				if signed {
  3938  					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
  3939  				} else {
  3940  					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
  3941  				}
  3942  			}
  3943  
  3944  			ce.pushValue(retLo)
  3945  			ce.pushValue(retHi)
  3946  			frame.pc++
  3947  		case wazeroir.OperationKindV128Narrow:
  3948  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  3949  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  3950  			signed := op.b3
  3951  
  3952  			var retLo, retHi uint64
  3953  			switch op.b1 {
  3954  			case wazeroir.ShapeI16x8: // signed/unsigned i16x8 to i8x16
  3955  				for i := 0; i < 8; i++ {
  3956  					var v16 uint16
  3957  					if i < 4 {
  3958  						v16 = uint16(x1Lo >> (i * 16))
  3959  					} else {
  3960  						v16 = uint16(x1Hi >> ((i - 4) * 16))
  3961  					}
  3962  
  3963  					var v byte
  3964  					if signed {
  3965  						if s := int16(v16); s > math.MaxInt8 {
  3966  							v = math.MaxInt8
  3967  						} else if s < math.MinInt8 {
  3968  							s = math.MinInt8
  3969  							v = byte(s)
  3970  						} else {
  3971  							v = byte(v16)
  3972  						}
  3973  					} else {
  3974  						if s := int16(v16); s > math.MaxUint8 {
  3975  							v = math.MaxUint8
  3976  						} else if s < 0 {
  3977  							v = 0
  3978  						} else {
  3979  							v = byte(v16)
  3980  						}
  3981  					}
  3982  					retLo |= uint64(v) << (i * 8)
  3983  				}
  3984  				for i := 0; i < 8; i++ {
  3985  					var v16 uint16
  3986  					if i < 4 {
  3987  						v16 = uint16(x2Lo >> (i * 16))
  3988  					} else {
  3989  						v16 = uint16(x2Hi >> ((i - 4) * 16))
  3990  					}
  3991  
  3992  					var v byte
  3993  					if signed {
  3994  						if s := int16(v16); s > math.MaxInt8 {
  3995  							v = math.MaxInt8
  3996  						} else if s < math.MinInt8 {
  3997  							s = math.MinInt8
  3998  							v = byte(s)
  3999  						} else {
  4000  							v = byte(v16)
  4001  						}
  4002  					} else {
  4003  						if s := int16(v16); s > math.MaxUint8 {
  4004  							v = math.MaxUint8
  4005  						} else if s < 0 {
  4006  							v = 0
  4007  						} else {
  4008  							v = byte(v16)
  4009  						}
  4010  					}
  4011  					retHi |= uint64(v) << (i * 8)
  4012  				}
  4013  			case wazeroir.ShapeI32x4: // signed/unsigned i32x4 to i16x8
  4014  				for i := 0; i < 4; i++ {
  4015  					var v32 uint32
  4016  					if i < 2 {
  4017  						v32 = uint32(x1Lo >> (i * 32))
  4018  					} else {
  4019  						v32 = uint32(x1Hi >> ((i - 2) * 32))
  4020  					}
  4021  
  4022  					var v uint16
  4023  					if signed {
  4024  						if s := int32(v32); s > math.MaxInt16 {
  4025  							v = math.MaxInt16
  4026  						} else if s < math.MinInt16 {
  4027  							s = math.MinInt16
  4028  							v = uint16(s)
  4029  						} else {
  4030  							v = uint16(v32)
  4031  						}
  4032  					} else {
  4033  						if s := int32(v32); s > math.MaxUint16 {
  4034  							v = math.MaxUint16
  4035  						} else if s < 0 {
  4036  							v = 0
  4037  						} else {
  4038  							v = uint16(v32)
  4039  						}
  4040  					}
  4041  					retLo |= uint64(v) << (i * 16)
  4042  				}
  4043  
  4044  				for i := 0; i < 4; i++ {
  4045  					var v32 uint32
  4046  					if i < 2 {
  4047  						v32 = uint32(x2Lo >> (i * 32))
  4048  					} else {
  4049  						v32 = uint32(x2Hi >> ((i - 2) * 32))
  4050  					}
  4051  
  4052  					var v uint16
  4053  					if signed {
  4054  						if s := int32(v32); s > math.MaxInt16 {
  4055  							v = math.MaxInt16
  4056  						} else if s < math.MinInt16 {
  4057  							s = math.MinInt16
  4058  							v = uint16(s)
  4059  						} else {
  4060  							v = uint16(v32)
  4061  						}
  4062  					} else {
  4063  						if s := int32(v32); s > math.MaxUint16 {
  4064  							v = math.MaxUint16
  4065  						} else if s < 0 {
  4066  							v = 0
  4067  						} else {
  4068  							v = uint16(v32)
  4069  						}
  4070  					}
  4071  					retHi |= uint64(v) << (i * 16)
  4072  				}
  4073  			}
  4074  			ce.pushValue(retLo)
  4075  			ce.pushValue(retHi)
  4076  			frame.pc++
  4077  		case wazeroir.OperationKindV128Dot:
  4078  			x2Hi, x2Lo := ce.popValue(), ce.popValue()
  4079  			x1Hi, x1Lo := ce.popValue(), ce.popValue()
  4080  			ce.pushValue(
  4081  				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
  4082  					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
  4083  			)
  4084  			ce.pushValue(
  4085  				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
  4086  					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
  4087  			)
  4088  			frame.pc++
  4089  		case wazeroir.OperationKindV128ITruncSatFromF:
  4090  			hi, lo := ce.popValue(), ce.popValue()
  4091  			signed := op.b3
  4092  			var retLo, retHi uint64
  4093  
  4094  			switch op.b1 {
  4095  			case wazeroir.ShapeF32x4: // f32x4 to i32x4
  4096  				for i, f64 := range [4]float64{
  4097  					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
  4098  					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
  4099  					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
  4100  					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
  4101  				} {
  4102  
  4103  					var v uint32
  4104  					if math.IsNaN(f64) {
  4105  						v = 0
  4106  					} else if signed {
  4107  						if f64 < math.MinInt32 {
  4108  							f64 = math.MinInt32
  4109  						} else if f64 > math.MaxInt32 {
  4110  							f64 = math.MaxInt32
  4111  						}
  4112  						v = uint32(int32(f64))
  4113  					} else {
  4114  						if f64 < 0 {
  4115  							f64 = 0
  4116  						} else if f64 > math.MaxUint32 {
  4117  							f64 = math.MaxUint32
  4118  						}
  4119  						v = uint32(f64)
  4120  					}
  4121  
  4122  					if i < 2 {
  4123  						retLo |= uint64(v) << (i * 32)
  4124  					} else {
  4125  						retHi |= uint64(v) << ((i - 2) * 32)
  4126  					}
  4127  				}
  4128  
  4129  			case wazeroir.ShapeF64x2: // f64x2 to i32x4
  4130  				for i, f := range [2]float64{
  4131  					math.Trunc(math.Float64frombits(lo)),
  4132  					math.Trunc(math.Float64frombits(hi)),
  4133  				} {
  4134  					var v uint32
  4135  					if math.IsNaN(f) {
  4136  						v = 0
  4137  					} else if signed {
  4138  						if f < math.MinInt32 {
  4139  							f = math.MinInt32
  4140  						} else if f > math.MaxInt32 {
  4141  							f = math.MaxInt32
  4142  						}
  4143  						v = uint32(int32(f))
  4144  					} else {
  4145  						if f < 0 {
  4146  							f = 0
  4147  						} else if f > math.MaxUint32 {
  4148  							f = math.MaxUint32
  4149  						}
  4150  						v = uint32(f)
  4151  					}
  4152  
  4153  					retLo |= uint64(v) << (i * 32)
  4154  				}
  4155  			}
  4156  
  4157  			ce.pushValue(retLo)
  4158  			ce.pushValue(retHi)
  4159  			frame.pc++
  4160  		}
  4161  	}
  4162  	ce.popFrame()
  4163  }
  4164  
  4165  // callerMemory returns the caller context memory.
  4166  func (ce *callEngine) callerMemory() *wasm.MemoryInstance {
  4167  	// Search through the call frame stack from the top until we find a non host function.
  4168  	for i := len(ce.frames) - 1; i >= 0; i-- {
  4169  		frame := ce.frames[i].f.source
  4170  		if !frame.IsHostFunction {
  4171  			return frame.Module.Memory
  4172  		}
  4173  	}
  4174  	return nil
  4175  }
  4176  
  4177  func WasmCompatMax32bits(v1, v2 uint32) uint64 {
  4178  	return uint64(math.Float32bits(moremath.WasmCompatMax32(
  4179  		math.Float32frombits(v1),
  4180  		math.Float32frombits(v2),
  4181  	)))
  4182  }
  4183  
  4184  func WasmCompatMin32bits(v1, v2 uint32) uint64 {
  4185  	return uint64(math.Float32bits(moremath.WasmCompatMin32(
  4186  		math.Float32frombits(v1),
  4187  		math.Float32frombits(v2),
  4188  	)))
  4189  }
  4190  
  4191  func addFloat32bits(v1, v2 uint32) uint64 {
  4192  	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
  4193  }
  4194  
  4195  func subFloat32bits(v1, v2 uint32) uint64 {
  4196  	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
  4197  }
  4198  
  4199  func mulFloat32bits(v1, v2 uint32) uint64 {
  4200  	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
  4201  }
  4202  
  4203  func divFloat32bits(v1, v2 uint32) uint64 {
  4204  	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
  4205  }
  4206  
  4207  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4208  func flt32(z1, z2 float32) bool {
  4209  	if z1 != z1 || z2 != z2 {
  4210  		return false
  4211  	} else if z1 == z2 {
  4212  		return false
  4213  	} else if math.IsInf(float64(z1), 1) {
  4214  		return false
  4215  	} else if math.IsInf(float64(z1), -1) {
  4216  		return true
  4217  	} else if math.IsInf(float64(z2), 1) {
  4218  		return true
  4219  	} else if math.IsInf(float64(z2), -1) {
  4220  		return false
  4221  	}
  4222  	return z1 < z2
  4223  }
  4224  
  4225  // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
  4226  func flt64(z1, z2 float64) bool {
  4227  	if z1 != z1 || z2 != z2 {
  4228  		return false
  4229  	} else if z1 == z2 {
  4230  		return false
  4231  	} else if math.IsInf(z1, 1) {
  4232  		return false
  4233  	} else if math.IsInf(z1, -1) {
  4234  		return true
  4235  	} else if math.IsInf(z2, 1) {
  4236  		return true
  4237  	} else if math.IsInf(z2, -1) {
  4238  		return false
  4239  	}
  4240  	return z1 < z2
  4241  }
  4242  
  4243  func i8RoundingAverage(v1, v2 byte) byte {
  4244  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4245  	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
  4246  }
  4247  
  4248  func i16RoundingAverage(v1, v2 uint16) uint16 {
  4249  	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
  4250  	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
  4251  }
  4252  
  4253  func i8Abs(v byte) byte {
  4254  	if i := int8(v); i < 0 {
  4255  		return byte(-i)
  4256  	} else {
  4257  		return byte(i)
  4258  	}
  4259  }
  4260  
  4261  func i8MaxU(v1, v2 byte) byte {
  4262  	if v1 < v2 {
  4263  		return v2
  4264  	} else {
  4265  		return v1
  4266  	}
  4267  }
  4268  
  4269  func i8MinU(v1, v2 byte) byte {
  4270  	if v1 > v2 {
  4271  		return v2
  4272  	} else {
  4273  		return v1
  4274  	}
  4275  }
  4276  
  4277  func i8MaxS(v1, v2 byte) byte {
  4278  	if int8(v1) < int8(v2) {
  4279  		return v2
  4280  	} else {
  4281  		return v1
  4282  	}
  4283  }
  4284  
  4285  func i8MinS(v1, v2 byte) byte {
  4286  	if int8(v1) > int8(v2) {
  4287  		return v2
  4288  	} else {
  4289  		return v1
  4290  	}
  4291  }
  4292  
  4293  func i16MaxU(v1, v2 uint16) uint16 {
  4294  	if v1 < v2 {
  4295  		return v2
  4296  	} else {
  4297  		return v1
  4298  	}
  4299  }
  4300  
  4301  func i16MinU(v1, v2 uint16) uint16 {
  4302  	if v1 > v2 {
  4303  		return v2
  4304  	} else {
  4305  		return v1
  4306  	}
  4307  }
  4308  
  4309  func i16MaxS(v1, v2 uint16) uint16 {
  4310  	if int16(v1) < int16(v2) {
  4311  		return v2
  4312  	} else {
  4313  		return v1
  4314  	}
  4315  }
  4316  
  4317  func i16MinS(v1, v2 uint16) uint16 {
  4318  	if int16(v1) > int16(v2) {
  4319  		return v2
  4320  	} else {
  4321  		return v1
  4322  	}
  4323  }
  4324  
  4325  func i32MaxU(v1, v2 uint32) uint32 {
  4326  	if v1 < v2 {
  4327  		return v2
  4328  	} else {
  4329  		return v1
  4330  	}
  4331  }
  4332  
  4333  func i32MinU(v1, v2 uint32) uint32 {
  4334  	if v1 > v2 {
  4335  		return v2
  4336  	} else {
  4337  		return v1
  4338  	}
  4339  }
  4340  
  4341  func i32MaxS(v1, v2 uint32) uint32 {
  4342  	if int32(v1) < int32(v2) {
  4343  		return v2
  4344  	} else {
  4345  		return v1
  4346  	}
  4347  }
  4348  
  4349  func i32MinS(v1, v2 uint32) uint32 {
  4350  	if int32(v1) > int32(v2) {
  4351  		return v2
  4352  	} else {
  4353  		return v1
  4354  	}
  4355  }
  4356  
  4357  func i16Abs(v uint16) uint16 {
  4358  	if i := int16(v); i < 0 {
  4359  		return uint16(-i)
  4360  	} else {
  4361  		return uint16(i)
  4362  	}
  4363  }
  4364  
  4365  func i32Abs(v uint32) uint32 {
  4366  	if i := int32(v); i < 0 {
  4367  		return uint32(-i)
  4368  	} else {
  4369  		return uint32(i)
  4370  	}
  4371  }
  4372  
  4373  func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, callCtx *wasm.CallContext, f *function, fnl experimental.FunctionListener) context.Context {
  4374  	ctx = fnl.Before(ctx, f.source.Definition, ce.peekValues(len(f.source.Type.Params)))
  4375  	ce.callNativeFunc(ctx, callCtx, f)
  4376  	// TODO: This doesn't get the error due to use of panic to propagate them.
  4377  	fnl.After(ctx, f.source.Definition, nil, ce.peekValues(len(f.source.Type.Results)))
  4378  	return ctx
  4379  }
  4380  
  4381  // popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
  4382  // As the top of stack value is 64-bit, this ensures it is in range before returning it.
  4383  func (ce *callEngine) popMemoryOffset(op *interpreterOp) uint32 {
  4384  	// TODO: Document what 'us' is and why we expect to look at value 1.
  4385  	offset := op.us[1] + ce.popValue()
  4386  	if offset > math.MaxUint32 {
  4387  		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
  4388  	}
  4389  	return uint32(offset)
  4390  }
  4391  
  4392  func (ce *callEngine) callGoFuncWithStack(ctx context.Context, callCtx *wasm.CallContext, f *function) {
  4393  	paramLen := f.source.Type.ParamNumInUint64
  4394  	resultLen := f.source.Type.ResultNumInUint64
  4395  	stackLen := paramLen
  4396  
  4397  	// In the interpreter engine, ce.stack may only have capacity to store
  4398  	// parameters. Grow when there are more results than parameters.
  4399  	if growLen := resultLen - paramLen; growLen > 0 {
  4400  		for i := 0; i < growLen; i++ {
  4401  			ce.stack = append(ce.stack, 0)
  4402  		}
  4403  		stackLen += growLen
  4404  	}
  4405  
  4406  	// Pass the stack elements to the go function.
  4407  	stack := ce.stack[len(ce.stack)-stackLen:]
  4408  	ce.callGoFunc(ctx, callCtx, f, stack)
  4409  
  4410  	// Shrink the stack when there were more parameters than results.
  4411  	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
  4412  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
  4413  	}
  4414  }